# Group comparison 3T and 7T epilepsy  

Surface-based comparisons  
- vertex-wise T-test : are controls and pts different
    - Use brainstat
- vertex-wise effect size : how big are the distances between the vertex differences?
    - Use own function


For figures: 
- Visualize effect size on a brain masked for significant p-values 

In [5]:
import pandas as pd
import numpy as np
import nibabel as nib
import pickle
import datetime
import brainstat as bstat

# Functions

In [7]:
# retrieve surfaces from pt of interest
def load_surf(study, IDss):
    """
    Get the surface data for a given group.

    inputs:
    study: dictionary item with keys 'name', 'dir_root', 'study'
    IDs: pd.dataframe woth cols IDs and SES indicating all participants IDs to extract surfaces for

    outputs:
    surfs: pd.dataframe with vertices in rows and unique ID_SES in columns
    """
    
    import nibabel as nib
    
    # get the list of patients in the group
    pt_list = bstat.get_subjects(study, pt_grp)
    
    # get the surface data for each patient
    surf_data = {}
    for pt in pt_list:
        surf_data[pt] = {}
        for surf in surfaces:
            surf_data[pt][surf] = bstat.get_surface_data(study, pt, surf)
    
    return surf_data

In [8]:
def chk_pth(pth):
    """
    Check if the path exists and is a file.
    
    inputs:
        pth: path to check
    
    outputs:
        True if the path exists and is a file, False otherwise
    """
    
    import os
    
    if os.path.exists(pth) and os.path.isfile(pth):
        return True
    else:
        return False

In [180]:
def mp_mapsPth(dir, sub, ses, hemi, surf, lbl, ft):
    """
    Returns path to micapipe maps for given subject, session, hemisphere, surface, label, and feature.
    """
    if lbl == "thickness":
        return f"{dir}/sub-{sub}_ses-{ses}_hemi-{hemi}_surf-{surf}_label-{lbl}.func.gii"
    else:
        return f"{dir}/sub-{sub}_ses-{ses}_hemi-{hemi}_surf-{surf}_label-{lbl}_{ft}.func.gii"

# when working, add to Utils scripts
def get_1pth(root, deriv_fldr, sub, ses, feature, label="midthickness", surf="fsLR-5k", space="nativepro", hemi="LR", check_pth=True,silence=True):
    """
    Get the path to the surface data for a given subject and session.
    Assumes BIDS format of data storage.

    inputs:
        root: root directory of the study
        deriv_fldr: name of derivative folder containing the surface data
        sub: subject ID (no `sub-` prefix)
        ses: session ID (with leading zero if applicable; no `ses-` prefix)
        surf: surface type and resolution (e.g., fsLR-32k, fsLR-5k)
        label: surface label (e.g., "pial", "white", "midThick")
        space: space of the surface data (e.g., "nativepro", "fsnative")
        hemi: hemisphere to extract (default is "LR" for both left and right hemispheres)

        check_pth: whether to check if the path exists (default is True)
        silence: whether to suppress print statements (default is True)
    outputs:
        path to the surface data files
    """

    # make surf to lower case
    label = label.lower()

    # ensure that label is well defined
    if label == "thickness":
        label = "thickness"
    elif label == "pial":
        label = "pial"
    elif label == "white":
        label = "white"
    elif label == "midthick" or label == "midthickness":
        label = "midthickness"
    else:
        raise ValueError(f"{label} Invalid label type. Choose from 'pial', 'white', 'midThick' or 'thickness'.")
    
    # construct the path to the surface data file
    hemi = hemi.upper()
    if hemi == "LEFT" or hemi == "L":
        hemi = "L"
    elif hemi == "RIGHT" or hemi == "R":
        hemi = "R"
    elif hemi != "LR":
        raise ValueError("Invalid hemisphere. Choose from 'L', 'R', or 'LR'.")

    # handle hippunfold naming convention
    if "micapipe" in deriv_fldr.lower():
        pth = f"{root}/derivatives/{deriv_fldr}/sub-{sub}/ses-{ses}/maps"
        if hemi == "L" or hemi == "R":
            pth = mp_mapsPth(dir=pth, sub=sub, ses=ses, hemi=hemi, surf=surf, lbl=label, ft=feature)
            if not silence: print(f"[get_1pth] Returning paths for both hemispheres ([0]: L, [1]: R)")
            
        else:         
            pth_L = mp_mapsPth(dir=pth, sub=sub, ses=ses, hemi="L", surf=surf, lbl=label, ft=feature)
            pth_R = mp_mapsPth(dir=pth, sub=sub, ses=ses, hemi="R", surf=surf, lbl=label, ft=feature)        
            pth = [pth_L, pth_R]
            if not silence: print(f"[get_1pth] Returning paths for both hemispheres ([0]: L, [1]: R)")
    elif "hippunfold" in deriv_fldr.lower():
        raise ValueError("Hippunfold derivative not yet implemented. Need to create feature maps using hippunfold surfaces.")
        
        # space usually: "T1w"
        # surf usually: "fsLR"
        # label options: "hipp_outer", "hipp_inner", "hipp_midthickness"

        pth = f"{root}/derivatives/{deriv_fldr}/sub-{sub}/ses-{ses}/surf"

        if hemi == "L" or hemi == "R":
            pth = f"{pth}/sub-{sub}_ses-{ses}_hemi-{hemi}_space-{space}_den-{surf}_label-{label}.surf.gii"
            if not silence: print(f"[surf_pth] Returning hippunfold path for {hemi} hemisphere")
        else:
            pth = f"{pth}/sub-{sub}_ses-{ses}_hemi-{hemi}_surf-{surf}_label-{label}_{feature}.func.gii"
            pth_L = f"{pth}/sub-{sub}_ses-{ses}_hemi-L_-{surf}_label-{label}.surf.gii"
            pth_R = f"{pth}/sub-{sub}_ses-{ses}_hemi-R_space-{space}_den-{surf}_label-{label}.surf.gii"
            pth = [pth_L, pth_R]
            if not silence: print(f"[surf_pth] Returning hippunfold paths for both hemispheres ([0]: L, [1]: R)")

    else:
        raise ValueError("Invalid derivative folder. Choose from 'micapipe' or 'hippunfold'.")


    if check_pth:
        if isinstance(pth, list):
            for idx, p in enumerate(pth):
                if not chk_pth(p):
                    if label == "thickness": feature = "(thickness)"
                    print(f"\t[get_1pth] FILE NOT FOUND (ft: {feature}, sub-{sub}_ses-{ses}): {p}")
                    pth[idx] = "ERROR:" + p
        else:
            if not chk_pth(pth):
                print(f"\t[get_1pth] FILE NOT FOUND (ft: {feature}, sub-{sub}_ses-{ses}): {pth}")
                pth = "ERROR:" + pth
    
    return pth   

def get_Npths(demographics, study, groups, feature="FA", derivative="micapipe", label="midthickness", hemi="LR", space="nativepro", surf="fsLR-5k"):
    """
    Get path to surface files for individual groups


    Input:
    demographics: dict  regarding demographics file. 
        Required keys: 
            'pth'
            'ID_7T'
            'ID_3T'
            'SES'
            'date'
            'grp'
    study: dict  regarding study.
        Required keys: 
            'name'
            'dir_root'
            'study'
            'dir_mp'
            'dir_hu'
    groups: dict    of groups to extract surfaces for. 
        Each key should be a group name, and the value should be a list of labels in the 'grp' column of demographics file assigned to that group.
    label: str  surface label to extract
    hemi: str  hemisphere to extract. Default is "LR" for both left and right hemispheres.
    space: str  space of the surface data. Default is "nativepro".
    surf: str  surface type and resolution. Default is "fsLR-5k".
    """
    import pandas as pd

    demo = pd.read_csv(demographics['pth'], dtype=str)
    
    out = []

    if derivative == "hippunfold":
        deriv_fldr = study['dir_hu']
    elif derivative == "micapipe":
        deriv_fldr = study['dir_mp']
    else:
        deriv_fldr = study['dir_mp']
        print(f"[get_Npths] WARNING: derivative not recognized. Defaulting to micapipe.")


    for grp_name, grp_labels in groups.items():
        print(f"{study['name']} {grp_name} ({grp_labels})")

        # get IDs for this group
        ids = demo.loc[
            (demo[demographics['grp']].isin(grp_labels)) &
            (demo['study'] == study['study']),
            [ID_col, demographics['SES'], 'study', 'Date']
        ].copy()

        for i, row in ids.iterrows():
            ID = row[ID_col]
            SES = row[demographics['SES']]
            date = row[demographics['date']]
            #print(f"\tsub-{ID}_ses-{SES}")
            pth = get_1pth(root=study['dir_root'], deriv_fldr=deriv_fldr, sub=ID, ses=SES, label=label, surf=surf, feature=feature, space=space, hemi=hemi)
            # add this pth to the dataframe
            if isinstance(pth, list):
                ids.loc[i, f'pth_L'] = pth[0]
                ids.loc[i, f'pth_R'] = pth[1]
            else:
                ids.loc[i, f'pth_{hemi}'] = pth 
        # if paths are duplicated, then keep only one of those rows
        if hemi == "LR":
            ids = ids.drop_duplicates(subset=[f'pth_L', f'pth_R'])
        else:
            ids = ids.drop_duplicates(subset=[f'pth_{hemi}'])

        # create dictionary item for each group, add to output list
        out.append({
            'study': study['name'],
            'grp': grp_name,
            'grp_labels': grp_labels,
            'label': label,
            'feature': feature,
            'map_pths': ids
        })

    return out


In [263]:
def clean_pths(dl, method="newest", silent=True):
    """
    Keeps only one session per ID
    input:
        dl (for dictionary list): List of dictionary items (e.g. outputs from get_Npths). 
            These dict should contain a df under the key 'map_pths'
        method: method to use for choosing session.
            "newest": use most recent session
            "oldest": use oldest session in the list
            {number}: session code to use (e.g. '01' or 'a1' etc)

    output:
        dl: List of dictionary items with cleaned dataframes

    """
    dl_out = []
    
    for i, d in enumerate(dl):
        if not silent: print(f"[clean_pths] {d['study']} {d['grp']}: {df.shape}, num unique IDs: {df[ID_col].nunique()}")

        df = d['map_pths']
                
        if d['study'] == "PNI": ID_col = "PNI_ID"
        else: ID_col = "MICS_ID"
        #print(ID_col)

        if df.empty: # check if the dataframe is empty
            print(f"\t[clean_pths] WARNING: Empty dataframe for {d['study']} {d['grp']}")
            continue
        else:
            df_clean = ses_clean(df, ID_col, method=method, silent=True)
            #dl[i]['map_pths'] = df_clean

        if df_clean.empty:  # check if the cleaned dataframe is empty
            print(f"\t[clean_pths] WARNING: Cleaned dataframe is empty for {d['study']} {d['grp']}")
            continue

        dl_out.append({
            'study': d['study'],
            'grp': d['grp'],
            'grp_labels': d['grp_labels'],
            'label': d['label'],
            'feature': d['feature'],
            'map_pths': df_clean
        })

    return dl_out


def ses_clean(df, ID_col, method="newest", silent=True):
    """
    Choose the session to use for each subject.
        If subject has multiple sessions with map path should only be using one of these sessions.

    inputs:
        df: pd.dataframe with columns for subject ID, session, date and map_paths
            Assumes map path is missing if either : map_pth
        method: method to use for choosing session. 
            "newest": use most recent session
            "oldest": use oldest session in the list
            {number}: session code to use (e.g. '01' or 'a1' etc)
    """
    
    import pandas as pd
    import datetime

    # check if the dataframe is empty
    if df.empty:
        print(f"[ses_clean] WARNING: Empty dataframe. Skipping.")
        return

    if not silent: print(f"[ses_clean] Choosing session according to method: {method}")
    
    # Do NOT remove rows with error paths
    df_clean = df.copy()

    # Find repeated IDs (i.e., subjects with multiple sessions)
    repeated_ids = df_clean[df_clean.duplicated(subset=ID_col, keep=False)][ID_col].unique()
    
    if not silent:
        if len(repeated_ids) > 0:
            print(f"\tIDs with multiple sessions: {repeated_ids}")
        else:
            print(f"\tNo repeated IDs found")

    rows_to_remove = []
    
    # Convert 'Date' column to datetime for comparison
    df_clean['Date_dt'] = pd.to_datetime(df_clean['Date'], format='%d.%m.%Y', errors='coerce')
    today = pd.to_datetime('today').normalize()

    if len(repeated_ids) > 0:
        if method == "newest":
            for id in repeated_ids:
                sub_df = df_clean[df_clean[ID_col] == id]
                if sub_df.shape[0] > 1:
                    idx_to_keep = sub_df['Date_dt'].idxmax()
                    idx_to_remove = sub_df.index.difference([idx_to_keep])
                    rows_to_remove.extend(idx_to_remove)
        elif method == "oldest":
            for id in repeated_ids:
                sub_df = df_clean[df_clean[ID_col] == id]
                if sub_df.shape[0] > 1:
                    idx_to_keep = sub_df['Date_dt'].idxmin()
                    idx_to_remove = sub_df.index.difference([idx_to_keep])
                    rows_to_remove.extend(idx_to_remove)
        else:
            # Assume method is a session code (e.g., '01', 'a1', etc)
            for id in repeated_ids:
                sub_df = df_clean[df_clean[ID_col] == id]
                if sub_df.shape[0] > 1:
                    idx_to_remove = sub_df[sub_df['SES'] != method].index
                    rows_to_remove.extend(idx_to_remove)

    # Remove the rows marked for removal
    df_clean = df_clean.drop(rows_to_remove)
    #if not silent: print(df_clean[[ID_col, 'SES']].sort_values(by=ID_col))

    # if num rows =/= to num unique IDs then write warning
    if df_clean.shape[0] != df_clean[ID_col].nunique():
        print(f"[ses_clean] WARNING: Number of rows ({df_clean.shape[0]}) not equal to num unique IDs ({df_clean[ID_col].nunique()})")
        print(f"\tMultiple sessions for IDs: {df_clean[df_clean.duplicated(subset=ID_col, keep=False)][ID_col].unique()}")

    if not silent: 
        print(f"\t{df.shape[0] - df_clean.shape[0]} rows removed, Change in unique IDs: {df_clean[ID_col].nunique() - df[ID_col].nunique()}")
        print(f"\t{df_clean.shape[0]} rows remaining")

    return df_clean

def get_finalSES(dl, demo, save_pth=None, long=False, silent=True): 
    """
    From a list of dictionary items, create a DF with sessions retained for each participant and each feature 

    input:
        dl: List of dictionary items with cleaned dataframes
        demo: dictionary with demographics file information.
        save_pth: path to save the dataframe to. If None, do not save.
        long: if True, return a long format dataframe with one row per subject and session feature. If False, return wide format.

    output:
        df: pd.dataframe with columns for subject ID, session_feature, grp, study and map_paths
            Assumes map path is missing if either : map_pth
    """
    import datetime
    import pandas as pd
    import numpy as np

    demo_df = pd.read_csv(demo['pth'], dtype=str)
    out = pd.DataFrame()  # Will collect all unique IDs and their session columns

    for i, d in enumerate(dl):
        feature = d['feature']
        label = d['label']

        df = d['map_pths']

        id_col = [col for col in df.columns if 'ID' in col.upper()][0]  
        ses_col = 'SES'
        
        if not silent: print(f"[get_finalSES] {d['study']} {d['grp']}: {feature}, {label} ({df.shape[0]} rows)")

        # Use correct study prefix and correct ID column for merge
        if d['study'] == "PNI": 
            study_prefix = "7T"
            merge_id_col = demo['ID_7T']
        elif d['study'] == "MICs":
            study_prefix = "3T"
            merge_id_col = demo['ID_3T']
        else: 
            study_prefix = "Unknown"
            merge_id_col = None

        if label == "thickness": lbl_ft = f"{label}"
        else: lbl_ft = f"{label}-{feature}"

        # Change column naming to be {studyPrefix}-ses_{suffix}
        if not long:
            new_col = f'{study_prefix}-ses_{lbl_ft}'
        else:
            new_col = f'{study_prefix}-ses_{lbl_ft}'

        # Mark SES as NA if all path columns are ERROR or missing
        path_cols = [col for col in df.columns if col.startswith('pth_') or col.startswith('surf_') or col.startswith('map_')]
        def ses_na_row(row):
            if not path_cols:
                return row[ses_col]
            # If all path columns are missing or start with ERROR
            if all((not isinstance(row[c], str)) or row[c].startswith("ERROR") or row[c] == "" for c in path_cols):
                return "NA"
            return row[ses_col]
        df_tmp = df[[id_col, ses_col] + path_cols].copy()
        df_tmp[new_col] = df_tmp.apply(ses_na_row, axis=1)
        df_tmp = df_tmp.rename(columns={id_col: "ID"})
        df_tmp = df_tmp[["ID", new_col]]

        # If column already exists, add to it
        if new_col in out.columns:
            # Merge on ID, but keep both values for comparison
            merged = pd.merge(out[['ID', new_col]], df_tmp, on="ID", how="outer", suffixes=('_old', '_new'))
            
            def resolve(row): # For each ID, resolve conflicts
                vals = set([row[f"{new_col}_old"], row[f"{new_col}_new"]])
                vals = {v for v in vals if pd.notnull(v)}
                if len(vals) == 1:
                    return vals.pop()
                elif len(vals) > 1:
                    # Print warning and keep the latest value (assuming SES is string, keep max)
                    if not silent:
                        print(f"[get_finalSES] WARNING: Multiple values for {row['ID']} in {new_col}: {vals}. Keeping latest.")
                    # If SES is numeric string, sort as int, else as string
                    try:
                        return sorted(vals, key=lambda x: int(x) if isinstance(x, str) and x.isdigit() else str(x))[-1]
                    except Exception:
                        return sorted(vals)[-1]
                else:
                    return None
                
            merged[new_col] = merged.apply(resolve, axis=1)
            
            # Update out with resolved column
            out = pd.merge(out, merged[['ID', new_col]], on="ID", how="outer", suffixes=('', '_resolved'))
            out[new_col] = out[new_col + '_resolved'].combine_first(out[new_col])
            out = out.drop(columns=[new_col + '_resolved'])
        else:
            if out.empty:
                out = df_tmp
            else:
                out = pd.merge(out, df_tmp, on="ID", how='outer')

    if not long:
        # under construction
        print("[get_finalSES] Wide format not yet implemented. Returning long format instead.")
        long = True

    if save_pth is not None:
        date = datetime.datetime.now().strftime("%d%b%Y-%H%M")
        if long: save = f"{save_pth}/sesXfeat_long_{date}.csv"
        else: save = f"{save_pth}/sesXfeat_final_{date}.csv"
        out.to_csv(save, index=False)
        print(f"[get_finalSES] Saved dataframe to {save}")

    return out

# MAIN

In [None]:
# specify root directories
MICs = {
    "name": "MICs",
    "dir_root": "/data/mica3/BIDS_MICs",
    "dir_mp": "micapipe_v0.2.0",
    "dir_hu": "hippunfold_v1.3.0/hippunfold",
    "study": "3T",
    "ID_ctrl" : ["HC"],
    "ID_Pt" : ["PX"]
    }

PNI = {
    "name": "PNI",
    "dir_root": "/data/mica3/BIDS_PNI",
    "dir_mp": "micapipe_v0.2.0",
    "dir_hu": "hippunfold_v1.3.0/hippunfold",
    "study": "7T",
    "ID_col" : ["PNC", "Pilot"], # column for ID in demographics file
    }

studies = [MICs, PNI]

demographics = {
    "pth" : "/host/verges/tank/data/daniel/3T7T/z/data/pt/demo_22May2025.csv",
    # column names:
    "ID_7T" : "PNI_ID", 
    "ID_3T" : "MICS_ID",
    "SES" : "SES",
    "date": "Date",
    "grp" : "grp_detailed" # col name for participant grouping variable of interest
}

px_grps = { # specify patient group labels to compare to controls
    'allPX' : ['TLE_U', 'MFCL', 'FLE_R', 'MFCL_bTLE', 'UKN_L', 'mTLE_R', 'mTLE_L', 'FLE_L', 'UKN_U', 'TLE_L', 'TLE_R'],
    'TLE' : ['TLE_L', 'TLE_R', 'TLE_U', 'mTLE_R', 'mTLE_L'],
    'TLE_L': ['TLE_L', 'mTLE_L'],
    'TLE_R': ['TLE_R', 'mTLE_R'],
    'FCD' : ['FLE_R', 'FLE_L'],
    'MFCL' : ['MFCL', 'MFCL_bTLE'],
    'UKN' : ['UKN_L', 'UKN_U']
}

ctrl_grp = {'ctrl' : ['CTRL']}

groups = {**px_grps, **ctrl_grp}

features = ["FA", "MD", "T1map"]
#labels = ["white", "midthick", "thickness", "swm1.0mm", "swm2.0mm", "swm3.0mm"]
labels = ["white", "midthick", "thickness"]

surfaces = ["fsLR-5k"]

demo = pd.read_csv(demographics['pth'], dtype=str)
demo[["MICS_ID", "PNI_ID", "study", "SES", "grp", "grp_detailed"]]

Unnamed: 0,MICS_ID,PNI_ID,study,SES,grp,grp_detailed
0,HC129,Pilot013,7T,05,CTRL,CTRL
1,HC082,PNC003,7T,01,CTRL,CTRL
2,HC082,PNC003,7T,02,CTRL,CTRL
3,HC082,PNC003,7T,03,CTRL,CTRL
4,HC082,PNC003,7T,04,CTRL,CTRL
...,...,...,...,...,...,...
116,HC130,PNC026,3T,02,CTRL,CTRL
117,HC083,PNC011,3T,02,CTRL,CTRL
118,PX215,PNE020,3T,01,UKN,UKN_U
119,PX216,PNE021,3T,01,TLE,TLE_R


In [None]:
studies = [MICs, PNI]
groups = {**px_grps, **ctrl_grp}


map_pths = []
map_pths_clean = []
final_ses = []
save_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/pt"

if 'surf_dfs' not in locals():
    surf_dfs = {}

for study in studies:
    print(f"[main] {study['name']}")

    if study['study'] == "3T": ID_col = demographics['ID_3T']
    elif study['study'] == "7T": ID_col = demographics['ID_7T']
    for surf in surfaces:
        for label in labels:
            print(f"\tsurf:{surf}, lbl:{label}")
            if label == "thickness":
                map_pths.extend(get_Npths(demographics, study, groups, feature="", derivative="micapipe", label=label, surf=surf))
            else:
                for feature in features:
                    print(f"\t\tft: {feature}, surf: {surf}")
                    map_pths.extend(get_Npths(demographics, study, groups, feature=feature, derivative="micapipe", label=label, surf=surf))
                    
map_pths_clean = clean_pths(map_pths, method="newest")
final_SES = get_finalSES(map_pths_clean, demo=demographics, save_pth=save_pth, long=False,silent=True)


[main] MICs
	surf:fsLR-5k, lbl:white
		ft: FA, surf: fsLR-5k
MICs allPX (['TLE_U', 'MFCL', 'FLE_R', 'MFCL_bTLE', 'UKN_L', 'mTLE_R', 'mTLE_L', 'FLE_L', 'UKN_U', 'TLE_L', 'TLE_R'])
	[get_1pth] FILE NOT FOUND (ft: FA, sub-PX071_ses-03): /data/mica3/BIDS_MICs/derivatives/micapipe_v0.2.0/sub-PX071/ses-03/maps/sub-PX071_ses-03_hemi-L_surf-fsLR-5k_label-white_FA.func.gii
	[get_1pth] FILE NOT FOUND (ft: FA, sub-PX071_ses-03): /data/mica3/BIDS_MICs/derivatives/micapipe_v0.2.0/sub-PX071/ses-03/maps/sub-PX071_ses-03_hemi-R_surf-fsLR-5k_label-white_FA.func.gii
	[get_1pth] FILE NOT FOUND (ft: FA, sub-PX216_ses-01): /data/mica3/BIDS_MICs/derivatives/micapipe_v0.2.0/sub-PX216/ses-01/maps/sub-PX216_ses-01_hemi-L_surf-fsLR-5k_label-white_FA.func.gii
	[get_1pth] FILE NOT FOUND (ft: FA, sub-PX216_ses-01): /data/mica3/BIDS_MICs/derivatives/micapipe_v0.2.0/sub-PX216/ses-01/maps/sub-PX216_ses-01_hemi-R_surf-fsLR-5k_label-white_FA.func.gii
MICs TLE (['TLE_L', 'TLE_R', 'TLE_U', 'mTLE_R', 'mTLE_L'])
	[get_1pth

In [None]:
# save dictionary item
save_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/pt"
date = datetime.datetime.now().strftime("%d%b%Y")
#with open(f'{save_pth}/map_paths_{date}.pkl', 'wb') as f:    pickle.dump(map_pths, f)

KeyError: 'ID'

In [253]:
# load list of dict items with surface paths

#with open('map_pths.pkl', 'rb') as f:
#    map_pths = pickle.load(f)

Unnamed: 0,ID,3T-ses_white-FA,3T-ses_white-MD,3T-ses_white-T1map,3T-ses_midthick-FA,3T-ses_midthick-MD,3T-ses_midthick-T1map,3T-ses_thickness,7T-ses_white-FA,7T-ses_white-MD,7T-ses_white-T1map,7T-ses_midthick-FA,7T-ses_midthick-MD,7T-ses_midthick-T1map,7T-ses_thickness
0,PX119,01,,01,01,,01,01,,,,,,,
1,PX071,04,,04,04,,04,04,,,,,,,
2,PX137,01,,01,01,,01,01,,,,,,,
3,PX148,01,,01,01,,01,01,,,,,,,
4,PX153,01,,01,01,,01,01,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58,PNC025,,,,,,,,,,a1,,,a1,a1
59,PNC026,,,,,,,,,,a3,,,a3,a3
60,PNC031,,,,,,,,,,a1,,,a1,a1
61,PNC037,,,,,,,,,,,,,,


In [None]:
# print maps_pths
for i, d in enumerate(map_pths):
    print(f"{i}: {d['study']} {d['grp']} ({d['grp_labels']})")
    print(d['map_pths'])
    print()

0: MICs all (['TLE_U', 'MFCL', 'FLE_R', 'MFCL_bTLE', 'UKN_L', 'mTLE_R', 'mTLE_L', 'FLE_L', 'UKN_U', 'TLE_L', 'TLE_R'])
    MICS_ID SES study        Date  \
80    PX071  01    3T  01.06.2022   
86    PX071  02    3T  17.01.2023   
87    PX071  03    3T  11.04.2023   
90    PX119  01    3T  30.08.2023   
91    PX071  04    3T  26.09.2023   
95    PX137  01    3T  06.03.2024   
96    PX148  01    3T  06.05.2024   
98    PX153  01    3T  28.05.2024   
99    PX158  01    3T  11.06.2024   
103   PX168  01    3T  20.08.2024   
104   PX173  01    3T  28.08.2024   
105   PX174  01    3T  04.09.2024   
106   PX176  01    3T  05.09.2024   
107   PX183  01    3T  16.10.2024   
109   PX189  01    3T  12.11.2024   
110   PX190  01    3T  12.11.2024   
111   PX194  01    3T  26.11.2024   
112   PX198  01    3T  03.12.2024   
113   PX199  01    3T  03.12.2024   
114   PX200  01    3T  03.12.2024   
115   PX204  01    3T  09.01.2025   
118   PX215  01    3T  20.03.2025   
119   PX216  01    3T  26.03.2

In [None]:
# print specific study-group combinations
get_study = "PNI"
get_grp = "ctrl"

for item in map_pths:
    if item['study'] == get_study and item['grp'] == get_grp:
        print(f"{item['study']}-{item['grp']} ({item['grp_labels']})")
        with pd.option_context('display.max_columns', None):
            print(item['map_pths'])
        break

PNI-ctrl (['CTRL'])
      PNI_ID SES study        Date  \
0   Pilot013  05    7T  18.04.2024   
1     PNC003  01    7T  06.05.2022   
2     PNC003  02    7T  13.06.2022   
3     PNC003  03    7T  13.03.2023   
4     PNC003  04    7T  24.10.2023   
5     PNC006  01    7T  11.05.2022   
6     PNC006  02    7T  05.11.2022   
7     PNC006  03    7T  20.03.2023   
8     PNC006  04    7T  17.10.2023   
9     PNC011  01    7T  05.10.2022   
10    PNC011  02    7T  11.11.2022   
11    PNC011  03    7T  01.03.2023   
12    PNC011  04    7T  03.08.2023   
13    PNC018  01    7T  19.04.2023   
14    PNC018  02    7T  05.05.2023   
15    PNC018  03    7T  12.05.2023   
16    PNC018  04    7T  28.09.2023   
17    PNC018  a1    7T  10.02.2025   
18    PNC019  01    7T  20.04.2023   
19    PNC019  02    7T  03.05.2023   
20    PNC019  03    7T  17.05.2023   
21    PNC019  04    7T  07.11.2023   
22    PNC019  a1    7T  11.12.2024   
31    PNC024  01    7T  23.04.2024   
32    PNC024  02    7T  07.05.

In [32]:
# Find path for specific participant
study = "PNI"
sub = "PNC018"
ses = "01"

grp = "ctrl"

entry = next((item for item in map_pths if item['study'] == study and item['grp'] == grp), None)

if entry is not None:
    df = entry['map_pths']
    row = df[(df[ID_col] == sub) & (df['SES'] == ses)]
    if not row.empty:
        print(row['pth_L'].values[0])
    else:
        print(f"No entry found for subject {sub} and session {ses}")
else:
    print(f"No entry found for study {study} and group {grp}")


/data/mica3/BIDS_PNI/derivatives/micapipe_v0.2.0/sub-PNC018/ses-01/maps/sub-PNC018_ses-01_hemi-L_surf-fsLR-5k_label-midthickness_FA.func.gii


In [None]:
"""

test = [map_pths[1], map_pths[7]]
for i, d in enumerate(test):
    print(f"{i}: {d['study']} {d['grp']}")
    print()

# clean tests
test_pths_clean = clean_pths(test, method="newest")
for i, d in enumerate(map_pths_clean):
    print(f"\nItem {i}:")
    print(f"  Keys: {list(d.keys())}")
    
"""

0: MICs TLE

1: MICs ctrl



[get_finalSES] MICs all: FA, midthickness (20 rows)
[get_finalSES] MICs TLE: FA, midthickness (10 rows)
[get_finalSES] MICs TLE_L: FA, midthickness (5 rows)
[get_finalSES] MICs TLE_R: FA, midthickness (3 rows)
[get_finalSES] MICs FCD: FA, midthickness (3 rows)
[get_finalSES] MICs MFCL: FA, midthickness (4 rows)
[get_finalSES] MICs UKN: FA, midthickness (3 rows)
[get_finalSES] MICs ctrl: FA, midthickness (11 rows)
[get_finalSES] PNI all: FA, midthickness (20 rows)
[get_finalSES] PNI TLE: FA, midthickness (10 rows)
[get_finalSES] PNI TLE_L: FA, midthickness (5 rows)
[get_finalSES] PNI TLE_R: FA, midthickness (3 rows)
[get_finalSES] PNI FCD: FA, midthickness (3 rows)
[get_finalSES] PNI MFCL: FA, midthickness (4 rows)
[get_finalSES] PNI UKN: FA, midthickness (3 rows)
[get_finalSES] PNI ctrl: FA, midthickness (12 rows)


In [None]:
df_print = False
for i, d in enumerate(map_pths_clean):
    print(f"\nItem {i}:")
    print(f"  Keys: {list(d.keys())}")

    for k, v in d.items():
        if isinstance(v, pd.DataFrame):
            print(f"  {k}: <DataFrame shape={v.shape}>")
            if df_print == True: print(f"  {k}: {v}")
        else:
            print(f" {k}: {v}")


Item 0:
  Keys: ['study', 'grp', 'grp_labels', 'label', 'feature', 'map_pths']
  study: MICs
  grp: all
  grp_labels: ['TLE_U', 'MFCL', 'FLE_R', 'MFCL_bTLE', 'UKN_L', 'mTLE_R', 'mTLE_L', 'FLE_L', 'UKN_U', 'TLE_L', 'TLE_R']
  label: midthickness
  feature: FA
  map_pths: <DataFrame shape=(20, 7)>

Item 1:
  Keys: ['study', 'grp', 'grp_labels', 'label', 'feature', 'map_pths']
  study: MICs
  grp: TLE
  grp_labels: ['TLE_L', 'TLE_R', 'TLE_U', 'mTLE_R', 'mTLE_L']
  label: midthickness
  feature: FA
  map_pths: <DataFrame shape=(10, 7)>

Item 2:
  Keys: ['study', 'grp', 'grp_labels', 'label', 'feature', 'map_pths']
  study: MICs
  grp: TLE_L
  grp_labels: ['TLE_L', 'mTLE_L']
  label: midthickness
  feature: FA
  map_pths: <DataFrame shape=(5, 7)>

Item 3:
  Keys: ['study', 'grp', 'grp_labels', 'label', 'feature', 'map_pths']
  study: MICs
  grp: TLE_R
  grp_labels: ['TLE_R', 'mTLE_R']
  label: midthickness
  feature: FA
  map_pths: <DataFrame shape=(3, 7)>

Item 4:
  Keys: ['study', 'grp',

[get_finalSES] Processing MICs all: FA, midthickness (df: 20 rows)
[get_finalSES] Processing MICs TLE: FA, midthickness (df: 10 rows)
[get_finalSES] Processing MICs TLE_L: FA, midthickness (df: 5 rows)
[get_finalSES] Processing MICs TLE_R: FA, midthickness (df: 3 rows)
[get_finalSES] Processing MICs FCD: FA, midthickness (df: 3 rows)
[get_finalSES] Processing MICs MFCL: FA, midthickness (df: 4 rows)
[get_finalSES] Processing MICs UKN: FA, midthickness (df: 3 rows)
[get_finalSES] Processing MICs ctrl: FA, midthickness (df: 11 rows)
[get_finalSES] Processing PNI all: FA, midthickness (df: 20 rows)
[get_finalSES] Processing PNI TLE: FA, midthickness (df: 10 rows)
[get_finalSES] Processing PNI TLE_L: FA, midthickness (df: 5 rows)
[get_finalSES] Processing PNI TLE_R: FA, midthickness (df: 3 rows)
[get_finalSES] Processing PNI FCD: FA, midthickness (df: 3 rows)
[get_finalSES] Processing PNI MFCL: FA, midthickness (df: 4 rows)
[get_finalSES] Processing PNI UKN: FA, midthickness (df: 3 rows)
[

In [None]:
# put surface files into dataframes

In [None]:
# flip TLEs --> put all lesions on same side

In [None]:
# get difference maps at 3T (3T ctrl - 3T cases)

In [None]:
# get difference maps at 7T (7T ctrl - 7T cases)

In [None]:
# get difference maps of difference maps (3T dif maps - 7T dif maps)