# Brainstat analysis comparing 3T and 7T epilepsy  

Surface-based comparisons  
- vertex-wise T-test : are controls and pts different
    - Use brainstat
- vertex-wise effect size : how big are the distances between the vertex differences?
    - Use own function


For figures: 
- Visualize effect size on a brain masked for significant p-values 

In [54]:
import pandas as pd
import numpy as np
import nibabel as nib
import pickle
import datetime
import brainstat as bstat

In [86]:
# specify root directories
MICs = {
    "name": "MICs",
    "dir_root": "/data/mica3/BIDS_MICs",
    "dir_mp": "micapipe_v0.2.0",
    "dir_hu": "hippunfold_v1.3.0/hippunfold",
    "study": "3T",
    "ID_ctrl" : ["HC"],
    "ID_Pt" : ["PX"]
    }

PNI = {
    "name": "PNI",
    "dir_root": "/data/mica3/BIDS_PNI",
    "dir_mp": "micapipe_v0.2.0",
    "dir_hu": "hippunfold_v1.3.0/hippunfold",
    "study": "7T",
    "ID_col" : ["PNC", "Pilot"], # column for ID in demographics file
    }

studies = [MICs, PNI]

demographics = {
    "pth" : "/host/verges/tank/data/daniel/3T7T/z/data/pt/demo_22May2025.csv",
    # column names:
    "ID_7T" : "PNI_ID", 
    "ID_3T" : "MICS_ID",
    "SES" : "SES",
    "date": "Date",
    "grp" : "grp_detailed" # col name for participant grouping variable of interest
}

px_grps = { # specify patient group labels to compare to controls
    'all' : ['TLE_U', 'MFCL', 'FLE_R', 'MFCL_bTLE', 'UKN_L', 'mTLE_R', 'mTLE_L', 'FLE_L', 'UKN_U', 'TLE_L', 'TLE_R'],
    'TLE' : ['TLE_L', 'TLE_R', 'TLE_U', 'mTLE_R', 'mTLE_L'],
    'TLE_L': ['TLE_L', 'mTLE_L'],
    'TLE_R': ['TLE_R', 'mTLE_R'],
    'FCD' : ['FLE_R', 'FLE_L'],
    'MFCL' : ['MFCL', 'MFCL_bTLE'],
    'UKN' : ['UKN_L', 'UKN_U']
}

ctrl_grp = {'ctrl' : ['CTRL']}

groups = {**px_grps, **ctrl_grp} # Combine all patient and control groups into a single dictionary

surfaces = ["fsLR-5k"]
labels = ["pial", "white", "midThick"]

In [5]:
# retrieve surfaces from pt of interest
def load_surf(study, IDss):
    """
    Get the surface data for a given group.

    inputs:
    study: dictionary item with keys 'name', 'dir_root', 'study'
    IDs: pd.dataframe woth cols IDs and SES indicating all participants IDs to extract surfaces for

    outputs:
    surfs: pd.dataframe with vertices in rows and unique ID_SES in columns
    """
    
    import nibabel as nib
    
    # get the list of patients in the group
    pt_list = bstat.get_subjects(study, pt_grp)
    
    # get the surface data for each patient
    surf_data = {}
    for pt in pt_list:
        surf_data[pt] = {}
        for surf in surfaces:
            surf_data[pt][surf] = bstat.get_surface_data(study, pt, surf)
    
    return surf_data

In [66]:
def chk_pth(pth):
    """
    Check if the path exists and is a file.
    
    inputs:
        pth: path to check
    
    outputs:
        True if the path exists and is a file, False otherwise
    """
    
    import os
    
    if os.path.exists(pth) and os.path.isfile(pth):
        return True
    else:
        return False

In [126]:
def mp_mapsPth(dir,sub, ses, hemi, surf, lbl, ft):
    """
    Returns path to micapipe maps for given subject, session, hemisphere, surface, label, and feature.
    """
    return f"{dir}/sub-{sub}_ses-{ses}_hemi-{hemi}_surf-{surf}_label-{lbl}_{ft}.func.gii"

# when working, add to Utils scripts
def get_1pth(root, deriv_fldr, sub, ses, label="midthickness_T1map", surf="fsLR-5k", feature="flair", space="nativepro", hemi="LR", check_pth=True,silence=True):
    """
    Get the path to the surface data for a given subject and session.
    Assumes BIDS format of data storage.

    inputs:
        root: root directory of the study
        deriv_fldr: name of derivative folder containing the surface data
        sub: subject ID (no `sub-` prefix)
        ses: session ID (with leading zero if applicable; no `ses-` prefix)
        surf: surface type and resolution (e.g., fsLR-32k, fsLR-5k)
        label: surface label (e.g., "pial", "white", "midThick")
        space: space of the surface data (e.g., "nativepro", "fsnative")
        hemi: hemisphere to extract (default is "LR" for both left and right hemispheres)

        check_pth: whether to check if the path exists (default is True)
        silence: whether to suppress print statements (default is True)
    outputs:
        path to the surface data files
    """

    # make surf to lower case
    label = label.lower()

    # ensure that surface is well defined
    if label == "pial":
        label = "pial"
    elif label == "white":
        label = "white"
    elif label == "midThick" or label == "midthickness":
        label = "midthickness"
    else:
        raise ValueError("Invalid surface type. Choose from 'pial', 'white', or 'midThick'.")
    
    # construct the path to the surface data file
    
    hemi = hemi.upper()
    if hemi == "LEFT" or hemi == "L":
        hemi = "L"
    elif hemi == "RIGHT" or hemi == "R":
        hemi = "R"
    elif hemi != "LR":
        raise ValueError("Invalid hemisphere. Choose from 'L', 'R', or 'LR'.")

    # handle hippunfold naming convention
    if "micapipe" in deriv_fldr.lower():
        pth = f"{root}/derivatives/{deriv_fldr}/sub-{sub}/ses-{ses}/maps"
        if hemi == "L" or hemi == "R":
            pth = mp_mapsPth(dir=pth, sub=sub, ses=ses, hemi=hemi, surf=surf, lbl=label, ft=feature)
            if not silence: print(f"[get_1pth] Returning paths for both hemispheres ([0]: L, [1]: R)")
            
        else:         
            pth_L = mp_mapsPth(dir=pth, sub=sub, ses=ses, hemi="L", surf=surf, lbl=label, ft=feature)
            pth_R = mp_mapsPth(dir=pth, sub=sub, ses=ses, hemi="R", surf=surf, lbl=label, ft=feature)        
            pth = [pth_L, pth_R]
            if not silence: print(f"[get_1pth] Returning paths for both hemispheres ([0]: L, [1]: R)")
    elif "hippunfold" in deriv_fldr.lower():
        raise ValueError("Hippunfold derivative not yet implemented. Need to create feature maps using hippunfold surfaces.")
        
        # space usually: "T1w"
        # surf usually: "fsLR"
        # label options: "hipp_outer", "hipp_inner", "hipp_midthickness"

        pth = f"{root}/derivatives/{deriv_fldr}/sub-{sub}/ses-{ses}/surf"

        if hemi == "L" or hemi == "R":
            pth = f"{pth}/sub-{sub}_ses-{ses}_hemi-{hemi}_space-{space}_den-{surf}_label-{label}.surf.gii"
            if not silence: print(f"[surf_pth] Returning hippunfold path for {hemi} hemisphere")
        else:
            pth = f"{pth}/sub-{sub}_ses-{ses}_hemi-{hemi}_surf-{surf}_label-{label}_{feature}.func.gii"
            pth_L = f"{pth}/sub-{sub}_ses-{ses}_hemi-L_-{surf}_label-{label}.surf.gii"
            pth_R = f"{pth}/sub-{sub}_ses-{ses}_hemi-R_space-{space}_den-{surf}_label-{label}.surf.gii"
            pth = [pth_L, pth_R]
            if not silence: print(f"[surf_pth] Returning hippunfold paths for both hemispheres ([0]: L, [1]: R)")

    else:
        raise ValueError("Invalid derivative folder. Choose from 'micapipe' or 'hippunfold'.")


    if check_pth:
        if isinstance(pth, list):
            for idx, p in enumerate(pth):
                if not chk_pth(p):
                    print(f"\t[get_1pth] FILE NOT FOUND (sub-{sub}_ses-{ses}): {p}")
                    pth[idx] = "ERROR:" + p
        else:
            if not chk_pth(pth):
                print(f"\t[get_1pth] FILE NOT FOUND (sub-{sub}_ses-{ses}): {pth}")
                pth = "ERROR:" + pth
    
    return pth   

def get_Npths(demographics, study, groups, feature="FA", derivative="micapipe", label="midthickness", hemi="LR", space="nativepro", surf="fsLR-5k"):
    """
    Get path to surface files for individual groups


    Input:
    demographics: dict  regarding demographics file. 
        Required keys: 
            'pth'
            'ID_7T'
            'ID_3T'
            'SES'
            'date'
            'grp'
    study: dict  regarding study.
        Required keys: 
            'name'
            'dir_root'
            'study'
            'dir_mp'
            'dir_hu'
    groups: dict    of groups to extract surfaces for. 
        Each key should be a group name, and the value should be a list of labels in the 'grp' column of demographics file assigned to that group.
    label: str  surface label to extract
    hemi: str  hemisphere to extract. Default is "LR" for both left and right hemispheres.
    space: str  space of the surface data. Default is "nativepro".
    surf: str  surface type and resolution. Default is "fsLR-5k".
    """
    import pandas as pd

    demo = pd.read_csv(demographics['pth'], dtype=str)
    
    out = []

    if derivative == "hippunfold":
        deriv_fldr = study['dir_hu']
    elif derivative == "micapipe":
        deriv_fldr = study['dir_mp']
    else:
        deriv_fldr = study['dir_mp']
        print(f"[get_Npths] WARNING: derivative not recognized. Defaulting to micapipe.")


    for grp_name, grp_labels in groups.items():
        print(f"{study['name']} {grp_name} ({grp_labels})")

        # get IDs for this group
        ids = demo.loc[
            (demo[demographics['grp']].isin(grp_labels)) &
            (demo['study'] == study['study']),
            [ID_col, demographics['SES'], 'study', 'Date']
        ].copy()

        for i, row in ids.iterrows():
            ID = row[ID_col]
            SES = row[demographics['SES']]
            date = row[demographics['date']]
            #print(f"\tsub-{ID}_ses-{SES}")
            pth = get_1pth(root=study['dir_root'], deriv_fldr=deriv_fldr, sub=ID, ses=SES, label=label, surf=surf, feature=feature, space=space, hemi=hemi)
            # add this pth to the dataframe
            if isinstance(pth, list):
                ids.loc[i, f'pth_L'] = pth[0]
                ids.loc[i, f'pth_R'] = pth[1]
            else:
                ids.loc[i, f'pth_{hemi}'] = pth 
        # if paths are duplicated, then keep only one of those rows
        if hemi == "LR":
            ids = ids.drop_duplicates(subset=[f'pth_L', f'pth_R'])
        else:
            ids = ids.drop_duplicates(subset=[f'pth_{hemi}'])

        # create dictionary item for each group, add to output list
        out.append({
            'study': study['name'],
            'grp': grp_name,
            'grp_labels': grp_labels,
            'label': label,
            'feature': feature,
            'map_pths': ids
        })

    return out


In [None]:
def ses_clean(df, method="newest", silent=True):
    """
    Choose the session to use for each subject.
        If subject has multiple sessions with map path should only be using one of these sessions.

    inputs:
        df: pd.dataframe with columns for subject ID, session, date and map_paths
            Assumes map path is missing if either : map_pth
        method: method to use for choosing session. 
            "newest": use most recent session
            "oldest": use oldest session in the list
            {number}: session code to use (e.g. '01' or 'a1' etc)
    """
    
    # check if the dataframe is empty
    if df.empty:
        if not silent: print(f"\t[clean_pths] WARNING: Empty dataframe for {d['study']} {d['grp']}")
        return
    
    # check if there is at least one repeated IDs (this indicates multiple sessions)
    if df.duplicated(subset=['ID']).any():
        # choose the session to keep based on the method
        
        # Find the column(s) that contain the paths
        path_cols = [col for col in df.columns if col.startswith('pth_') or col.startswith('surf_') or col.startswith('map_')] 
        # Mark rows where at least one path is valid (not empty and does not start with "ERROR")
        valid = df[path_cols].apply(lambda row: any((isinstance(x, str) and x and not x.startswith("ERROR")) for x in row), axis=1)
        df_valid = df[valid].copy()
        
        if df_valid.empty:
            if not silent: print("\t[clean_pths] WARNING: No valid sessions found for subject(s).")
            return df.iloc[[]]  # return empty dataframe with same columns
        if method == "newest":
            df_clean = df_valid.loc[df_valid.groupby(df_valid.columns[0])['Date'].idxmax()]
        elif method == "oldest":
            df_clean = df_valid.loc[df_valid.groupby(df_valid.columns[0])['Date'].idxmin()]
        else:
            df_clean = df_valid[df_valid['SES'] == method]
        return df_clean



def clean_pths(dl, method="newest"):
    """
    Keeps only one session per ID
    input:
        dl (for dictionary list): List of dictionary items (e.g. outputs from get_Npths). 
            These dict should contain a df under the key 'map_pths'
        method: method to use for choosing session.
            "newest": use most recent session
            "oldest": use oldest session in the list
            {number}: session code to use (e.g. '01' or 'a1' etc)

    output:
        dl: List of dictionary items with cleaned dataframes

    """

    # iterate thorugh each dictionary item in the list
    for i, d in enumerate(dl):
        # get the dataframe
        df = d['map_pths']
        dl[i]['map_pths'] = ses_clean(df, method=method)


def get_finalSes(dl, save_pth=None):
    """
    From a list of dictionary items, create a DF with sessions retained for each participant and each feature 

    input:
        dl: List of dictionary items with cleaned dataframes
        save_pth: path to save the dataframe to. If None, do not save.

    output:
        df: pd.dataframe with columns for subject ID, session_feature, date and map_paths
            Assumes map path is missing if either : map_pth
    """

    # can create a seperate function that collects sessions for different features

In [27]:
demo = pd.read_csv(demographics['pth'], dtype=str)
demo[["MICS_ID", "PNI_ID", "study", "SES", "grp", "grp_detailed"]]

Unnamed: 0,MICS_ID,PNI_ID,study,SES,grp,grp_detailed
0,HC129,Pilot013,7T,05,CTRL,CTRL
1,HC082,PNC003,7T,01,CTRL,CTRL
2,HC082,PNC003,7T,02,CTRL,CTRL
3,HC082,PNC003,7T,03,CTRL,CTRL
4,HC082,PNC003,7T,04,CTRL,CTRL
...,...,...,...,...,...,...
116,HC130,PNC026,3T,02,CTRL,CTRL
117,HC083,PNC011,3T,02,CTRL,CTRL
118,PX215,PNE020,3T,01,UKN,UKN_U
119,PX216,PNE021,3T,01,TLE,TLE_R


In [127]:
label = "midthickness"
feature = "FA"

map_pths = []
if 'surf_dfs' not in locals():
    surf_dfs = {}

for study in studies:
    if study['study'] == "3T":
        ID_col = demographics['ID_3T']
    elif study['study'] == "7T":
        ID_col = demographics['ID_7T']

    map_pths.extend(get_Npths(demographics, study, groups, feature, derivative="micapipe"))

# save
save_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/paths"
date = datetime.datetime.now().strftime("%d%b%Y")
with open(f'{save_pth}/map_paths_{label}-{feature}_{date}.pkl', 'wb') as f:
    pickle.dump(map_pths, f)


MICs all (['TLE_U', 'MFCL', 'FLE_R', 'MFCL_bTLE', 'UKN_L', 'mTLE_R', 'mTLE_L', 'FLE_L', 'UKN_U', 'TLE_L', 'TLE_R'])
	[get_1pth] FILE NOT FOUND (sub-PX071_ses-03): /data/mica3/BIDS_MICs/derivatives/micapipe_v0.2.0/sub-PX071/ses-03/maps/sub-PX071_ses-03_hemi-L_surf-fsLR-5k_label-midthickness_FA.func.gii
	[get_1pth] FILE NOT FOUND (sub-PX071_ses-03): /data/mica3/BIDS_MICs/derivatives/micapipe_v0.2.0/sub-PX071/ses-03/maps/sub-PX071_ses-03_hemi-R_surf-fsLR-5k_label-midthickness_FA.func.gii
	[get_1pth] FILE NOT FOUND (sub-PX216_ses-01): /data/mica3/BIDS_MICs/derivatives/micapipe_v0.2.0/sub-PX216/ses-01/maps/sub-PX216_ses-01_hemi-L_surf-fsLR-5k_label-midthickness_FA.func.gii
	[get_1pth] FILE NOT FOUND (sub-PX216_ses-01): /data/mica3/BIDS_MICs/derivatives/micapipe_v0.2.0/sub-PX216/ses-01/maps/sub-PX216_ses-01_hemi-R_surf-fsLR-5k_label-midthickness_FA.func.gii
MICs TLE (['TLE_L', 'TLE_R', 'TLE_U', 'mTLE_R', 'mTLE_L'])
	[get_1pth] FILE NOT FOUND (sub-PX071_ses-03): /data/mica3/BIDS_MICs/derivati

In [None]:
# load list of dict items with surface paths

#with open('map_pths.pkl', 'rb') as f:
#    map_pths = pickle.load(f)

In [128]:
# print specific study-group combinations
get_study = "PNI"
get_grp = "ctrl"

for item in map_pths:
    if item['study'] == get_study and item['grp'] == get_grp:
        print(f"{item['study']}-{item['grp']} ({item['grp_labels']})")
        with pd.option_context('display.max_columns', None):
            print(item['map_pths'])
        break

PNI-ctrl (['CTRL'])
      PNI_ID SES study        Date  \
0   Pilot013  05    7T  18.04.2024   
1     PNC003  01    7T  06.05.2022   
2     PNC003  02    7T  13.06.2022   
3     PNC003  03    7T  13.03.2023   
4     PNC003  04    7T  24.10.2023   
5     PNC006  01    7T  11.05.2022   
6     PNC006  02    7T  05.11.2022   
7     PNC006  03    7T  20.03.2023   
8     PNC006  04    7T  17.10.2023   
9     PNC011  01    7T  05.10.2022   
10    PNC011  02    7T  11.11.2022   
11    PNC011  03    7T  01.03.2023   
12    PNC011  04    7T  03.08.2023   
13    PNC018  01    7T  19.04.2023   
14    PNC018  02    7T  05.05.2023   
15    PNC018  03    7T  12.05.2023   
16    PNC018  04    7T  28.09.2023   
17    PNC018  a1    7T  10.02.2025   
18    PNC019  01    7T  20.04.2023   
19    PNC019  02    7T  03.05.2023   
20    PNC019  03    7T  17.05.2023   
21    PNC019  04    7T  07.11.2023   
22    PNC019  a1    7T  11.12.2024   
31    PNC024  01    7T  23.04.2024   
32    PNC024  02    7T  07.05.

In [None]:
# Find path for specific participant
study = "PNI"
sub = "PNC018"
ses = "01"

grp = "ctrl"

entry = next((item for item in map_pths if item['study'] == study and item['grp'] == grp), None)

if entry is not None:
    df = entry['map_pths']
    row = df[(df[ID_col] == sub) & (df['SES'] == ses)]
    if not row.empty:
        print(row['pth_L'].values[0])
    else:
        print(f"No entry found for subject {sub} and session {ses}")
else:
    print(f"No entry found for study {study} and group {grp}")


/data/mica3/BIDS_PNI/derivatives/micapipe_v0.2.0/sub-PNC018/ses-01/maps/sub-PNC018_ses-01_hemi-L_surf-fsLR-5k_label-midthickness_FA.func.gii


In [None]:
# finalize sessions to use
map_pths_clean = []
# pass map_paths into function that chooses a single session per subject

In [None]:
# put surface files into dataframes

In [None]:
# flip TLEs --> put all lesions on same side

In [None]:
# get difference maps at 3T (3T ctrl - 3T cases)

In [None]:
# get difference maps at 7T (7T ctrl - 7T cases)

In [None]:
# get difference maps of difference maps (3T dif maps - 7T dif maps)