# Group comparison 3T and 7T epilepsy  

Surface-based comparisons  
- vertex-wise T-test : are controls and pts different
    - Use brainstat
- vertex-wise effect size : how big are the distances between the vertex differences?
    - Use own function


For figures: 
- Visualize effect size on a brain masked for significant p-values 

In [1]:
import pandas as pd
import numpy as np
import nibabel as nib
import pickle
import datetime
import brainstat as bstat

import importlib
import vrtx
importlib.reload(vrtx)

<module 'vrtx' from '/host/verges/tank/data/daniel/3T7T/z/code/analyses/vrtx.py'>

# Functions

In [2]:
# Finding paths and checking if they exist

def chk_pth(pth):
    """
    Check if the path exists and is a file.
    
    inputs:
        pth: path to check
    
    outputs:
        True if the path exists and is a file, False otherwise
    """
    
    import os
    
    if os.path.exists(pth) and os.path.isfile(pth):
        return True
    else:
        return False
    
def mp_mapsPth(dir, sub, ses, hemi, surf, lbl, ft):
    """
    Returns path to micapipe maps for given subject, session, hemisphere, surface, label, and feature.
    """
    if lbl == "thickness":
        return f"{dir}/sub-{sub}_ses-{ses}_hemi-{hemi}_surf-{surf}_label-{lbl}.func.gii"
    else:
        return f"{dir}/sub-{sub}_ses-{ses}_hemi-{hemi}_surf-{surf}_label-{lbl}_{ft}.func.gii"

# when working, add to Utils scripts
def get_1pth(root, deriv_fldr, sub, ses, feature, label="midthickness", surf="fsLR-5k", space="nativepro", hemi="LR", check_pth=True,silence=True):
    """
    Get the path to the surface data for a given subject and session.
    Assumes BIDS format of data storage.

    inputs:
        root: root directory of the study
        deriv_fldr: name of derivative folder containing the surface data
        sub: subject ID (no `sub-` prefix)
        ses: session ID (with leading zero if applicable; no `ses-` prefix)
        surf: surface type and resolution (e.g., fsLR-32k, fsLR-5k)
        label: surface label (e.g., "pial", "white", "midThick")
        space: space of the surface data (e.g., "nativepro", "fsnative")
        hemi: hemisphere to extract (default is "LR" for both left and right hemispheres)

        check_pth: whether to check if the path exists (default is True)
        silence: whether to suppress print statements (default is True)
    outputs:
        path to the surface data files
    """

    # make surf to lower case
    label = label.lower()

    # ensure that label is well defined
    if label == "thickness":
        label = "thickness"
    elif label == "pial":
        label = "pial"
    elif label == "white":
        label = "white"
    elif label == "midthick" or label == "midthickness":
        label = "midthickness"
    else:
        raise ValueError(f"{label} Invalid label type. Choose from 'pial', 'white', 'midThick' or 'thickness'.")
    
    # construct the path to the surface data file
    hemi = hemi.upper()
    if hemi == "LEFT" or hemi == "L":
        hemi = "L"
    elif hemi == "RIGHT" or hemi == "R":
        hemi = "R"
    elif hemi != "LR":
        raise ValueError("Invalid hemisphere. Choose from 'L', 'R', or 'LR'.")

    # handle hippunfold naming convention
    if "micapipe" in deriv_fldr.lower():
        pth = f"{root}/derivatives/{deriv_fldr}/sub-{sub}/ses-{ses}/maps"
        if hemi == "L" or hemi == "R":
            pth = mp_mapsPth(dir=pth, sub=sub, ses=ses, hemi=hemi, surf=surf, lbl=label, ft=feature)
            if not silence: print(f"[get_1pth] Returning paths for both hemispheres ([0]: L, [1]: R)")
            
        else:         
            pth_L = mp_mapsPth(dir=pth, sub=sub, ses=ses, hemi="L", surf=surf, lbl=label, ft=feature)
            pth_R = mp_mapsPth(dir=pth, sub=sub, ses=ses, hemi="R", surf=surf, lbl=label, ft=feature)        
            pth = [pth_L, pth_R]
            if not silence: print(f"[get_1pth] Returning paths for both hemispheres ([0]: L, [1]: R)")
    elif "hippunfold" in deriv_fldr.lower():
        raise ValueError("Hippunfold derivative not yet implemented. Need to create feature maps using hippunfold surfaces.")
        
        # space usually: "T1w"
        # surf usually: "fsLR"
        # label options: "hipp_outer", "hipp_inner", "hipp_midthickness"

        pth = f"{root}/derivatives/{deriv_fldr}/sub-{sub}/ses-{ses}/surf"

        if hemi == "L" or hemi == "R":
            pth = f"{pth}/sub-{sub}_ses-{ses}_hemi-{hemi}_space-{space}_den-{surf}_label-{label}.surf.gii"
            if not silence: print(f"[surf_pth] Returning hippunfold path for {hemi} hemisphere")
        else:
            pth = f"{pth}/sub-{sub}_ses-{ses}_hemi-{hemi}_surf-{surf}_label-{label}_{feature}.func.gii"
            pth_L = f"{pth}/sub-{sub}_ses-{ses}_hemi-L_-{surf}_label-{label}.surf.gii"
            pth_R = f"{pth}/sub-{sub}_ses-{ses}_hemi-R_space-{space}_den-{surf}_label-{label}.surf.gii"
            pth = [pth_L, pth_R]
            if not silence: print(f"[surf_pth] Returning hippunfold paths for both hemispheres ([0]: L, [1]: R)")

    else:
        raise ValueError("Invalid derivative folder. Choose from 'micapipe' or 'hippunfold'.")


    if check_pth:
        if isinstance(pth, list):
            for idx, p in enumerate(pth):
                if not chk_pth(p):
                    if label == "thickness": feature = "(thickness)"
                    print(f"\t[get_1pth] FILE NOT FOUND (ft: {feature}, sub-{sub}_ses-{ses}): {p}")
                    pth[idx] = "ERROR:" + p
        else:
            if not chk_pth(pth):
                print(f"\t[get_1pth] FILE NOT FOUND (ft: {feature}, sub-{sub}_ses-{ses}): {pth}")
                pth = "ERROR:" + pth
    
    return pth   

def get_Npths(demographics, study, groups, feature="FA", derivative="micapipe", label="midthickness", hemi="LR", space="nativepro", surf="fsLR-5k"):
    """
    Get path to surface files for individual groups


    Input:
    demographics: dict  regarding demographics file. 
        Required keys: 
            'pth'
            'ID_7T'
            'ID_3T'
            'SES'
            'date'
            'grp'
    study: dict  regarding study.
        Required keys: 
            'name'
            'dir_root'
            'study'
            'dir_mp'
            'dir_hu'
    groups: dict    of groups to extract surfaces for. 
        Each key should be a group name, and the value should be a list of labels in the 'grp' column of demographics file assigned to that group.
    label: str  surface label to extract
    hemi: str  hemisphere to extract. Default is "LR" for both left and right hemispheres.
    space: str  space of the surface data. Default is "nativepro".
    surf: str  surface type and resolution. Default is "fsLR-5k".
    """
    import pandas as pd

    demo = pd.read_csv(demographics['pth'], dtype=str)
    
    out = []

    if derivative == "hippunfold":
        deriv_fldr = study['dir_hu']
    elif derivative == "micapipe":
        deriv_fldr = study['dir_mp']
    else:
        deriv_fldr = study['dir_mp']
        print(f"[get_Npths] WARNING: derivative not recognized. Defaulting to micapipe.")


    for grp_name, grp_labels in groups.items():
        print(f"{study['name']} {grp_name} ({grp_labels})")

        # get IDs for this group
        ids = demo.loc[
            (demo[demographics['grp']].isin(grp_labels)) &
            (demo['study'] == study['study']),
            [ID_col, demographics['SES'], 'study', 'Date']
        ].copy()

        for i, row in ids.iterrows():
            ID = row[ID_col]
            SES = row[demographics['SES']]
            date = row[demographics['date']]
            #print(f"\tsub-{ID}_ses-{SES}")
            pth = get_1pth(root=study['dir_root'], deriv_fldr=deriv_fldr, sub=ID, ses=SES, label=label, surf=surf, feature=feature, space=space, hemi=hemi)
            # add this pth to the dataframe
            if isinstance(pth, list):
                ids.loc[i, f'pth_L'] = pth[0]
                ids.loc[i, f'pth_R'] = pth[1]
            else:
                ids.loc[i, f'pth_{hemi}'] = pth 
        # if paths are duplicated, then keep only one of those rows
        if hemi == "LR":
            ids = ids.drop_duplicates(subset=[f'pth_L', f'pth_R'])
        else:
            ids = ids.drop_duplicates(subset=[f'pth_{hemi}'])

        # create dictionary item for each group, add to output list
        out.append({
            'study': study['name'],
            'grp': grp_name,
            'grp_labels': grp_labels,
            'label': label,
            'feature': feature,
            'map_pths': ids
        })

    return out


In [3]:
# ID and session cleaning
def clean_pths(dl, method="newest", silent=True):
    """
    Keeps only one session per ID
    input:
        dl (for dictionary list): List of dictionary items (e.g. outputs from get_Npths). 
            These dict should contain a df under the key 'map_pths'
        method: method to use for choosing session.
            "newest": use most recent session
            "oldest": use oldest session in the list
            {number}: session code to use (e.g. '01' or 'a1' etc)

    output:
        dl: List of dictionary items with cleaned dataframes

    """
    dl_out = []
    
    for i, d in enumerate(dl):
        if not silent: print(f"[clean_pths] {d['study']} {d['grp']}: {df.shape}, num unique IDs: {df[ID_col].nunique()}")

        df = d['map_pths']
                
        if d['study'] == "PNI": ID_col = "PNI_ID"
        else: ID_col = "MICS_ID"
        #print(ID_col)

        if df.empty: # check if the dataframe is empty
            print(f"\t[clean_pths] WARNING: Empty dataframe for {d['study']} {d['grp']}")
            continue
        else:
            df_clean = ses_clean(df, ID_col, method=method, silent=True)
            #dl[i]['map_pths'] = df_clean

        if df_clean.empty:  # check if the cleaned dataframe is empty
            print(f"\t[clean_pths] WARNING: Cleaned dataframe is empty for {d['study']} {d['grp']}")
            continue

        dl_out.append({
            'study': d['study'],
            'grp': d['grp'],
            'grp_labels': d['grp_labels'],
            'label': d['label'],
            'feature': d['feature'],
            'map_pths': df_clean
        })

    return dl_out


def ses_clean(df, ID_col, method="newest", silent=True):
    """
    Choose the session to use for each subject.
        If subject has multiple sessions with map path should only be using one of these sessions.

    inputs:
        df: pd.dataframe with columns for subject ID, session, date and map_paths
            Assumes map path is missing if either : map_pth
        method: method to use for choosing session. 
            "newest": use most recent session
            "oldest": use oldest session in the list
            {number}: session code to use (e.g. '01' or 'a1' etc)
    """
    
    import pandas as pd
    import datetime

    # check if the dataframe is empty
    if df.empty:
        print(f"[ses_clean] WARNING: Empty dataframe. Skipping.")
        return

    if not silent: print(f"[ses_clean] Choosing session according to method: {method}")
    
    
    df = df.copy()  # Avoid modifying the original dataframe

    # remove rows whose path col is empty or starts with "ERROR:"
    path_cols = [col for col in df.columns if col.startswith('pth_') or col.startswith('surf_') or col.startswith('map_')]
    df_clean = df.dropna(subset=path_cols, how='all')  # Keep rows where at least one path column is not NaN
    df_clean = df_clean[~df_clean[path_cols].apply(lambda x: x.str.startswith("ERROR:")).any(axis=1)]  # Remove rows where any path column starts with "ERROR:"
    if df_clean.empty:
        if not silent:
            print(f"[ses_clean] WARNING: All rows removed due to empty or ERROR paths. Returning empty dataframe.")
        return pd.DataFrame()
    
    # Find repeated IDs (i.e., subjects with multiple sessions)
    repeated_ids = df_clean[df_clean.duplicated(subset=ID_col, keep=False)][ID_col].unique()
    
    if not silent:
        if len(repeated_ids) > 0:
            print(f"\tIDs with multiple sessions: {repeated_ids}")
        else:
            print(f"\tNo repeated IDs found")

    rows_to_remove = []
    
    # Convert 'Date' column to datetime for comparison
    df_clean['Date_dt'] = pd.to_datetime(df_clean['Date'], format='%d.%m.%Y', errors='coerce')
    today = pd.to_datetime('today').normalize()

    if len(repeated_ids) > 0:
        if method == "newest":
            for id in repeated_ids:
                sub_df = df_clean[df_clean[ID_col] == id]
                if sub_df.shape[0] > 1:
                    idx_to_keep = sub_df['Date_dt'].idxmax()
                    idx_to_remove = sub_df.index.difference([idx_to_keep])
                    rows_to_remove.extend(idx_to_remove)
        elif method == "oldest":
            for id in repeated_ids:
                sub_df = df_clean[df_clean[ID_col] == id]
                if sub_df.shape[0] > 1:
                    idx_to_keep = sub_df['Date_dt'].idxmin()
                    idx_to_remove = sub_df.index.difference([idx_to_keep])
                    rows_to_remove.extend(idx_to_remove)
        else:
            # Assume method is a session code (e.g., '01', 'a1', etc)
            for id in repeated_ids:
                sub_df = df_clean[df_clean[ID_col] == id]
                if sub_df.shape[0] > 1:
                    idx_to_remove = sub_df[sub_df['SES'] != method].index
                    rows_to_remove.extend(idx_to_remove)

    # Remove the rows marked for removal
    df_clean = df_clean.drop(rows_to_remove)
    #if not silent: print(df_clean[[ID_col, 'SES']].sort_values(by=ID_col))

    # if num rows =/= to num unique IDs then write warning
    if df_clean.shape[0] != df_clean[ID_col].nunique():
        print(f"[ses_clean] WARNING: Number of rows ({df_clean.shape[0]}) not equal to num unique IDs ({df_clean[ID_col].nunique()})")
        print(f"\tMultiple sessions for IDs: {df_clean[df_clean.duplicated(subset=ID_col, keep=False)][ID_col].unique()}")

    if not silent: 
        print(f"\t{df.shape[0] - df_clean.shape[0]} rows removed, Change in unique IDs: {df_clean[ID_col].nunique() - df[ID_col].nunique()}")
        print(f"\t{df_clean.shape[0]} rows remaining")

    return df_clean

def get_finalSES(dl, demo, save_pth=None, long=False, silent=True): 
    """
    From a list of dictionary items, create a DF with sessions retained for each participant and each feature 

    input:
        dl: List of dictionary items with cleaned dataframes
        demo: dictionary with demographics file information.
        save_pth: path to save the dataframe to. If None, do not save.
        long: if True, return a long format dataframe with one row per subject and session feature. If False, return wide format.

    output:
        df: pd.dataframe with columns for subject ID, session_feature, grp, study and map_paths
            Assumes map path is missing if either : map_pth
    """
    import datetime
    import pandas as pd
    import numpy as np

    demo_df = pd.read_csv(demo['pth'], dtype=str)
    out = pd.DataFrame()  # Will collect all unique IDs and their session columns

    for i, d in enumerate(dl):
        feature = d['feature']
        label = d['label']

        df = d['map_pths']

        id_col = [col for col in df.columns if 'ID' in col.upper()][0]  
        ses_col = 'SES'
        
        if not silent: print(f"[get_finalSES] {d['study']} {d['grp']}: {feature}, {label} ({df.shape[0]} rows)")

        # Use correct study prefix and correct ID column for merge
        if d['study'] == "PNI": 
            study_prefix = "7T"
            merge_id_col = demo['ID_7T']
        elif d['study'] == "MICs":
            study_prefix = "3T"
            merge_id_col = demo['ID_3T']
        else: 
            study_prefix = "Unknown"
            merge_id_col = None

        if label == "thickness": lbl_ft = f"{label}"
        else: lbl_ft = f"{label}-{feature}"

        new_col = f'{study_prefix}-ses_{lbl_ft}'

        # Mark SES as NA if all path columns are ERROR or missing
        path_cols = [col for col in df.columns if col.startswith('pth_') or col.startswith('surf_') or col.startswith('map_')]
        def ses_na_row(row):
            if not path_cols:
                return row[ses_col]
            # If all path columns are missing or start with ERROR
            if all((not isinstance(row[c], str)) or row[c].startswith("ERROR") or row[c] == "" for c in path_cols):
                return "NA"
            return row[ses_col]
        df_tmp = df[[id_col, ses_col] + path_cols].copy()
        df_tmp[new_col] = df_tmp.apply(ses_na_row, axis=1)
        df_tmp = df_tmp.rename(columns={id_col: "ID"})
        df_tmp = df_tmp[["ID", new_col]]

        # If column already exists, add to it
        if new_col in out.columns:
            # Merge on ID, but keep both values for comparison
            merged = pd.merge(out[['ID', new_col]], df_tmp, on="ID", how="outer", suffixes=('_old', '_new'))
            
            def resolve(row): # For each ID, resolve conflicts
                vals = set([row[f"{new_col}_old"], row[f"{new_col}_new"]])
                vals = {v for v in vals if pd.notnull(v)}
                if len(vals) == 1:
                    return vals.pop()
                elif len(vals) > 1:
                    # Print warning and keep the latest value (assuming SES is string, keep max)
                    if not silent:
                        print(f"[get_finalSES] WARNING: Multiple values for {row['ID']} in {new_col}: {vals}. Keeping latest.")
                    # If SES is numeric string, sort as int, else as string
                    try:
                        return sorted(vals, key=lambda x: int(x) if isinstance(x, str) and x.isdigit() else str(x))[-1]
                    except Exception:
                        return sorted(vals)[-1]
                else:
                    return None
                
            merged[new_col] = merged.apply(resolve, axis=1)
            
            # Update out with resolved column
            out = pd.merge(out, merged[['ID', new_col]], on="ID", how="outer", suffixes=('', '_resolved'))
            out[new_col] = out[new_col + '_resolved'].combine_first(out[new_col])
            out = out.drop(columns=[new_col + '_resolved'])
        else:
            if out.empty:
                out = df_tmp
            else:
                out = pd.merge(out, df_tmp, on="ID", how='outer')

    if not long:
        # under construction
        print("[get_finalSES] Wide format not yet implemented. Returning long format instead.")
        long = True

    if save_pth is not None:
        date = datetime.datetime.now().strftime("%d%b%Y-%H%M")
        if long: save = f"{save_pth}/sesXfeat_long_{date}.csv"
        else: save = f"{save_pth}/sesXfeat_{date}.csv"
        out.to_csv(save, index=False)
        print(f"[get_finalSES] Saved dataframe to {save}")

    return out

# support print functions
def print_dict(dict, df_print=False):
    """
    Print the contents of a dictionary with DataFrames in a readable format.
    """
    print(f"\nmap_pths_clean ({len(dict)} items):")
    for i, d in enumerate(dict):
        print(f"\nItem {i}:")
        print(f"  Keys: {list(d.keys())}")

        for k, v in d.items():
            if isinstance(v, pd.DataFrame):
                print(f"  {k}: <DataFrame shape={v.shape}>")
                if df_print == True: print(f"  {k}: {v}")
            else:
                print(f" {k}: {v}")

def print_grpDF(dict, grp, study, df="pth"):

    for item in dict:
        
        if item['study'] == study and item['grp'] == grp:
            if df == "pth":
                df = ['map_pths']
            elif df == "maps":
                # identify keys with prefix 'map'
                df = [k for k in item.keys() if k.startswith('map_') or k.startswith('pth_')]
            else:
                df = 'map_pths'

            print(f"{item['study']}-{item['grp']} ({item['grp_labels']})")
            with pd.option_context('display.max_columns', None):
                for i in df: print(item[i])
            break

In [4]:
# ipsi/contra support functions
def find_paired_TLE_index(maps, idx):
    """
    Given the index of a TLE_L or TLE_R item in maps, find the index of the paired item
    (same study, label, feature, but opposite group).
    """
    item = maps[idx]
    grp = item['grp']
    if grp not in ['TLE_L', 'TLE_R']:
        raise ValueError("Item at idx is not TLE_L or TLE_R")
    paired_grp = 'TLE_R' if grp == 'TLE_L' else 'TLE_L'
    for j, other in enumerate(maps):
        if (
            j != idx and
            other['study'] == item['study'] and
            other['grp'] == paired_grp and
            other['label'] == item['label'] and
            other['feature'] == item['feature']
        ):
            return j
    return None  # Not found


def ctrl_index(maps, comp_idx, ctrl_code='ctrl'):
    """
    Find the index of the control group in the maps list for a given study.
    
    inputs:
        maps: list of dictionary items with keys 'study', 'grp', 'label', 'feature'
        study: name of the study to search in
        comp_idx: index of the item to find the control group for
        ctrl_code: code for control group (default is 'ctrl')
    """

    item = maps[comp_idx]
    study = item['study']
    label = item['label']
    feature = item['feature']

    for i, other in enumerate(maps):
        if (
            other['study'] == study and
            other['grp'] == ctrl_code and
            other['label'] == label and
            other['feature'] == feature
        ):
            return i
    return None  # Not found


# MAIN

In [5]:
# specify root directories
MICs = {
    "name": "MICs",
    "dir_root": "/data/mica3/BIDS_MICs",
    "dir_mp": "micapipe_v0.2.0",
    "dir_hu": "hippunfold_v1.3.0/hippunfold",
    "study": "3T",
    "ID_ctrl" : ["HC"],
    "ID_Pt" : ["PX"]
    }

PNI = {
    "name": "PNI",
    "dir_root": "/data/mica3/BIDS_PNI",
    "dir_mp": "micapipe_v0.2.0",
    "dir_hu": "hippunfold_v1.3.0/hippunfold",
    "study": "7T",
    "ID_col" : ["PNC", "Pilot"], # column for ID in demographics file
    }

studies = [MICs, PNI]

demographics = {
    "pth" : "/host/verges/tank/data/daniel/3T7T/z/data/pt/demo_22May2025.csv",
    # column names:
    "ID_7T" : "PNI_ID", 
    "ID_3T" : "MICS_ID",
    "SES" : "SES",
    "date": "Date",
    "grp" : "grp_detailed" # col name for participant grouping variable of interest
}

px_grps = { # specify patient group labels to compare to controls
    'allPX' : ['TLE_U', 'MFCL', 'FLE_R', 'MFCL_bTLE', 'UKN_L', 'mTLE_R', 'mTLE_L', 'FLE_L', 'UKN_U', 'TLE_L', 'TLE_R'],
    'TLE' : ['TLE_L', 'TLE_R', 'TLE_U', 'mTLE_R', 'mTLE_L'],
    'TLE_L': ['TLE_L', 'mTLE_L'],
    'TLE_R': ['TLE_R', 'mTLE_R'],
    'FCD' : ['FLE_R', 'FLE_L'],
    'MFCL' : ['MFCL', 'MFCL_bTLE'],
    'UKN' : ['UKN_L', 'UKN_U']
}

ctrl_grp = {'ctrl' : ['CTRL']}

groups = {**px_grps, **ctrl_grp}

features = ["FA", "MD", "T1map"]
#labels = ["white", "midthick", "thickness", "swm1.0mm", "swm2.0mm", "swm3.0mm"]
labels = ["white", "midthick", "thickness"]

surfaces = ["fsLR-5k"]

demo = pd.read_csv(demographics['pth'], dtype=str)
demo[["MICS_ID", "PNI_ID", "study", "SES", "grp", "grp_detailed"]]

Unnamed: 0,MICS_ID,PNI_ID,study,SES,grp,grp_detailed
0,HC129,Pilot013,7T,05,CTRL,CTRL
1,HC082,PNC003,7T,01,CTRL,CTRL
2,HC082,PNC003,7T,02,CTRL,CTRL
3,HC082,PNC003,7T,03,CTRL,CTRL
4,HC082,PNC003,7T,04,CTRL,CTRL
...,...,...,...,...,...,...
116,HC130,PNC026,3T,02,CTRL,CTRL
117,HC083,PNC011,3T,02,CTRL,CTRL
118,PX215,PNE020,3T,01,UKN,UKN_U
119,PX216,PNE021,3T,01,TLE,TLE_R


In [6]:
studies = [MICs, PNI]
groups = {**px_grps, **ctrl_grp}


map_pths = []
map_pths_clean = []
final_ses = []
save_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/pt"

if 'surf_dfs' not in locals():
    surf_dfs = {}

for study in studies:
    print(f"[main] {study['name']}")

    if study['study'] == "3T": ID_col = demographics['ID_3T']
    elif study['study'] == "7T": ID_col = demographics['ID_7T']
    for surf in surfaces:
        for label in labels:
            print(f"\tsurf:{surf}, lbl:{label}")
            if label == "thickness":
                map_pths.extend(get_Npths(demographics, study, groups, feature="", derivative="micapipe", label=label, surf=surf))
            else:
                for feature in features:
                    print(f"\t\tft: {feature}, surf: {surf}")
                    map_pths.extend(get_Npths(demographics, study, groups, feature=feature, derivative="micapipe", label=label, surf=surf))

# save dictionary item
save_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/pt"
date = datetime.datetime.now().strftime("%d%b%Y")
#with open(f'{save_pth}/map_paths_{date}.pkl', 'wb') as f:    pickle.dump(map_pths, f)
                    
map_pths_clean = clean_pths(map_pths, method="newest") # MAIN list of dict items
final_SES = get_finalSES(map_pths_clean, demo=demographics, save_pth=save_pth, long=False,silent=True) # summary of sessions used by feature for each participant


[main] MICs
	surf:fsLR-5k, lbl:white
		ft: FA, surf: fsLR-5k
MICs allPX (['TLE_U', 'MFCL', 'FLE_R', 'MFCL_bTLE', 'UKN_L', 'mTLE_R', 'mTLE_L', 'FLE_L', 'UKN_U', 'TLE_L', 'TLE_R'])
	[get_1pth] FILE NOT FOUND (ft: FA, sub-PX071_ses-03): /data/mica3/BIDS_MICs/derivatives/micapipe_v0.2.0/sub-PX071/ses-03/maps/sub-PX071_ses-03_hemi-L_surf-fsLR-5k_label-white_FA.func.gii
	[get_1pth] FILE NOT FOUND (ft: FA, sub-PX071_ses-03): /data/mica3/BIDS_MICs/derivatives/micapipe_v0.2.0/sub-PX071/ses-03/maps/sub-PX071_ses-03_hemi-R_surf-fsLR-5k_label-white_FA.func.gii
	[get_1pth] FILE NOT FOUND (ft: FA, sub-PX216_ses-01): /data/mica3/BIDS_MICs/derivatives/micapipe_v0.2.0/sub-PX216/ses-01/maps/sub-PX216_ses-01_hemi-L_surf-fsLR-5k_label-white_FA.func.gii
	[get_1pth] FILE NOT FOUND (ft: FA, sub-PX216_ses-01): /data/mica3/BIDS_MICs/derivatives/micapipe_v0.2.0/sub-PX216/ses-01/maps/sub-PX216_ses-01_hemi-R_surf-fsLR-5k_label-white_FA.func.gii
MICs TLE (['TLE_L', 'TLE_R', 'TLE_U', 'mTLE_R', 'mTLE_L'])
	[get_1pth

# Add ipsi/contra groups

In [7]:
# Create ipsi-contra dict entries
grps = ["TLE_L", "TLE_R"]

for study in studies:
    # find all entries for the groups of interest
    index_list = [i for i, item in enumerate(map_pths_clean) if item['study'] == study['name'] and item['grp'] in grps]
    #print(index_list)
    print(f"[flip_TLE] {len(index_list)} entries for groups {grps} in study {study['name']}")

    paired = set()  # to keep track of paired indices
    for i in index_list:
        if i in paired: # skip indexes already processed
            continue
          
        other_i = find_paired_TLE_index(map_pths_clean, i)
        
        item_i = map_pths_clean[i]
        grp_i = item_i['grp']
        df_i = item_i['map_pths']

        item_other = map_pths_clean[other_i]
        grp_other = item_other['grp']
        df_other = item_other['map_pths']
        
        print(f"\t{i} & {other_i} | {item_i['label']}, {item_i['feature']} | {grp_i} & {grp_other}")

        paired.add(i)
        if other_i is not None:
            paired.add(other_i)
        else:
            print(f"[flip_TLE] WARNING: No paired index found for {i}. Skipping.")
            continue

        out = item_i # copies item at index i

        # Add group as column
        df_i['grp'] = grp_i
        df_other['grp'] = grp_other

        # rename path cols
        if grp_i == 'TLE_L':
            df_i.rename(columns={f'pth_L': 'pth_ipsi', f'pth_R': 'pth_contra'}, inplace=True)
            df_other.rename(columns={f'pth_R': 'pth_ipsi', f'pth_L': 'pth_contra'}, inplace=True)
        else: # i['grp'] == 'TLE_R'
            df_i.rename(columns={f'pth_R': 'pth_ipsi', f'pth_L': 'pth_contra'}, inplace=True)
            df_other.rename(columns={f'pth_L': 'pth_ipsi', f'pth_R': 'pth_contra'}, inplace=True)

        # combine modified dfs
        dfs = pd.concat([df_i, df_other], ignore_index=True)
        
        out['map_pths'] = dfs  # add the combined dataframe to the output item
        out['grp'] = 'TLE_ic'  # ic for ipsi-contra
        out['grp_labels'] = [grp_i, grp_other]

        # add to the list of outputs
        map_pths_clean.append(out)

[flip_TLE] 10 entries for groups ['TLE_L', 'TLE_R'] in study MICs
	2 & 3 | white, FA | TLE_L & TLE_R
	10 & 11 | white, T1map | TLE_L & TLE_R
	18 & 19 | midthick, FA | TLE_L & TLE_R
	26 & 27 | midthick, T1map | TLE_L & TLE_R
	34 & 35 | thickness,  | TLE_L & TLE_R
[flip_TLE] 6 entries for groups ['TLE_L', 'TLE_R'] in study PNI
	45 & 46 | white, T1map | TLE_L & TLE_R
	56 & 57 | midthick, T1map | TLE_L & TLE_R
	64 & 65 | thickness,  | TLE_L & TLE_R


In [8]:
# print structure of map_pths_clean
print_dict(map_pths_clean)


map_pths_clean (78 items):

Item 0:
  Keys: ['study', 'grp', 'grp_labels', 'label', 'feature', 'map_pths']
 study: MICs
 grp: allPX
 grp_labels: ['TLE_U', 'MFCL', 'FLE_R', 'MFCL_bTLE', 'UKN_L', 'mTLE_R', 'mTLE_L', 'FLE_L', 'UKN_U', 'TLE_L', 'TLE_R']
 label: white
 feature: FA
  map_pths: <DataFrame shape=(19, 7)>

Item 1:
  Keys: ['study', 'grp', 'grp_labels', 'label', 'feature', 'map_pths']
 study: MICs
 grp: TLE
 grp_labels: ['TLE_L', 'TLE_R', 'TLE_U', 'mTLE_R', 'mTLE_L']
 label: white
 feature: FA
  map_pths: <DataFrame shape=(9, 7)>

Item 2:
  Keys: ['study', 'grp', 'grp_labels', 'label', 'feature', 'map_pths']
 study: MICs
 grp: TLE_ic
 grp_labels: ['TLE_L', 'TLE_R']
 label: white
 feature: FA
  map_pths: <DataFrame shape=(7, 8)>

Item 3:
  Keys: ['study', 'grp', 'grp_labels', 'label', 'feature', 'map_pths']
 study: MICs
 grp: TLE_R
 grp_labels: ['TLE_R', 'mTLE_R']
 label: white
 feature: FA
  map_pths: <DataFrame shape=(2, 8)>

Item 4:
  Keys: ['study', 'grp', 'grp_labels', 'lab

In [9]:
# print specific study-group combinations
print_grpDF(dict=map_pths_clean, grp='TLE_ic', study='PNI', df="pth")

PNI-TLE_ic (['TLE_L', 'TLE_R'])
   PNI_ID SES study        Date  \
0  PNE010  a1    7T  09.01.2025   
1  PNE012  a1    7T  10.01.2025   
2  PNE017  a1    7T  20.02.2025   
3  PNE018  a1    7T  12.03.2025   
4  PNE019  a1    7T  18.03.2025   
5  PNE006  a1    7T  04.12.2024   
6  PNE013  a1    7T  13.01.2025   
7  PNE021  a1    7T  02.04.2025   

                                            pth_ipsi  \
0  /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2...   
1  /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2...   
2  /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2...   
3  /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2...   
4  /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2...   
5  /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2...   
6  /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2...   
7  /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2...   

                                          pth_contra    Date_dt    grp  
0  /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2... 2025-01-09  TLE_L  
1

In [10]:
# Find path for specific participant
study = "PNI"
sub = "PNC018"
ses = "01"

grp = "ctrl"

entry = next((item for item in map_pths if item['study'] == study and item['grp'] == grp), None)

if entry is not None:
    df = entry['map_pths']
    row = df[(df[ID_col] == sub) & (df['SES'] == ses)]
    if not row.empty:
        print(row['pth_L'].values[0])
    else:
        print(f"No entry found for subject {sub} and session {ses}")
else:
    print(f"No entry found for study {study} and group {grp}")

/data/mica3/BIDS_PNI/derivatives/micapipe_v0.2.0/sub-PNC018/ses-01/maps/sub-PNC018_ses-01_hemi-L_surf-fsLR-5k_label-white_FA.func.gii


# Extract map values

In [11]:
# Each row is a vertex, each column is a subject-session
## note for ipsi/contra groups, the following map values are stored under keys : maps_ipsi, maps_contra
maps = map_pths_clean.copy()

for item in maps:

    # iterate through mmap_pths
    df = item['map_pths']
    
    ID_col = [col for col in df.columns if 'ID' in col.upper()][0]  # Find the ID column
    
    # identify pth columns
    pth_cols = [col for col in df.columns if col.startswith('pth_')]
    
    for pth in pth_cols:
        df_map = pd.DataFrame()
        for i, row in df.iterrows():
            sub = row[ID_col]
            ses = row['SES']
            
            # extract map values
            gii = nib.load(row[pth])
            map = gii.darrays[0].data

            col_name = f"{sub}-{ses}"
            df_map[col_name] = map

        # add the dataframe to dict item
        pth_clean = pth.replace('pth_', '').replace('surf_', '')
        item[f'map_{pth_clean}'] = df_map

# print the structure of the maps
print_dict(maps)


map_pths_clean (78 items):

Item 0:
  Keys: ['study', 'grp', 'grp_labels', 'label', 'feature', 'map_pths', 'map_L', 'map_R']
 study: MICs
 grp: allPX
 grp_labels: ['TLE_U', 'MFCL', 'FLE_R', 'MFCL_bTLE', 'UKN_L', 'mTLE_R', 'mTLE_L', 'FLE_L', 'UKN_U', 'TLE_L', 'TLE_R']
 label: white
 feature: FA
  map_pths: <DataFrame shape=(19, 7)>
  map_L: <DataFrame shape=(4842, 19)>
  map_R: <DataFrame shape=(4842, 19)>

Item 1:
  Keys: ['study', 'grp', 'grp_labels', 'label', 'feature', 'map_pths', 'map_L', 'map_R']
 study: MICs
 grp: TLE
 grp_labels: ['TLE_L', 'TLE_R', 'TLE_U', 'mTLE_R', 'mTLE_L']
 label: white
 feature: FA
  map_pths: <DataFrame shape=(9, 7)>
  map_L: <DataFrame shape=(4842, 9)>
  map_R: <DataFrame shape=(4842, 9)>

Item 2:
  Keys: ['study', 'grp', 'grp_labels', 'label', 'feature', 'map_pths', 'map_ipsi', 'map_contra']
 study: MICs
 grp: TLE_ic
 grp_labels: ['TLE_L', 'TLE_R']
 label: white
 feature: FA
  map_pths: <DataFrame shape=(7, 8)>
  map_ipsi: <DataFrame shape=(4842, 7)>
  

In [12]:
# print specific study-group combinations (prints first feature-label combination)
print_grpDF(dict=maps, grp='ctrl', study='PNI', df="maps")

PNI-ctrl (['CTRL'])
    PNI_ID SES study        Date  \
2   PNC003  02    7T  13.06.2022   
6   PNC006  02    7T  05.11.2022   
10  PNC011  02    7T  11.11.2022   
14  PNC018  02    7T  05.05.2023   
22  PNC019  a1    7T  11.12.2024   
33  PNC024  a1    7T  09.10.2024   
37  PNC025  a1    7T  11.09.2024   
41  PNC026  a1    7T  16.10.2024   
71  PNC037  a1    7T  15.05.2025   
72  PNC038  a1    7T  14.05.2025   

                                                pth_L  \
2   /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2...   
6   /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2...   
10  /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2...   
14  /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2...   
22  /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2...   
33  /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2...   
37  /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2...   
41  /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2...   
71  /data/mica3/BIDS_PNI/derivatives/micapipe_v0.2...   
72  /data/mic

# Statistics

In [13]:
def t_test(df1, df2, fdr=True, equal_var=False):
    """
    Perform a vertex wise t-test between two DataFrames.
    inputs:
        df1: pd.DataFrame with vertex data for group 1. Vertices should be rows, subjects as columns.
        df2: pd.DataFrame with vertex data for group 2.
        fdr: whether to apply FDR correction to the p-values (default is True)
        equal_var: whether to assume equal variance between the two groups (default is False, i.e., Welch's t-test)

    outputs:
        df with shape n_vertices x 2 . Cols:
            't': t-statistic
            'p': p-value
            'p_fdr' < if fdr = True >: FDR-corrected p-value


    Note:
     - Assumes:
      - Values at each vertex are normally distributed
      - Participants are independent
      - Unequal variance
    """
    import pandas as pd
    from scipy import stats
    import numpy as np

    if df1.shape[0] != df2.shape[0]:
        raise ValueError("[t_test] DataFrames must have the same number of vertices (rows).")
    
    if df1.shape[1] < 2 or df2.shape[1] < 2:
        raise ValueError("[t_test] Each DataFrame must have at least two subjects for t-test. {}".format((df1.shape, df2.shape)))
    
    # Initialize output DataFrame
    out = pd.DataFrame(index=df1.index)
    out['t'] = np.nan
    out['p'] = np.nan
    out['p_fdr'] = np.nan

    # Perform t-test for each vertex
    for i in range(df1.shape[0]):
        t_stat, p_val = stats.ttest_ind(df1.iloc[i, :], df2.iloc[i, :], equal_var=equal_var)
        out.at[i, 't'] = t_stat
        out.at[i, 'p'] = p_val

    # FDR correction if requested
    if fdr:
        from statsmodels.stats.multitest import multipletests
        out['p_fdr'] = multipletests(out['p'], method='fdr_bh')[1]  # FDR correction

    return out


def d(df1, df2):
    """
    Calculate Cohen's d for each vertex between two DataFrames.

    inputs:
        df1: pd.DataFrame with vertex data for group 1. Vertices should be rows, subjects as columns.
        df2: pd.DataFrame with vertex data for group 2.

    outputs:
        pd.DataFrame with shape n_vertices x 1. Cols:
            'd': Cohen's d value for each vertex
    """
    import pandas as pd
    import numpy as np

    if df1.shape[0] != df2.shape[0]:
        raise ValueError("[d] DataFrames must have the same number of vertices (rows).")
    
    if df1.shape[1] < 2 or df2.shape[1] < 2:
        raise ValueError("[d] Each DataFrame must have at least two subjects for d calculation. {}".format((df1.shape, df2.shape)))

    # Initialize output DataFrame
    out = pd.DataFrame(index=df1.index)
    out['d'] = np.nan
    print("[d] CHECK FORMULA! - DM 28 May 2025")
    # Calculate Cohen's d for each vertex
    for i in range(df1.shape[0]):
        mean1 = df1.iloc[i, :].mean()
        mean2 = df2.iloc[i, :].mean()
        std1 = df1.iloc[i, :].std(ddof=1)  # Sample standard deviation
        std2 = df2.iloc[i, :].std(ddof=1)
        n1 = df1.shape[1]
        n2 = df2.shape[1]

        pooled_std = np.sqrt(((n1 - 1) * std1**2 + (n2 - 1) * std2**2) / (n1 + n2 - 2))
        out.at[i, 'd'] = (mean1 - mean2) / pooled_std

    return out


In [None]:
comps = []  # List to store comparison results

ipsiTo = "L" # what hemi of controls to compare ipsi group to
ipsiTo = ipsiTo.upper()  # Ensure ipsiTo is uppercase for consistency

for comp_idx in range(len(maps)):
    
    comp = maps[comp_idx]

    if comp['grp'] == 'ctrl': # skip comparing ctrl group to itself
        continue

    ctrl_idx = ctrl_index(maps, comp_idx)
    print(f"Control dict index: {ctrl_idx}")
    ctrl = maps[ctrl_idx]

    # determine map keys present in the item
    comp_keys = [key for key in comp.keys() if key.startswith('map_') and not key.endswith('pths')]
    ctrl_keys = [key for key in ctrl.keys() if key.startswith('map_') and not key.endswith('pths')]
    print(f"\nMap keys: {comp_keys}")

    for key in comp_keys: # compare L and/or R hemisphere maps

        if 'ipsi' in key and ipsiTo == "L":
            ctrl_col = "map_L"
        elif 'ipsi' in key and ipsiTo == "R":
            ctrl_col = "map_R"
        elif 'contra' in key and ipsiTo == "L":
            ctrl_col = "map_R"
        elif 'contra' in key and ipsiTo == "R":
            ctrl_col = "map_L"
        else:
            assert key in ctrl_keys, f"Provided comparison key {key} not in control keys ({ctrl_keys}). Check that ipsiTo is set either to `L` or `R`."
            ctrl_col = key

        df_comp = comp[key]
        df_ctrl = ctrl[ctrl_col]

        print(f"{key}, ctrl: {ctrl_col} | comp: {df_comp.shape}, ctrl: {df_ctrl.shape}")

        # Example: create output DataFrame (as in your template)
        n_ctrl = df_ctrl.shape[1]
        n_comp = df_comp.shape[1]

        if n_ctrl < 2 or n_comp < 2:
            print(f"[main] Skipping case. too few participants for t-test ({n_comp} comp, {n_ctrl} ctrl).")
            continue
        
        out_t_p = t_test(df_comp, df_ctrl, fdr=True, equal_var=False)
        out_d = d(df_comp, df_ctrl)

        # combine results into a single DataFrame
        out_df = pd.DataFrame(index=df_comp.index)
        out_df['t'] = out_t_p['t']
        out_df['p'] = out_t_p['p']
        out_df['p_fdr'] = out_t_p['p_fdr']
        out_df['d'] = out_d['d']
    
        # add out_df to the comp item
        comp['stats'] = out_df
        comp['n_ctrl'] = n_ctrl
        comp['n_comp'] = n_comp
        comp['feature'] = comp['feature']
        comp['label'] = comp['label']
        comp['study'] = comp['study']
        comp['grp'] = comp['grp']
        comp['grp_labels'] = comp['grp_labels']

        print(out_df.shape)

        # add to the list of outputs
        comps.append(comp)


Control dict index: 7

Map keys: ['map_L', 'map_R']
map_L, ctrl: map_L | comp: (4842, 19), ctrl: (4842, 11)
[d] CHECK FORMULA! - DM 28 May 2025
(4842, 4)
map_R, ctrl: map_R | comp: (4842, 19), ctrl: (4842, 11)
[d] CHECK FORMULA! - DM 28 May 2025
(4842, 4)
Control dict index: 7

Map keys: ['map_L', 'map_R']
map_L, ctrl: map_L | comp: (4842, 9), ctrl: (4842, 11)
[d] CHECK FORMULA! - DM 28 May 2025
(4842, 4)
map_R, ctrl: map_R | comp: (4842, 9), ctrl: (4842, 11)
[d] CHECK FORMULA! - DM 28 May 2025
(4842, 4)
Control dict index: 7

Map keys: ['map_ipsi', 'map_contra']
map_ipsi, ctrl: map_L | comp: (4842, 7), ctrl: (4842, 11)
[d] CHECK FORMULA! - DM 28 May 2025
(4842, 4)
map_contra, ctrl: map_R | comp: (4842, 7), ctrl: (4842, 11)
[d] CHECK FORMULA! - DM 28 May 2025
(4842, 4)
Control dict index: 7

Map keys: ['map_contra', 'map_ipsi']
map_contra, ctrl: map_R | comp: (4842, 2), ctrl: (4842, 11)
[d] CHECK FORMULA! - DM 28 May 2025
(4842, 4)
map_ipsi, ctrl: map_L | comp: (4842, 2), ctrl: (4842, 

  t_stat, p_val = stats.ttest_ind(df1.iloc[i, :], df2.iloc[i, :], equal_var=equal_var)


[d] CHECK FORMULA! - DM 28 May 2025
(4842, 4)
map_ipsi, ctrl: map_L | comp: (4842, 2), ctrl: (4842, 10)
[d] CHECK FORMULA! - DM 28 May 2025
(4842, 4)
Control dict index: 15

Map keys: ['map_L', 'map_R']
map_L, ctrl: map_L | comp: (4842, 3), ctrl: (4842, 10)
[d] CHECK FORMULA! - DM 28 May 2025
(4842, 4)
map_R, ctrl: map_R | comp: (4842, 3), ctrl: (4842, 10)
[d] CHECK FORMULA! - DM 28 May 2025
(4842, 4)
Control dict index: 15

Map keys: ['map_L', 'map_R']
map_L, ctrl: map_L | comp: (4842, 4), ctrl: (4842, 10)
[d] CHECK FORMULA! - DM 28 May 2025
(4842, 4)
map_R, ctrl: map_R | comp: (4842, 4), ctrl: (4842, 10)
[d] CHECK FORMULA! - DM 28 May 2025
(4842, 4)
Control dict index: 15

Map keys: ['map_L', 'map_R']
map_L, ctrl: map_L | comp: (4842, 3), ctrl: (4842, 10)
[d] CHECK FORMULA! - DM 28 May 2025
(4842, 4)
map_R, ctrl: map_R | comp: (4842, 3), ctrl: (4842, 10)
[d] CHECK FORMULA! - DM 28 May 2025
(4842, 4)
Control dict index: 23

Map keys: ['map_L', 'map_R']
map_L, ctrl: map_L | comp: (4842

In [None]:
out_df

Unnamed: 0,t,p,p_fdr,d
0,0.0,1.0,,0.0
1,0.0,1.0,,0.0
2,0.0,1.0,,0.0
3,0.0,1.0,,0.0
4,0.0,1.0,,0.0
...,...,...,...,...
4837,0.0,1.0,,0.0
4838,0.0,1.0,,0.0
4839,0.0,1.0,,0.0
4840,0.0,1.0,,0.0


In [17]:
print_dict(comps)


map_pths_clean (80 items):

Item 0:
  Keys: ['study', 'grp', 'grp_labels', 'label', 'feature', 'map_pths', 'map_L', 'map_R', 'stats', 'n_ctrl', 'n_comp']
 study: MICs
 grp: allPX
 grp_labels: ['TLE_U', 'MFCL', 'FLE_R', 'MFCL_bTLE', 'UKN_L', 'mTLE_R', 'mTLE_L', 'FLE_L', 'UKN_U', 'TLE_L', 'TLE_R']
 label: white
 feature: FA
  map_pths: <DataFrame shape=(19, 7)>
  map_L: <DataFrame shape=(4842, 19)>
  map_R: <DataFrame shape=(4842, 19)>
  stats: <DataFrame shape=(4842, 4)>
 n_ctrl: 11
 n_comp: 19

Item 1:
  Keys: ['study', 'grp', 'grp_labels', 'label', 'feature', 'map_pths', 'map_L', 'map_R', 'stats', 'n_ctrl', 'n_comp']
 study: MICs
 grp: allPX
 grp_labels: ['TLE_U', 'MFCL', 'FLE_R', 'MFCL_bTLE', 'UKN_L', 'mTLE_R', 'mTLE_L', 'FLE_L', 'UKN_U', 'TLE_L', 'TLE_R']
 label: white
 feature: FA
  map_pths: <DataFrame shape=(19, 7)>
  map_L: <DataFrame shape=(4842, 19)>
  map_R: <DataFrame shape=(4842, 19)>
  stats: <DataFrame shape=(4842, 4)>
 n_ctrl: 11
 n_comp: 19

Item 2:
  Keys: ['study', '

In [18]:
comps_prelim = comps 

In [None]:
def get_idx_compl(ldict, idx):
    """
    Given a list of dict items and an index, find comparison that is for the same group, label, feature, but different study. 
    Get the comparison dictionary item at a specific index from a list of comparison items.
    
    inputs:
        ldict: list of comparison dictionary items
        idx: index of the item to retrieve

    outputs:
        idx_compl: index of the comparison dictionary item that is complementary to input index

    """
    if idx < 0 or idx >= len(ldict):
        raise IndexError(f"Index {idx} out of bounds for list of length {len(ldict)}")
    
    study = ldict[idx]['study']

    if study == "PNI":
        other_study = "MICs"
    elif study == "MICs":
        other_study = "PNI"
    else:
        raise ValueError(f"[get_idx_compl] Study code {study} not recognized. Only 'PNI' and 'MICs' are currently supported.")
    
    # Find all other list items that differ only in study (ignore values for 'stats')
    idx_compl = [item for item in ldict if item['study'] == other_study and 
                         item['label'] == ldict[idx]['label'] and 
                         item['feature'] == ldict[idx]['feature'] and 
                         item['grp'] == ldict[idx]['grp']]
    if not idx_compl:
        raise ValueError(f"[get_idx_compl] ERROR. No complementary item found for index {idx} in study {study}.")

    return idx_compl

def d_dif(df1, df2, d_col='d'):
    """
    Calculate the difference in Cohen's d between two DataFrames.
    
    inputs:
        df1: pd.DataFrame with statistics for comparisons between grp and ctrl for study 1. Vertices should be rows, subjects as columns.
        df2: pd.DataFrame with vertex data for comparisons between grp and ctrl for study 2.
        d_col: column name for Cohen's d values in df1 and df2 (default is 'd'). (must be present in both DataFrames)

    outputs:
        pd.DataFrame with shape n_vertices x 1. Cols:
            'd_dif': Difference in Cohen's d value for each vertex
    """
    import pandas as pd
    import numpy as np

    if df1.shape[0] != df2.shape[0]:
        raise ValueError("[d_dif] DataFrames must have the same number of vertices (rows).")
    
    # Initialize output DataFrame
    out = pd.DataFrame()
    out['d_dif'] = np.nan

    # Calculate Cohen's d difference for each vertex
    for i in range(df1.shape[0]):
        d1 = df1.iloc[i, :].mean()
        d2 = df2.iloc[i, :].mean()
        out.at[i, 'd_dif'] = d1 - d2

    return out


In [None]:
# get difference maps of difference maps (3T dif maps - 7T dif maps)
# iterate through comps dict. Find correspondence between the index and complementary comparison in the other study

tTsT_comp = []

for idx in range(len(comps)):
    idx_compl = get_idx_compl(comps, idx) # get index for same comparisons in other study
    
    stats_idx = comps[idx]['stats']
    stats_compl = comps[idx_compl]['stats']
    
    


    grp = comps[idx]['grp']
    ft = comps[idx]['feature']
    lbl = comps[idx]['label']

    assert  grp == comps[idx]['grp'], f"[main] ERROR. Different groups: {grp} vs {comps[idx_compl]['grp']}"
    assert ft == comps[idx]['feature'], f"[main] ERROR. Different features: {ft} vs {comps[idx_compl]['feature']}"
    assert lbl == comps[idx]['label'], f"[main] ERROR. Different labels: {lbl} vs {comps[idx_compl]['label']}"
    assert comps[idx]['study'] != comps[idx_compl]['study'], f"[main] ERROR. Studies should differ."

    grp_lbls_idx = comps[idx]['grp_labels']
    grp_lbls_compl = comps[idx_compl]['grp_labels']

    out_item = {

    }


In [None]:
# check correspondence between veretices L and R <optional: thorough>