# Comparisons betweeon studies

Steps:   
--- TO BE MOVED INTO 02_demo -------- 
1. DEMOGRAPHICS  
    - Identify IDs with 3T and 7T    
    - Extract clinical information for epilepsy patients  
    - Extract demographic information for all participants  
1. SMOOTH MAPS
1. CLEAN DATA
-------------------------------------

1. SELECT SESSIONS
1. ANALYSES
    - (visualize unsmoothed, smoothed maps)
    - within study TLE vs CTRL comparison
        - extract smoother maps
        - compute z, w scores (values per participant)
        - group and flip
        - Cohen's D (compare TLE and control z/w score distributions within each vertex)
    - between study 7T vs 3T comparison      


# 1. DEMOGRAPHICS

In [None]:
import os
sys.path.append(os.getcwd())
import pandas as pd
import sys
import importlib
import numpy as np
import datetime
import utils_demo
import tTsTGrpUtils as tsutil
from genUtils import id, gen, t1

lab = True
save = True
verbose = True
toPrint = True

test = False
test_frac = 0.1 # fraction of demo to use for testing if test=True

includeBL = False # if should include bilateral TLE patients (with one side higher than other) in analyses

if lab: # define root paths to source files
    src_dir = "/host/verges/tank/data/daniel/3T7T/z/data/sources" # path to directory with source pt sheets
    sys.path.append("/host/verges/tank/data/daniel/")
    if save:
        save_pth = save_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs"
else:
    src_dir = "/Users/danielmendelson/Library/CloudStorage/OneDrive-McGillUniversity/Documents/PhD/Boris/projects/PT/sources" # path to directory with source pt sheets
    sys.path.append("/Users/danielmendelson/Library/CloudStorage/OneDrive-McGillUniversity/Documents/PhD/Boris/code")
    if save:
        save_pth = "/Users/danielmendelson/Library/CloudStorage/OneDrive-McGillUniversity/Documents/PhD/Boris/projects/3T7T/data/outputs"


# For each sheet, must define NAME, PATH, SHEET, ID_7T, ID_3T. 
# All other keys are those to be extracted.
# The same variables should have the same key names across sheets.
PNI = {
    'NAME': 'PNI',
    'PATH': f'{src_dir}/MICA_PNI_06Oct2025.xlsx', # 7T controls
    'SHEET': 'all', # name of sheet in file
    'ID_7T': 'ID_PNI', 
    'ID_3T': 'ID_MICs',
    'Ses_7T': 'session',
    'Date_7T': 'scanDate',
    'study': '7T',
    'DOB': 'dob',
    'Sex': 'sex',
    'Gender': 'gender',
    'Hand': 'handedness',
    'Eth': 'ethnicity',
    'Language': 'language',
    'Job': 'employment',
    'Edu': 'education',
    'LastSz': 'lastSeizure',
}

MICs = {
    'NAME': 'MICs',
    'PATH': f'{src_dir}/MICA-MTL-3T_06Oct2025.xlsx', # 3T controls
    'SHEET': 'Sheet1', # name of sheet in file
    'ID_7T': None, 
    'ID_3T': 'Study_name',
    'Ses_3T': 'Visit',
    'Date_3T': 'Scan_Date (D.M.Y)',
    'study': '3T',
    'Hand': 'Handed', 
    'Sex': 'AssignedSex',
    'Gender': 'GenderIdentity',
    'Height': 'HeightApprox',
    'Weight': 'WeightApprox',
    'Eth': 'Ethnicity',
    'Job': 'Employ',
    'Edu': 'YoE',
    'LastSz': 'Last seizure'
}

Clin = {
    'NAME': 'Clin',
    'PATH': f'{src_dir}/Clinical_06Oct2025.xlsx',
    'SHEET': 'clinical-database-detailed', # name of sheet in file
    'ID_7T': None, 
    'ID_3T': 'participant_id',
    'Date_3T': None,
    'Gender': 'Gender',
    'Hand': 'Handedness',
    'Language': 'Language',
    'Job': 'Employment',
    'Edu': 'Education',
    'EpilepsyDxILAE': 'Epilepsy diagnosis based on ILAE',
    'EpilepsyClass': 'Epilepsy classification:Focal,Generalized',
    'FocusLat': 'Lateralization of epileptogenic focus',
    'FocusConfirmed': 'Epileptogenic focus confirmed by the information of (sEEG/ site of surgical resection/ Ictal EEG abnormalities +/. MRI findings): FLE=forntal lobe epilepsy and cingulate epilepsy, CLE:central/midline epilepsy,ILE: insular epilepsy, mTLE=mesio.temporal lobe epilepsy, nTLE=neocortical lobe epilepsy, PQLE=posterior quadrant lobe epilepsy , multifocal epilepsy,IGE=ideopathic lobe epilepsy,unclear)',
    'EMUDischargeDx': 'Dx at EMU discharge ',
    'EMUAdmissionDate': 'EMU admission date(dd-mm-yy)',
    'AdmissionDuration': 'Duration of admission',
    'EpilepsyRiskFactors': 'Risk factors for epilepsy',
    'SeizureOnsetYr': 'Seizure onset (yr)',
    'DrugResistant': 'Drug resistant epilepsy at time of EMU admission',
    'NumASMsPrior': '# of ASMs prior current EMU admission',
    'PrevASMs': 'Previous ASMs (name and doses (mg/d)) if applicable prior the current EMU admission',
    'NumASMOnAdmission': '# of ASM on admission',
    'ASMsOnAdmission': 'ASMs  on admission (name, doses (mg per day)',
    'GeneticTest': 'Genetic test (year,results)',
    'FDGPET': 'FDG.PET',
    'BaselineMRI': 'Baseline MRI (year,results)',
    'InvasiveExplorations': 'Invasive explorations (Y/N)',
    'NumSurgicalResections': '# of surgical resection/thermocoagulatin',
    'SurgicalResectionDateSite': 'Surgical resection date and site',
    'Histopathology': 'Histopatholgy',
    'Engel6mo': 'Engel classification (seizure outcomes at the 6 month )',
    'Engel1yr': 'Engel classification (seizure outcomes after 1 year from surgical resection)',
    'ILAEOutcome1yr': 'ILAE outcome after surgical resection by 1 yr',
    'NeuromodDevices': 'Neuromodulation devices'
    }

sheets = [PNI, MICs, Clin]

# QC sheets
PNI_QC= { # details of sheet with QC info on 7T surface segmentation
    "PATH":f"{src_dir}/7T_processing_26Sept2025.xlsx",
    "SHEET":"Proc_newDays",
    "ID_7T": "Subjec_ID",
    "SES": "Session ",
    "QC_col": "Comments " # NOTE values are free-form strings. Only present for some rows that should be checked myself. Other row's segmentations can be assumed good.
}

MICs_QC = { # details of sheet with QC info on 3T surface segmentation
    "PATH":f"{src_dir}/BIDS_MICs_QC_logs_26Sept2025.xlsx", 
    "SHEET":"Sheet1",
    "ID_3T": "ID",
    "SES": "ses",
    "QC_col": "surface quality" # NOTE 0 < values < 2 0=unacceptable, 2=acceptable
}


##### ANALYSIS SPECIFICATIONS #####

# Demographics details
demographics = {
    "pth" : "/host/verges/tank/data/daniel/3T7T/z/outputs/01c_grpSummary_16Sep2025.csv", # path to demographics file produced by 02_demo.ipynb
    # column names:
    'nStudies': True, # whether multiple studies are included
    "ID_7T" : "PNI_ID", 
    "ID_3T" : "MICS_ID",
    "SES" : "SES",
    "date": "Date",
    "age": "age",
    "sex": "sex",
    "grp" : "grp_detailed" # col name for participant grouping variable to use
}

# Study details
MICs = {
    "name": "MICs",
    "dir_root": "/data/mica3/BIDS_MICs",
    "dir_raw": "/rawdata",
    "dir_deriv": "/derivatives",
    "dir_mp": "/micapipe_v0.2.0",
    "dir_hu": "/hippunfold_v1.3.0/hippunfold",
    "dir_zb": "/DM_zb_37comp",
    "study": "3T",
    "ID_ctrl" : ["HC"], # patterns for control IDs in demographics file
    "ID_Pt" : ["PX"] # patterns for patient IDs in demographics file
    }

PNI = {
    "name": "PNI",
    "dir_root": "/data/mica3/BIDS_PNI",
    "dir_raw": "/rawdata",
    "dir_deriv": "/derivatives",
    "dir_mp": "/micapipe_v0.2.0",
    "dir_hu": "/hippunfold_v1.3.0/hippunfold",
    "dir_zb": "/DM_zb_37comp",
    "study": "7T",
    "ID_col" : ["PNC", "Pilot"], # column for ID in demographics file
    }

studies = [MICs, PNI]

ctrl_grp = {'ctrl' : ['CTRL']}

# Analysis details
specs  = { # all spec values to be in lists to allow for iteration across these values
    # directories
    'prjDir_root' : "/host/verges/tank/data/daniel/3T7T/z", 
    'prjDir_outs' : "/outputs",
    'prjDir_out_stats': "/outputs/stats",
    'prjDir_out_figs': "/outputs/figures",
    'prjDir_maps' : "/maps", # output directory for smoothed cortical maps
    'prjDir_dictLists': "/maps/dictLists",
    'prjDir_mapPths' : "/output/paths",
    'prjDir_maps_dfs': "/outputs/dfs/04a_maps_dfs",
    'prjDir_parc_dfs': "/outputs/dfs/04b_maps_parc",
    'prjDir_winComp_dfs': "/outputs/dfs/05a_winComp",
    'prjDir_grpFlip_dfs': "/outputs/dfs/05b_grpFlip",
    'prjDir_winD_dfs': "/outputs/dfs/05c_winD",
    'prjDir_btwD_dfs': "/outputs/dfs/05d_btwComp",

    'ctx': True, # whether to include cortical analyses
    'surf_ctx': ['fsLR-5k'],
    'parcellate_ctx': 'glasser', # parcellation to use, or None if no parcellation.
    'parc_lbl_ctx': 'glasser_int', # what name to fetch for parcellation values
    'lbl_ctx': ['midthickness', 'pial', 'white', 'swm1.0mm'], # pial, midthick, white, etc
    'ft_ctx': ['thickness', 'T1map', 'flair', 'ADC', 'FA'], # features: T1map, flair, thickness, FA, ADC
    'smth_ctx': [5, 10], # in mm
    
    'hipp': True, # whether to include hippocampal analyses
    'surf_hipp': ['den-0p5mm'],
    'parcellate_hipp': 'DK25',
    'parc_lbl_hipp': 'idx',
    'lbl_hipp': ['midthickness', "inner", "outer"], # outer, inner, midthickness, etc
    'ft_hipp': ['thickness', 'T1map', 'flair', 'ADC', 'FA'], # features: T1map, flair, thickness, FA, ADC
    'smth_hipp': [2, 5], # in mm
    
    # within study comparisons
    'col_grp': 'grp_detailed',  # column in df_demo with group labels
    'winComp_stats': ['z'], # what stats to run for within study comparisons ('z' for z-scoring, 'w' for w-scoring)
    'covars': [demographics['age'], demographics['sex']],

    'ipsiTo' : 'L', # what hemisphere for controls ipsi should be mapped to
    'newQC': False
}


In [None]:
# TODO. Make proper Table 1
importlib.reload(utils_demo)

df_demo = pd.read_csv(demographics['pth'])
utils_demo.grp_summary(df_demo, col_grp='grp_detailed', save_pth=save_pth)
print("-"*100)
print("MEDIAN AGE by group")
df_demo.groupby(['grp_detailed', 'study'])['age'].median().sort_index(level='grp_detailed')


# ANALYSIS SPECS

In [None]:
# ensure demo file loaded
if 'df_demo' not in globals() or df_demo is None:
    df_demo = pd.read_csv("/host/verges/tank/data/daniel/3T7T/z/outputs/01b_demo_16Sep2025-154209.csv")
    print(f"[main] Demo file loaded from {demographics['pth']}")
    
print(df_demo[['UID','MICS_ID', 'PNI_ID', 'study', 'SES', 'Date', 'grp_detailed']])

# 2. SMOOTH MAPS - DEPRECATED. SEE 02_demo.ipynb
Strategy:
Add paths to relevant maps to df containing demographic information. Each row is one participant at a unique session.

Hippocampal maps: identify path to smoothed hippocampal maps, add to row-wise df
Cortical maps: take raw maps from micapipe, apply smoothing then save these maps in project directory and add path of the smoothed map to the df

In [None]:
# DEPRECATED. SEE 02_demo.ipynb
importlib.reload(tsutil)
df_pths, out_pth, log_pth = tsutil.idToMap(df_demo = df_demo, studies = studies, dict_demo = demographics, specs = specs, 
                              save = save, save_pth=f"{specs['prjDir_root']}{specs['prjDir_outs']}", save_name = "02a_mapPths", 
                              test=test, verbose=True)


# 3. CLEAN DATA
DEPRECATED. See 02_demo.ipynb

In [None]:
# DEPRECATED. See 02_demo.ipynb
# ensure demo file loaded
reimport = False
pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/01a_mapPths_12Sep2025-171142.csv"
toPrint = True
if 'df_pths' not in globals() or df_pths is None or reimport == True:
    df_pths_clean_final = tsutil.loadPickle(pth, dlPrint=toPrint)

if test:
    filename = "/host/verges/tank/data/daniel/3T7T/z/maps/paths/" + "demo_pths_04Sep2025-135322.csv"
    df_pths = pd.read_csv(filename, dtype=str)
    # take a random 10% subset of demo for testing
    df_pths = df_pths.sample(frac=test_frac).reset_index(drop=True)
    df_pths = df_pths.dropna(axis=1, how='all') # drop empty columns
    print(f"[TEST MODE] Running on random {test_frac *100}% subset of demographics ({df_pths.shape[0]} rows).")

print(f"Unique participants: {df_pths['MICS_ID'].nunique()}")
print(df_demo[['UID','MICS_ID', 'PNI_ID', 'study', 'SES', 'Date', 'grp_detailed']])

In [None]:
# TODO. Change method to ensure df not highly fragmented
# Get a summary of errors in df_pths
importlib.reload(tsutil)

ERR_sv_root = f"{specs['prjDir_root']}{specs['prjDir_outs']}"

# Create error summary
cols = tsutil.get_mapCols(df_pths.columns, split=False, verbose=True)
error_summary, ERR_sv_pth = tsutil.countErrors(df_pths, cols, save=ERR_sv_root)

# QC STEPS
1. Check RAW data for each volume related to feature of interest
    - Use bash script 'qc_vols.sh' with csv of ID, SES to check and will automatically load raw volumes to check
2. Check surface segmentations
    - TODO. Create bash script that loads the T1w volumes and surfaces that are all in a common space [Hippunfold surfaces as well?]
3. Merge QC columns (takes lowest value of raw data and segmentation QC). Use this column in selecting final session by feature 

In [None]:
# DEPRECATED. See 02_demo.ipynb
import utils_demo as ud
importlib.reload(ud)
importlib.reload(tsutil)

reimport_src = True
specs['newQC'] = True
#pth = dfPths_out_pth
pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/02a_mapPths_06Oct2025-202345.csv"

save_name = "03a_qc_table"
save_pth = specs['prjDir_root'] + specs['prjDir_outs']

if specs['newQC']: # create new QC sheet
    
    if 'df_pths' not in globals() or df_pths is None or reimport_src: # load
        df_pths = pd.read_csv(pth, dtype=str)
        print(f"[main] df_pths loaded from {pth}")
    
    qc_sheet, pth = ud.mk_qcSheet(df = df_pths, fts = specs['ft_ctx'] + specs['ft_hipp'], 
                                  studies = studies, ctx_surf_qc = qc_sheets,
                                  save_pth = specs['prjDir_root'] + specs['prjDir_outs'], save_name = save_name)

    

In [None]:
# COMBINE SUMMARY QC COLUMN FOR EACH FEATURE
# Take min value of either above column. If one col is a string, assume 1
if 'qc_sheet' not in globals() or qc_sheet is None:
    qc_sheet_completed_pth = "02b_qc_table_06Oct2025-205727_surfQC.csv"
    qc_sheet = pd.read_csv(f"{specs['prjDir_root']}{specs['prjDir_outs']}/{qc_sheet_completed_pth}", dtype=str)

def convert_qc(val):
    if pd.isna(val) or val in ['', 'NA', 'NaN']:
        return np.nan
    try:
        return float(val)
    except (ValueError, TypeError):
        return np.nan

vol_names = tsutil.get_RawVolumeNames(specs['ft_ctx'] + specs['ft_hipp'])
for vol in vol_names:
    if vol in qc_sheet.columns:
        qc_sheet[f"QC_{vol}_surf"] = qc_sheet.apply(
            lambda row: min(
                convert_qc(row[vol]) if not pd.isna(convert_qc(row[vol])) else 2,
                convert_qc(row['surf_QC']) if not pd.isna(convert_qc(row['surf_QC'])) else 2
            ) if (not pd.isna(convert_qc(row[vol])) or not pd.isna(convert_qc(row['surf_QC']))) else 'NA',
            axis=1
        )
    else:
        print(f"[main] Volume {vol} not found in QC sheet columns.")
if save:
    qc_sheet.to_csv(f"{specs['prjDir_root']}{specs['prjDir_outs']}/02c_qc_table_merge_{datetime.datetime.now().strftime('%d%b%Y-%H%M%S')}.csv", index=False, na_rep='NaN')
    print(f"QC sheet with combined QC values saved to {specs['prjDir_root']}{specs['prjDir_outs']}/02b_qc_table_25Sep2025-141754_surfQC_combined.csv")


# add QC values to df_pths
df_pths_cp = df_pths.copy()
# add vol_pths and vol_QC columns to df_pths
match_on = ['UID', 'study', 'Date']
vol_names = tsutil.get_RawVolumeNames(specs['ft_ctx'] + specs['ft_hipp'])

# Rename QC columns by adding '_raw_QC' suffix to volume names
qc_sheet = qc_sheet.rename(columns={vol: f'{vol}_raw_QC' for vol in vol_names if vol in qc_sheet.columns})

# Merge QC and path columns
df_pths_cp = df_pths_cp.merge(qc_sheet[[col for col in qc_sheet.columns if col.endswith(('_raw_QC', '_pth')) or col in match_on]], on=match_on, how='left')

# Reorder columns
qc_path_cols = [col for col in df_pths_cp.columns if col.endswith(('_raw_QC', '_pth', "_surf"))]
other_cols = [col for col in df_pths_cp.columns if not col.endswith(('_raw_QC', '_pth', "_surf")) and not col.startswith(('ctx', 'hip'))]
ctx_hip_cols = [col for col in df_pths_cp.columns if col.startswith(('ctx', 'hipp'))]

df_pths_cp = df_pths_cp[other_cols + qc_path_cols + ctx_hip_cols]

print(f"{len(df_pths_cp.columns)} columns: {list(df_pths_cp.columns)}")

# save
if save:
    out_pth = f"{specs['prjDir_root']}{specs['prjDir_outs']}/02c_mapPths_QC_{datetime.datetime.now().strftime('%d%b%Y-%H%M%S')}.csv"
    df_pths_cp.to_csv(out_pth, index=False)
    print(f"[main] df_pths with QC values for raw volumes saved to {out_pth}")

In [None]:
# CLEAN SESSIONS

importlib.reload(tsutil)

# LOAD
if 'df_paths' not in globals() or df_pths is None:
    df_pths_name = "02a_mapPths_06Oct2025-202345.csv"
    df_pths = pd.read_csv(f"{specs['prjDir_root']}{specs['prjDir_outs']}/{df_pths_name}", dtype=str)
    print(f"[main] df_pths loaded from {specs['prjDir_root']}{specs['prjDir_outs']}/{df_pths_name}")
                                    
# i. Ensure both hemisphere maps present, each subject has data for both studies
clean_sv_pth = f"{specs['prjDir_root']}{specs['prjDir_outs']}"
df_pths_clean, df_pths_rmv = tsutil.clean_demoPths(df_pths, nStudies=2, save=clean_sv_pth, verbose=False) # missing hemisphere pairs, missing study pairs

# TODO. ii. Remove cases marked for exclusion in seperate file or feature-ID-SES combinations to remove (eg., due to imaging artifacts like motion)
# consider merged QC value when optimizing selection of sessions

# iii. Choose a single session per participant
importlib.reload(tsutil)
df_pths_clean_final, paths_clean_pth = tsutil.clean_ses(df_pths_clean, col_ID="UID", save = clean_sv_pth, col_study='study', verbose=True)


# 4. Analysis

In [None]:
# Load maps, save into new list of dictionary items.
# Drop cases with missing values in the current map col

import pickle
importlib.reload(tsutil)

save = True
save_pth = f"{specs['prjDir_root']}{specs['prjDir_outs']}"
save_name = "04a_dl_maps"
test = False
verbose = True
toPrint = False
  
reimport = True
pth = f"{specs['prjDir_root']}{specs['prjDir_outs']}/03d_ses_clean_06Oct2025-210313.csv"

if 'df_pths_clean_final' not in globals() or df_pths_clean_final is None or reimport == True:
    df_pths_clean_final =pd.read_csv(pth, dtype=str)

print(f"shape: {df_pths_clean_final.shape}")
print(f"columns: {list(df_pths_clean_final.columns)}")

print(f"Reading in maps, creating dictionary list (each combo of: study-feature-label-surface-smoothing).\n\tNote. Not seperating groups yet.")

# find all map cols
cols_L, cols_R = tsutil.get_mapCols(df_pths_clean_final.columns, verbose=True)

# extract maps as appropriate
ctx_dl = []
hipp_dl = []

if specs['ctx']:
    ctx_dl = tsutil.extractMap(df_mapPaths = df_pths_clean_final, cols_L = cols_L, cols_R = cols_R, 
                               studies = studies, demographics = demographics, 
                               save_name = save_name, save_df_pth = specs['prjDir_root'] + specs['prjDir_maps_dfs'], log_save_pth = specs['prjDir_root'] + specs['prjDir_outs'],
                               region = "cortex", verbose=True, test = test)
    
if specs['hipp']:
    hipp_dl = tsutil.extractMap(df_mapPaths = df_pths_clean_final, cols_L = cols_L, cols_R = cols_R, 
                                studies = studies, demographics = demographics, 
                                save_name = save_name, save_df_pth = specs['prjDir_root'] + specs['prjDir_maps_dfs'], log_save_pth = specs['prjDir_root'] + specs['prjDir_outs'],
                                region = "hippocampus", verbose=True, test = test)

# Create single dl 
dl = ctx_dl + hipp_dl

len_unsmth = len([d for d in ctx_dl + hipp_dl if d['smth'] == 'NA'])
len_smth = len([d for d in ctx_dl + hipp_dl if d['smth'] != 'NA'])
print(f"\n[main] {len(dl)} dictionary items for this study-feature-label-surface pairs\n\t{len_unsmth} with smoothing == NA | {len_unsmth} with smoothing")

if save:
    out_pth = tsutil.savePickle(obj = dl, root = save_pth, name = save_name, test = test)

if toPrint:
    print("="*100)
    tsutil.print_dict(dl)

# NOTE. columns with no rows are not kept in dictionary list 

In [None]:
# Parcellate maps
importlib.reload(tsutil)
reimport_src = False
save_name = "04b_dl_maps_parcel"
save_pth = f"{specs['prjDir_root']}{specs['prjDir_outs']}"
test = False
verbose = False

if 'dl' not in globals() or dl is None or reimport_src:
    src_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/04a_dl_maps_08Oct2025-104443.pkl"
    dl = tsutil.loadPickle(src_pth)

#tsutil.print_dict(dl)

if specs['parcellate_ctx'] is not None or specs['parcellate_hipp'] is not None: # for each item, create a df_parc   
    region_parc = [{'region': 'cortex', 
                    'parcellate': specs.get('parcellate_ctx', False),
                    'parc_lbl': specs.get('parc_lbl_ctx', None)}, 
                   {'region': 'hippocampus',
                    'parcellate': specs.get('parcellate_hipp', False),
                    'parc_lbl': specs.get('parc_lbl_hipp', None)}]
    
    # TODO. Also parcellate without summarizing accross parcels
    dl_parcel, region_parc = tsutil.parcellate_items(dl, df_keys=['df_maps'], parcellationSpecs = region_parc, df_save_pth = specs['prjDir_root'] + specs['prjDir_parc_dfs'],
                                                     stats = [None, 'mdn', 'mean'],
                                                    save_pth=save_pth, save_name=save_name,
                                                    verbose=verbose, test=test)

In [None]:
importlib.reload(tsutil)
tsutil.print_dict(dl_parcel, df_print=False)
#item_test = dl_parcel[0]['df_maps_parc_glsr_mdn']
#item_test.head() 

In [None]:
# STATISTICS about vertices within parcels
# TODO. Appears broken
import importlib
import utils_parc as up
importlib.reload(up)

reimport_src = False
if 'dl_parcel' not in globals() or dl_parcel is None or reimport_src:
    pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/04c_dl_maps_parcel_07Oct2025-112622.pkl"
    dl_parcel = tsutil.loadPickle(pth)
    print(f"[main] Dict list with parcellated map values loaded from {pth}")

save_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/figs/04c_parcel_distr"
koi = "df_maps_parc_dk25"
up.parcel_stats(dl = dl_parcel, key = koi, sv_root = save_pth, test = False)


In [None]:
# SHOW MAP MATRICES
importlib.reload(tsutil)

# create pngs
fig_dir = "/host/verges/tank/data/daniel/3T7T/z/outputs/figs/maps_allPt/raw"
tsutil.plotMatrices(dl = dl_parcel, df_keys = ['df_maps_parc_glsr_mdn', 'df_maps_parc_dk25_mdn'], save_pth=fig_dir, test=False) # visualize smoothed maps

print("Should visually inspect maps, identifying feature-ID-SES combinations that are outliers. Mark for removal [editing <path/to/file name.xlsx> and rerun from step 3.")
print("="*75)

tsutil.plotLine(dl_parcel, df_keys = ['df_maps_parc_glsr_mdn', 'df_maps_parc_dk25_mdn'],
            name_append="line", 
            parc=['glasser', 'DK25'], stat = ['mdn', 'mdn'],
            hline_idx = [[60,120,240,300], None],
            save_pth="/host/verges/tank/data/daniel/3T7T/z/outputs/figs/04c_maps_allPt/raw",
            marks = False, alpha = 0.6,
            test=False)

#tsutil.pngs2pdf(fig_dir, output="/host/verges/tank/data/daniel/3T7T/z/outputs/figs/maps_allPt") # group pngs of same comparisons with different smoothing to single pdf


# Within study, vertex/parcel-wise statistics (z-, w- scores)
- compares _all_ participants to controls 


In [None]:
specs['prjDir_root'] + specs['prjDir_winComp_dfs']

In [None]:
# compute z, w scores within studies (all participants vs control distribution)
importlib.reload(tsutil)

# import smoothed maps
reimport_src = False
test = False
save_name = "05a_winStudy"

if 'dl_parcel' not in globals() or dl_parcel is None or reimport_src:
    pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/04c_dl_maps_parcel_07Oct2025-181931.pkl"
    dl = tsutil.loadPickle(pth, verbose = False)
    print(f"[main] Dict list with parcellated map values loaded from {pth}")

#tsutil.print_dict(dl_parcel, df_print=False)

# calculate statistics
dl_winComp = tsutil.winComp(dl = dl_parcel, demographics = demographics, keys_maps = ['df_maps', 'df_maps_parc_glsr_mdn', 'df_maps_parc_glsr_mean', 'df_maps_parc_dk25_mdn', 'df_maps_parc_dk25_mean'], col_grp = specs['col_grp'], ctrl_grp = ctrl_grp, 
                            out_df_save_pth = specs['prjDir_root'] + specs['prjDir_winComp_dfs'],
                            stat=specs['winComp_stats'], covars = specs['covars'], key_demo = 'df_demo',
                            save = True, save_pth = specs['prjDir_root'] + specs['prjDir_outs'], save_name = save_name,
                            verbose = True, dlPrint = False, test=test)


In [None]:
tsutil.print_dict(dl_winComp)

In [None]:
# plot z, w score maps
importlib.reload(tsutil)

reimport_src = True
test = False
fig_dir = "/host/verges/tank/data/daniel/3T7T/z/outputs/figs/05a_winComp/raw"

if 'dl_parcel' not in globals() or dl_parcel is None or reimport_src:
    pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/05a_winStudy_08Oct2025-110338.pkl"
    dl_winComp = tsutil.loadPickle(pth, verbose = False)
    print(f"[main] winComp dict list loaded from {pth}")

# TODO. Add smart plotting based on parameters listed in specs dictionary
dfs_toPlot = ['df_maps_parc_glsr_mdn_z', 'df_maps_parc_dk25_mdn_z']

tsutil.plotMatrices(dl = dl_winComp, df_keys = dfs_toPlot, name_append=True, save_pth=fig_dir, test=test) # visualize winCompStat maps
tsutil.plotLine(dl_winComp, df_keys = dfs_toPlot, 
            parc= ['glasser', 'DK25'], stat = ['z', 'z'],
            hlines = [[60,120,240,300], None],
            save_pth=fig_dir,
            marks = False, alpha = 0.6,
            test=test)

# TODO. Allow integration of pdf for large images
#tsutil.pngs2pdf(fig_dir, output="/host/verges/tank/data/daniel/3T7T/z/outputs/figs/05a_winComp", verbose = True) # group pngs of same comparisons with different smoothing to single pdf


# Select group of interest and ipsi/contra flip

In [None]:
# Create new dictionary list based on previous dl.
# New dl will have the same number of dictionary items (one for each study, ft, label, surf, smth, region combination).
#   Keys of each dictionary items may change. One df for each combination of [group[len(goi)] x lateralization[_R, _L, _ic] + 1 (ctrl)] x stat[<_z>, <_w>]] 
#   If df_{stat} is none, nothing regarding this statistic will be added to dict item.

importlib.reload(tsutil)

# import
reimport_src = False
src_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/05a_winStudy_08Oct2025-110338.pkl"

if 'dl_winComp' not in globals() or dl_winComp is None or reimport_src:
    dl_winComp = tsutil.loadPickle(src_pth, verbose = True)

importlib.reload(tsutil)
goi = ["TLE"] # group(s) of interest. Store main diagnosis abrev in list to allow for multiple groups
koi = ['df_maps_parc_glsr_mdn_z', 'df_maps_parc_dk25_mdn_z', 'df_maps_parc_glsr_mean_z', 'df_maps_parc_dk25_mean_z'] # keys of dl_winComp to use
test = False
save_name = "05b_stats_winStudy_grp"
verbose = True

dl_grp_ic = tsutil.grp_flip(dl = dl_winComp, demographics = demographics, 
                            goi = goi, df_keys = koi,
                    col_grp = specs['col_grp'], save_pth_df = specs['prjDir_root'] + specs['prjDir_grpFlip_dfs'],
                    save_pth = specs['prjDir_root'] + specs['prjDir_outs'], save_name = save_name, test=test, verbose=verbose)


In [None]:
# visualize
importlib.reload(tsutil)

reimport_src = False
src_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/05b_stats_winStudy_grp_08Oct2025-110654.pkl"
printDl = True

test = False
dfs_toPlot = ['df_maps_parc_glsr_mdn_z_TLE_ic', 'df_maps_parc_dk25_mdn_z_TLE_ic']
foi = ["thickness", "T1map", 'flair'] # features of interest
loi = ['midthickness', 'white', 'inner', 'outer'] # surfaces of interest
fig_dir = "/host/verges/tank/data/daniel/3T7T/z/outputs/figs/05b_winStudy_grp/raw"


if 'dl_grp_ic' not in globals() or dl_grp_ic is None or reimport_src:
    dl_grp_ic = tsutil.loadPickle(src_pth, verbose = True)

dl_interest = [d for d in dl_grp_ic if d['feature'] in foi and d['label'] in loi]

if printDl:
    print("="*100)
    tsutil.print_dict(dl_interest, df_print=False)

tsutil.plotMatrices(dl = dl_interest, df_keys = dfs_toPlot, 
                    name_append=True, save_pth=fig_dir, test=test) # visualize z score maps

#tsutil.pngs2pdf(fig_dir, output="/host/verges/tank/data/daniel/3T7T/z/outputs/figs/05b_winStat_ic", verbose = True) # group pngs of same comparisons with different smoothing to single pdf
tsutil.plotLine(dl_interest, df_keys = dfs_toPlot,
                name_append="line",
                parc= ['glasser', 'DK25'], stat = ['z', 'z'],
                hlines = [[60,120,240,300], None],
                save_pth=fig_dir, spacing = None,
                marks=False, alpha = 0.6,
                test=test)

# Within study Cohen's D

In [None]:
importlib.reload(tsutil)

reimport = True
test = False
toPrint = False
save = True
save_name = "05c_stats_winD"

koi = ['df_maps_parc_glsr_mdn_z', 'df_maps_parc_dk25_mdn_z']
goi = ['TLE_ic']

# import
pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/05b_stats_winStudy_grp_08Oct2025-110654.pkl"
if 'dl_grp_ic' not in globals() or dl_grp_ic is None or reimport == True:
    dl_grp_ic = tsutil.loadPickle(pth, dlPrint=toPrint)

winD = tsutil.winD(dl = dl_grp_ic, df_keys = koi, save_pth_df = specs['prjDir_root'] + specs['prjDir_winD_dfs'],
                   ipsiTo = specs.get('ipsiTo', 'L'), 
                   save = save, save_pth = specs['prjDir_root'] + specs['prjDir_outs'], save_name = save_name,
                   verbose = verbose, test = test)


In [None]:
df_test = winD[5]
print(list(df_test.keys()))
df_test.get('ipsiTo', None)

In [None]:
# visualize matrices
importlib.reload(tsutil)
save_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/figs/05c_winD/raw"
tsutil.plotMatrices(dl = winD[83], key = 'df_d', save_pth=save_pth) # Visualize unsmoothed maps
tsutil.plotMatrices(dl = winD[83], key = 'df_d_ic', save_pth=save_pth) # Visualize unsmoothed maps
tsutil.pngs2pdf(fig_dir = save_pth, output = "/host/verges/tank/data/daniel/3T7T/z/outputs/figs/05c_winD", verbose = True)

# Between study: D-score differences
- Identify pairs of dictionary items
- Extract d scoring statitics and compute:
- raw d dif
- d dif / ctrl d

In [None]:
importlib.reload(tsutil)

reimport = False
test = False
toPrint = False
verbose = True
save_name = "05d_btwD"

# import 
pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/05c_stats_winD_08Oct2025-140529.pkl"
if 'winD' not in globals() or winD is None or reimport == True:
    winD = tsutil.loadPickle(pth, dlPrint=toPrint)

comps = tsutil.btwD(dl = winD, save_pth_df = specs['prjDir_root'] + specs['prjDir_btwD_dfs'],
                    save = save, save_pth = specs['prjDir_root'] + specs['prjDir_outs'], save_name = save_name,
                    verbose = verbose, test = test)
