In [None]:
import os
import pandas as pd
import sys
import importlib
import re
import numpy as np
import datetime
import matplotlib.pyplot as plot
import copy
import pickle

sys.path.append(os.getcwd())

In [None]:
lab = True
save = True
verbose = True
toPrint = True

test = False
test_frac = 0.1 # fraction of demo to use for testing if test=True

if lab: # define root paths to source files
    src_dir = "/host/verges/tank/data/daniel/3T7T/z/data/sources" # path to directory with source pt sheets
    sys.path.append("/host/verges/tank/data/daniel/")
    if save:
        save_pth = save_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs"
else:
    src_dir = "/Users/danielmendelson/Library/CloudStorage/OneDrive-McGillUniversity/Documents/PhD/Boris/projects/PT/sources" # path to directory with source pt sheets
    sys.path.append("/Users/danielmendelson/Library/CloudStorage/OneDrive-McGillUniversity/Documents/PhD/Boris/code")
    if save:
        save_pth = "/Users/danielmendelson/Library/CloudStorage/OneDrive-McGillUniversity/Documents/PhD/Boris/projects/3T7T/data/outputs"

import demo
import tTsTGrpUtils as tsutil
from genUtils import id, gen, t1

importlib.reload(demo)
importlib.reload(id)
importlib.reload(gen)
importlib.reload(t1)
importlib.reload(tsutil)

# For each sheet, must define NAME, PATH, SHEET, ID_7T, ID_3T. 
# All other keys are those to be extracted.
# The same variables should have the same key names across sheets.
PNI = {
    'NAME': 'PNI',
    'PATH': f'{src_dir}/MICA_PNI_01Oct2025.xlsx', # 7T controls
    'SHEET': 'all', # name of sheet in file
    'ID_7T': 'ID_PNI', 
    'ID_3T': 'ID_MICs',
    'Ses_7T': 'session',
    'Date_7T': 'scanDate',
    'study': '7T',
    'DOB': 'dob',
    'Sex': 'sex',
    'Gender': 'gender',
    'Hand': 'handedness',
    'Eth': 'ethnicity',
    'Language': 'language',
    'Job': 'employment',
    'Edu': 'education',
    'LastSz': 'lastSeizure',
}

MICs = {
    'NAME': 'MICs',
    'PATH': f'{src_dir}/MICA-MTL-3T_01Oct2025.xlsx', # 3T controls
    'SHEET': 'Sheet1', # name of sheet in file
    'ID_7T': None, 
    'ID_3T': 'Study_name',
    'Ses_3T': 'Visit',
    'Date_3T': 'Scan_Date (D.M.Y)',
    'study': '3T',
    'Hand': 'Handed', 
    'Sex': 'AssignedSex',
    'Gender': 'GenderIdentity',
    'Height': 'HeightApprox',
    'Weight': 'WeightApprox',
    'Eth': 'Ethnicity',
    'Job': 'Employ',
    'Edu': 'YoE',
    'LastSz': 'Last seizure'
}

Clin = {
    'NAME': 'Clin',
    'PATH': f'{src_dir}/Clinical_01Oct2025.xlsx',
    'SHEET': 'clinical-database-detailed', # name of sheet in file
    'ID_7T': None, 
    'ID_3T': 'participant_id',
    'Date_3T': None,
    'Gender': 'Gender',
    'Hand': 'Handedness',
    'Language': 'Language',
    'Job': 'Employment',
    'Edu': 'Education',
    'EpilepsyDxILAE': 'Epilepsy diagnosis based on ILAE',
    'EpilepsyClass': 'Epilepsy classification:Focal,Generalized',
    'FocusLat': 'Lateralization of epileptogenic focus',
    'FocusConfirmed': 'Epileptogenic focus confirmed by the information of (sEEG/ site of surgical resection/ Ictal EEG abnormalities +/. MRI findings): FLE=forntal lobe epilepsy and cingulate epilepsy, CLE:central/midline epilepsy,ILE: insular epilepsy, mTLE=mesio.temporal lobe epilepsy, nTLE=neocortical lobe epilepsy, PQLE=posterior quadrant lobe epilepsy , multifocal epilepsy,IGE=ideopathic lobe epilepsy,unclear)',
    'EMUDischargeDx': 'Dx at EMU discharge ',
    'EMUAdmissionDate': 'EMU admission date(dd-mm-yy)',
    'AdmissionDuration': 'Duration of admission',
    'EpilepsyRiskFactors': 'Risk factors for epilepsy',
    'SeizureOnsetYr': 'Seizure onset (yr)',
    'DrugResistant': 'Drug resistant epilepsy at time of EMU admission',
    'NumASMsPrior': '# of ASMs prior current EMU admission',
    'PrevASMs': 'Previous ASMs (name and doses (mg/d)) if applicable prior the current EMU admission',
    'NumASMOnAdmission': '# of ASM on admission',
    'ASMsOnAdmission': 'ASMs  on admission (name, doses (mg per day)',
    'GeneticTest': 'Genetic test (year,results)',
    'FDGPET': 'FDG.PET',
    'BaselineMRI': 'Baseline MRI (year,results)',
    'InvasiveExplorations': 'Invasive explorations (Y/N)',
    'NumSurgicalResections': '# of surgical resection/thermocoagulatin',
    'SurgicalResectionDateSite': 'Surgical resection date and site',
    'Histopathology': 'Histopatholgy',
    'Engel6mo': 'Engel classification (seizure outcomes at the 6 month )',
    'Engel1yr': 'Engel classification (seizure outcomes after 1 year from surgical resection)',
    'ILAEOutcome1yr': 'ILAE outcome after surgical resection by 1 yr',
    'NeuromodDevices': 'Neuromodulation devices'
    }

sheets = [PNI, MICs, Clin]

# QC sheets
PNI_QC= { # details of sheet with QC info on 7T surface segmentation
    "PATH":f"{src_dir}/7T_processing_26Sept2025.xlsx",
    "SHEET":"Proc_newDays",
    "ID_7T": "Subjec_ID",
    "SES": "Session ",
    "QC_col": "Comments " # NOTE values are free-form strings. Only present for some rows that should be checked myself. Other row's segmentations can be assumed good.
}

MICs_QC = { # details of sheet with QC info on 3T surface segmentation
    "PATH":f"{src_dir}/BIDS_MICs_QC_logs_26Sept2025.xlsx", 
    "SHEET":"Sheet1",
    "ID_3T": "ID",
    "SES": "ses",
    "QC_col": "surface quality" # NOTE 0 < values < 2 0=unacceptable, 2=acceptable
}


In [None]:
# below parameters may eventually be moved to a config file

# run parameters
if 'demo_csv_pth' not in globals():
    demo_csv_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/01c_grpSummary_16Sep2025.csv"
   
# details of demographics file
demographics = {
    "pth" : demo_csv_pth,
    # column names:
    'nStudies': True, # whether multiple studies are included
    "ID_7T" : "PNI_ID", 
    "ID_3T" : "MICS_ID",
    "SES" : "SES",
    "date": "Date",
    "age": "age",
    "sex": "sex",
    "grp" : "grp_detailed" # col name for participant grouping variable to use
}

# specify root directories
MICs = {
    "name": "MICs",
    "dir_root": "/data/mica3/BIDS_MICs",
    "dir_raw": "/rawdata",
    "dir_deriv": "/derivatives",
    "dir_mp": "/micapipe_v0.2.0",
    "dir_hu": "/hippunfold_v1.3.0/hippunfold",
    "dir_zb": "/DM_zb_37comp",
    "study": "3T",
    "ID_ctrl" : ["HC"], # patterns for control IDs in demographics file
    "ID_Pt" : ["PX"] # patterns for patient IDs in demographics file
    }

PNI = {
    "name": "PNI",
    "dir_root": "/data/mica3/BIDS_PNI",
    "dir_raw": "/rawdata",
    "dir_deriv": "/derivatives",
    "dir_mp": "/micapipe_v0.2.0",
    "dir_hu": "/hippunfold_v1.3.0/hippunfold",
    "dir_zb": "/DM_zb_37comp",
    "study": "7T",
    "ID_col" : ["PNC", "Pilot"], # column for ID in demographics file
    }

studies = [MICs, PNI]

ctrl_grp = {'ctrl' : ['CTRL']}

px_grps = { # specify patient group labels to compare to controls
    'allPX' : ['TLE_U', 'MFCL', 'FLE_R', 'MFCL_bTLE', 'UKN_L', 'mTLE_R', 'mTLE_L', 'FLE_L', 'UKN_U', 'TLE_L', 'TLE_R'],
    'TLE' : ['TLE_L', 'TLE_R', 'TLE_U', 'mTLE_R', 'mTLE_L'],
    'TLE_L': ['TLE_L', 'mTLE_L', 'bTLE_L'],
    'TLE_R': ['TLE_R', 'mTLE_R', 'bTLE_R'],
    'FCD' : ['FLE_R', 'FLE_L'],
    'MFCL' : ['MFCL', 'bTLE'],
    'UKN' : ['UKN_L', 'UKN_U']
}

# Make list of dict items for group definitions
groups = [
    {'TLE_L': px_grps['TLE_L']},
    {'TLE_R': px_grps['TLE_R']},
    ctrl_grp
]

specs  = { # all spec values to be in lists to allow for iteration across these values
    # directories
    'prjDir_root' : "/host/verges/tank/data/daniel/3T7T/z", 
    'prjDir_outs' : "/outputs",
    'prjDir_out_stats': "/outputs/stats",
    'prjDir_out_figs': "/outputs/figures",
    'prjDir_maps' : "/maps", # output directory for smoothed cortical maps
    'prjDir_dictLists': "/maps/dictLists",
    'prjDir_mapPths' : "/output/paths",

    'ctx': True, # whether to include cortical analyses
    'surf_ctx': ['fsLR-5k'],
    'parcellate': 'glasser', # parcellation to use, or None if no parcellation. Uses fsl32k surfaces
    'lbl_ctx': ['midthickness', 'pial', 'white', 'swm1.0mm'], # pial, midthick, white, etc
    'ft_ctx': ['thickness', 'T1map', 'flair', 'ADC', 'FA'], # features: T1map, flair, thickness, FA, ADC
    'smth_ctx': [5, 10], # in mm
    
    'hipp': False, # whether to include hippocampal analyses
    'surf_hipp': ['0p5mm'],
    'lbl_hipp': ['midthickness', "inner", "outer"], # outer, inner, midthickness, etc
    'ft_hipp': ['thickness', 'T1map', 'flair', 'ADC', 'FA'], # features: T1map, flair, thickness, FA, ADC
    'smth_hipp': [2, 5], # in mm
    
    # within study comparisons
    'col_grp': 'grp_detailed',  # column in df_demo with group labels
    'z': True, # whether to run z-scoring
    'w': True, # whether to run w-scoring
    'covars': [demographics['age'], demographics['sex']],

    'ipsiTo' : 'L', # what hemisphere for controls ipsi should be mapped to
    'newQC': False
}


out_pths = {
    'df_01b_demo_pth' : demographics['pth']
} # to hold paths to generated outputs

In [None]:
# ensure demo file loaded
reimport = True
if 'df_demo' not in globals() or df_demo is None or reimport:
    df_demo = pd.read_csv("/host/verges/tank/data/daniel/3T7T/z/outputs/01b_demo_16Sep2025-154209.csv")
    print(f"[main] Demo file loaded from {demographics['pth']}")
    
print(df_demo[['UID','MICS_ID', 'PNI_ID', 'study', 'SES', 'Date', 'grp_detailed']])

In [None]:
importlib.reload(tsutil)
series_test = pd.Series({'UID': 'UID0001',
                         'study': '3T',
                         'SES': '01'}, dtype="object")

series_test = pd.concat([series_test, pd.Series({'MICS_ID': 'new_value', 'new_key2': 'new_value2', 'language': 'TESTVALUE'})])
match_on = ['UID', 'study', 'SES']
df_out = tsutil.appendSeries(series_test, df_demo, match_on=match_on)

series_test = pd.Series({'UID': 'UID0002',
                         'study': '7T',
                         'SES': '01'}, dtype="object")

series_test = pd.concat([series_test, pd.Series({'MICS_ID': 'value2', 
                                                 'new_key2': 'new_value190', 'language': 'TESTVALUE'})])
#series_test = pd.concat([series_test, pd.Series({'new_key2': 'new_valueNEWNEW'})])
series_test = tsutil.addToSeries(col = ['new_key2', 'new_key3'], val = ['new_valueNEWNEW', 'secondValue'], series = series_test)

print(series_test)
df_out = tsutil.appendSeries(series_test, df_out, match_on=match_on)

df_out

In [None]:
importlib.reload(tsutil)
df_idToMap = tsutil.idToMap(df_demo=df_demo, studies=studies, specs=specs, dict_demo = demographics,
               save=True, save_pth=save_pth, save_name = "02a_DEBUG", test = True, test_frac=0.025, verbose=True)

In [None]:
save_pth