# Get demographics and paths to smoothed maps from sources
(steps common to between study and between session comparisons)
1. DEMOGRAPHICS  
    - Identify IDs with 3T and 7T    
    - Extract clinical information for epilepsy patients
    - Extract demographic information for all participants
2. PATHS TO MAPS
    - Smooth maps is necessary, save paths in a master demographics file
    - Create a df specifying the errors encountered with map creation/availability
3. CLEAN
    - Drop NA rows
    - Apply QC
    - Ensure each participant has data for both studies

Note. Do not select sessions at this point. This can vary by nature of analyses and should therefore occur in the analysis file

In [None]:
# Initialize
import os
import sys
import pandas as pd
import importlib
import numpy as np
import datetime

sys.path.append(os.getcwd())

import utils_demo as ud
import tTsTGrpUtils as tsutil
from genUtils import id, gen, t1


lab = True
save = True
verbose = True
toPrint = True

test = False
test_frac = 0.1 # fraction of demo to use for testing if test=True

includeBL = False # if should include bilateral TLE patients (with one side higher than other) in analyses

if lab: # define root paths to source files
    src_dir = "/host/verges/tank/data/daniel/3T7T/z/data/sources" # path to directory with source pt sheets
    sys.path.append("/host/verges/tank/data/daniel/")
    if save:
        save_pth = save_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs"
else:
    src_dir = "/Users/danielmendelson/Library/CloudStorage/OneDrive-McGillUniversity/Documents/PhD/Boris/projects/PT/sources" # path to directory with source pt sheets
    sys.path.append("/Users/danielmendelson/Library/CloudStorage/OneDrive-McGillUniversity/Documents/PhD/Boris/code")
    if save:
        save_pth = "/Users/danielmendelson/Library/CloudStorage/OneDrive-McGillUniversity/Documents/PhD/Boris/projects/3T7T/data/outputs"



# 1. Demographics

In [None]:
# Specifications

PNI = {
    'NAME': 'PNI',
    'PATH': f'{src_dir}/MICA_PNI_06Oct2025.xlsx', # 7T controls
    'SHEET': 'all', # name of sheet in file
    'ID_7T': 'ID_PNI', 
    'ID_3T': 'ID_MICs',
    'Ses_7T': 'session',
    'Date_7T': 'scanDate',
    'study': '7T',
    'DOB': 'dob',
    'Sex': 'sex',
    'Gender': 'gender',
    'Hand': 'handedness',
    'Eth': 'ethnicity',
    'Language': 'language',
    'Job': 'employment',
    'Edu': 'education',
    'LastSz': 'lastSeizure',
}

MICs = {
    'NAME': 'MICs',
    'PATH': f'{src_dir}/MICA-MTL-3T_06Oct2025.xlsx', # 3T controls
    'SHEET': 'Sheet1', # name of sheet in file
    'ID_7T': None, 
    'ID_3T': 'Study_name',
    'Ses_3T': 'Visit',
    'Date_3T': 'Scan_Date (D.M.Y)',
    'study': '3T',
    'Hand': 'Handed', 
    'Sex': 'AssignedSex',
    'Gender': 'GenderIdentity',
    'Height': 'HeightApprox',
    'Weight': 'WeightApprox',
    'Eth': 'Ethnicity',
    'Job': 'Employ',
    'Edu': 'YoE',
    'LastSz': 'Last seizure'
}

Clin = {
    'NAME': 'Clin',
    'PATH': f'{src_dir}/Clinical_06Oct2025.xlsx',
    'SHEET': 'clinical-database-detailed', # name of sheet in file
    'ID_7T': None, 
    'ID_3T': 'participant_id',
    'Date_3T': None,
    'Gender': 'Gender',
    'Hand': 'Handedness',
    'Language': 'Language',
    'Job': 'Employment',
    'Edu': 'Education',
    'EpilepsyDxILAE': 'Epilepsy diagnosis based on ILAE',
    'EpilepsyClass': 'Epilepsy classification:Focal,Generalized',
    'FocusLat': 'Lateralization of epileptogenic focus',
    'FocusConfirmed': 'Epileptogenic focus confirmed by the information of (sEEG/ site of surgical resection/ Ictal EEG abnormalities +/. MRI findings): FLE=forntal lobe epilepsy and cingulate epilepsy, CLE:central/midline epilepsy,ILE: insular epilepsy, mTLE=mesio.temporal lobe epilepsy, nTLE=neocortical lobe epilepsy, PQLE=posterior quadrant lobe epilepsy , multifocal epilepsy,IGE=ideopathic lobe epilepsy,unclear)',
    'EMUDischargeDx': 'Dx at EMU discharge ',
    'EMUAdmissionDate': 'EMU admission date(dd-mm-yy)',
    'AdmissionDuration': 'Duration of admission',
    'EpilepsyRiskFactors': 'Risk factors for epilepsy',
    'SeizureOnsetYr': 'Seizure onset (yr)',
    'DrugResistant': 'Drug resistant epilepsy at time of EMU admission',
    'NumASMsPrior': '# of ASMs prior current EMU admission',
    'PrevASMs': 'Previous ASMs (name and doses (mg/d)) if applicable prior the current EMU admission',
    'NumASMOnAdmission': '# of ASM on admission',
    'ASMsOnAdmission': 'ASMs  on admission (name, doses (mg per day)',
    'GeneticTest': 'Genetic test (year,results)',
    'FDGPET': 'FDG.PET',
    'BaselineMRI': 'Baseline MRI (year,results)',
    'InvasiveExplorations': 'Invasive explorations (Y/N)',
    'NumSurgicalResections': '# of surgical resection/thermocoagulatin',
    'SurgicalResectionDateSite': 'Surgical resection date and site',
    'Histopathology': 'Histopatholgy',
    'Engel6mo': 'Engel classification (seizure outcomes at the 6 month )',
    'Engel1yr': 'Engel classification (seizure outcomes after 1 year from surgical resection)',
    'ILAEOutcome1yr': 'ILAE outcome after surgical resection by 1 yr',
    'NeuromodDevices': 'Neuromodulation devices'
    }

sheets = [PNI, MICs, Clin]

# QC sheets
PNI_QC= { # details of sheet with QC info on 7T surface segmentation
    "STUDY": "7T",
    "PATH": f"{src_dir}/7T_processing_26Sept2025.xlsx",
    "SHEET": "Proc_newDays",
    "ID": "Subjec_ID",
    "SES": "Session ",
    "QC_col": "Comments " # NOTE values are free-form strings. Only present for some rows that should be checked myself. Other row's segmentations can be assumed good.
}

MICs_QC = { # details of sheet with QC info on 3T surface segmentation
    "STUDY": "3T",
    "PATH": f"{src_dir}/BIDS_MICs_QC_logs_26Sept2025.xlsx", 
    "SHEET": "Sheet1",
    "ID": "ID",
    "SES": "ses",
    "QC_col": "surface quality" # NOTE 0 < values < 2 0=unacceptable, 2=acceptable
}

qc_sheets = [PNI_QC, MICs_QC]

In [None]:
# Study details
# specify root directories
MICs = {
    "name": "MICs",
    "dir_root": "/data/mica3/BIDS_MICs",
    "dir_raw": "/rawdata",
    "dir_deriv": "/derivatives",
    "dir_mp": "/micapipe_v0.2.0",
    "dir_hu": "/hippunfold_v1.3.0/hippunfold",
    "dir_zb": "/DM_zb_37comp",
    "study": "3T",
    "ID_ctrl" : ["HC"], # patterns for control IDs in demographics file
    "ID_Pt" : ["PX"] # patterns for patient IDs in demographics file
    }

PNI = {
    "name": "PNI",
    "dir_root": "/data/mica3/BIDS_PNI",
    "dir_raw": "/rawdata",
    "dir_deriv": "/derivatives",
    "dir_mp": "/micapipe_v0.2.0",
    "dir_hu": "/hippunfold_v1.3.0/hippunfold",
    "dir_zb": "/DM_zb_37comp",
    "study": "7T",
    "ID_col" : ["PNC", "Pilot"], # column for ID in demographics file
    }

studies = [MICs, PNI]

ctrl_grp = {'ctrl' : ['CTRL']}

if includeBL:
    px_grps = { # specify patient group labels to compare to controls
        'allPX' : ['TLE_U', 'MFCL', 'FLE_R', 'MFCL_bTLE', 'UKN_L', 'mTLE_R', 'mTLE_L', 'FLE_L', 'UKN_U', 'TLE_L', 'TLE_R'],
        'TLE' : ['TLE_L', 'TLE_R', 'TLE_U', 'mTLE_R', 'mTLE_L'],
        'TLE_L': ['TLE_L', 'mTLE_L', 'bTLE_L'],
        'TLE_R': ['TLE_R', 'mTLE_R', 'bTLE_R'],
        'FCD' : ['FLE_R', 'FLE_L'],
        'MFCL' : ['MFCL', 'bTLE'],
        'UKN' : ['UKN_L', 'UKN_U']
    }
else:
    px_grps = { # specify patient group labels to compare to controls
        'allPX' : ['TLE_U', 'MFCL', 'FLE_R', 'MFCL_bTLE', 'UKN_L', 'mTLE_R', 'mTLE_L', 'FLE_L', 'UKN_U', 'TLE_L', 'TLE_R'],
        'TLE' : ['TLE_L', 'TLE_R', 'TLE_U', 'mTLE_R', 'mTLE_L'],
        'TLE_L': ['TLE_L', 'mTLE_L'],
        'TLE_R': ['TLE_R', 'mTLE_R'],
        'FCD' : ['FLE_R', 'FLE_L'],
        'MFCL' : ['MFCL', 'bTLE'],
        'UKN' : ['UKN_L', 'UKN_U']
    }
# Make list of dict items for group definitions
groups = [
    {'TLE_L': px_grps['TLE_L']},
    {'TLE_R': px_grps['TLE_R']},
    ctrl_grp
]

specs  = { # all spec values to be in lists to allow for iteration across these values
    # directories
    'prjDir_root' : "/host/verges/tank/data/daniel/3T7T/z", 
    'prjDir_outs' : "/outputs",
    'prjDir_out_stats': "/outputs/stats",
    'prjDir_out_figs': "/outputs/figures",
    'prjDir_maps' : "/maps", # output directory for smoothed cortical maps
    'prjDir_dictLists': "/maps/dictLists",
    'prjDir_mapPths' : "/output/paths",
    'prjDir_maps_dfs': "/outputs/dfs/04a_maps_dfs",
    'prjDir_parc_dfs': "/outputs/dfs/04b_maps_parc",
    'prjDir_winComp_dfs': "/outputs/dfs/05a_winComp",
    'prjDir_grpFlip_dfs': "/outputs/dfs/05b_grpFlip",
    'prjDir_winD_dfs': "/outputs/dfs/05c_winD",
    'prjDir_btwD_dfs': "/outputs/dfs/05d_btwComp",

    'ctx': True, # whether to include cortical analyses
    'surf_ctx': ['fsLR-32k'],
    'parcellate_ctx': 'glasser', # parcellation to use, or None if no parcellation.
    'parc_lbl_ctx': 'glasser_int', # what name to fetch for parcellation values
    'lbl_ctx': ['midthickness', 'pial', 'white'], # pial, midthick, white, etc
    'ft_ctx': ['thickness', 'T1map'], # features: T1map, flair, thickness, FA, ADC
    'smth_ctx': [5, 10], # in mm
    
    'hipp': True, # whether to include hippocampal analyses
    'surf_hipp': ['den-0p5mm'],
    'parcellate_hipp': 'DK25',
    'parc_lbl_hipp': 'idx',
    'lbl_hipp': ['midthickness', "inner", "outer"], # outer, inner, midthickness, etc
    'ft_hipp': ['thickness', 'T1map'], # features: T1map, flair, thickness, FA, ADC
    'smth_hipp': [2, 5], # in mm
    
    # within study comparisons
    'col_grp': 'grp_detailed',  # column in df_demo with group labels
    'winComp_stats': ['z'], # what stats to run for within study comparisons ('z' for z-scoring, 'w' for w-scoring)

    'ipsiTo' : 'L', # what hemisphere for controls ipsi should be mapped to
    'newQC': False,
    'currentQC_table': "/host/verges/tank/data/daniel/3T7T/z/outputs/03a_qc_table_16Oct2025-170946.csv",
    'completed_qc_pth': "/host/verges/tank/data/daniel/3T7T/z/outputs/03a_qc_table_16Oct2025-170946.csv"
}

In [None]:
# function call
importlib.reload(ud)
if save:
    df_demo, demo_csv_pth = ud.get_demo(sheets, save_pth=save_pth)
else:
    df_demo, demo_csv_pth = ud.get_demo(sheets)

# details of demographics file
demographics = {
    "pth" : demo_csv_pth,
    # column names:
    'nStudies': True, # whether multiple studies are included
    "ID_7T" : "PNI_ID", 
    "ID_3T" : "MICS_ID",
    "SES" : "SES",
    "date": "Date",
    "age": "age",
    "sex": "sex",
    "grp" : "grp_detailed" # col name for participant grouping variable to use
}

specs['covars'] = [demographics['age'], demographics['sex']],

print(f"Unique participants: {df_demo['UID'].nunique()}")
print(df_demo[['UID','MICS_ID', 'PNI_ID', 'study', 'SES', 'Date', 'grp_detailed']])

In [None]:
# TODO. Make proper Table 1
importlib.reload(ud)
ud.grp_summary(df_demo, col_grp='grp_detailed', save_pth=save_pth)
print("-"*100)
print("MEDIAN AGE by group")
df_demo.groupby(['grp_detailed', 'study'])['age'].median().sort_index(level='grp_detailed')

# 2. Path to smoothed maps
Strategy: 
Add paths to relevant maps to df containing demographic information. Each row is one participant at a unique session.

Hippocampal maps: identify path to smoothed hippocampal maps, add to row-wise df
Cortical maps: take raw maps from micapipe, apply smoothing then save these maps in project directory and add path of the smoothed map to the df

In [None]:
# a. Get map paths 
importlib.reload(tsutil)
reimport_src = False

if 'df_demo' not in globals() and 'demo_csv_pth' not in globals() or reimport_src:
    if 'demo_csv_pth' not in globals():
        pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/01b_demo_16Oct2025-163601.csv"
    else:
        pth = demo_csv_pth
    df_demo = pd.read_csv(pth)
    print(f"[main] Demo file loaded from {pth}")

print(df_demo[['UID','MICS_ID', 'PNI_ID', 'study', 'SES', 'Date', 'grp_detailed']])
# TODO. Create proper log file for function
df_pths, dfPths_out_pth, log_pth = tsutil.idToMap(df_demo = df_demo, studies = studies, dict_demo = demographics, specs = specs, 
                              save = save, save_pth=f"{specs['prjDir_root']}{specs['prjDir_outs']}", save_name = "02a_mapPths", 
                              test=test, verbose=True)


In [None]:
# b. Map error summary
# TODO. Change method to ensure df not highly fragmented
importlib.reload(tsutil)

reimport_src = False
pth = dfPths_out_pth

if 'df_pths' not in globals() or df_pths is None or reimport_src:
    df_pths = pd.read_csv(pth)
    print(f"[main] df_pths loaded from {pth}")

cols = tsutil.get_mapCols(df_pths.columns, split=False, verbose=True)
err_summary, ERR_sv_pth = tsutil.countErrors(df_pths, cols, save=specs['prjDir_root'] + specs['prjDir_outs'])


# 3. Quality Control

In [None]:
# Generate QC sheet to be completed. Merge completed QC sheet with df_pths
importlib.reload(ud)
importlib.reload(tsutil)

reimport_src = False

save_name = "03a_qc_table"
save_pth = specs['prjDir_root'] + specs['prjDir_outs']

# a. Create blank QC sheet to complete manually
if specs['newQC']: # create new QC sheet
    
    if 'df_pths' not in globals() or df_pths is None or reimport_src: # load
        df_pths_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/02a_mapPths_16Oct2025-170218.csv"
        df_pths = pd.read_csv(df_pths_pth, dtype=str)
        print(f"[main] df_pths loaded from {pth}")
    
    qc_sheet, pth = ud.mk_qcSheet(df = df_pths, fts = specs['ft_ctx'] + specs['ft_hipp'], 
                                  studies = studies, ctx_surf_qc = qc_sheets,
                                  save_pth = specs['prjDir_root'] + specs['prjDir_outs'], save_name = save_name,
                                  currentQC = specs['currentQC_table'])
else: # load existing QC sheet
    qc_sheet_pth = specs['currentQC_table']
    qc_sheet = pd.read_csv(qc_sheet_pth, dtype=str)
    print(f"[main] QC sheet loaded from {qc_sheet_pth}")

# Merge QC values for volumes and surfaces
if 'dfPths_out_pth' not in globals():
    dfPths_out_pth = "/host/verges/tank/data/daniel/3T7T/z/outputs/02a_mapPths_16Oct2025-170218.csv" # should be most recent output of idToMap function

df_pths_qc, pth = ud.qc_combine(qc_pth = specs['completed_qc_pth'], 
                                df_pths_pth = dfPths_out_pth, 
                                save_pth = f"{specs['prjDir_root']}{specs['prjDir_outs']}",
                                save_name = "03b_mapPths_QC")