In [2]:
import nibabel as nib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from glob import glob
import math

In [3]:
project_dir = '/home/ROBARTS/myousif/projects_inprogress/hippocharts/'
project_data_dir = os.path.join(project_dir,'data')

In [4]:
all_datasets_path = os.path.abspath('/home/ROBARTS/myousif/graham/scratch/norm_models/datasets/')

datasets_path_names = ['snsx','eplink']
datasets_path_list = [os.path.join(all_datasets_path,dset) for dset in datasets_path_names]

datasets_names = ['snsx','eplink']
datasets_paths = dict(zip(datasets_names,datasets_path_list))

In [5]:
hippunfold_path = lambda direc: os.path.join(os.path.abspath(direc),'hippunfold')
diff_path = lambda direc, metric='FA': os.path.join(os.path.abspath(direc),f'hipposample')

master_participant_df = pd.read_csv(os.path.join(project_data_dir,'master_participants_data.csv'),index_col=0)

In [6]:
def glob_metric(hippunfold_dir,den='0p5mm',metric='thickness',space='T1w',label='hipp',ext='.dscalar.nii',desc=None,smooth='1mm'):
    
    if desc == None:
        paths = glob(os.path.join(hippunfold_dir, f'sub-*/surf/sub-*_space-{space}_den-{den}_label-{label}_{metric}{ext}'))
        if paths == []:
            paths = glob(os.path.join(hippunfold_dir, f'sub-*/ses-*1/surf/sub-*_ses-*1_space-{space}_den-{den}_label-{label}_{metric}{ext}'))
    else:
        paths = glob(os.path.join(hippunfold_dir, f'sub-*/surf/sub-*_space-{space}_den-{den}_label-{label}_desc-{desc}_smooth-{smooth}_{metric}{ext}'))
        if paths == []:
            paths = glob(os.path.join(hippunfold_dir, f'sub-*/ses-*1/surf/sub-*_ses-*1_space-{space}_den-{den}_label-{label}_desc-{desc}_smooth-{smooth}_{metric}{ext}'))
    
    try:
        paths = sorted(paths)
        subjects = [ p.split('/')[-1].split('_')[0] for p in paths]
    except:
        subjects = []
    
    return pd.DataFrame({'participant_id':subjects, 'paths':paths})

In [7]:
datasets_paths

{'snsx': '/home/ROBARTS/myousif/graham/scratch/norm_models/datasets/snsx',
 'eplink': '/home/ROBARTS/myousif/graham/scratch/norm_models/datasets/eplink'}

In [8]:
snsx_epi_df = pd.read_csv('/home/ROBARTS/myousif/graham/projects/ctb-akhanf/myousif9/norm_models/datasets/snsx/snsx_data_collection.csv')

In [9]:
snsx_columns = ['participant_id', 'age', 'sex', 'diagnosis',
                'Focal', 'Generalized', 'Unknown', 'Temporal', 'Frontal', 'Parietal','Insular', 'Occipital ', 'Bilobal', 
                'Right ', 'Left', 'Other',
                'Engel_IA', 'Engel_IB', 'Engel_IC', 'Engel_ID', 
                'Engel_IIA', 'Engel_IIB', 'Engel_IIC', 'Engel_IID',
                'Engel_IIIA', 'Engel_IIIB', 'Engel_IVA', 'Engel_IVB', 'Engel_IVC', 'Engel_I=1', 
                'ilae_1', 'ilae_2', 'ilae_3', 'ilae_4', 'ilae_5', 'ilae_6',
                'MRI (0=no/1=yes) Normal','MRI (0=no/1=yes) MTS']

In [10]:
snsx_epi_df = snsx_epi_df[snsx_columns]

In [11]:
snsx_epi_df['site'] = 'snsx_7T'
snsx_epi_df['dataset'] = 'snsx'

In [12]:
snsx_epi_df.head()

Unnamed: 0,participant_id,age,sex,diagnosis,Focal,Generalized,Unknown,Temporal,Frontal,Parietal,...,ilae_1,ilae_2,ilae_3,ilae_4,ilae_5,ilae_6,MRI (0=no/1=yes) Normal,MRI (0=no/1=yes) MTS,site,dataset
0,sub-P008,26,f,Epilepsy,1.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,snsx_7T,snsx
1,sub-P012,28,m,Epilepsy,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,snsx_7T,snsx
2,sub-P013,37,f,Epilepsy,1.0,0.0,0.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,snsx_7T,snsx
3,sub-P014,33,m,Epilepsy,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,snsx_7T,snsx
4,sub-P015,19,f,Epilepsy,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,snsx_7T,snsx


In [14]:
snsx_df = pd.read_csv('/home/ROBARTS/myousif/graham/projects/ctb-akhanf/cfmm-bids/Khan/SNSX/master/participants_master.tsv', sep='\t')

In [15]:
snsx_df.diagnosis = snsx_df.diagnosis.str.replace('Eplepsy','Epilepsy')

In [16]:
snsx_df.scanner.value_counts()

7T    130
3T     17
Name: scanner, dtype: int64

In [17]:
snsx_df[((snsx_df.diagnosis == 'control') | (snsx_df.diagnosis == 'Epilepsy')) & (snsx_df.scanner == '7T')].head()

Unnamed: 0,participant_id,age,sex,diagnosis,handedness,scanner
0,sub-C001,54.0,m,control,right,7T
1,sub-C002,33.0,m,control,right,7T
2,sub-C003,20.0,f,control,right,7T
3,sub-C004,30.0,m,control,right,7T
4,sub-C005,36.0,m,control,right,7T


In [20]:
snsx_df.head()

Unnamed: 0,participant_id,age,sex,diagnosis,handedness,scanner
0,sub-C001,54.0,m,control,right,7T
1,sub-C002,33.0,m,control,right,7T
2,sub-C003,20.0,f,control,right,7T
3,sub-C004,30.0,m,control,right,7T
4,sub-C005,36.0,m,control,right,7T


In [21]:
metrics = ['thickness','gyrification','myelin']
metric_csvs = []

for met_idx, met in enumerate(metrics):

    paths = []
    dset = 'snsx'

    # for dset_idx, dset in enumerate(datasets_paths):
    
    met_sub = glob_metric(hippunfold_path(datasets_paths[dset]),metric=met)
        
    if met_sub.size == 0:
        continue
    else:   
        met_sub['dataset'] = dset
        met_sub['site'] = 'snsx_7T'
        met_sub = pd.merge(met_sub,snsx_df,on=['participant_id'],how='left')
        met_sub.dropna(inplace=True)
        met_sub.reset_index(drop=True,inplace=True)

        metric_csvs.append(met_sub[['participant_id','age','sex','site','dataset','paths']])

    # metric_csvs.append(paths)


KeyboardInterrupt: 

In [89]:
for met_idx, met in enumerate(metrics):
    metric_csvs[met_idx][0].reset_index(drop=True).to_csv(os.path.join(project_data_dir,f'snsx_{met}_participants_data.csv'))


In [24]:
metrics_diff = ['FA','MD']
metric_diff_csvs = []

for met_idx, met in enumerate(metrics_diff):


    dset = 'snsx'

    # for dset_idx, dset in enumerate(datasets_paths):
    
    met_sub = glob_metric(os.path.join(datasets_paths[dset],'hipposample'),metric=met,desc='eddy')
        
    if met_sub.size == 0:
        continue
    else:   
        met_sub['dataset'] = dset
        met_sub['site'] = 'snsx_7T'
        met_sub = pd.merge(met_sub,snsx_df,on=['participant_id'],how='left')
        met_sub.dropna(inplace=True)
        met_sub.reset_index(drop=True,inplace=True)

        metric_diff_csvs.append(met_sub[['participant_id','age','sex','site','dataset','paths']])

    # metric_csvs.append(paths)


In [32]:

for met_idx, met in enumerate(metrics_diff):
    metric_diff_csvs[met_idx].reset_index(drop=True).to_csv(os.path.join(project_data_dir,f'snsx_{met}_participants_data.csv'))


In [13]:
eplink_df = pd.read_csv('/home/ROBARTS/myousif/graham/projects/ctb-akhanf/myousif9/norm_models/datasets/eplink_phase3/eplink_participants_2024_01_09.tsv', sep='\t')

In [14]:
eplink_df.head()

Unnamed: 0,participant_id,age,sex,handedness,focal_seizures,generalized_seizures,mesial_temporal_lobe_epilepsy,temporal_lobe_epilepsy,frontal_lobe_epilepsy,occipital_lobe_epilepsy,...,resective_surgery,temporal_lobe_surgery,side_of_lesionectomy_surgery,lesionectomy,intraoperative_cortical_stimulation,MRGLITT_surgery,pathology_completed,ilae_hippocampal_sclerosis_classification,hippocampal_pathology_details,neocortical_pathology_details
0,sub-HSC0001,10.0,Male,Right,Checked,Unchecked,No,No,Probable,No,...,,,,,,,,,,
1,sub-HSC0002,13.0,Female,Right,Checked,Unchecked,No,Possible,Possible,,...,Yes,,,No,No,Yes,No,,,
2,sub-HSC0003,8.0,Female,Right,Checked,Unchecked,No,Possible,No,No,...,Yes,,,No,No,Yes,No,,,
3,sub-HSC0004,11.0,Female,Right,Checked,Unchecked,No,No,No,No,...,Yes,,,Yes,Yes,No,Yes,,,
4,sub-HSC0005,12.0,Female,Right,Checked,Unchecked,No,Probable,No,Probable,...,Yes,Anterior Temporal Lobectomy With Amygdalohippo...,Right,No,No,No,Yes,Type 1,Sections show poorly oriented hippocampus with...,"A. Resection, lateral temporal lobe - Subpial ..."


In [15]:
eplink_df

Unnamed: 0,participant_id,age,sex,handedness,focal_seizures,generalized_seizures,mesial_temporal_lobe_epilepsy,temporal_lobe_epilepsy,frontal_lobe_epilepsy,occipital_lobe_epilepsy,...,resective_surgery,temporal_lobe_surgery,side_of_lesionectomy_surgery,lesionectomy,intraoperative_cortical_stimulation,MRGLITT_surgery,pathology_completed,ilae_hippocampal_sclerosis_classification,hippocampal_pathology_details,neocortical_pathology_details
0,sub-HSC0001,10.0,Male,Right,Checked,Unchecked,No,No,Probable,No,...,,,,,,,,,,
1,sub-HSC0002,13.0,Female,Right,Checked,Unchecked,No,Possible,Possible,,...,Yes,,,No,No,Yes,No,,,
2,sub-HSC0003,8.0,Female,Right,Checked,Unchecked,No,Possible,No,No,...,Yes,,,No,No,Yes,No,,,
3,sub-HSC0004,11.0,Female,Right,Checked,Unchecked,No,No,No,No,...,Yes,,,Yes,Yes,No,Yes,,,
4,sub-HSC0005,12.0,Female,Right,Checked,Unchecked,No,Probable,No,Probable,...,Yes,Anterior Temporal Lobectomy With Amygdalohippo...,Right,No,No,No,Yes,Type 1,Sections show poorly oriented hippocampus with...,"A. Resection, lateral temporal lobe - Subpial ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,sub-TWH0081,41.0,Female,Right,Checked,Unchecked,Probable,Probable,Probable,No,...,,,,,,,,,,
136,sub-TWH0082,26.0,Male,Right,Checked,Unchecked,Possible,Definite,No,No,...,,,,,,,,,,
137,sub-TWH0084,27.0,Female,Right,Checked,Unchecked,No,No,Definite,No,...,,,,,,,,,,
138,sub-TWH0085,54.0,Male,Right,Checked,Unchecked,No,Definite,No,No,...,,,,,,,,,,


In [16]:
eplink_df

Unnamed: 0,participant_id,age,sex,handedness,focal_seizures,generalized_seizures,mesial_temporal_lobe_epilepsy,temporal_lobe_epilepsy,frontal_lobe_epilepsy,occipital_lobe_epilepsy,...,resective_surgery,temporal_lobe_surgery,side_of_lesionectomy_surgery,lesionectomy,intraoperative_cortical_stimulation,MRGLITT_surgery,pathology_completed,ilae_hippocampal_sclerosis_classification,hippocampal_pathology_details,neocortical_pathology_details
0,sub-HSC0001,10.0,Male,Right,Checked,Unchecked,No,No,Probable,No,...,,,,,,,,,,
1,sub-HSC0002,13.0,Female,Right,Checked,Unchecked,No,Possible,Possible,,...,Yes,,,No,No,Yes,No,,,
2,sub-HSC0003,8.0,Female,Right,Checked,Unchecked,No,Possible,No,No,...,Yes,,,No,No,Yes,No,,,
3,sub-HSC0004,11.0,Female,Right,Checked,Unchecked,No,No,No,No,...,Yes,,,Yes,Yes,No,Yes,,,
4,sub-HSC0005,12.0,Female,Right,Checked,Unchecked,No,Probable,No,Probable,...,Yes,Anterior Temporal Lobectomy With Amygdalohippo...,Right,No,No,No,Yes,Type 1,Sections show poorly oriented hippocampus with...,"A. Resection, lateral temporal lobe - Subpial ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,sub-TWH0081,41.0,Female,Right,Checked,Unchecked,Probable,Probable,Probable,No,...,,,,,,,,,,
136,sub-TWH0082,26.0,Male,Right,Checked,Unchecked,Possible,Definite,No,No,...,,,,,,,,,,
137,sub-TWH0084,27.0,Female,Right,Checked,Unchecked,No,No,Definite,No,...,,,,,,,,,,
138,sub-TWH0085,54.0,Male,Right,Checked,Unchecked,No,Definite,No,No,...,,,,,,,,,,


In [24]:
metrics = ['thickness','gyrification']
metric_eplink_csvs = []

for met_idx, met in enumerate(metrics):

    paths = []
    dset = 'eplink'

    # for dset_idx, dset in enumerate(datasets_paths):
    
    met_sub = glob_metric(hippunfold_path(datasets_paths[dset]),den='2mm',metric=met)
        
    if met_sub.size == 0:
        continue
    else:   
        met_sub['dataset'] = dset
        met_sub['site'] = 'eplink'
        met_sub = pd.merge(met_sub,eplink_df,on=['participant_id'],how='left')
        met_sub.dropna(inplace=True)
        met_sub.reset_index(drop=True,inplace=True)

        metric_eplink_csvs.append(met_sub[['participant_id','age','sex','site','dataset','paths']])

    # metric_csvs.append(paths)


In [22]:
eplink_thickness_df = glob_metric(hippunfold_path(datasets_paths['eplink']),den='2mm')

In [28]:
eplink_gyrification_df = glob_metric(hippunfold_path(datasets_paths['eplink']),den='2mm',metric='gyrification')

In [32]:
eplink_fa_df = glob_metric(os.path.join(datasets_paths['eplink'],'hipposample'), den='2mm', metric='FA', desc='eddy', smooth='2mm')

In [36]:
eplink_md_df = glob_metric(os.path.join(datasets_paths['eplink'],'hipposample'), den='2mm', metric='MD', desc='eddy', smooth='2mm')

In [87]:
eplink_path_dfs = []
for df in [eplink_thickness_df, eplink_gyrification_df, eplink_fa_df, eplink_md_df]:  
    ep_df = pd.merge(df,eplink_df,on='participant_id',how='left')
    eplink_path_dfs.append(ep_df[[ep_df.columns[0]]+ ep_df.columns[2:].tolist() + ['paths']])

In [89]:
eplink_data_dir = os.path.join(project_data_dir,'eplink_data')
os.makedirs(eplink_data_dir,exist_ok=True)

In [90]:
for met_idx, met in enumerate(['thickness','gyrification','FA','MD']):
    eplink_path_dfs[met_idx].to_csv(os.path.join(eplink_data_dir,f'eplink_{met}_participants_data.csv'))

In [91]:
eplink_thick_path_df.head()

Unnamed: 0,participant_id,paths,age,sex,handedness,focal_seizures,generalized_seizures,mesial_temporal_lobe_epilepsy,temporal_lobe_epilepsy,frontal_lobe_epilepsy,...,resective_surgery,temporal_lobe_surgery,side_of_lesionectomy_surgery,lesionectomy,intraoperative_cortical_stimulation,MRGLITT_surgery,pathology_completed,ilae_hippocampal_sclerosis_classification,hippocampal_pathology_details,neocortical_pathology_details
0,sub-HSC0002,/home/ROBARTS/myousif/graham/scratch/norm_mode...,13.0,Female,Right,Checked,Unchecked,No,Possible,Possible,...,Yes,,,No,No,Yes,No,,,
1,sub-HSC0004,/home/ROBARTS/myousif/graham/scratch/norm_mode...,11.0,Female,Right,Checked,Unchecked,No,No,No,...,Yes,,,Yes,Yes,No,Yes,,,
2,sub-HSC0006,/home/ROBARTS/myousif/graham/scratch/norm_mode...,10.0,Male,Right,Checked,Checked,No,Possible,No,...,Yes,Anterior Temporal Lobectomy Without Amygdalohi...,Left,Yes,No,No,Yes,No hippocampal sclerosis,,Pathology demonstrated diffuse low grade astro...
3,sub-HSC0009,/home/ROBARTS/myousif/graham/scratch/norm_mode...,9.0,Female,Right,Checked,Unchecked,No,Probable,No,...,Yes,,,Yes,No,No,Yes,,,
4,sub-HSC0011,/home/ROBARTS/myousif/graham/scratch/norm_mode...,14.0,Male,Right,Checked,Checked,No,No,Possible,...,Yes,,,Yes,Yes,No,Yes,,,


In [64]:
eplink_md_df.head()

Unnamed: 0,participant_id,paths
0,sub-HSC0002,/home/ROBARTS/myousif/graham/scratch/norm_mode...
1,sub-HSC0004,/home/ROBARTS/myousif/graham/scratch/norm_mode...
2,sub-HSC0006,/home/ROBARTS/myousif/graham/scratch/norm_mode...
3,sub-HSC0009,/home/ROBARTS/myousif/graham/scratch/norm_mode...
4,sub-HSC0018,/home/ROBARTS/myousif/graham/scratch/norm_mode...


In [44]:
metrics_diff = ['FA','MD']
metric_diff_eplink_csvs = []

for met_idx, met in enumerate(metrics_diff):


    dset = 'eplink'

    # for dset_idx, dset in enumerate(datasets_paths):
    
    met_sub = glob_metric(os.path.join(datasets_paths[dset],'hipposample'),metric=met,desc='eddy',smooth='2mm')
        
    if met_sub.size == 0:
        continue
    else:   
        met_sub['dataset'] = dset
        met_sub['site'] = 'eplink'
        met_sub = pd.merge(met_sub,eplink_df,on=['participant_id'],how='left')
        met_sub.dropna(inplace=True)
        met_sub.reset_index(drop=True,inplace=True)

        metric_diff_eplink_csvs.append(met_sub[['participant_id','age','sex','site','dataset','paths']])

    # metric_csvs.append(paths)


In [79]:
eplink_path_dfs[3]..value_counts()

No     23
Yes     2
Name: mri_3T_mts, dtype: int64

In [78]:
eplink_path_dfs[3].columns

Index(['participant_id', 'age', 'sex', 'handedness', 'focal_seizures',
       'generalized_seizures', 'mesial_temporal_lobe_epilepsy',
       'temporal_lobe_epilepsy', 'frontal_lobe_epilepsy',
       'occipital_lobe_epilepsy', 'parietal_lobe_epilepsy',
       'unknown_localization_focal_epilepsy', 'pet_abnormality',
       'pet_lateralization_temporal_lobe', 'pet_lateralization_frontal_lobe',
       'pet_lateralization_parietal_lobe', 'pet_lateralization_occipital_lobe',
       'spect_abnormality', 'spect_lateralization_temporal_lobe',
       'spect_lateralization_frontal_lobe',
       'spect_lateralization_parietal_lobe',
       'spect_lateralization_occipital_lobe', 'meg_spike_cluster',
       'mri_1.5T_abnormality', 'mri_1.5T_mts', 'mri_1.5T_mts_location',
       'mri_1.5T_fcd', 'mri_1.5T_fcd_location',
       'mri_1.5T_other_epileptogenic_lesion',
       'mri_1.5T_other_epileptogenic_lesion_location',
       'mri_1.5T_other_nonspecific_findings',
       'mri_1.5T_other_nonspecific_

In [77]:
eplink_path_dfs[3].a.value_counts()

No          28
Possible     9
Definite     7
Probable     5
Name: mesial_temporal_lobe_epilepsy, dtype: int64

In [37]:
for met_idx, met in enumerate(metrics_diff):
    metric_diff_csvs[met_idx].reset_index(drop=True).to_csv(os.path.join(project_data_dir,f'{met}_participants_data.csv'))