### Notebook to convert summary FS data from SP into nipoppy extraction format


In [1]:
import pandas as pd
import seaborn as sns

### Data paths

In [111]:
### Load data
# /home/nikhil/projects/Parkinsons/nimhans/data/PD_YLO/derivatives/freesurfer/7.3.2/IDP/

data_dir = "/home/nikhil/projects/Parkinsons/nimhans/data/PD_YLO/"

tabular_dir = f"{data_dir}/tabular/"

FS_dir = f"{data_dir}/derivatives/freesurfer/7.3.2/IDP/"

save_dir = "/home/nikhil/projects/Parkinsons/neuro_arch/analysis/IDP/nimhans_ylo/agg_dfs/"

session = "ses-01"

redcap_csv = f"{tabular_dir}/redcap_report.csv"
FS_stats_file = f"{FS_dir}/fs_ylopd_0624.xlsx"

### Load demographics data

In [131]:
redcap_df = pd.read_csv(redcap_csv)

demo_df = redcap_df[["record_id","redcap_event_name","age_assessment","gender"]]
demo_df = demo_df[demo_df["redcap_event_name"]=="Intake"]
demo_df["participant_id"] = demo_df["record_id"].str.replace("_","")
demo_df["bids_id"] = "sub-" + demo_df["participant_id"].astype(str)
demo_df["group"] = demo_df["participant_id"].apply(lambda x: "control" if "HC" in x else "PD")
demo_df = demo_df.rename(columns={"age_assessment":"age","gender":"sex"})
demo_df = demo_df.drop(columns=["record_id","redcap_event_name"])

demo_df.head()

Unnamed: 0,age,sex,participant_id,bids_id,group
0,51.0,Male,YLOPD01,sub-YLOPD01,PD
1,66.0,Male,YLOPD02,sub-YLOPD02,PD
2,65.0,Male,YLOPD03,sub-YLOPD03,PD
4,59.0,Male,YLOPD04,sub-YLOPD04,PD
6,43.0,Female,YLOPD05,sub-YLOPD05,PD


### Load FS data

In [132]:
group = "PD"
FS_stats_df = pd.read_excel(FS_stats_file,sheet_name=group, engine='openpyxl')
FS_stats_df = FS_stats_df.rename(columns={"sub":"bids_id"})

FS_stats_df.head()

Unnamed: 0,bids_id,lh_bankssts_thickness,lh_caudalanteriorcingulate_thickness,lh_caudalmiddlefrontal_thickness,lh_cuneus_thickness,lh_entorhinal_thickness,lh_fusiform_thickness,lh_inferiorparietal_thickness,lh_inferiortemporal_thickness,lh_isthmuscingulate_thickness,...,TotalGrayVol,SupraTentorialVol,SupraTentorialVolNotVent,MaskVol,BrainSegVol-to-eTIV,MaskVol-to-eTIV,lhSurfaceHoles,rhSurfaceHoles,SurfaceHoles,EstimatedTotalIntraCranialVol
0,sub-YLOPD03,2.488,2.283,2.366,1.723,3.533,2.816,2.43,2.945,2.161,...,626633.436838,1052666,1001059,1473104,0.72649,0.912172,12,5,17,1614941.0
1,sub-YLOPD04,2.641,2.587,2.658,1.636,3.455,2.545,2.234,2.895,2.487,...,455998.051226,748852,721749,1135645,0.68634,0.922824,2,3,5,1230620.0
2,sub-YLOPD05,2.621,2.456,2.489,1.559,3.243,2.66,2.41,2.729,2.271,...,517062.678291,917655,895570,1262055,0.822859,1.038733,1,4,5,1214994.0
3,sub-YLOPD06,2.415,2.369,2.231,1.765,3.267,2.674,2.172,2.845,2.113,...,482849.406231,808195,771677,1284500,0.651696,0.915908,8,9,17,1402433.0
4,sub-YLOPD07,2.443,2.534,2.282,1.712,3.707,2.82,2.431,2.869,2.336,...,484804.07767,773091,751042,1103126,0.78754,0.9894,5,6,11,1114945.0


### Rename CT and ASEG cols

In [133]:
def get_FS_nipoppy_df(raw_df, column_names_dict, hemi="global"):
    nipoppy_df = raw_df[['bids_id']+ list(column_names_dict.keys())].copy()
    nipoppy_df = nipoppy_df.rename(columns=column_names_dict)
    nipoppy_df.loc[:,"hemi"] = hemi
    return nipoppy_df


In [134]:
ylo_groups = ["PD"] # PD sheet also has HC participants
CT_df_list = []

for group in ylo_groups:

    lh_ct_cols = [col for col in FS_stats_df.columns if (('_thickness' in col) and ('lh_' in col))]
    lh_ct_col_dict = {col:col.split('_')[1] for col in lh_ct_cols}

    rh_ct_cols = [col for col in FS_stats_df.columns if (('_thickness' in col) and ('rh_' in col))]
    rh_ct_col_dict = {col:col.split('_')[1] for col in rh_ct_cols}

    lh_CT_df = get_FS_nipoppy_df(FS_stats_df, lh_ct_col_dict, hemi="lh")
    rh_CT_df = get_FS_nipoppy_df(FS_stats_df, rh_ct_col_dict, hemi="rh")

    n_participants = FS_stats_df["bids_id"].nunique()
    print(f"Group: {group}, Participants: {n_participants}")

    CT_df_list.append(lh_CT_df)
    CT_df_list.append(rh_CT_df)

CT_df = pd.concat(CT_df_list,axis=0)
n_participants = CT_df["bids_id"].nunique()
print(f"Total Participants: {n_participants}")

# Drop ROIs
# TODO  
# drop_ROIs = ["temporalpole","frontalpole","banks of the superior temporal sulcus"]

CT_df.head()

Group: PD, Participants: 298
Total Participants: 298


Unnamed: 0,bids_id,bankssts,caudalanteriorcingulate,caudalmiddlefrontal,cuneus,entorhinal,fusiform,inferiorparietal,inferiortemporal,isthmuscingulate,...,superiorfrontal,superiorparietal,superiortemporal,supramarginal,frontalpole,temporalpole,transversetemporal,insula,MeanThickness,hemi
0,sub-YLOPD03,2.488,2.283,2.366,1.723,3.533,2.816,2.43,2.945,2.161,...,2.792,2.213,2.753,2.497,2.854,3.846,2.227,3.038,2.50462,lh
1,sub-YLOPD04,2.641,2.587,2.658,1.636,3.455,2.545,2.234,2.895,2.487,...,2.895,2.015,2.699,2.444,2.925,3.155,1.926,3.109,2.42192,lh
2,sub-YLOPD05,2.621,2.456,2.489,1.559,3.243,2.66,2.41,2.729,2.271,...,2.597,2.062,2.777,2.445,2.959,3.775,2.348,3.057,2.38384,lh
3,sub-YLOPD06,2.415,2.369,2.231,1.765,3.267,2.674,2.172,2.845,2.113,...,2.315,1.705,2.584,2.064,2.705,3.343,1.897,2.804,2.2228,lh
4,sub-YLOPD07,2.443,2.534,2.282,1.712,3.707,2.82,2.431,2.869,2.336,...,2.831,2.188,2.61,2.374,2.728,3.769,2.002,3.19,2.49378,lh


### Merge demo and FS dataframes and save df

In [135]:
save_agg_CT_df = True

# Merge with demo_df
CT_DKT_df = pd.merge(CT_df, demo_df, on="bids_id", how="inner")

if save_agg_CT_df:
    save_file = f"{save_dir}/CT_DKT_df.csv"
    print(f"Saving CT_DKT_df to {save_file}")
    CT_DKT_df.to_csv(save_file, index=False)

CT_DKT_df.head()

Saving CT_DKT_df to /home/nikhil/projects/Parkinsons/neuro_arch/analysis/IDP/nimhans_ylo/agg_dfs//CT_DKT_df.csv


Unnamed: 0,bids_id,bankssts,caudalanteriorcingulate,caudalmiddlefrontal,cuneus,entorhinal,fusiform,inferiorparietal,inferiortemporal,isthmuscingulate,...,frontalpole,temporalpole,transversetemporal,insula,MeanThickness,hemi,age,sex,participant_id,group
0,sub-YLOPD03,2.488,2.283,2.366,1.723,3.533,2.816,2.43,2.945,2.161,...,2.854,3.846,2.227,3.038,2.50462,lh,65.0,Male,YLOPD03,PD
1,sub-YLOPD03,2.705,2.309,2.329,1.697,3.593,3.09,2.419,3.026,2.042,...,2.682,3.78,1.871,2.865,2.46704,rh,65.0,Male,YLOPD03,PD
2,sub-YLOPD04,2.641,2.587,2.658,1.636,3.455,2.545,2.234,2.895,2.487,...,2.925,3.155,1.926,3.109,2.42192,lh,59.0,Male,YLOPD04,PD
3,sub-YLOPD04,2.554,2.509,2.478,1.822,3.605,2.593,2.281,2.689,2.29,...,2.71,3.875,2.0,2.9,2.4393,rh,59.0,Male,YLOPD04,PD
4,sub-YLOPD05,2.621,2.456,2.489,1.559,3.243,2.66,2.41,2.729,2.271,...,2.959,3.775,2.348,3.057,2.38384,lh,43.0,Female,YLOPD05,PD


### bilateral ASEG data

In [136]:
ylo_groups = ["PD"] # PD sheet also has HC participants
aseg_df_list = []

for group in ylo_groups:

    lh_aseg_cols = [col for col in FS_stats_df.columns if (('_thickness' not in col) and ('Left-' in col))]
    lh_aseg_col_dict = {col:col.split('-',1)[1] for col in lh_aseg_cols}
    
    rh_aseg_cols = [col for col in FS_stats_df.columns if (('_thickness' not in col) and ('Left-' in col))]
    rh_aseg_col_dict = {col:col.split('-',1)[1] for col in lh_aseg_cols}
    
    lh_aseg_df = get_FS_nipoppy_df(FS_stats_df, lh_aseg_col_dict, hemi="lh")
    rh_aseg_df = get_FS_nipoppy_df(FS_stats_df, rh_aseg_col_dict, hemi="rh")

    n_participants = FS_stats_df["bids_id"].nunique()
    print(f"Group: {group}, Participants: {n_participants}")

    aseg_df_list.append(lh_aseg_df)
    aseg_df_list.append(rh_aseg_df)

aseg_df = pd.concat(aseg_df_list,axis=0)
n_participants = aseg_df["bids_id"].nunique()
print(f"Total Participants: {n_participants}")

aseg_df.head()

Group: PD, Participants: 298
Total Participants: 298


Unnamed: 0,bids_id,Lateral-Ventricle,Inf-Lat-Vent,Cerebellum-White-Matter,Cerebellum-Cortex,Thalamus,Caudate,Putamen,Pallidum,Hippocampus,Amygdala,Accumbens-area,VentralDC,vessel,choroid-plexus,WM-hypointensities,non-WM-hypointensities,hemi
0,sub-YLOPD03,19963.8,669.4,14188.6,46697.8,7166.7,4008.2,4630.1,2025.0,4074.7,1870.5,396.0,4049.8,31.6,1242.1,0,0,lh
1,sub-YLOPD04,10746.6,677.4,10071.6,38089.1,4693.3,2555.0,3754.2,1766.4,3414.9,1334.0,206.8,3158.8,0.0,931.8,0,0,lh
2,sub-YLOPD05,9871.0,295.2,8874.7,32588.9,6102.1,2771.3,3283.3,1402.6,4143.8,1657.3,460.2,3734.8,18.7,608.8,0,0,lh
3,sub-YLOPD06,14058.6,808.4,12124.7,41307.8,5237.5,2950.3,3749.6,1667.0,3580.8,1366.1,326.2,3696.6,21.9,1074.6,0,0,lh
4,sub-YLOPD07,9024.6,771.2,12761.6,39659.9,6103.6,4454.2,4655.1,1919.7,3533.0,1257.7,309.2,3413.9,0.0,532.4,0,0,lh


### Save df

In [137]:
save_agg_aseg_df = True

# Merge with demo_df
aseg_DKT_df = pd.merge(aseg_df, demo_df, on="bids_id", how="inner")

if save_agg_aseg_df:
    save_file = f"{save_dir}/bilateral_vol_ASEG_df.csv"
    print(f"Saving aseg_DKT_df to {save_file}")
    aseg_DKT_df.to_csv(save_file, index=False)

aseg_DKT_df.head()

Saving aseg_DKT_df to /home/nikhil/projects/Parkinsons/neuro_arch/analysis/IDP/nimhans_ylo/agg_dfs//bilateral_vol_ASEG_df.csv


Unnamed: 0,bids_id,Lateral-Ventricle,Inf-Lat-Vent,Cerebellum-White-Matter,Cerebellum-Cortex,Thalamus,Caudate,Putamen,Pallidum,Hippocampus,...,VentralDC,vessel,choroid-plexus,WM-hypointensities,non-WM-hypointensities,hemi,age,sex,participant_id,group
0,sub-YLOPD03,19963.8,669.4,14188.6,46697.8,7166.7,4008.2,4630.1,2025.0,4074.7,...,4049.8,31.6,1242.1,0,0,lh,65.0,Male,YLOPD03,PD
1,sub-YLOPD03,19963.8,669.4,14188.6,46697.8,7166.7,4008.2,4630.1,2025.0,4074.7,...,4049.8,31.6,1242.1,0,0,rh,65.0,Male,YLOPD03,PD
2,sub-YLOPD04,10746.6,677.4,10071.6,38089.1,4693.3,2555.0,3754.2,1766.4,3414.9,...,3158.8,0.0,931.8,0,0,lh,59.0,Male,YLOPD04,PD
3,sub-YLOPD04,10746.6,677.4,10071.6,38089.1,4693.3,2555.0,3754.2,1766.4,3414.9,...,3158.8,0.0,931.8,0,0,rh,59.0,Male,YLOPD04,PD
4,sub-YLOPD05,9871.0,295.2,8874.7,32588.9,6102.1,2771.3,3283.3,1402.6,4143.8,...,3734.8,18.7,608.8,0,0,lh,43.0,Female,YLOPD05,PD


### global ASEG data

In [142]:
global_aseg_cols = [col for col in FS_stats_df.columns if (('_thickness' not in col) and ('Left-' not in col) and ('Right-' not in col))]

aseg_global_df = FS_stats_df[global_aseg_cols].copy()
aseg_global_df.loc[:,"hemi"] = "global"

n_participants = aseg_global_df["bids_id"].nunique()
print(f"Total Participants: {n_participants}")

aseg_global_df.head()

Total Participants: 298


Unnamed: 0,bids_id,3rd-Ventricle,4th-Ventricle,Brain-Stem,CSF,5th-Ventricle,WM-hypointensities,non-WM-hypointensities,Optic-Chiasm,CC_Posterior,...,SupraTentorialVol,SupraTentorialVolNotVent,MaskVol,BrainSegVol-to-eTIV,MaskVol-to-eTIV,lhSurfaceHoles,rhSurfaceHoles,SurfaceHoles,EstimatedTotalIntraCranialVol,hemi
0,sub-YLOPD03,1698.3,1913.7,20510.8,1389.8,0.0,1845.6,0.0,111.9,1174.5,...,1052666,1001059,1473104,0.72649,0.912172,12,5,17,1614941.0,global
1,sub-YLOPD04,1277.7,1409.2,16628.1,1439.6,0.0,584.0,0.0,91.0,825.9,...,748852,721749,1135645,0.68634,0.922824,2,3,5,1230620.0,global
2,sub-YLOPD05,1339.6,1632.6,14528.2,843.7,0.0,658.4,0.0,95.9,1147.9,...,917655,895570,1262055,0.822859,1.038733,1,4,5,1214994.0,global
3,sub-YLOPD06,1718.5,1798.6,19649.8,1568.6,0.0,3818.8,0.0,119.0,748.9,...,808195,771677,1284500,0.651696,0.915908,8,9,17,1402433.0,global
4,sub-YLOPD07,1367.1,1386.7,18661.5,821.0,0.0,5620.9,0.0,67.7,876.0,...,773091,751042,1103126,0.78754,0.9894,5,6,11,1114945.0,global


### Save df

In [143]:
save_agg_aseg_df = True

# Merge with demo_df
aseg_global_DKT_df = pd.merge(aseg_global_df, demo_df, on="bids_id", how="inner")

if save_agg_aseg_df:
    save_file = f"{save_dir}/global_vol_ASEG_df.csv"
    print(f"Saving aseg_DKT_df to {save_file}")
    aseg_global_DKT_df.to_csv(save_file, index=False)

aseg_DKT_df.head()

Saving aseg_DKT_df to /home/nikhil/projects/Parkinsons/neuro_arch/analysis/IDP/nimhans_ylo/agg_dfs//global_vol_ASEG_df.csv


Unnamed: 0,bids_id,Lateral-Ventricle,Inf-Lat-Vent,Cerebellum-White-Matter,Cerebellum-Cortex,Thalamus,Caudate,Putamen,Pallidum,Hippocampus,...,VentralDC,vessel,choroid-plexus,WM-hypointensities,non-WM-hypointensities,hemi,age,sex,participant_id,group
0,sub-YLOPD03,19963.8,669.4,14188.6,46697.8,7166.7,4008.2,4630.1,2025.0,4074.7,...,4049.8,31.6,1242.1,0,0,lh,65.0,Male,YLOPD03,PD
1,sub-YLOPD03,19963.8,669.4,14188.6,46697.8,7166.7,4008.2,4630.1,2025.0,4074.7,...,4049.8,31.6,1242.1,0,0,rh,65.0,Male,YLOPD03,PD
2,sub-YLOPD04,10746.6,677.4,10071.6,38089.1,4693.3,2555.0,3754.2,1766.4,3414.9,...,3158.8,0.0,931.8,0,0,lh,59.0,Male,YLOPD04,PD
3,sub-YLOPD04,10746.6,677.4,10071.6,38089.1,4693.3,2555.0,3754.2,1766.4,3414.9,...,3158.8,0.0,931.8,0,0,rh,59.0,Male,YLOPD04,PD
4,sub-YLOPD05,9871.0,295.2,8874.7,32588.9,6102.1,2771.3,3283.3,1402.6,4143.8,...,3734.8,18.7,608.8,0,0,lh,43.0,Female,YLOPD05,PD
