## Notebook to test out freesufer stats output without using freesurfer utilities

In [1]:
import pandas as pd
import numpy as np
from freesurfer_stats import CorticalParcellationStats
import os
import glob
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
metadata_dir = "/home/nikhil/projects/brain_changes/brain-diff/metadata/"

# DKT
ukbb_dkt_fields = f"{metadata_dir}UKBB_FS_DKT_Fields.csv"
ukbb_dkt_ct_fields = f"{metadata_dir}UKBB_DKT_CT_Fields.csv"

# ASEG
ukbb_aseg_fields = f"{metadata_dir}UKBB_FS_ASEG_Fields.csv"
ukbb_aseg_vol_fields = f"{metadata_dir}UKBB_ASEG_vol_Fields.csv"


fs_output_dir = "/home/nikhil/projects/QPN_processing/test_data/fmriprep/output/freesurfer-6.0.1/"

### Read DKT CT stats

In [3]:
stat_file = "aparc.DKTatlas.stats"
stat_measure = "average_thickness_mm" 
save_dir = './'


ukbb_dkt_ct_fields_df = pd.read_csv(ukbb_dkt_ct_fields)

hemispheres = ["lh", "rh"]
roi_ct_field_df_dict = {}

subject_id_list = ["sub-PD01369D713546","sub-NIMHANS001"]

hemi_stat_measures_dict = {}
for hemi in hemispheres:
    stat_measure_df = pd.DataFrame()
    for subject_id in subject_id_list:
        fs_stats_dir = f"{fs_output_dir}{subject_id}/stats/"
        stats = CorticalParcellationStats.read(f"{fs_stats_dir}{hemi}.{stat_file}").structural_measurements
        
        cols = ["subject_id"] + list(stats["structure_name"].values)
        vals = [subject_id] + list(stats[stat_measure].values)
        
        df = pd.DataFrame(columns=cols)
        df.loc[0] = vals
        stat_measure_df = pd.concat([stat_measure_df, df], axis=0)

    # replace columns names with ukbb field IDs
    roi_ct_field_df = ukbb_dkt_ct_fields_df[ukbb_dkt_ct_fields_df["hemi"]==hemi][["Field ID","roi"]]
    roi_ct_field_df["hemi"] = hemi
    roi_ct_field_id_dict = dict(zip(roi_ct_field_df["roi"], roi_ct_field_df["Field ID"]))
    stat_measure_df = stat_measure_df.rename(columns=roi_ct_field_id_dict)
    
    hemi_stat_measures_dict[hemi] = stat_measure_df

    roi_ct_field_df_dict[hemi] = roi_ct_field_df

# merge left and right dfs
stat_measure_LR_df = pd.merge(hemi_stat_measures_dict["lh"],hemi_stat_measures_dict["rh"], on="subject_id")

save_file = f"{stat_file.split('.')[1]}_{stat_measure.rsplit('_',1)[0]}.csv"

print(f"Saving stat measures here: {save_dir}/{save_file}")
stat_measure_LR_df.to_csv(f"{save_dir}/{save_file}")


Saving stat measures here: .//DKTatlas_average_thickness.csv


### Read ASEG vol stats

In [4]:
def parse_aseg(aseg_file, stat_measure):
    aseg_data = np.loadtxt(aseg_file, dtype="i1,i1,i4,f4,S32,f4,f4,f4,f4,f4")

    aseg_df = pd.DataFrame(data=aseg_data)
    aseg_df = aseg_df[["f4","f3"]].rename(columns={"f3":stat_measure, "f4":"hemi_ROI"})
    aseg_df["hemi_ROI"] = aseg_df["hemi_ROI"].str.decode('utf-8') 

    print(f"number of ROIs in aseg file: {len(aseg_df)}")

    return aseg_df


In [5]:
stat_file = "aseg.stats"
stat_measure = "Volume_mm3"

stat_measure_df = pd.DataFrame()
for subject_id in subject_id_list:
    fs_stats_dir = f"{fs_output_dir}{subject_id}/stats/"
    aseg_file = f"{fs_stats_dir}{stat_file}"
    stats = parse_aseg(aseg_file,stat_measure)
    
    cols = ["subject_id"] + list(stats["hemi_ROI"].values)
    vals = [subject_id] + list(stats[stat_measure].values)
    
    df = pd.DataFrame(columns=cols)
    df.loc[0] = vals
    stat_measure_df = pd.concat([stat_measure_df, df], axis=0)

# Grab UKBB field ids lookup table
ukbb_aseg_vol_fields_df = pd.read_csv(ukbb_aseg_vol_fields)

roi_vol_field_df = ukbb_aseg_vol_fields_df[ukbb_aseg_vol_fields_df["hemi_ROI"].isin(stat_measure_df.columns)]
common_rois = list(roi_vol_field_df["hemi_ROI"].values)
roi_vol_field_id_dict = dict(zip(roi_vol_field_df["hemi_ROI"], roi_vol_field_df["Field ID"]))

print(f"Number of aseg vol ROIs after UKBB merge: {len(roi_vol_field_id_dict)}")

# Rename ROIs with ukbb ids (remove the ROIs which don't have ukbb ids)
stat_measure_df = stat_measure_df[["subject_id"] + common_rois].copy()
stat_measure_df = stat_measure_df.rename(columns=roi_vol_field_id_dict)

save_file = f"aseg_subcortical_volumes.csv"

print(f"Saving stat measures here: {save_dir}/{save_file}")
stat_measure_df.to_csv(f"{save_dir}/{save_file}")


number of ROIs in aseg file: 45
number of ROIs in aseg file: 45
Number of aseg vol ROIs after UKBB merge: 36
Saving stat measures here: .//aseg_subcortical_volumes.csv


### DKT + ASEG columns --> UKBB ID list
- This is list of input variable for UKBB and ADNI comparison

In [6]:
# aseg_ukbb_ids_df = roi_vol_field_df[["Field ID","hemi_ROI"]].copy()
# aseg_ukbb_ids_df["stat"] = "aseg"

# dkt_ukbb_ids_df = pd.concat([roi_ct_field_df_dict["lh"],roi_ct_field_df_dict["rh"]],axis=0)
# dkt_ukbb_ids_df["hemi_ROI"] = dkt_ukbb_ids_df["hemi"] + "-" + dkt_ukbb_ids_df["roi"]
# dkt_ukbb_ids_df["stat"] = "DKT"

# brainage_fs_ukbb_field_ids_df = pd.concat([dkt_ukbb_ids_df[["Field ID","hemi_ROI","stat"]], 
#                                             aseg_ukbb_ids_df[["Field ID","hemi_ROI","stat"]]],axis=0)


# brainage_fs_ukbb_field_ids_df.to_csv(f"{metadata_dir}/brainage_fs_ukbb_field_ids.csv")

# brainage_fs_ukbb_field_ids_df.head()

Unnamed: 0,Field ID,hemi_ROI,stat
0,27174,lh-caudalanteriorcingulate,DKT
2,27175,lh-caudalmiddlefrontal,DKT
4,27176,lh-cuneus,DKT
6,27177,lh-entorhinal,DKT
8,27178,lh-fusiform,DKT


## Plot CT after collating stats

Desikan-Killiany-Tourville Atlas
Cortical Regions. Frontal pole, temporal pole, and “banks of the superior temporal sulcus” regions were removed as per the DKT protocol, 

In [None]:
FS_stats_dir = "/home/nikhil/projects/brain_changes/brain-diff/fmriprep/stats/"

BL_CT_file = f"{FS_stats_dir}bl/DKTatlas_average_thickness.csv"
BL_ASEG_file = f"{FS_stats_dir}bl/aseg_subcortical_volumes.csv"
FU_CT_file = f"{FS_stats_dir}m24/DKTatlas_average_thickness.csv"
FU_ASEG_file = f"{FS_stats_dir}m24/aseg_subcortical_volumes.csv"

BL_CT_df = pd.read_csv(BL_CT_file).drop(columns=["Unnamed: 0"]).set_index("subject_id")
BL_ASEG_df = pd.read_csv(BL_ASEG_file).drop(columns=["Unnamed: 0"]).set_index("subject_id")
FU_CT_df = pd.read_csv(FU_CT_file).drop(columns=["Unnamed: 0"]).set_index("subject_id")
FU_ASEG_df = pd.read_csv(FU_ASEG_file).drop(columns=["Unnamed: 0"]).set_index("subject_id")


print(f"Number of BL, CT subjects: {len(BL_CT_df)}, number of ROIs: 2 x {len(BL_CT_df.columns)/2}")
print(f"Number of BL, ASEG subjects: {len(BL_ASEG_df)}, number of ROIs: {len(BL_ASEG_df.columns)}")

print(f"Number of FU, CT subjects: {len(FU_CT_df)}, number of ROIs: 2 x {len(FU_CT_df.columns)/2}")
print(f"Number of FU, ASEG subjects: {len(FU_ASEG_df)}, number of ROIs: {len(FU_ASEG_df.columns)}")

BL_subjects = set(BL_CT_df.index) & set(BL_ASEG_df.index)
FU_subjects = set(FU_CT_df.index) & set(FU_ASEG_df.index)

BL_and_FU_subjects = BL_subjects & FU_subjects

print(f"Number unique subjects with CT and ASEG features\n BL:{len(BL_subjects)}, \
FU:{len(FU_subjects)}, BLandFU:{len(BL_and_FU_subjects)}")

## Read UKBB fields 
- This is based on UKBB showcase
- Only doing once and saving fieldIDs-ROIs csv

### DKT CT 

In [None]:
ukbb_dkt_fields_df = pd.read_csv(ukbb_dkt_fields)
ukbb_dkt_fields_df.head()

### Grab rows with specific stat measure of interest

In [None]:
stat_measure = "Mean thickness" 
ukbb_dkt_measure_df = ukbb_dkt_fields_df[ukbb_dkt_fields_df["Description"].str.startswith(stat_measure)].copy()
ukbb_dkt_measure_df["Description"] = ukbb_dkt_measure_df["Description"].str.replace("Mean thickness","Mean_thickness")
print(f"number of fields with {stat_measure}: {len(ukbb_dkt_measure_df)}")

ukbb_dkt_measure_df[["metric","roi","hemi"]] = ukbb_dkt_measure_df["Description"].str.split(" ", expand=True)[[0,2,3]]
ukbb_dkt_measure_df["hemi"] = ukbb_dkt_measure_df["hemi"].replace({"(left":"lh", "(right":"rh"})
ukbb_dkt_measure_df.head()

In [None]:
## Save 
# UKBB_dkt_ct_fields = "/home/nikhil/projects/brain_changes/brain-diff/metadata/UKBB_DKT_CT_Fields.csv"
# ukbb_dkt_measure_df.to_csv(UKBB_dkt_ct_fields)

### ASEG Vols

In [None]:
ukbb_aseg_fields_df = pd.read_csv(ukbb_aseg_fields)
print(f"Number of fields: {len(ukbb_aseg_fields_df)}")
ukbb_aseg_fields_df.head()

### Grab rows with specific stat measure of interest

In [None]:
stat_measure = "Volume of" 
ukbb_aseg_fields_df = ukbb_aseg_fields_df[ukbb_aseg_fields_df["Description"].str.startswith(stat_measure)].copy()
ukbb_aseg_fields_df["Description"] = ukbb_aseg_fields_df["Description"].str.replace("Mean thickness","Mean_thickness")
print(f"number of fields with {stat_measure}: {len(ukbb_aseg_fields_df)}")

ukbb_aseg_fields_df.head()

In [None]:
ukbb_aseg_fields_df[["metric","roi","hemi"]] = ukbb_aseg_fields_df["Description"].str.split(" ", expand=True)[[0,2,3]]
ukbb_aseg_fields_df["hemi"] = ukbb_aseg_fields_df["hemi"].replace({"(left":"Left-", "(right":"Right-", "(whole":""})
ukbb_aseg_fields_df["hemi_ROI"] = ukbb_aseg_fields_df["hemi"] + ukbb_aseg_fields_df["roi"]
ukbb_aseg_fields_df.head()

In [None]:
# # Save 
# ukbb_aseg_vol_fields = "/home/nikhil/projects/brain_changes/brain-diff/metadata/UKBB_ASEG_vol_Fields.csv"
# ukbb_aseg_fields_df.to_csv(ukbb_aseg_vol_fields)