## Analyze FS output

1. DKT CT distributions 
2. ASEG vol distribution
3. Cortical and subcortical brain plots (enigma-toolbox)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import nibabel as nib
from pathlib import Path
import json
import ptitprince as pt

### Load data (paths are specified in a local config file)

In [None]:
local_config_f = Path('../../local_config.json')
if local_config_f.exists():
    with open(local_config_f) as f:
        local_config = json.load(f)
else:
    print(f'Specify a local_config.json with path to nipoppy DATASET_DIR')

print('local_config:', local_config)


dx_color_palette = local_config['plot_styles']['DX_COLOR_PALETTE']
palette = [dx_color_palette["PD"], dx_color_palette["control"]]

sns.palplot(palette)

In [None]:
dataset_dir = local_config['DATASET_DIR']
current_release = local_config['DATASET_RELEASE']

pipeline = "freesurfer"
pipeline_version = "7.3.2"
session = "ses-01"

# Current nipoppy manifest
manifest_csv = f"{dataset_dir}/manifest.csv"

# tabular data
tabular_dir = f"{dataset_dir}/tabular/"

# demographics
demographics_csv = f"{tabular_dir}/demographics.csv"

# Dx
dx_csv = f"{tabular_dir}/assessments/diagnosis.csv"

mri_sessions_csv = f"{tabular_dir}/mri_info/mri_sessions.csv"

# derivative data
derivatives_dir = f"{dataset_dir}/derivatives/"

# IDPs
idp_dir = f"{derivatives_dir}/{pipeline}/{pipeline_version}/idp/"

DKT_csv = f"{idp_dir}/{session}/dkt.csv"
ASEG_csv = f"{idp_dir}/{session}/aseg.csv"

# aparc+aseg (nipoppy extractor)
aparc_aseg_tsv = f"{idp_dir}/{session}/fs_stats-aseg-aparc_thickness.tsv"

# UKB encoding of FS fields (DKT + asg) and FS6 vs 7 ROI naming maps
region_field_dir = f"{dataset_dir}/results/region_field_ids/"
ukbb_dkt_ct_fields = f"{region_field_dir}/FS_DKT_UKBB_Fields_ROI_map.csv"
ukbb_aseg_vol_fields = f"{region_field_dir}/FS_ASEG_UKBB_Fields_ROI_map.csv"

# save dirs
figs_dir = f"{dataset_dir}/results/{session}/freesurfer/figs/"

### manifest

In [None]:
manifest_cols = ["participant_id", "visit", "session"]
nipoppy_df = pd.read_csv(manifest_csv)
nipoppy_df = nipoppy_df[manifest_cols] 
nipoppy_participants = nipoppy_df["participant_id"].unique()
n_nipoppy_participants = len(nipoppy_participants)
print(f"nipoppy participants: {n_nipoppy_participants}")
nipoppy_df.head()

### Diagnosis info
- as confirmed later by the clinicians

In [None]:
dx_df = pd.read_csv(dx_csv)
dx_df = dx_df[dx_df["redcap_event_name"] == "Baseline (Arm 1: C-OPN)"]

control_participants = dx_df[dx_df["diagnosis_group_for_analysis"] == "control"]["participant_id"].unique()
PD_participants = dx_df[dx_df["diagnosis_group_for_analysis"] == "PD"]["participant_id"].unique()

all_participants = list(control_participants) + list(PD_participants)

print(f"PD + control: {len(all_participants)}")
print(f"Control: {len(control_participants)}")
print(f"PD: {len(PD_participants)}")

dx_df.head()

### UKB - DKT - ASEG fields and names
- These change based on 1) ukbb names 2) FS6 and 3) FS7.

In [None]:
### DKT metadata
DKT_fields_df = pd.read_csv(ukbb_dkt_ct_fields)
DKT_fields_df["hemi_roi"] = DKT_fields_df["hemi"] + "." + DKT_fields_df["roi"]
# DKT_field_roi_dict = dict(zip(DKT_fields_df["Field ID"].values.astype("str"),DKT_fields_df["hemi_roi"].values))

CT_rois = list(DKT_fields_df[DKT_fields_df["hemi"]=="rh"]["roi"])
print("-"*50)
print(f"Loading CT DKT map")
print(f"n_CT_rois: {len(CT_rois)}")
print("-"*50)

lh_CT_rois = DKT_fields_df[DKT_fields_df["hemi"]=="lh"]["hemi_roi"]
rh_CT_rois = DKT_fields_df[DKT_fields_df["hemi"]=="rh"]["hemi_roi"]

# hemi specific dict with FS ROI names
lh_CT_roi_dict = dict(zip(lh_CT_rois,CT_rois))
rh_CT_roi_dict = dict(zip(rh_CT_rois,CT_rois))

### ASEG metadata
ASEG_fields_df = pd.read_csv(ukbb_aseg_vol_fields)
left_hemi_suffixes = ["Left-","lh","left-"]
right_hemi_suffixes = ["Right-","rh","right-"]

roi_naming_version = pipeline_version.split(".",1)[0]
print(f"Loading vol ASEG map")
print(f"**roi_naming_version: {roi_naming_version}**")

roi_col = f"FS{roi_naming_version}_roi"
hemi_col = f"FS{roi_naming_version}_hemi"

vol_ROIs = ASEG_fields_df[roi_col].values
print(f"n_vol_ROIs: {len(vol_ROIs)}")
vol_hemis = ASEG_fields_df[hemi_col].values
vol_hemi_counts = ASEG_fields_df[hemi_col].value_counts()
print(f"n_rois per hemi: {vol_hemi_counts}")

lh_vol_rois = list(ASEG_fields_df[ASEG_fields_df[hemi_col].isin(left_hemi_suffixes)][roi_col].values)
rh_vol_rois = list(ASEG_fields_df[ASEG_fields_df[hemi_col].isin(right_hemi_suffixes)][roi_col].values)
global_vol_rois = list(ASEG_fields_df[ASEG_fields_df[hemi_col].isna()][roi_col].dropna().values)


print(f"n_lh_ASEG_rois: {len(lh_vol_rois)}")
print(f"n_rh_ASEG_rois: {len(rh_vol_rois)}")
print(f"n_global_ASEG_rois: {len(global_vol_rois)}")
print("-"*50)

ASEG_fields_df[hemi_col] = ASEG_fields_df[hemi_col].fillna("")
ASEG_fields_df["hemi_roi"] = ASEG_fields_df[hemi_col] + ASEG_fields_df[roi_col] # delimiter is part of the hemi col is present
lh_hemi_ASEG_rois = ASEG_fields_df[ASEG_fields_df[hemi_col].isin(left_hemi_suffixes)]["hemi_roi"]
rh_hemi_ASEG_rois = ASEG_fields_df[ASEG_fields_df[hemi_col].isin(right_hemi_suffixes)]["hemi_roi"]

lh_hemi_ASEG_roi_dict = dict(zip(lh_hemi_ASEG_rois,lh_vol_rois))
rh_hemi_ASEG_roi_dict = dict(zip(rh_hemi_ASEG_rois,rh_vol_rois))

ASEG_fields_df.head()

### Read DKT data

In [None]:
CT_DKT_df = pd.read_csv(DKT_csv)
CT_DKT_df["participant_id"] = CT_DKT_df["participant_id"].str.split("-", expand=True)[1]

FS_participants = list(CT_DKT_df["participant_id"].unique())
print(f"n_FS_participants: {len(FS_participants)}")

# Check ROI names 
expected_cols = set(DKT_fields_df["hemi_roi"].unique())
data_cols = set(CT_DKT_df.columns)
if len(expected_cols - data_cols) == 0:
    print("all expected CT DKT ROI names are in the dataframe")
else:
    extra_schema_cols = data_cols - expected_cols
    print(f"missing ROI names in the dataframe: {extra_schema_cols}")

unknown_CT_DKT_cols = set(CT_DKT_df.columns) - set(DKT_fields_df["hemi_roi"].values) - set(["participant_id"])
if len(unknown_CT_DKT_cols) > 0:
    print(f"found extra columns in CT DKT: {unknown_CT_DKT_cols}, dropping extra columns...")
    CT_DKT_df = CT_DKT_df.drop(columns=unknown_CT_DKT_cols)

In [None]:
CT_DKT_df.head()

### Read aparc data

In [None]:
CT_aparc_df = pd.read_csv(aparc_aseg_tsv, sep="\t")

ct_cols = [col for col in CT_aparc_df.columns if "thickness" in col]

# remove thickness suffix
ct_cols_rename = [col.removesuffix("_thickness") for col in ct_cols]

# replace "_" with "."
ct_cols_rename = [col.replace("_",".") for col in ct_cols_rename]

col_rename_map_dict = dict(zip(ct_cols, ct_cols_rename))

# rename columns
CT_aparc_df = CT_aparc_df.rename(columns=col_rename_map_dict).drop(columns=["session_id"])
CT_aparc_df = CT_aparc_df[["participant_id"] + ct_cols_rename]


CT_aparc_df.head()

### Merge with demographics

In [None]:
parcelation = "aparc" # or "aparc"
## Merge with demographics

startification_col = "diagnosis_group_for_analysis"
demo_cols = ["participant_id", startification_col]

if parcelation == "DKT":
    print(f"using DKT parcelation")
    CT_demo_df = pd.merge(CT_DKT_df,dx_df[demo_cols],on="participant_id",how="left")
else:
    print(f"using aparc parcelation")
    CT_demo_df = pd.merge(CT_aparc_df,dx_df[demo_cols],on="participant_id",how="left")

participants_per_group = CT_demo_df.groupby([startification_col])["participant_id"].nunique()
print(f"participants per group: {participants_per_group}")

CT_demo_df.head()

### Split DKT data into left and right hemisphere

In [None]:
lh_CT_demo_df = CT_demo_df[list(lh_CT_rois) + demo_cols].copy()
lh_CT_demo_df["hemi"] = "lh"
rh_CT_demo_df = CT_demo_df[list(rh_CT_rois) + demo_cols].copy()
rh_CT_demo_df["hemi"] = "rh"

lh_CT_demo_df = lh_CT_demo_df.rename(columns=lh_CT_roi_dict)
rh_CT_demo_df = rh_CT_demo_df.rename(columns=rh_CT_roi_dict)


n_roi = CT_demo_df
print(f"n_roi={len(lh_CT_rois) + len(rh_CT_rois)}")

CT_demo_df = pd.concat([lh_CT_demo_df,rh_CT_demo_df], axis=0)
    
CT_demo_df.head()

### Quick QC before plots

In [None]:
def quick_QC(df, check_cols, min_val, max_val, index_col="participant_id"):
    """Checks for NaNs and out of range outliers """
    
    if index_col in df.columns:
        # check NaNs
        nan_participants = df[df[check_cols].isna().any(axis=1)][index_col].values
        n_nans = len(nan_participants)

        # check range
        outlier_participants = df[df[check_cols].apply(lambda x: (x < min_val) | (x > max_val)).any(axis=1)][index_col].values
        n_outliers = len(outlier_participants)

        print(f"found {n_nans} NaNs and {n_outliers} outliers")
        return list(nan_participants), list(outlier_participants)
    
    else:
        print(f"Provide an index column")
        return None, None

In [None]:
check_cols = lh_CT_roi_dict.values()
min_val = 0.1
max_val = 10
nan_participants, outlier_participants = quick_QC(CT_demo_df, check_cols, min_val, max_val, index_col="participant_id")
remove_participants = list(set(nan_participants + outlier_participants))

print(f"removing {len(remove_participants)} participants: {remove_participants}")
CT_demo_df = CT_demo_df[~CT_demo_df["participant_id"].isin(remove_participants)]

### Plot CT

In [None]:
# too long for plots
CT_col_rename_dict = {
    "caudalanteriorcingulate": "Caudal\nAnterior\nCingulate",
    "caudalmiddlefrontal": "Caudal\nMiddle\nFrontal",
    "cuneus": "Cuneus",
    "entorhinal": "Entorhinal",
    "fusiform": "Fusiform",
    "inferiorparietal": "Inferior\nParietal",
    "inferiortemporal": "Inferior\nTemporal",
    "insula": "Insula",
    "isthmuscingulate": "Isthmus\nCingulate",
    "lateraloccipital": "Lateral\nOccipital",
    "lateralorbitofrontal": "Lateral\nOrbitofrontal",
    "lingual": "Lingual",
    "medialorbitofrontal": "Medial\nOrbitofrontal",
    "middletemporal": "Middle\nTemporal",
    "paracentral": "Paracentral",
    "parahippocampal": "Parahippocampal",
    "parsopercularis": "Pars\nOpercularis",
    "parsorbitalis": "Pars\nOrbitalis",
    "parstriangularis": "Pars\nTriangularis",
    "pericalcarine": "Pericalcarine",
    "postcentral": "Postcentral",
    "posteriorcingulate": "Posterior\nCingulate",
    "precentral": "Precentral",
    "precuneus": "Precuneus",
    "rostralanteriorcingulate": "Rostral\nAnterior\nCingulate",
    "rostralmiddlefrontal": "Rostral\nMiddle\nFrontal",
    "superiorfrontal": "Superior\nFrontal",
    "superiorparietal": "Superior\nParietal",
    "superiortemporal": "Superior\nTemporal",
    "supramarginal": "Supra\nmarginal",
    "transversetemporal": "Transverse\nTemporal"
}

In [None]:
save_fig = True

plot_groups = ["control","PD"]
CT_demo_df = CT_demo_df[CT_demo_df["diagnosis_group_for_analysis"].isin(plot_groups)]
CT_demo_df["hemi"] = CT_demo_df["hemi"].replace({"lh":"Left","rh":"Right"})
CT_demo_df = CT_demo_df.rename(columns=CT_col_rename_dict)

CT_demo_df_melt = CT_demo_df.melt(
    id_vars=demo_cols + ["hemi"],
    var_name="ROI", 
    value_name="CTh")

plot_df = CT_demo_df_melt.copy()
plot_df["ROI"] = plot_df["ROI"].astype(str)

plot_df["group"] = plot_df["diagnosis_group_for_analysis"] # rename for plotting

n_participants = plot_df["participant_id"].nunique()
print(f"n_participants: {n_participants}")
participants_per_group = plot_df.groupby([startification_col])["participant_id"].nunique()
print(f"participants_per_group: {participants_per_group}")

sns.set_theme(font_scale=2.5)
with sns.axes_style("whitegrid"):
    # g = sns.catplot(y="ROI",x="CTh", hue="group", col="hemi",
    #                 kind="violin",split=True, linewidth=0.7, width=0.7,
    #                 palette=palette, data=plot_df, aspect=0.5, height=20)
    # g.tick_params(axis='x', rotation=90, labelsize=14)
    g = sns.FacetGrid(plot_df, col = "ROI", col_wrap=8, height = 10, aspect=0.4, sharex=False, sharey=True)
    g = g.map_dataframe(pt.RainCloud, y = "CTh", x = "hemi", hue="group", data = plot_df, palette=palette, #bw = 0.2, 
                        width_viol = 0.6, width_box = 0.3, box_manage_ticks = False,
                        orient = "v", box_showfliers = False,
                        point_size=10, point_alpha=0.3, 
                        box_linewidth = 5, cloud_alpha = 0.7, dodge = True)

    g.set_titles(row_template = '{row_name}', col_template = '{col_name}')
    # g.set_xlabels("")
    g.set_ylabels("")
    # g.despine(left=True)

    # legend
    # Put the legend out of the figure
    children = plt.gca().get_children()

    plt.legend([children[0], children[2]], ['PD', 'control'], bbox_to_anchor=(1.3, 0.7), loc=2, 
               frameon=False, title="Group", 
               title_fontsize=30, borderaxespad=0. )
    


if save_fig:
    g.savefig(f"{figs_dir}/CT_{parcelation}.png")

### Save IDP df for release

In [None]:
# IDPs
CTh_release_csv = f"{idp_dir}/CTh_aparc_R1.csv"

print(f"Saving {CTh_release_csv}")

CT_demo_df.to_csv(f"{CTh_release_csv}", index=False)

### Volumetric measures

In [None]:
vol_ASEG_df = pd.read_csv(ASEG_csv)

vol_ASEG_df["participant_id"] = vol_ASEG_df["participant_id"].str.split("-", expand=True)[1]

FS_participants = list(vol_ASEG_df["participant_id"].unique())
print(f"n_FS_participants: {len(FS_participants)}")

# Check the FS version and corresponding ROI
expected_cols = set(ASEG_fields_df["hemi_roi"].dropna().unique())
data_cols = set(vol_ASEG_df.columns)

if len(expected_cols - data_cols) == 0:
    print("all expected CT DKT ROI names are in the dataframe")
else:
    extra_schema_cols = expected_cols - data_cols
    print(f"missing columns in vol ASEG dataframe: {extra_schema_cols}")


unknown_vol_ASEG_cols = data_cols - expected_cols - set(["participant_id"])
if len(unknown_vol_ASEG_cols) > 0:
    print(f"found extra columns: {unknown_vol_ASEG_cols}")


### Merge with demographics

In [None]:
## Merge with demographics
startification_col = "diagnosis_group_for_analysis"
demo_cols = ["participant_id", startification_col]
vol_ASEG_df = pd.merge(vol_ASEG_df,dx_df[demo_cols],on="participant_id",how="left")

participants_per_group = vol_ASEG_df.groupby([startification_col])["participant_id"].nunique()
print(f"participants per group: {participants_per_group}")

vol_ASEG_df.head()

### Split bilateral volumetric data into left and right hemisphere

In [None]:
lh_vol_ASEG_df = vol_ASEG_df[list(lh_hemi_ASEG_rois) + demo_cols].copy()
lh_vol_ASEG_df["hemi"] = "lh"
rh_vol_ASEG_df = vol_ASEG_df[list(rh_hemi_ASEG_rois) + demo_cols].copy()
rh_vol_ASEG_df["hemi"] = "rh"
global_vol_ASEG_df = vol_ASEG_df[global_vol_rois + demo_cols].copy()
global_vol_ASEG_df["hemi"] = "global"

lh_vol_ASEG_df = lh_vol_ASEG_df.rename(columns=lh_hemi_ASEG_roi_dict)
rh_vol_ASEG_df = rh_vol_ASEG_df.rename(columns=rh_hemi_ASEG_roi_dict)
# global_vol_ASEG_df = global_vol_ASEG_df.rename(columns=global_ASEG_roi_dict)

bilateral_vol_ASEG_df = pd.concat([lh_vol_ASEG_df,rh_vol_ASEG_df], axis=0)

bilateral_vol_ASEG_df.head()

### Quick QC before plots

In [None]:
check_cols = lh_hemi_ASEG_roi_dict.values()
min_val = 0
max_val = 3000000
nan_participants, outlier_participants = quick_QC(bilateral_vol_ASEG_df, check_cols, min_val, max_val, index_col="participant_id")
remove_participants = list(set(nan_participants + outlier_participants))

print(f"Bilateral regions: removing {len(remove_participants)} participants")
bilateral_vol_ASEG_df = bilateral_vol_ASEG_df[~bilateral_vol_ASEG_df["participant_id"].isin(remove_participants)]

check_cols = global_vol_rois
nan_participants, outlier_participants = quick_QC(global_vol_ASEG_df, check_cols, min_val, max_val, index_col="participant_id")
remove_participants = list(set(nan_participants + outlier_participants))

print(f"Global regions: removing {len(remove_participants)} participants")
global_vol_ASEG_df = global_vol_ASEG_df[~global_vol_ASEG_df["participant_id"].isin(remove_participants)]

### Plot ASEG
- hemi 
- global

In [None]:
save_fig = True

# Rename global regions for brevity
plot_renaming_dict = {"Thalamus-Proper":"Thalamus"} #FSv6 --> FSv7
global_vol_ASEG_df = global_vol_ASEG_df.rename(columns=plot_renaming_dict)

plot_groups = ["control","PD"]
bilateral_vol_ASEG_df = bilateral_vol_ASEG_df[bilateral_vol_ASEG_df["diagnosis_group_for_analysis"].isin(plot_groups)]

vol_ASEG_df_melt = bilateral_vol_ASEG_df.melt(
    id_vars=demo_cols + ["hemi"],
    var_name="ROI", 
    value_name="volume",
)

plot_df = vol_ASEG_df_melt.copy()
plot_df["ROI"] = plot_df["ROI"].astype(str)
hemi_roi_list = ['Pallidum', 'Thalamus', 'Putamen',  'Amygdala', 'Caudate', 'Hippocampus', 'Accumbens-area', 
                'Cerebellum-Cortex','Cerebellum-White-Matter','VentralDC', 'Lateral-Ventricle','Inf-Lat-Vent']

n_participants = plot_df["participant_id"].nunique()
print(f"n_participants: {n_participants}")
participants_per_group = plot_df.groupby([startification_col])["participant_id"].nunique()
print(f"participants_per_group: {participants_per_group}")

plot_df["group"] = plot_df["diagnosis_group_for_analysis"] # rename for plotting

sns.set_theme(font_scale=4)
with sns.axes_style("whitegrid"):
    # g = sns.catplot(y="volume",x="hemi", hue="group", col="ROI",kind="box", col_wrap=6, col_order=hemi_roi_list,
    # palette=palette, data=plot_df, aspect=1, height=10, sharey=False)
    # g.tick_params(axis='x', rotation=90, labelsize=14)
    g = sns.FacetGrid(plot_df, col = "ROI", col_wrap=4, col_order=hemi_roi_list, height = 10, aspect=1, sharex=False, sharey=False)
    
    g = g.map_dataframe(pt.RainCloud, y = "volume", x = "hemi", hue="group", data = plot_df, palette=palette, #bw = 0.2, 
                        width_viol = 0.4, width_box = 0.3, box_manage_ticks = False,
                        orient = "v", box_showfliers = False,
                        point_size=10, point_alpha=0.3, 
                        box_linewidth = 5, cloud_alpha = 0.7, dodge = True)

    g.set_titles(row_template = '{row_name}', col_template = '{col_name}')
    # g.set_xlabels("")
    g.set_ylabels("")
    # g.despine(left=True)

    # legend
    # Put the legend out of the figure
    children = plt.gca().get_children()

    plt.legend([children[0], children[2]], ['PD', 'control'], ncols=2, bbox_to_anchor=(-1.2, 4), #loc=1, 
               frameon=False, title="Group", 
               title_fontsize=50, borderaxespad=0. )
if save_fig:
    g.savefig(f"{figs_dir}/ASEG_bilateral.png")

### Save IDP df for release

In [None]:
# IDPs
bilateral_vol_release_csv = f"{idp_dir}/bilateral_vol_aseg_R1.csv"

print(f"Saving {bilateral_vol_release_csv}")

bilateral_vol_ASEG_df.to_csv(f"{bilateral_vol_release_csv}", index=False)

#### Plot global vols

In [None]:
save_fig = True

# Rename global regions for brevity
plot_renaming_dict = {"EstimatedTotalIntraCranialVol":"eTIV"}
global_vol_ASEG_df = global_vol_ASEG_df.rename(columns=plot_renaming_dict)

plot_groups = ["control","PD"]
global_vol_ASEG_df = global_vol_ASEG_df[global_vol_ASEG_df["diagnosis_group_for_analysis"].isin(plot_groups)]


global_vol_ASEG_df_melt = global_vol_ASEG_df.melt(
    id_vars=demo_cols + ["hemi"],
    var_name="ROI", 
    value_name="volume",
)

plot_df = global_vol_ASEG_df_melt.copy()

global_roi_list = ["eTIV", "SupraTentorial", "TotalGray", "SubCortGray", 
                    "CSF","Brain-Stem","3rd-Ventricle","4th-Ventricle"]
plot_df = plot_df[plot_df["ROI"].isin(global_roi_list)]

n_participants = plot_df["participant_id"].nunique()
print(f"n_participants: {n_participants}")
participants_per_group = plot_df.groupby([startification_col])["participant_id"].nunique()
print(f"participants_per_group: {participants_per_group}")

plot_df["group"] = plot_df["diagnosis_group_for_analysis"] # rename for plotting


sns.set_theme(font_scale=4)
with sns.axes_style("whitegrid"):
    # g = sns.catplot(y="volume",x="hemi", hue="group", col="ROI",kind="box", col_wrap=4, col_order=global_roi_list,
    # palette=palette, data=plot_df, aspect=1, height=10, sharey=False)
    # g.set_xlabels("")
    # g.set_xticklabels("")

    g = sns.FacetGrid(plot_df, col = "ROI", col_wrap=4, col_order=global_roi_list, height = 10, aspect=1, sharex=False, sharey=False)
    g = g.map_dataframe(pt.RainCloud, y = "volume", x = "hemi", hue="group", data = plot_df, palette=palette, #bw = 0.2, 
                        width_viol = 0.4, width_box = 0.3, box_manage_ticks = False,
                        orient = "v", box_showfliers = False,
                        point_size=10, point_alpha=0.3, 
                        box_linewidth = 5, cloud_alpha = 0.7, dodge = True)

    g.set_titles(row_template = '{row_name}', col_template = '{col_name}')
    # g.set_xlabels("")
    g.set_ylabels("")
    # g.despine(left=True)

    # legend
    # Put the legend out of the figure
    children = plt.gca().get_children()

    plt.legend([children[0], children[2]], ['PD', 'control'], ncols=2, bbox_to_anchor=(-1.25, 2.8), #loc=1, 
               frameon=False, title="Group", 
               title_fontsize=50, borderaxespad=0. )

if save_fig:
    g.savefig(f"{figs_dir}/ASEG_global.png")

### Save IDP df for release

In [None]:
# IDPs
global_vol_release_csv = f"{idp_dir}/global_vol_aseg_R1.csv"

print(f"Saving {global_vol_release_csv}")

global_vol_ASEG_df.to_csv(f"{global_vol_release_csv}", index=False)

### Preliminary analysis on group-wise difference in CTh and ASEG volumes
- Also generate enigma-like plots

In [None]:
from enigmatoolbox.plotting import plot_subcortical, plot_cortical
from enigmatoolbox.utils.parcellation import parcel_to_surface
import statsmodels.api as sm
from statsmodels.stats.multitest import multipletests
import statsmodels.formula.api as smf

In [None]:
enigma_rois = [
    'Left-Lateral-Ventricle', 'Left-Thalamus', 'Left-Caudate', 
    'Left-Putamen', 'Left-Pallidum', 'Left-Hippocampus', 'Left-Amygdala', 
    'Left-Accumbens-area', 'Right-Lateral-Ventricle', 'Right-Thalamus', 
    'Right-Caudate', 'Right-Putamen', 'Right-Pallidum', 'Right-Hippocampus', 
    'Right-Amygdala', 'Right-Accumbens-area'
]

global_vol_roi  = 'EstimatedTotalIntraCranialVol'

print("-"*50)
print(f"n_enigma_rois: {len(enigma_rois)}, global_vol_roi: {global_vol_roi}")
print("-"*50)

# for plotting purposes
enigma_order = [7, 6, 2, 5, 4, 3, 1, 0, 15, 14, 10, 13, 12, 11, 9, 8]

demo_cols = ["participant_id", "diagnosis_group_for_analysis"]

enigma_vol_df = vol_ASEG_df[demo_cols + enigma_rois + [global_vol_roi]]

plot_groups = ["control","PD"]
enigma_vol_df = enigma_vol_df[enigma_vol_df["diagnosis_group_for_analysis"].isin(plot_groups)]

# Sanity checks
enigma_vol_df_control = enigma_vol_df[enigma_vol_df["diagnosis_group_for_analysis"] == "control"]
enigma_vol_df_PD = enigma_vol_df[enigma_vol_df["diagnosis_group_for_analysis"] == "PD"]

PD_avg_vol = enigma_vol_df_PD[enigma_rois].mean().values[enigma_order]
control_avg_vol = enigma_vol_df_control[enigma_rois].mean().values[enigma_order]

print(f"PD_avg_vol: {PD_avg_vol}")
print(f"control_avg_vol: {control_avg_vol}")


### get demographics for analysis

In [None]:
demo_df = pd.read_csv(demographics_csv)
demo_df = demo_df[demo_df["redcap_event_name"] == "Baseline (Arm 1: C-OPN)"]

demo_cols = ["participant_id", "sex"]
demo_df = demo_df[demo_cols]

mri_sessions_df = pd.read_csv(mri_sessions_csv)
mri_sessions_df = mri_sessions_df[mri_sessions_df["redcap_event_name"] == "Baseline (Arm 1: C-OPN)"]


demo_df = pd.merge(demo_df, mri_sessions_df, on="participant_id", how="left")

demo_df.head()

In [None]:
normalized_enigma_vol_df = enigma_vol_df.copy()
# normalized_enigma_vol_df[enigma_rois] = enigma_vol_df[enigma_rois].div(enigma_vol_df[global_vol_roi], axis=0) * 100 

normalized_enigma_vol_df = pd.merge(normalized_enigma_vol_df, demo_df, on="participant_id", how="left")
normalized_enigma_vol_df = normalized_enigma_vol_df.rename(columns={"diagnosis_group_for_analysis":"group", "MRI_age":"age"})

normalized_enigma_vol_df.head()

In [None]:
stats_df = normalized_enigma_vol_df.copy()

betas = pd.DataFrame(index=['const', 'group', 'age', 'sex', 'ICV'], columns=enigma_rois)
dvals = pd.DataFrame(index=['const', 'group', 'age', 'sex', 'ICV'], columns=enigma_rois)
pvals = pd.DataFrame(index=['const', 'group', 'age', 'sex', 'ICV'], columns=enigma_rois)
for roi in enigma_rois:
    new_roi = roi.replace("-","")
    # print(f"running model for {roi}-->{new_roi}")
    stats_df = stats_df.rename(columns={roi:new_roi})
    model = smf.ols(formula=f"{new_roi} ~ age + C(group, Treatment(reference='control')) + C(sex) + EstimatedTotalIntraCranialVol", data=stats_df).fit()
    betas[roi] = model.params.values
    dvals[roi] = model.params[1]/np.std(model.resid, ddof=1)
    pvals[roi] = model.pvalues.values

# reorder results based on enigma order of ROIs
b = betas.loc['group'][enigma_order]
d = dvals.loc['group'][enigma_order]
p = multipletests(pvals.loc['group'], method='fdr_bh')[1][enigma_order]

d_thresholded = d.where(p < 0.05, other=pd.NA)

model.summary()

In [None]:
d_thresholded

In [None]:
use_thresholded = False
# visualize results
min_color = np.round(-np.abs(d_thresholded).max())
max_color = np.round(np.abs(d_thresholded).max())

print(f"min_color: {min_color}, max_color: {max_color}")

if use_thresholded:
    print("using thresholded effect sizes")
    array_name = d_thresholded
    save_file = f"{figs_dir}/enigma_vols_thresholded_effect_size.png"
else:
    print("using unthresholded effect sizes")
    array_name = d
    save_file = f"{figs_dir}/enigma_vols_unthresholded_effect_size.png"

print(f"save_file: {save_file}")
plot_subcortical(array_name=array_name, size=(900, 250), color_bar='bottom', zoom=1.25, embed_nb=True, 
                    interactive=False, share='both', color_range=(min_color, max_color), 
                    nan_color=(255, 255, 255, 1), cmap="coolwarm", transparent_bg=True,
                    screenshot=True, filename=save_file)

### Plot cortical thickness

The example datasets from enigma toolbox donot load. However, we can look at the list and order of CT structures here:
https://github.com/MICA-MNI/ENIGMA/blob/master/enigmatoolbox/datasets/summary_statistics/gge_case-controls_CortThick.csv

This is same as the default list from aparc.stats summary

In [None]:
CT_aparc_df = pd.read_csv(aparc_aseg_tsv, sep="\t").drop(columns=["lh_MeanThickness_thickness","rh_MeanThickness_thickness"])
ct_cols = [col for col in CT_aparc_df.columns if "thickness" in col]
ct_cols_rename = [col.removesuffix("_thickness") for col in ct_cols]
CT_aparc_df = CT_aparc_df.rename(columns=dict(zip(ct_cols, ct_cols_rename)))
CT_aparc_df = CT_aparc_df[["participant_id"] + ct_cols_rename]
CT_aparc_df.head()

### Merge CT with demographics

In [None]:
demo_df = pd.read_csv(demographics_csv)
demo_df = demo_df[demo_df["redcap_event_name"] == "Baseline (Arm 1: C-OPN)"]

demo_cols = ["participant_id", "sex"]
demo_df = demo_df[demo_cols]

mri_sessions_df = pd.read_csv(mri_sessions_csv)
mri_sessions_df = mri_sessions_df[mri_sessions_df["redcap_event_name"] == "Baseline (Arm 1: C-OPN)"]

dx_df = pd.read_csv(dx_csv)
dx_df = dx_df[dx_df["redcap_event_name"] == "Baseline (Arm 1: C-OPN)"]

dx_cols = ["participant_id", "diagnosis_group_for_analysis"]
dx_df = dx_df[dx_cols]

demo_df = pd.merge(demo_df, mri_sessions_df, on="participant_id", how="left")
demo_df = pd.merge(demo_df, dx_df, on="participant_id", how="left")

CT_aparc_demo_df = pd.merge(CT_aparc_df, demo_df, on="participant_id", how="left")

CT_aparc_demo_df = CT_aparc_demo_df.rename(columns={"diagnosis_group_for_analysis":"group", "MRI_age":"age"})
CT_aparc_demo_df.head()

In [None]:
stats_df = CT_aparc_demo_df[CT_aparc_demo_df["group"].isin(["control","PD"])].copy()

betas = pd.DataFrame(index=['const', 'group', 'age', 'sex'], columns=ct_cols_rename)
dvals = pd.DataFrame(index=['const', 'group', 'age', 'sex'], columns=ct_cols_rename)
pvals = pd.DataFrame(index=['const', 'group', 'age', 'sex'], columns=ct_cols_rename)
for roi in ct_cols_rename:
    new_roi = roi.replace("_","")
    stats_df = stats_df.rename(columns={roi:new_roi})
    model = smf.ols(formula=f"{new_roi} ~ age + C(group, Treatment(reference='control')) + C(sex)", data=stats_df).fit()
    betas[roi] = model.params.values
    dvals[roi] = model.params[1]/np.std(model.resid, ddof=1)
    pvals[roi] = model.pvalues.values

# reorder results based on enigma order of ROIs
b = betas.loc['group']
d = dvals.loc['group']
p = multipletests(pvals.loc['group'], method='fdr_bh')[1]

d_thresholded = d.where(p < 0.05, other=pd.NA)


print(f"max d: {d.max()}, (max, min) thresholded d: {d_thresholded.max()}, {d_thresholded.min()}")
model.summary()

In [None]:
plot_val = parcel_to_surface(d, 'aparc_fsa5')

print(plot_val.shape)

min_color = -np.abs(plot_val).max().round(2)
max_color = np.abs(plot_val).max().round(2)

save_file = f"{figs_dir}/enigma_CT_effect_size.png"
print(f"min_color: {min_color}, max_color: {max_color}")
print(f"save_file: {save_file}")
plot_cortical(array_name=plot_val, surface_name="fsa5", size=(900, 250), color_bar='bottom', zoom=1.25, embed_nb=True, 
                    interactive=True, share='both', color_range=(min_color, max_color), 
                    nan_color=(255, 255, 255, 1), cmap="coolwarm", transparent_bg=True, 
                    screenshot=True, filename=save_file)