## Code to analyze FS output

1. DKT CT distributions 
2. ASEG vol distribution
3. Surface plots

In [None]:
import sys
import pandas as pd
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import nibabel as nib
# from nilearn import datasets, surface, plotting
from pathlib import Path

In [None]:
dataset = "qpn"
current_release = "Oct_2024"
FS_version = "7.3.2" #"6.0.1" #"7.3.2"
session = "ses-01"

dataset_dir = f"/home/nikhil/projects/Parkinsons/{dataset}/"
release_dir = f"{dataset_dir}/releases/{current_release}/"
tabular_dir = f"{release_dir}/tabular/"

# Current nipoppy manifest
manifest_csv = f"{release_dir}/manifest.csv"

# demographics
demographics_csv = f"{tabular_dir}/demographics.csv"

# Dx
dx_csv = f"{tabular_dir}/assessments/diagnosis.csv"

# mri_info
mri_sessions_csv = f"{tabular_dir}/mri_info/mri_sessions.csv"

# imaging derivatives
FS_dir = f"{dataset_dir}/derivatives/freesurfer/v{FS_version}"
FS_DKT_dir = f"{FS_dir}/IDP/{session}/"
DKT_csv = f"{FS_DKT_dir}/dkt.csv"
ASEG_csv = f"{FS_DKT_dir}/aseg.csv"

# aparc+aseg (nipoppy extractor)
aparc_aseg_tsv = f"{FS_DKT_dir}fs_stats-aseg-aparc_thickness.tsv"

# UKB encoding of FS fields (DKT + asg) and FS6 vs 7 ROI naming maps
region_field_dir = "/home/nikhil/projects/Parkinsons/region_field_ids/"
ukbb_dkt_ct_fields = f"{region_field_dir}/FS_DKT_UKBB_Fields_ROI_map.csv"
ukbb_aseg_vol_fields = f"{region_field_dir}/FS_ASEG_UKBB_Fields_ROI_map.csv"

# save dirs
save_dir = f"/home/nikhil/projects/Parkinsons/neuro_arch/analysis/IDP/{dataset}/{current_release}/agg_dfs/"
figs_dir = f"{dataset_dir}/results/{session}/anat/figs/"

# Create dirs for results if they don't exist
Path(f"{save_dir}").mkdir(parents=True, exist_ok=True)
Path(f"{figs_dir}").mkdir(parents=True, exist_ok=True)

### Colormaps

In [None]:
from enum import Enum
class my_colors(Enum):
    CONTROL = "#8d99ae"
    PD = "#e63946"
    
color_list = [my_colors.PD.value, my_colors.CONTROL.value,]
palette = sns.color_palette(palette=color_list) #sns.husl_palette()

sns.palplot(palette)

In [None]:
def quick_QC(df, check_cols, min_val, max_val, index_col="participant_id"):
    """Checks for NaNs and out of range outliers """
    
    if index_col in df.columns:
        # check NaNs
        nan_participants = df[df[check_cols].isna().any(axis=1)][index_col].values
        n_nans = len(nan_participants)

        # check range
        outlier_participants = df[df[check_cols].apply(lambda x: (x < min_val) | (x > max_val)).any(axis=1)][index_col].values
        n_outliers = len(outlier_participants)

        print(f"found {n_nans} NaNs and {n_outliers} outliers")
        return list(nan_participants), list(outlier_participants)
    
    else:
        print(f"Provide an index column")
        return None, None

### manifest

In [None]:
manifest_cols = ["participant_id", "visit", "session"]
nipoppy_df = pd.read_csv(manifest_csv)
nipoppy_df = nipoppy_df[manifest_cols] 
nipoppy_participants = nipoppy_df["participant_id"].unique()
n_nipoppy_participants = len(nipoppy_participants)
print(f"nipoppy participants: {n_nipoppy_participants}")
nipoppy_df.head()

### Diagnosis info
- as confirmed later by the clinicians

In [None]:
dx_df = pd.read_csv(dx_csv)
dx_df = dx_df[dx_df["redcap_event_name"] == "Baseline (Arm 1: C-OPN)"]

control_participants = dx_df[dx_df["diagnosis_group_for_analysis"] == "control"]["participant_id"].unique()
PD_participants = dx_df[dx_df["diagnosis_group_for_analysis"] == "PD"]["participant_id"].unique()

all_participants = list(control_participants) + list(PD_participants)

print(f"PD + control: {len(all_participants)}")
print(f"Control: {len(control_participants)}")
print(f"PD: {len(PD_participants)}")

dx_df.head()

### UKB - DKT - ASEG fields and names
- These change based on 1) ukbb names 2) FS6 and 3) FS7.

In [None]:
### DKT metadata
DKT_fields_df = pd.read_csv(ukbb_dkt_ct_fields)
DKT_fields_df["hemi_roi"] = DKT_fields_df["hemi"] + "." + DKT_fields_df["roi"]
# DKT_field_roi_dict = dict(zip(DKT_fields_df["Field ID"].values.astype("str"),DKT_fields_df["hemi_roi"].values))

CT_rois = list(DKT_fields_df[DKT_fields_df["hemi"]=="rh"]["roi"])
print("-"*50)
print(f"Loading CT DKT map")
print(f"n_CT_rois: {len(CT_rois)}")
print("-"*50)

lh_CT_rois = DKT_fields_df[DKT_fields_df["hemi"]=="lh"]["hemi_roi"]
rh_CT_rois = DKT_fields_df[DKT_fields_df["hemi"]=="rh"]["hemi_roi"]

# hemi specific dict with FS ROI names
lh_CT_roi_dict = dict(zip(lh_CT_rois,CT_rois))
rh_CT_roi_dict = dict(zip(rh_CT_rois,CT_rois))

### ASEG metadata
ASEG_fields_df = pd.read_csv(ukbb_aseg_vol_fields)
left_hemi_suffixes = ["Left-","lh","left-"]
right_hemi_suffixes = ["Right-","rh","right-"]

roi_naming_version = FS_version.split(".",1)[0]
print(f"Loading vol ASEG map")
print(f"**roi_naming_version: {roi_naming_version}**")

roi_col = f"FS{roi_naming_version}_roi"
hemi_col = f"FS{roi_naming_version}_hemi"

vol_ROIs = ASEG_fields_df[roi_col].values
print(f"n_vol_ROIs: {len(vol_ROIs)}")
vol_hemis = ASEG_fields_df[hemi_col].values
vol_hemi_counts = ASEG_fields_df[hemi_col].value_counts()
print(f"n_rois per hemi: {vol_hemi_counts}")

lh_vol_rois = list(ASEG_fields_df[ASEG_fields_df[hemi_col].isin(left_hemi_suffixes)][roi_col].values)
rh_vol_rois = list(ASEG_fields_df[ASEG_fields_df[hemi_col].isin(right_hemi_suffixes)][roi_col].values)
global_vol_rois = list(ASEG_fields_df[ASEG_fields_df[hemi_col].isna()][roi_col].dropna().values)


print(f"n_lh_ASEG_rois: {len(lh_vol_rois)}")
print(f"n_rh_ASEG_rois: {len(rh_vol_rois)}")
print(f"n_global_ASEG_rois: {len(global_vol_rois)}")
print("-"*50)

ASEG_fields_df[hemi_col] = ASEG_fields_df[hemi_col].fillna("")
ASEG_fields_df["hemi_roi"] = ASEG_fields_df[hemi_col] + ASEG_fields_df[roi_col] # delimiter is part of the hemi col is present
lh_hemi_ASEG_rois = ASEG_fields_df[ASEG_fields_df[hemi_col].isin(left_hemi_suffixes)]["hemi_roi"]
rh_hemi_ASEG_rois = ASEG_fields_df[ASEG_fields_df[hemi_col].isin(right_hemi_suffixes)]["hemi_roi"]

lh_hemi_ASEG_roi_dict = dict(zip(lh_hemi_ASEG_rois,lh_vol_rois))
rh_hemi_ASEG_roi_dict = dict(zip(rh_hemi_ASEG_rois,rh_vol_rois))

ASEG_fields_df.head()

### Read DKT data

In [None]:
CT_DKT_df = pd.read_csv(DKT_csv)
CT_DKT_df["participant_id"] = CT_DKT_df["participant_id"].str.split("-", expand=True)[1]

FS_participants = list(CT_DKT_df["participant_id"].unique())
print(f"n_FS_participants: {len(FS_participants)}")

# Check ROI names 
expected_cols = set(DKT_fields_df["hemi_roi"].unique())
data_cols = set(CT_DKT_df.columns)
if len(expected_cols - data_cols) == 0:
    print("all expected CT DKT ROI names are in the dataframe")
else:
    extra_schema_cols = data_cols - expected_cols
    print(f"missing ROI names in the dataframe: {extra_schema_cols}")

unknown_CT_DKT_cols = set(CT_DKT_df.columns) - set(DKT_fields_df["hemi_roi"].values) - set(["participant_id"])
if len(unknown_CT_DKT_cols) > 0:
    print(f"found extra columns in CT DKT: {unknown_CT_DKT_cols}, dropping extra columns...")
    CT_DKT_df = CT_DKT_df.drop(columns=unknown_CT_DKT_cols)

In [None]:
CT_DKT_df.head()

### Read aparc data

In [None]:
CT_aparc_df = pd.read_csv(aparc_aseg_tsv, sep="\t")

ct_cols = [col for col in CT_aparc_df.columns if "thickness" in col]

# remove thickness suffix
ct_cols_rename = [col.removesuffix("_thickness") for col in ct_cols]

# replace "_" with "."
ct_cols_rename = [col.replace("_",".") for col in ct_cols_rename]

col_rename_map_dict = dict(zip(ct_cols, ct_cols_rename))

# rename columns
CT_aparc_df = CT_aparc_df.rename(columns=col_rename_map_dict).drop(columns=["session_id"])
CT_aparc_df = CT_aparc_df[["participant_id"] + ct_cols_rename]


CT_aparc_df.head()

### Merge with demographics

In [None]:
parcelation = "aparc" # or "aparc"
## Merge with demographics

startification_col = "diagnosis_group_for_analysis"
demo_cols = ["participant_id", startification_col]

if parcelation == "DKT":
    print(f"using DKT parcelation")
    CT_demo_df = pd.merge(CT_DKT_df,dx_df[demo_cols],on="participant_id",how="left")
else:
    print(f"using aparc parcelation")
    CT_demo_df = pd.merge(CT_aparc_df,dx_df[demo_cols],on="participant_id",how="left")

participants_per_group = CT_demo_df.groupby([startification_col])["participant_id"].nunique()
print(f"participants per group: {participants_per_group}")

CT_demo_df.head()

### Split DKT data into left and right hemisphere

In [None]:
save_results = False

lh_CT_demo_df = CT_demo_df[list(lh_CT_rois) + demo_cols].copy()
lh_CT_demo_df["hemi"] = "lh"
rh_CT_demo_df = CT_demo_df[list(rh_CT_rois) + demo_cols].copy()
rh_CT_demo_df["hemi"] = "rh"

lh_CT_demo_df = lh_CT_demo_df.rename(columns=lh_CT_roi_dict)
rh_CT_demo_df = rh_CT_demo_df.rename(columns=rh_CT_roi_dict)


n_roi = CT_demo_df
print(f"n_roi={len(lh_CT_rois) + len(rh_CT_rois)}")

CT_demo_df = pd.concat([lh_CT_demo_df,rh_CT_demo_df], axis=0)

if save_results:
    CT_demo_df.to_csv(f"{save_dir}/CT_demo_df.csv")
    
CT_demo_df.head()

### Quick QC before plots

In [None]:
check_cols = lh_CT_roi_dict.values()
min_val = 0.1
max_val = 10
nan_participants, outlier_participants = quick_QC(CT_demo_df, check_cols, min_val, max_val, index_col="participant_id")
remove_participants = list(set(nan_participants + outlier_participants))

print(f"removing {len(remove_participants)} participants: {remove_participants}")
CT_demo_df = CT_demo_df[~CT_demo_df["participant_id"].isin(remove_participants)]

### Plot CT

In [None]:
save_fig = True

plot_groups = ["control","PD"]
CT_demo_df = CT_demo_df[CT_demo_df["diagnosis_group_for_analysis"].isin(plot_groups)]

CT_demo_df_melt = CT_demo_df.melt(
    id_vars=demo_cols + ["hemi"],
    var_name="ROI", 
    value_name="CTh")

plot_df = CT_demo_df_melt.copy()
plot_df["ROI"] = plot_df["ROI"].astype(str)

plot_df["group"] = plot_df["diagnosis_group_for_analysis"] # rename for plotting

n_participants = plot_df["participant_id"].nunique()
print(f"n_participants: {n_participants}")
participants_per_group = plot_df.groupby([startification_col])["participant_id"].nunique()
print(f"participants_per_group: {participants_per_group}")

sns.set_theme(font_scale=2.5)
with sns.axes_style("whitegrid"):
    g = sns.catplot(y="ROI",x="CTh", hue="group", col="hemi",kind="box",palette=palette, data=plot_df, aspect=0.5, height=20)
    # g.tick_params(axis='x', rotation=90, labelsize=14)

if save_fig:
    g.savefig(f"{figs_dir}/CT_{parcelation}.png")

### Save CT DKT data

In [None]:
save_agg_CT_df = False

if save_agg_CT_df:
    save_file = f"{save_dir}/CT_demo_df.csv"
    print(f"Saving CT_demo_df to {save_file}")
    CT_demo_df.to_csv(save_file, index=False)

CT_demo_df.head()

### Volumetric measures

In [None]:
vol_ASEG_df = pd.read_csv(ASEG_csv)

vol_ASEG_df["participant_id"] = vol_ASEG_df["participant_id"].str.split("-", expand=True)[1]

FS_participants = list(vol_ASEG_df["participant_id"].unique())
print(f"n_FS_participants: {len(FS_participants)}")

# Check the FS version and corresponding ROI

expected_cols = set(ASEG_fields_df["hemi_roi"].dropna().unique())
data_cols = set(vol_ASEG_df.columns)

if len(expected_cols - data_cols) == 0:
    print("all expected CT DKT ROI names are in the dataframe")
else:
    extra_schema_cols = expected_cols - data_cols
    print(f"missing columns in vol ASEG dataframe: {extra_schema_cols}")


unknown_vol_ASEG_cols = data_cols - expected_cols - set(["participant_id"])
if len(unknown_vol_ASEG_cols) > 0:
    print(f"found extra columns: {unknown_vol_ASEG_cols}")


### Merge with demographics

In [None]:
## Merge with demographics
startification_col = "diagnosis_group_for_analysis"
demo_cols = ["participant_id", startification_col]
vol_ASEG_df = pd.merge(vol_ASEG_df,dx_df[demo_cols],on="participant_id",how="left")

participants_per_group = vol_ASEG_df.groupby([startification_col])["participant_id"].nunique()
print(f"participants per group: {participants_per_group}")

vol_ASEG_df.head()

### Split bilateral volumetric data into left and right hemisphere

In [None]:
save_results = False

lh_vol_ASEG_df = vol_ASEG_df[list(lh_hemi_ASEG_rois) + demo_cols].copy()
lh_vol_ASEG_df["hemi"] = "lh"
rh_vol_ASEG_df = vol_ASEG_df[list(rh_hemi_ASEG_rois) + demo_cols].copy()
rh_vol_ASEG_df["hemi"] = "rh"
global_vol_ASEG_df = vol_ASEG_df[global_vol_rois + demo_cols].copy()
global_vol_ASEG_df["hemi"] = "global"

lh_vol_ASEG_df = lh_vol_ASEG_df.rename(columns=lh_hemi_ASEG_roi_dict)
rh_vol_ASEG_df = rh_vol_ASEG_df.rename(columns=rh_hemi_ASEG_roi_dict)
# global_vol_ASEG_df = global_vol_ASEG_df.rename(columns=global_ASEG_roi_dict)

bilateral_vol_ASEG_df = pd.concat([lh_vol_ASEG_df,rh_vol_ASEG_df], axis=0)

if save_results:
    bilateral_vol_ASEG_df.to_csv(f"{save_dir}/bilateral_vol_ASEG_df.csv")
    global_vol_ASEG_df.to_csv(f"{save_dir}/global_vol_ASEG_df.csv")

bilateral_vol_ASEG_df.head()

### Quick QC before plots

In [None]:
check_cols = lh_hemi_ASEG_roi_dict.values()
min_val = 0
max_val = 3000000
nan_participants, outlier_participants = quick_QC(bilateral_vol_ASEG_df, check_cols, min_val, max_val, index_col="participant_id")
remove_participants = list(set(nan_participants + outlier_participants))

print(f"Bilateral regions: removing {len(remove_participants)} participants")
bilateral_vol_ASEG_df = bilateral_vol_ASEG_df[~bilateral_vol_ASEG_df["participant_id"].isin(remove_participants)]

check_cols = global_vol_rois
nan_participants, outlier_participants = quick_QC(global_vol_ASEG_df, check_cols, min_val, max_val, index_col="participant_id")
remove_participants = list(set(nan_participants + outlier_participants))

print(f"Global regions: removing {len(remove_participants)} participants")
global_vol_ASEG_df = global_vol_ASEG_df[~global_vol_ASEG_df["participant_id"].isin(remove_participants)]

### Plot ASEG
- hemi 
- global

In [None]:
save_fig = True

# Rename global regions for brevity
plot_renaming_dict = {"Thalamus-Proper":"Thalamus"} #FSv6 --> FSv7
global_vol_ASEG_df = global_vol_ASEG_df.rename(columns=plot_renaming_dict)

plot_groups = ["control","PD"]
bilateral_vol_ASEG_df = bilateral_vol_ASEG_df[bilateral_vol_ASEG_df["diagnosis_group_for_analysis"].isin(plot_groups)]

vol_ASEG_df_melt = bilateral_vol_ASEG_df.melt(
    id_vars=demo_cols + ["hemi"],
    var_name="ROI", 
    value_name="volume",
)

plot_df = vol_ASEG_df_melt.copy()
plot_df["ROI"] = plot_df["ROI"].astype(str)
hemi_roi_list = ['Pallidum', 'Thalamus', 'Putamen',  'Amygdala', 'Caudate', 'Hippocampus', 'Accumbens-area', 
                'Cerebellum-Cortex','Cerebellum-White-Matter','VentralDC', 'Lateral-Ventricle','Inf-Lat-Vent']

n_participants = plot_df["participant_id"].nunique()
print(f"n_participants: {n_participants}")
participants_per_group = plot_df.groupby([startification_col])["participant_id"].nunique()
print(f"participants_per_group: {participants_per_group}")

plot_df["group"] = plot_df["diagnosis_group_for_analysis"] # rename for plotting

sns.set_theme(font_scale=4)
with sns.axes_style("whitegrid"):
    g = sns.catplot(y="volume",x="hemi", hue="group", col="ROI",kind="box", col_wrap=6, col_order=hemi_roi_list,
    palette=palette, data=plot_df, aspect=1, height=10, sharey=False)
    # g.tick_params(axis='x', rotation=90, labelsize=14)

if save_fig:
    g.savefig(f"{figs_dir}/ASEG_bilateral.png")

### Save hemi aseg vols

In [None]:
save_agg_aseg_df = False

if save_agg_aseg_df:
    save_file = f"{save_dir}/bilateral_vol_ASEG_df.csv"
    print(f"Saving aseg_DKT_df to {save_file}")
    bilateral_vol_ASEG_df.to_csv(save_file, index=False)

bilateral_vol_ASEG_df.head()

In [None]:
save_fig = True

# Rename global regions for brevity
plot_renaming_dict = {"EstimatedTotalIntraCranialVol":"eTIV"}
global_vol_ASEG_df = global_vol_ASEG_df.rename(columns=plot_renaming_dict)

plot_groups = ["control","PD"]
global_vol_ASEG_df = global_vol_ASEG_df[global_vol_ASEG_df["diagnosis_group_for_analysis"].isin(plot_groups)]


global_vol_ASEG_df_melt = global_vol_ASEG_df.melt(
    id_vars=demo_cols + ["hemi"],
    var_name="ROI", 
    value_name="volume",
)

plot_df = global_vol_ASEG_df_melt.copy()

global_roi_list = ["eTIV", "SupraTentorial", "TotalGray", "SubCortGray", 
                    "CSF","Brain-Stem","3rd-Ventricle","4th-Ventricle"]
plot_df = plot_df[plot_df["ROI"].isin(global_roi_list)]

n_participants = plot_df["participant_id"].nunique()
print(f"n_participants: {n_participants}")
participants_per_group = plot_df.groupby([startification_col])["participant_id"].nunique()
print(f"participants_per_group: {participants_per_group}")

plot_df["group"] = plot_df["diagnosis_group_for_analysis"] # rename for plotting


sns.set_theme(font_scale=4)
with sns.axes_style("whitegrid"):
    g = sns.catplot(y="volume",x="hemi", hue="group", col="ROI",kind="box", col_wrap=4, col_order=global_roi_list,
    palette=palette, data=plot_df, aspect=1, height=10, sharey=False)
    g.set_xlabels("")
    g.set_xticklabels("")

if save_fig:
    g.savefig(f"{figs_dir}/ASEG_global.png")

### Save global aseg vols

In [None]:
save_agg_aseg_df = False

if save_agg_aseg_df:
    save_file = f"{save_dir}/global_vol_ASEG_df.csv"
    print(f"Saving aseg_DKT_df to {save_file}")
    global_vol_ASEG_df.to_csv(save_file, index=False)

global_vol_ASEG_df.head()

### Make enigma-plots

In [None]:
from enigmatoolbox.plotting import plot_subcortical, plot_cortical
from enigmatoolbox.datasets import load_example_data
from enigmatoolbox.utils.parcellation import parcel_to_surface
import statsmodels.api as sm
from statsmodels.stats.multitest import multipletests
import statsmodels.formula.api as smf

In [None]:
enigma_rois = [
    'Left-Lateral-Ventricle', 'Left-Thalamus', 'Left-Caudate', 
    'Left-Putamen', 'Left-Pallidum', 'Left-Hippocampus', 'Left-Amygdala', 
    'Left-Accumbens-area', 'Right-Lateral-Ventricle', 'Right-Thalamus', 
    'Right-Caudate', 'Right-Putamen', 'Right-Pallidum', 'Right-Hippocampus', 
    'Right-Amygdala', 'Right-Accumbens-area'
]

global_vol_roi  = 'EstimatedTotalIntraCranialVol'

print("-"*50)
print(f"n_enigma_rois: {len(enigma_rois)}, global_vol_roi: {global_vol_roi}")
print("-"*50)

# for plotting purposes
enigma_order = [7, 6, 2, 5, 4, 3, 1, 0, 15, 14, 10, 13, 12, 11, 9, 8]

demo_cols = ["participant_id", "diagnosis_group_for_analysis"]

enigma_vol_df = vol_ASEG_df[demo_cols + enigma_rois + [global_vol_roi]]

plot_groups = ["control","PD"]
enigma_vol_df = enigma_vol_df[enigma_vol_df["diagnosis_group_for_analysis"].isin(plot_groups)]

# Sanity checks
enigma_vol_df_control = enigma_vol_df[enigma_vol_df["diagnosis_group_for_analysis"] == "control"]
enigma_vol_df_PD = enigma_vol_df[enigma_vol_df["diagnosis_group_for_analysis"] == "PD"]

PD_avg_vol = enigma_vol_df_PD[enigma_rois].mean().values[enigma_order]
control_avg_vol = enigma_vol_df_control[enigma_rois].mean().values[enigma_order]

print(f"PD_avg_vol: {PD_avg_vol}")
print(f"control_avg_vol: {control_avg_vol}")


### get demographics for analysis

In [None]:
demo_df = pd.read_csv(demographics_csv)
demo_df = demo_df[demo_df["redcap_event_name"] == "Baseline (Arm 1: C-OPN)"]

demo_cols = ["participant_id", "sex"]
demo_df = demo_df[demo_cols]

mri_sessions_df = pd.read_csv(mri_sessions_csv)
mri_sessions_df = mri_sessions_df[mri_sessions_df["redcap_event_name"] == "Baseline (Arm 1: C-OPN)"]


demo_df = pd.merge(demo_df, mri_sessions_df, on="participant_id", how="left")

demo_df.head()

In [None]:
normalized_enigma_vol_df = enigma_vol_df.copy()
# normalized_enigma_vol_df[enigma_rois] = enigma_vol_df[enigma_rois].div(enigma_vol_df[global_vol_roi], axis=0) * 100 

normalized_enigma_vol_df = pd.merge(normalized_enigma_vol_df, demo_df, on="participant_id", how="left")
normalized_enigma_vol_df = normalized_enigma_vol_df.rename(columns={"diagnosis_group_for_analysis":"group", "MRI_age":"age"})

save_dir = "/home/nikhil/projects/Parkinsons/qpn//results/ses-01/anat/dfs/"
normalized_enigma_vol_df.to_csv(f"{save_dir}/normalized_enigma_vol_df.csv", index=False)
normalized_enigma_vol_df.head()

In [None]:
stats_df = normalized_enigma_vol_df.copy()

betas = pd.DataFrame(index=['const', 'group', 'age', 'sex', 'ICV'], columns=enigma_rois)
dvals = pd.DataFrame(index=['const', 'group', 'age', 'sex', 'ICV'], columns=enigma_rois)
pvals = pd.DataFrame(index=['const', 'group', 'age', 'sex', 'ICV'], columns=enigma_rois)
for roi in enigma_rois:
    new_roi = roi.replace("-","")
    # print(f"running model for {roi}-->{new_roi}")
    stats_df = stats_df.rename(columns={roi:new_roi})
    model = smf.ols(formula=f"{new_roi} ~ age + C(group, Treatment(reference='control')) + C(sex) + EstimatedTotalIntraCranialVol", data=stats_df).fit()
    betas[roi] = model.params.values
    dvals[roi] = model.params[1]/np.std(model.resid, ddof=1)
    pvals[roi] = model.pvalues.values

# reorder results based on enigma order of ROIs
b = betas.loc['group'][enigma_order]
d = dvals.loc['group'][enigma_order]
p = multipletests(pvals.loc['group'], method='fdr_bh')[1][enigma_order]

d_thresholded = d.where(p < 0.05, other=pd.NA)

model.summary()

In [None]:
d

In [None]:
d_thresholded

In [None]:
use_thresholded = False
# visualize results
min_color = -np.abs(d_thresholded).max().round(2)
max_color = np.abs(d_thresholded).max().round(2)

print(f"min_color: {min_color}, max_color: {max_color}")

if use_thresholded:
    print("using thresholded effect sizes")
    array_name = d_thresholded
    save_file = f"{figs_dir}/enigma_vols_thresholded_effect_size.png"
else:
    print("using unthresholded effect sizes")
    array_name = d
    save_file = f"{figs_dir}/enigma_vols_unthresholded_effect_size.png"

print(f"save_file: {save_file}")
plot_subcortical(array_name=array_name, size=(900, 250), color_bar='bottom', zoom=1.25, embed_nb=True, 
                    interactive=False, share='both', color_range=(min_color, max_color), 
                    nan_color=(255, 255, 255, 1), cmap="coolwarm", transparent_bg=True,
                    screenshot=True, filename=save_file)

### Plot cortical thickness

The example datasets from enigma toolbox donot load. However, we can look at the list and order of CT structures here:
https://github.com/MICA-MNI/ENIGMA/blob/master/enigmatoolbox/datasets/summary_statistics/gge_case-controls_CortThick.csv

This is same as the default list from aparc.stats summary

In [None]:
import statsmodels.api as sm
from statsmodels.stats.multitest import multipletests
import statsmodels.formula.api as smf

In [None]:
CT_aparc_df = pd.read_csv(aparc_aseg_tsv, sep="\t").drop(columns=["lh_MeanThickness_thickness","rh_MeanThickness_thickness"])
ct_cols = [col for col in CT_aparc_df.columns if "thickness" in col]
ct_cols_rename = [col.removesuffix("_thickness") for col in ct_cols]
CT_aparc_df = CT_aparc_df.rename(columns=dict(zip(ct_cols, ct_cols_rename)))
CT_aparc_df = CT_aparc_df[["participant_id"] + ct_cols_rename]
CT_aparc_df.head()

### Merge CT with demographics

In [None]:
demo_df = pd.read_csv(demographics_csv)
demo_df = demo_df[demo_df["redcap_event_name"] == "Baseline (Arm 1: C-OPN)"]

demo_cols = ["participant_id", "sex"]
demo_df = demo_df[demo_cols]

mri_sessions_df = pd.read_csv(mri_sessions_csv)
mri_sessions_df = mri_sessions_df[mri_sessions_df["redcap_event_name"] == "Baseline (Arm 1: C-OPN)"]

dx_df = pd.read_csv(dx_csv)
dx_df = dx_df[dx_df["redcap_event_name"] == "Baseline (Arm 1: C-OPN)"]

dx_cols = ["participant_id", "diagnosis_group_for_analysis"]
dx_df = dx_df[dx_cols]

demo_df = pd.merge(demo_df, mri_sessions_df, on="participant_id", how="left")
demo_df = pd.merge(demo_df, dx_df, on="participant_id", how="left")

CT_aparc_demo_df = pd.merge(CT_aparc_df, demo_df, on="participant_id", how="left")

CT_aparc_demo_df = CT_aparc_demo_df.rename(columns={"diagnosis_group_for_analysis":"group", "MRI_age":"age"})
CT_aparc_demo_df.head()

In [None]:
stats_df = CT_aparc_demo_df[CT_aparc_demo_df["group"].isin(["control","PD"])].copy()

betas = pd.DataFrame(index=['const', 'group', 'age', 'sex'], columns=ct_cols_rename)
dvals = pd.DataFrame(index=['const', 'group', 'age', 'sex'], columns=ct_cols_rename)
pvals = pd.DataFrame(index=['const', 'group', 'age', 'sex'], columns=ct_cols_rename)
for roi in ct_cols_rename:
    new_roi = roi.replace("_","")
    stats_df = stats_df.rename(columns={roi:new_roi})
    model = smf.ols(formula=f"{new_roi} ~ age + C(group, Treatment(reference='control')) + C(sex)", data=stats_df).fit()
    betas[roi] = model.params.values
    dvals[roi] = model.params[1]/np.std(model.resid, ddof=1)
    pvals[roi] = model.pvalues.values

# reorder results based on enigma order of ROIs
b = betas.loc['group']
d = dvals.loc['group']
p = multipletests(pvals.loc['group'], method='fdr_bh')[1]

d_thresholded = d.where(p < 0.05, other=pd.NA)


print(f"max d: {d.max()}, (max, min) thresholded d: {d_thresholded.max()}, {d_thresholded.min()}")
model.summary()

In [None]:
from enigmatoolbox.plotting import plot_subcortical, plot_cortical
from enigmatoolbox.utils.parcellation import parcel_to_surface

In [None]:
plot_val = parcel_to_surface(d, 'aparc_fsa5')

print(plot_val.shape)

min_color = -np.abs(plot_val).max().round(2)
max_color = np.abs(plot_val).max().round(2)

save_file = f"{figs_dir}/enigma_CT_effect_size.png"
print(f"min_color: {min_color}, max_color: {max_color}")
print(f"save_file: {save_file}")
plot_cortical(array_name=plot_val, surface_name="fsa5", size=(900, 250), color_bar='bottom', zoom=1.25, embed_nb=True, 
                    interactive=True, share='both', color_range=(min_color, max_color), 
                    nan_color=(255, 255, 255, 1), cmap="coolwarm", transparent_bg=True, 
                    screenshot=True, filename=save_file)