In [7]:
import os
import sys
import pandas as pd
import warnings

warnings.filterwarnings(action='ignore')

src_path = os.path.abspath(os.path.join(os.getcwd(), '..', '..', '..', 'src'))
if src_path not in sys.path:
    sys.path.append(src_path)

from pathology.core import (determine_amyloid_status, compute_roi_amyloid)
from pathology.cutoffs import find_gmm_cutoff
from connectomics.atlases import get_dk_rois

# in
data_dir = os.path.abspath(os.path.join(os.getcwd(), '..', 'data'))

df_bf2_fname = os.path.join(data_dir, 'bf2_20240927', 'toomas__20240927_090835.csv')
df_ld_fname = os.path.join(data_dir, 'datasets', 'bf2_tau_asymmetry_ad_long_tnic_fnc.csv')

# out
export_dir = os.path.join(data_dir, 'sila', 'in')
os.makedirs(export_dir, exist_ok=True)

In [8]:
amy_prefix = 'fnc'

# read raw BF2 & longitudinal datasets
df_bf2 = pd.read_csv(df_bf2_fname, low_memory=False)
print(f"N (raw) = {df_bf2.shape}")
df_ld = pd.read_csv(df_ld_fname, low_memory=False, index_col=0)
print(f"N (long) = {df_ld.shape}")

# define ROIs
ROIs = get_dk_rois()

N (raw) = (16034, 2896)
N (long) = (1390, 3185)


estimating cut-offs for the ROIs, but we are still using the established cut-off of 1.033 for amyloid positivity // this is more of a sanity check if we did everything correct

In [9]:
### cut-offs of amyloid PET

cutoff_ROIs = [f'{amy_prefix}_global', f'{amy_prefix}_temporal_meta', f'{amy_prefix}_early_amyloid', 
               f'{amy_prefix}_intermediate_amyloid', f'{amy_prefix}_late_amyloid']

# sample selection
df_gmm = df_bf2.copy()
df_gmm = df_gmm[df_gmm['excluded']!=1]
df_gmm = df_gmm[df_gmm['diagnosis_baseline_variable'].isin(['AD', 'SCD', 'MCI', 'Normal'])]
df_gmm = df_gmm.loc[df_gmm['age']>50]
df_gmm['amyloid_positive'] = df_gmm.apply(determine_amyloid_status, axis=1)
df_gmm = df_gmm.loc[df_gmm.groupby('mid')['Visit'].idxmin()]

# PET averages in all defined ROIs for both hemispheres
for ROI_name, ROI_regions in ROIs.items():
    df_gmm[f'{amy_prefix}_{ROI_name}_left'] = compute_roi_amyloid(df_gmm, ROI_regions, ['_lh_', '_Left'])
    df_gmm[f'{amy_prefix}_{ROI_name}_right'] = compute_roi_amyloid(df_gmm, ROI_regions, ['_rh_', '_Right'])
    df_gmm[f'{amy_prefix}_{ROI_name}'] = compute_roi_amyloid(df_gmm, ROI_regions, ['_lh_', '_Left', '_rh_', '_Right'])

df_gmm = df_gmm.dropna(subset=cutoff_ROIs)
print(f"N (BF2 for cut-off estimation) = {df_gmm.shape}")

cutoffs = {}
for amy_col in cutoff_ROIs:
    cutoffs[amy_col] = find_gmm_cutoff(scores_gmm=df_gmm[amy_col].to_numpy(), roi_name=amy_col, verbose=True, plot_gmm=False)

N (BF2 for cut-off estimation) = (1294, 2948)
fnc_global GMM cut-off = 1.037
fnc_temporal_meta GMM cut-off = 1.025
fnc_early_amyloid GMM cut-off = 1.068
fnc_intermediate_amyloid GMM cut-off = 1.031
fnc_late_amyloid GMM cut-off = 1.059


In [10]:
# export longitudinal df for SILA modelling in MATLAB
df_ld_ac = df_ld.copy()
df_ld_ac = df_ld_ac[['sid', 'age'] + [roi+'_left' for roi in cutoff_ROIs] + [roi+'_right' for roi in cutoff_ROIs]]\
    .dropna().reset_index(drop=True)
df_ld_ac['sid'] = df_ld_ac['sid'].str.replace('BF', '').astype(int)
df_ld_ac.to_csv(os.path.join(export_dir, 'bf2_tau_asymmetry_ad_long_SILA_in.csv'), index=None)
print(f"N (long df for SILA) = {df_ld_ac.shape}")

N (long df for SILA) = (961, 12)
