In [1]:
import pandas as pd
import os
import numpy as np
import warnings
from sklearn.preprocessing import MinMaxScaler, StandardScaler

warnings.simplefilter(action='ignore', category=FutureWarning)
pd.set_option('mode.chained_assignment', None)
from statsmodels.stats.multitest import multipletests

In [2]:
seg_folder = 'outputs/metrics_ondri_fs/'
metric_df = []
seg_files = os.listdir(seg_folder)
for file in seg_files:
    metric_df.append(pd.read_csv(seg_folder + file))

metric_df = pd.concat(metric_df)
metric_df['SUBJECT'] = [x.split('.')[:-1][0] for x in seg_files]
# if 'TBV-voxel' in metric_df.columns:
#     metric_df = metric_df.drop(columns=['TBV-voxel'])

metric_df = metric_df.dropna(axis=1, how='all')


In [3]:
nvox_cols = metric_df.columns[metric_df.columns.str.contains('nvox')]
metric_df = metric_df.drop(columns=nvox_cols)
#metric_df[nvox_cols] = np.asarray(metric_df[nvox_cols])/np.expand_dims(metric_df['TBV-voxel'], axis=-1)

In [4]:
delete_cols = metric_df.columns[metric_df.columns.str.contains('MII')  | metric_df.columns.str.contains('mean')]

metric_df = metric_df.drop(delete_cols, axis=1)

In [5]:
clinical_df = pd.read_csv('data/summary/ONDRI_summary.csv')
clinical_df = clinical_df[~pd.isna(clinical_df['NII_FILENAME_T1'])]

In [6]:


for cohort in ['ADMCI', 'PD', 'FTD', 'ALS']:
    print(cohort)
    fs_t1_vols = pd.read_csv('outputs/fs_t1_volumes.csv')

    nvox_cols = fs_t1_vols.columns[~fs_t1_vols.columns.str.contains('SUBJECT')]
    fs_t1_vols[nvox_cols] = fs_t1_vols[nvox_cols].to_numpy()/np.expand_dims(fs_t1_vols['total_intracranial_volume'].to_numpy(), axis=-1)
    fs_t1_vols = fs_t1_vols.rename(columns={x: x + '_nvox' for x in nvox_cols})
    merged = pd.merge(clinical_df[['SUBJECT', 'COHORT']], metric_df, on='SUBJECT')

    merged = pd.merge(merged, fs_t1_vols, on='SUBJECT', how='inner')
    merged = merged.drop(columns=['total_intracranial_volume_nvox'])
    merged = merged[merged['COHORT'] == cohort]
    print(len(merged))

    scaler = StandardScaler()
    x = scaler.fit_transform(merged[merged.columns[merged.columns.str.contains('nvox')]])


    scaler = StandardScaler()
    y = scaler.fit_transform(merged[merged.columns[merged.columns.str.contains('MAD')]])

    from scipy.stats import pearsonr
    cols = merged[merged.columns[merged.columns.str.contains('nvox')]].columns

    rs = []
    for i, col in enumerate(cols):
        rs.append(pearsonr(x[:, i], y[:, i]))

    df = pd.DataFrame()
    df['region'] = cols
    df['r'] = [r.statistic for r in rs]
    test = multipletests([x.pvalue for x in rs])
    df['p'] = test[1]

    pd.set_option('display.precision', 2)
    significant = df[df['p'] < 0.05]
    significant['r'] = np.round(significant['r'], 2)


    significant['p'] = [float('%.3g' % x) for x in significant['p']]
    significant.sort_values('r', ascending=False, inplace=True)
    print(significant)
    significant.to_csv('outputs/MAD_size_correlation_%s.csv' % cohort, index=False)

ADMCI
105
                              region     r         p
2             Left-Inf-Lat-Vent_nvox  0.50  5.43e-06
20           Right-Inf-Lat-Vent_nvox  0.36  1.28e-02
31         Right-choroid-plexus_nvox  0.33  3.75e-02
91      ctx-rh-superiortemporal_nvox -0.33  4.25e-02
92         ctx-rh-supramarginal_nvox -0.34  2.93e-02
34   ctx-lh-caudalmiddlefrontal_nvox -0.36  1.50e-02
94                ctx-rh-insula_nvox -0.36  1.58e-02
13                Left-Amygdala_nvox -0.37  8.87e-03
36            ctx-lh-entorhinal_nvox -0.38  4.69e-03
93    ctx-rh-transversetemporal_nvox -0.38  5.24e-03
52           ctx-lh-postcentral_nvox -0.43  3.72e-04
42  ctx-lh-lateralorbitofrontal_nvox -0.44  2.84e-04
29         Right-Accumbens-area_nvox -0.45  1.01e-04
1        Left-Lateral-Ventricle_nvox -0.46  6.16e-05
27            Right-Hippocampus_nvox -0.47  4.72e-05
12             Left-Hippocampus_nvox -0.48  1.92e-05
19      Right-Lateral-Ventricle_nvox -0.49  9.86e-06
28               Right-Amygdala_nvox