In [None]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import networkx as nx

import utils_description as utils
import networkx.algorithms.community as nx_comm
import numpy as np

In [None]:
sns.set(style="whitegrid", font='Arial', font_scale=1.)
meanprops = {'markeredgecolor': 'black',  'markerfacecolor': 'white', 'markersize': 8}

### Load results data

In [None]:
infile_results  = 'results/results_clustering.csv'
infile_metadata = 'data/ADNI/ADNIMERGE_processed.csv'

In [None]:
results  = pd.read_csv(infile_results, index_col=0)
metadata = pd.read_csv(infile_metadata, index_col=0)
metadata['DX_bl'].replace({'AD':'Dementia', 'EMCI':'MCI', 'LMCI':'MCI'}, inplace=True)

print('Diagnosis and clusters:')
print(pd.crosstab(metadata['DX_bl'], results['cluster_3']))
print()

results = results.loc[(metadata['DX_bl'] == 'Dementia') | (metadata['DX_bl'] == 'MCI')]

In [None]:
age_diagnosis = metadata[['AGE', 'Month', 'Month_Dementia', 'Month_MCI', 'DX_bl', 'DX', 'PTRACCAT']]
age_diagnosis['Year']          = age_diagnosis['Month']/12
age_diagnosis['Year_Dementia'] = age_diagnosis['Month_Dementia']/12
age_diagnosis['Year_MCI']      = age_diagnosis['Month_MCI']/12
age_diagnosis['Age_last']      = age_diagnosis['AGE'] + age_diagnosis['Year']
age_diagnosis['Age_Dementia']  = age_diagnosis['AGE'] + age_diagnosis['Year_Dementia']
age_diagnosis['Age_MCI']       = age_diagnosis['AGE'] + age_diagnosis['Year_MCI']
age_diagnosis['DXCHANGE']      = age_diagnosis['DX_bl'] + ' to ' + age_diagnosis['DX']
age_diagnosis_clust = pd.concat([age_diagnosis, results['cluster_3']], axis=1, join='inner')

### Descriptive of clusters

In [None]:
#### Sociodemo, clinical, biomarkers and cognitive data
sociodemo, cognitive, biomarker = utils.load_adnimerge_data(results, 'cluster_3')

for diag in ['MCI', 'Dementia']:
    
    sociodemo_tmp = sociodemo.loc[sociodemo['DX_bl'] == diag]
    biomarker_tmp = biomarker.loc[sociodemo['DX_bl'] == diag]
    cognitive_tmp = cognitive.loc[sociodemo['DX_bl'] == diag]
    age_diagnosis_clust_tmp = age_diagnosis_clust.loc[sociodemo['DX_bl'] == diag]

    stats1 = utils.stats_numerical(sociodemo_tmp[['PTEDUCAT', 'AGE', 'cluster_3']], 'cluster_3')
    stats2 = utils.stats_numerical(age_diagnosis_clust_tmp[['Age_Dementia', 'Age_MCI', 'cluster_3']], 'cluster_3')
    stats3 = utils.stats_categorical(sociodemo_tmp[['PTGENDER', 'APOE4', 'cluster_3']], 'cluster_3')
    stats4 = utils.stats_numerical(biomarker_tmp, 'cluster_3')
    stats5 = utils.stats_numerical(cognitive_tmp, 'cluster_3')

    print(stats1[['cluster1', 'cluster2', 'cluster3', 'F', 'pvalue']].to_latex(), '\n')
    print(stats2[['cluster1', 'cluster2', 'cluster3', 'F', 'pvalue']].to_latex(), '\n')
    print(stats3[['cluster1', 'cluster2', 'cluster3', 'chstat', 'pvalue']].to_latex())
    print(stats4[['cluster1', 'cluster2', 'cluster3', 'F', 'pvalue']].to_latex(), '\n')
    print(stats5[['cluster1', 'cluster2', 'cluster3', 'F', 'pvalue']].to_latex(), '\n')

In [None]:
data = pd.concat([biomarker, metadata[['DX_bl', 'DX', 'AGE']]], axis=1, join='inner')
data['cluster_3'] = data['cluster_3'].replace({0:'Cluster 1', 1:'Cluster 2', 2:'Cluster 3'})

In [None]:
o = ['Cluster 1', 'Cluster 2', 'Cluster 3']
h = ['MCI', 'Dementia']

hue = 'DX_bl'
sns.set_palette('magma_r', 3)

fig, ax = plt.subplots(4, 3, figsize=(10, 10))

sns.boxplot(data=data, y='ABETA_bl', x='cluster_3', hue=hue,
            hue_order=h, order=o, ax=ax[0][0], showmeans=True, meanprops=meanprops)
sns.boxplot(data=data, y='TAU_bl', x='cluster_3', hue=hue,
            hue_order=h, order=o, ax=ax[0][1], showmeans=True, meanprops=meanprops)
sns.boxplot(data=data, y='PTAU_bl', x='cluster_3', hue=hue,
            hue_order=h, order=o, ax=ax[0][2], showmeans=True, meanprops=meanprops)

sns.boxplot(data=data, y='AV45_bl', x='cluster_3', hue=hue,
            hue_order=h, order=o, ax=ax[1][0], showmeans=True, meanprops=meanprops)
sns.boxplot(data=data, y='FDG_bl', x='cluster_3', hue=hue,
            hue_order=h, order=o, ax=ax[1][1], showmeans=True, meanprops=meanprops)
sns.boxplot(data=data, y='WholeBrain_bl', x='cluster_3', hue=hue,
            hue_order=h, order=o, ax=ax[1][2], showmeans=True, meanprops=meanprops)

sns.boxplot(data=data, y='Ventricles_bl', x='cluster_3', hue=hue,
            hue_order=h, order=o, ax=ax[2][0], showmeans=True, meanprops=meanprops)
sns.boxplot(data=data, y='MidTemp_bl', x='cluster_3', hue=hue,
            hue_order=h, order=o, ax=ax[2][1], showmeans=True, meanprops=meanprops)
sns.boxplot(data=data, y='Hippocampus_bl', x='cluster_3', hue=hue,
            hue_order=h, order=o, ax=ax[2][2], showmeans=True, meanprops=meanprops)

sns.boxplot(data=data, y='Fusiform_bl', x='cluster_3', hue=hue,
            hue_order=h, order=o, ax=ax[3][0], showmeans=True, meanprops=meanprops)
sns.boxplot(data=data, y='Entorhinal_bl', x='cluster_3', hue=hue,
            hue_order=h, order=o, ax=ax[3][1], showmeans=True, meanprops=meanprops)


for i in[0, 1, 2, 3]:
    for j in [0, 1, 2]:
        if i == 3 and j == 2:
            fig.delaxes(ax[3][2])
        else:
            ax[i][j].get_legend().remove()
            ax[i][j].set_xlabel('')



handles, labels = ax[0][0].get_legend_handles_labels()
leg = fig.legend(handles, labels, bbox_to_anchor=(0.9, 0.2), ncol=1, title='Baseline diagnosis:\n')
leg._legend_box.align = "left"

fig.tight_layout()
plt.savefig('figures/boxplots_biomarkers.pdf', dpi=500)
# plt.show()


### APOE alleles distribution between clusters

In [None]:
genotype_data = pd.read_csv('results/processed_variants_ADNI_WGS_v2.csv', index_col=0)
genotype_data.drop(columns='SYMBOL', inplace=True)
genotype_data = genotype_data.T

apoe_data = genotype_data[['rs429358', 'rs7412']]
apoe_data['APOE_alleles'] = apoe_data.apply(lambda row: utils.label_APOE_alleles(row), axis=1)

apoe_data = pd.concat([apoe_data, sociodemo], axis=1, join='inner')
apoe_data['DX_bl'].replace({'EMCI':'MCI', 'LMCI':'MCI', 'AD':'Dementia'}, inplace=True)

for diag in ['MCI', 'Dementia']:
    
    tmp = apoe_data.loc[apoe_data['DX_bl'] == diag]
    tmp = pd.get_dummies(tmp, columns=['APOE_alleles'], prefix=[''])
    
    if diag == 'MCI':
        columns = ['_E2/E2', '_E2/E3', '_E2/E4', '_E3/E4', '_E4/E4', 'cluster_3']
        
    elif diag == 'Dementia':
        columns = ['_E2/E2', '_E2/E3', '_E3/E4', '_E4/E4', 'cluster_3']
    
    stats_apoe = utils.stats_categorical(tmp[columns], 'cluster_3')
    
    print(stats_apoe[['cluster1', 'cluster2', 'cluster3', 'chstat', 'pvalue']].to_latex())
    print()

In [None]:
dataviz = age_diagnosis_clust
order = ['MCI', 'Dementia']
sns.set_palette('magma_r', len(order))

plt.figure(figsize=(7, 7))
sns.boxplot(data=dataviz, x='cluster_3', y='Age_Dementia', hue='DX_bl', hue_order=order,
            showmeans=True, meanprops=meanprops)
plt.ylabel('Age of dementia diagnosis')
plt.xlabel('Cluster')
plt.legend(title='Baseline diagnosis', loc='lower right')
plt.title('Subjects whom converted to dementia')
plt.savefig('figures/diagnosis_age.png', dpi=500)
plt.show()