In [1]:
%matplotlib inline

In [2]:
import pandas as pd
from os.path import join, exists
from os import mkdir
from glob import glob
import xml.etree.ElementTree as ET
import numpy as np
from scipy import stats
from tqdm import tqdm
import statsmodels.api as sm
import statsmodels.formula.api as sfa
import scikit_posthocs as sp
import seaborn as sns
import matplotlib.pyplot as plt

from statsmodels.stats.multitest import multipletests

import statsmodels

from itertools import combinations
import json

pd.set_option('display.max_rows', None, 'display.max_columns', None)

# Correlation study data

In [None]:
def export_df(directory, psych_test='MMSE'):
    XML_COLS = ['Age', 'Sex', 'APOE_A1', 'APOE_A2', 'MMSE', 'NPIQ']

    stats_df = pd.read_csv(join(directory, 'stats', 'output_'+psych_test.lower()+'.csv'))

    if stats_df.columns[0] != 'PET_ID':
        stats_df.drop(stats_df.columns[0], axis=1, inplace=True)

    stats_df['Scan_Date'] = stats_df['PET_ID'].apply(lambda id: id.split('~')[1].split('_')[0])
    stats_df['PET_ID'] = stats_df['PET_ID'].apply(lambda id: id.split('~')[0] + '-' + id.split('~')[-1])
    stats_df['Subject_ID'] = stats_df['PET_ID'].apply(lambda id: id.split('-')[0])

    col_list = list(stats_df.columns)
    new_col_list = col_list[0:1] + col_list[2:] + list(col_list[1:2])
    stats_df = stats_df[new_col_list]

    for xml_col in XML_COLS:
        stats_df[xml_col] = None
    
    col_list = list(stats_df.columns)
    new_col_list = col_list[0:1] + col_list[-9:] + col_list[1:-9]
    stats_df = stats_df[new_col_list]
   
    metadata_dir = join(directory, 'Metadata', 'ADNI')
    xml_files = glob(join(metadata_dir, '*.xml'))

    tree = None

    for xml_file in xml_files:
        xml_file_name = xml_file.split('/')[-1]
        subject_id = '_'.join(xml_file_name.split('_')[1:4])
        other_id = xml_file_name.split('_')[-1].split('.')[0]
        unique_id = subject_id + '-' + other_id

        if len(stats_df.loc[stats_df['PET_ID'] == unique_id].index.values) == 0:
            continue

        tree = ET.parse(xml_file)
        root = tree.getroot()

        if len(root.findall(".//subjectAge")) > 0:
            stats_df.at[stats_df.loc[stats_df['PET_ID'] == unique_id].index.values[0], 'Age'] = root.findall(".//subjectAge")[0].text
        if len(root.findall(".//subjectSex")) > 0:
            stats_df.at[stats_df.loc[stats_df['PET_ID'] == unique_id].index.values[0], 'Sex'] = root.findall(".//subjectSex")[0].text
        if len(root.findall(".//subjectInfo[@item='APOE A1']")) > 0:
            stats_df.at[stats_df.loc[stats_df['PET_ID'] == unique_id].index.values[0], 'APOE_A1'] = root.findall(".//subjectInfo[@item='APOE A1']")[0].text
        if len(root.findall(".//subjectInfo[@item='APOE A2']")) > 0:
            stats_df.at[stats_df.loc[stats_df['PET_ID'] == unique_id].index.values[0], 'APOE_A2'] = root.findall(".//subjectInfo[@item='APOE A2']")[0].text
        if len(root.findall(".//assessmentScore[@attribute='MMSCORE']")) > 0:
            stats_df.at[stats_df.loc[stats_df['PET_ID'] == unique_id].index.values[0], 'MMSE'] = root.findall(".//assessmentScore[@attribute='MMSCORE']")[0].text
        if len(root.findall(".//assessmentScore[@attribute='NPISCORE']")) > 0:
            stats_df.at[stats_df.loc[stats_df['PET_ID'] == unique_id].index.values[0], 'NPIQ'] = root.findall(".//assessmentScore[@attribute='NPISCORE']")[0].text
    
    object_cols = ['Age', 'APOE_A1', "APOE_A2", 'MMSE', 'NPIQ']
    for object_col in object_cols:
        stats_df[object_col] = stats_df[object_col].apply(pd.to_numeric, errors='coerce')
    
    rois_df = stats_df.drop(stats_df.columns[0:10], axis=1, inplace=False)
    corr_data = {psych_test+'_Corr': [], psych_test+'_p_value': []}

    for roi in rois_df:
        x, y = stats_df[psych_test].values, rois_df[roi].values
        nans = np.logical_or(np.isnan(x), np.isnan(y))
        try:
            score_corr, score_p = stats.pearsonr(x[~nans], y[~nans])
            corr_data[psych_test+'_Corr'].append(score_corr)
            corr_data[psych_test+'_p_value'].append(score_p)
        except ValueError:
            print(directory+' has fewer than 2 values in '+psych_test)
            return (stats_df, None)
    
    corr_data_df = pd.DataFrame.from_dict(corr_data)
    corr_data_df['ROI'] = rois_df.columns
    corr_data_df.set_index('ROI', inplace=True)
    corr_data_df.sort_values([psych_test+'_Corr'], ignore_index=False, inplace=True)
    
    return (stats_df, corr_data_df)

In [None]:
directories = glob(join('..', 'Data_revision', '*', '*'))

# # stats_df.fillna(value=np.nan, inplace=True)

# object_cols = ['Age', 'APOE_A1', "APOE_A2", 'MMSE', 'NPIQ']
# for object_col in object_cols:
#     stats_df[object_col] = stats_df[object_col].apply(pd.to_numeric, errors='coerce')

# stats_df.drop(stats_df.columns[0:10], axis=1, inplace=True)
# stats_df
# count = 0

with tqdm(total=len(directories), desc='Directories analyzed') as pbar:
    for directory in directories:
        stats_df, corr_data_df = export_df(directory)
        _, npiq_corr_data_df = export_df(directory, psych_test='NPIQ')

        stats_df.to_csv(join(directory, 'stats', 'summary.csv'), index=False)
        if corr_data_df is not None:
            corr_data_df.to_csv(join(directory, 'stats', 'mmse_corr.csv'))
        if npiq_corr_data_df is not None:
            npiq_corr_data_df.to_csv(join(directory, 'stats', 'npiq_corr.csv'))

        pbar.update()

# RoI ranking generated from Influential nodes data

In [None]:
def calculate_roi_ranks(directory):
    influential_df = pd.read_csv(join(directory, 'stats', 'influential.csv'))
    influential_df.drop(influential_df.columns[0], axis=1, inplace=True)

    roi_score = {}
    roi_occurrences = {}

    for ind in influential_df.index:
        rois = influential_df['Influential node values'][ind]
        rois = rois.split(',')
        rois = [roi.strip() for roi in rois]

        for i in range(len(rois)):
            if rois[i] in roi_score:
                roi_score.update({rois[i]: roi_score[rois[i]]+i+1})
                roi_occurrences.update({rois[i]: roi_occurrences[rois[i]]+1})
            else:
                roi_score[rois[i]] = i+1
                roi_occurrences[rois[i]] = 1

    ranking_df_dict = {'ROI': [roi for roi in roi_score], \
                       'Occurrences': [roi_occurrences[roi] for roi in roi_score], \
                       'Rank': [roi_score[roi]//roi_occurrences[roi] for roi in roi_score]}
    ranking_df = pd.DataFrame.from_dict(ranking_df_dict)
    ranking_df.sort_values(['Rank', 'Occurrences'], ascending=[True, False], ignore_index=True, inplace=True)

    ranking_df.to_csv(join(directory, 'stats', 'roi_ranking.csv'), index=False)

In [None]:
directories = glob(join('..', 'Data_revision', '*', '*'))

# directory = '../Data_revision/MCI/AV45'
# calculate_roi_ranks(directory)

for directory in directories:
    calculate_roi_ranks(directory)
    # calculate_roi_ranks(directory, psych_test='NPIQ')

# ANOVA statsmodels

In [3]:
def calculate_anova_statsmodels(dir1, dir2, dir3):
    radioisotopes = ['AV45', 'PiB']
    rois = ['GM Superior parietal lobule 7P L',
            'GM Medial geniculate body L',
            'GM Anterior intra-parietal sulcus hIP3 R',
            'GM Superior parietal lobule 7A L',
            'GM Superior parietal lobule 5L L']

    for radioisotope in radioisotopes:
        print(radioisotope)
        if radioisotope == 'PiB':
            rois = ["GM Broca's area BA44 R",
                    "GM Amygdala_laterobasal group L",
                    "GM Amygdala_laterobasal group R",
                    "WM Superior occipito-frontal fascicle R",
                    "GM Superior parietal lobule 7A L",
                    "GM Visual cortex V3V R",
                    "GM Hippocampus hippocampal-amygdaloid transition area R",
                    "WM Superior longitudinal fascicle L",
                    "GM Primary auditory cortex TE1.1 L"]
        mmse_cn_df = pd.read_csv(join(dir1, radioisotope, 'stats', 'output_mmse.csv'))
        npiq_cn_df = pd.read_csv(join(dir1, radioisotope, 'stats', 'output_npiq.csv'))
        cn_df = pd.concat([mmse_cn_df, npiq_cn_df], axis=1)
        cn_df = cn_df.loc[:,~cn_df.columns.duplicated()]
        cn_df.drop([cn_df.columns[i] for i in range(2)], axis=1, inplace=True)
        cn_df['Category'] = 'CN'

        mmse_mci_df = pd.read_csv(join(dir2, radioisotope, 'stats', 'output_mmse.csv'))
        npiq_mci_df = pd.read_csv(join(dir2, radioisotope, 'stats', 'output_npiq.csv'))
        mci_df = pd.concat([mmse_mci_df, npiq_mci_df], axis=1)
        mci_df = mci_df.loc[:,~mci_df.columns.duplicated()]
        mci_df.drop([mci_df.columns[i] for i in range(2)], axis=1, inplace=True)
        mci_df['Category'] = 'MCI'

        mmse_ad_df = pd.read_csv(join(dir3, radioisotope, 'stats', 'output_mmse.csv'))
        npiq_ad_df = pd.read_csv(join(dir3, radioisotope, 'stats', 'output_npiq.csv'))
        ad_df = pd.concat([mmse_ad_df, npiq_ad_df], axis=1)
        ad_df = ad_df.loc[:,~ad_df.columns.duplicated()]
        ad_df.drop([ad_df.columns[i] for i in range(2)], axis=1, inplace=True)
        ad_df['Category'] = 'AD'

        anova_df = pd.concat([cn_df, mci_df, ad_df], ignore_index=True)
        old_cols = anova_df.columns.tolist()
        new_cols = [col.replace(' ', '_') for col in old_cols]
        new_cols = [col.replace('-', '_') for col in new_cols]
        new_cols = [col.replace('.', '_') for col in new_cols]
        new_cols = [col.replace("'", '_') for col in new_cols]
        anova_df.rename(columns={old_cols[i]: new_cols[i] for i in range(len(new_cols))}, inplace=True)

        rois = [roi.replace(' ', '_') for roi in rois]
        rois = [roi.replace('-', '_') for roi in rois]
        rois = [roi.replace("'", '_') for roi in rois]
        rois = [roi.replace('.', '_') for roi in rois]

        # anova_df_dict = {'ROI': [], 'f_value': [], 'p_value': []}
        for roi in rois:
            print(roi)
            mod_anova_df = anova_df.drop([col for col in anova_df.columns.tolist() if col != roi and col != 'Category'], axis=1, inplace=False)
            mod_anova_df = mod_anova_df[['Category', roi]]
            # print(mod_anova_df)
            model = sfa.ols(roi + ' ~ C(Category)', data=mod_anova_df).fit()
            aov_table = sm.stats.anova_lm(model, typ=2)
            print(aov_table)
        print()

In [4]:
directory2 = '../Data_revision/MCI'
directory1 = '../Data_revision/CN'
directory3 = '../Data_revision/AD'

calculate_anova_statsmodels(directory1, directory2, directory3)

AV45
GM_Superior_parietal_lobule_7P_L
                sum_sq     df         F    PR(>F)
C(Category)   0.295529    2.0  3.788142  0.023361
Residual     17.592231  451.0       NaN       NaN
GM_Medial_geniculate_body_L
                sum_sq     df         F    PR(>F)
C(Category)   0.314216    2.0  3.575613  0.028795
Residual     19.816397  451.0       NaN       NaN
GM_Anterior_intra_parietal_sulcus_hIP3_R
                sum_sq     df         F    PR(>F)
C(Category)   0.255406    2.0  3.216638  0.041011
Residual     17.905028  451.0       NaN       NaN
GM_Superior_parietal_lobule_7A_L
                sum_sq     df         F    PR(>F)
C(Category)   0.235190    2.0  3.213123  0.041153
Residual     16.505845  451.0       NaN       NaN
GM_Superior_parietal_lobule_5L_L
                sum_sq     df         F    PR(>F)
C(Category)   0.227484    2.0  3.118451  0.045181
Residual     16.449730  451.0       NaN       NaN

PiB
GM_Broca_s_area_BA44_R
               sum_sq    df         F    PR(>F)
C

# ANOVA Student T

In [None]:
def calculate_anova_1(dir1, dir2, dir3):
    radioisotopes = ['AV45', 'PiB']
    rois = []
    for radioisotope in radioisotopes:
        mmse_cn_df = pd.read_csv(join(dir1, radioisotope, 'stats', 'output_mmse.csv'))
        npiq_cn_df = pd.read_csv(join(dir1, radioisotope, 'stats', 'output_npiq.csv'))
        cn_df = pd.concat([mmse_cn_df, npiq_cn_df], axis=1)
        cn_df = cn_df.loc[:,~cn_df.columns.duplicated()]
        cn_df.drop([cn_df.columns[i] for i in range(2)], axis=1, inplace=True)
        if len(rois) == 0:
            rois = cn_df.columns.tolist()
        cn_df['Category'] = 'CN'

        mmse_mci_df = pd.read_csv(join(dir2, radioisotope, 'stats', 'output_mmse.csv'))
        npiq_mci_df = pd.read_csv(join(dir2, radioisotope, 'stats', 'output_npiq.csv'))
        mci_df = pd.concat([mmse_mci_df, npiq_mci_df], axis=1)
        mci_df = mci_df.loc[:,~mci_df.columns.duplicated()]
        mci_df.drop([mci_df.columns[i] for i in range(2)], axis=1, inplace=True)
        mci_df['Category'] = 'MCI'

        mmse_ad_df = pd.read_csv(join(dir3, radioisotope, 'stats', 'output_mmse.csv'))
        npiq_ad_df = pd.read_csv(join(dir3, radioisotope, 'stats', 'output_npiq.csv'))
        ad_df = pd.concat([mmse_ad_df, npiq_ad_df], axis=1)
        ad_df = ad_df.loc[:,~ad_df.columns.duplicated()]
        ad_df.drop([ad_df.columns[i] for i in range(2)], axis=1, inplace=True)
        ad_df['Category'] = 'AD'

        anova_df = pd.concat([cn_df, mci_df, ad_df], ignore_index=True)

        for roi in rois:
            x = sp.posthoc_ttest(anova_df, val_col=roi, group_col='Category', p_adjust='holm')
            if (x['CN'] < 0.05).any() or (x['MCI'] < 0.05).any() or (x['AD'] < 0.05).any():
                print(radioisotope, '\t', roi)
                print(x)
                print()
        # print(anova_df.head)

        # anova_df_dict = {'ROI': [], 'f_value': [], 'p_value': []}
        # for roi in cn_df:
        #     anova_df_dict['ROI'].append(roi)
        #     f, p = stats.f_oneway(cn_df[roi], mci_df[roi], ad_df[roi])
        #     anova_df_dict['f_value'].append(f)
        #     anova_df_dict['p_value'].append(p)
        
        # anova_df = pd.DataFrame.from_dict(anova_df_dict)
        # anova_df.set_index("ROI", inplace=True)
        # anova_df.sort_values(['f_value', 'p_value'], ascending=[False, True], inplace=True)

        # if not exists(join('..', 'Data_revision', 'stats')):
        #     mkdir(join('..', 'Data_revision', 'stats'))
        # anova_df.to_csv(join('..', 'Data_revision', 'stats', 'anova_'+radioisotope.lower()+'.csv'), index=True)

In [None]:
directory2 = '../Data_revision/MCI'
directory1 = '../Data_revision/CN'
directory3 = '../Data_revision/AD'

calculate_anova_1(directory1, directory2, directory3)

# ANOVA

In [None]:
def calculate_anova_2(dir1, dir2, dir3):
    radioisotopes = ['AV45', 'PiB']
    for radioisotope in radioisotopes:
        mmse_cn_df = pd.read_csv(join(dir1, radioisotope, 'stats', 'output_mmse.csv'))
        npiq_cn_df = pd.read_csv(join(dir1, radioisotope, 'stats', 'output_npiq.csv'))
        cn_df = pd.concat([mmse_cn_df, npiq_cn_df], axis=1)
        cn_df = cn_df.loc[:,~cn_df.columns.duplicated()]
        cn_df.drop([cn_df.columns[i] for i in range(2)], axis=1, inplace=True)

        mmse_mci_df = pd.read_csv(join(dir2, radioisotope, 'stats', 'output_mmse.csv'))
        npiq_mci_df = pd.read_csv(join(dir2, radioisotope, 'stats', 'output_npiq.csv'))
        mci_df = pd.concat([mmse_mci_df, npiq_mci_df], axis=1)
        mci_df = mci_df.loc[:,~mci_df.columns.duplicated()]
        mci_df.drop([mci_df.columns[i] for i in range(2)], axis=1, inplace=True)

        mmse_ad_df = pd.read_csv(join(dir3, radioisotope, 'stats', 'output_mmse.csv'))
        npiq_ad_df = pd.read_csv(join(dir3, radioisotope, 'stats', 'output_npiq.csv'))
        ad_df = pd.concat([mmse_ad_df, npiq_ad_df], axis=1)
        ad_df = ad_df.loc[:,~ad_df.columns.duplicated()]
        ad_df.drop([ad_df.columns[i] for i in range(2)], axis=1, inplace=True)

        anova_df_dict = {'ROI': [], 'f_value': [], 'p_value': []}
        for roi in cn_df:
            anova_df_dict['ROI'].append(roi)
            f, p = stats.f_oneway(cn_df[roi], mci_df[roi], ad_df[roi])
            anova_df_dict['f_value'].append(f)
            anova_df_dict['p_value'].append(p)
        
        anova_df = pd.DataFrame.from_dict(anova_df_dict)
        anova_df.set_index("ROI", inplace=True)
        anova_df.sort_values(['f_value', 'p_value'], ascending=[False, True], inplace=True)

        if not exists(join('..', 'Data_revision', 'stats')):
            mkdir(join('..', 'Data_revision', 'stats'))
        anova_df.to_csv(join('..', 'Data_revision', 'stats', 'anova_'+radioisotope.lower()+'.csv'), index=True)

In [None]:
# directories = glob(join('..', 'Data_revision', '*'))

directory2 = '../Data_revision/MCI'
directory1 = '../Data_revision/CN'
directory3 = '../Data_revision/AD'

for directory in directories:
    calculate_anova_2(directory1, directory2, directory3)

# Scheffe

In [5]:
def scheffe(dir1, dir2, dir3):
    radioisotopes = ['AV45', 'PiB']
    rois = []
    for radioisotope in radioisotopes:
        mmse_cn_df = pd.read_csv(join(dir1, radioisotope, 'stats', 'output_mmse.csv'))
        npiq_cn_df = pd.read_csv(join(dir1, radioisotope, 'stats', 'output_npiq.csv'))
        cn_df = pd.concat([mmse_cn_df, npiq_cn_df], axis=1)
        cn_df = cn_df.loc[:,~cn_df.columns.duplicated()]
        cn_df.drop([cn_df.columns[i] for i in range(2)], axis=1, inplace=True)
        if len(rois) == 0:
            rois = cn_df.columns.tolist()
        cn_df['Category'] = 'CN'

        mmse_mci_df = pd.read_csv(join(dir2, radioisotope, 'stats', 'output_mmse.csv'))
        npiq_mci_df = pd.read_csv(join(dir2, radioisotope, 'stats', 'output_npiq.csv'))
        mci_df = pd.concat([mmse_mci_df, npiq_mci_df], axis=1)
        mci_df = mci_df.loc[:,~mci_df.columns.duplicated()]
        mci_df.drop([mci_df.columns[i] for i in range(2)], axis=1, inplace=True)
        mci_df['Category'] = 'MCI'

        mmse_ad_df = pd.read_csv(join(dir3, radioisotope, 'stats', 'output_mmse.csv'))
        npiq_ad_df = pd.read_csv(join(dir3, radioisotope, 'stats', 'output_npiq.csv'))
        ad_df = pd.concat([mmse_ad_df, npiq_ad_df], axis=1)
        ad_df = ad_df.loc[:,~ad_df.columns.duplicated()]
        ad_df.drop([ad_df.columns[i] for i in range(2)], axis=1, inplace=True)
        ad_df['Category'] = 'AD'

        anova_df = pd.concat([cn_df, mci_df, ad_df], ignore_index=True)

        for roi in rois:
            x = sp.posthoc_scheffe(anova_df, val_col=roi, group_col='Category')
            if (x['CN'] < 0.05).any() or (x['MCI'] < 0.05).any() or (x['AD'] < 0.05).any():
                print(radioisotope, '\t', roi)
                print(x)
                print()

In [6]:
directory2 = '../Data_revision/MCI'
directory1 = '../Data_revision/CN'
directory3 = '../Data_revision/AD'

scheffe(directory1, directory2, directory3)

AV45 	 GM Anterior intra-parietal sulcus hIP3 R
           CN       MCI        AD
CN   1.000000  0.916342  0.042334
MCI  0.916342  1.000000  0.307361
AD   0.042334  0.307361  1.000000

AV45 	 GM Superior parietal lobule 7P L
           CN       MCI        AD
CN   1.000000  0.043307  0.220884
MCI  0.043307  1.000000  0.664573
AD   0.220884  0.664573  1.000000

PiB 	 GM Broca's area BA44 R
           CN       MCI        AD
CN   1.000000  0.996228  0.070505
MCI  0.996228  1.000000  0.005371
AD   0.070505  0.005371  1.000000

PiB 	 GM Hippocampus hippocampal-amygdaloid transition area R
           CN       MCI        AD
CN   1.000000  0.301853  0.873924
MCI  0.301853  1.000000  0.045042
AD   0.873924  0.045042  1.000000

PiB 	 GM Superior parietal lobule 7A L
           CN       MCI        AD
CN   1.000000  0.041955  0.024760
MCI  0.041955  1.000000  0.700914
AD   0.024760  0.700914  1.000000

PiB 	 GM Visual cortex V3V R
           CN       MCI        AD
CN   1.000000  0.613399  0.490895


# MLR

In [None]:
def perform_mlr(radioisotope, psych_test='MMSE'):
    indep_df = pd.read_csv(join(dataset_path, 'AD', radioisotope, 'stats', 'output_'+psych_test.lower()+'.csv'))
    indep_df = pd.concat([indep_df, pd.read_csv(join(dataset_path, 'MCI', radioisotope, 'stats', 'output_'+psych_test.lower()+'.csv'))], ignore_index=True)
    indep_df = pd.concat([indep_df, pd.read_csv(join(dataset_path, 'CN', radioisotope, 'stats', 'output_'+psych_test.lower()+'.csv'))], ignore_index=True)
    indep_df.drop([indep_df.columns[i] for i in range(2)], axis=1, inplace=True)

    target_df = pd.read_csv(join(dataset_path, 'AD', radioisotope, 'stats', 'summary.csv'))
    target_df = pd.concat([target_df, pd.read_csv(join(dataset_path, 'MCI', radioisotope, 'stats', 'summary.csv'))], ignore_index=True)
    target_df = pd.concat([target_df, pd.read_csv(join(dataset_path, 'CN', radioisotope, 'stats', 'summary.csv'))], ignore_index=True)

    X = indep_df
    y = target_df[psych_test]

    # model = sm.OLS(y, X, missing='drop').fit()
    model = statsmodels.regression.linear_model.OLS(y, X, missing='drop').fit_regularized(alpha=2., L1_wt=0.5, refit=True)
    results_summary = model.summary()

    results_as_html_0 = results_summary.tables[0].as_html()
    res_df_0 = pd.read_html(results_as_html_0, header=None, index_col=0)[0]

    results_as_html_1 = results_summary.tables[1].as_html()
    res_df_1 = pd.read_html(results_as_html_1, header=0, index_col=0)[0]
    res_df_1.rename(columns={'P>|t|':'p'}, inplace=True)
    res_df_1.sort_values(['t'], inplace=True)

    res_df_0.to_csv(join('..', 'Data_revision', 'stats', 'mlr_'+radioisotope.lower()+'_'+psych_test.lower()+'_model_summary.csv'))
    res_df_1.to_csv(join('..', 'Data_revision', 'stats', 'mlr_'+radioisotope.lower()+'_'+psych_test.lower()+'_model_coeffs.csv'))
    # return res_df_1

In [None]:
dataset_path = join('..', 'Data_revision')

radioisotopes = ['AV45', 'PiB']

df = None
for radioisotope in radioisotopes:
    # df = perform_mlr(radioisotope)
    perform_mlr(radioisotope)
    perform_mlr(radioisotope, psych_test='NPIQ')

# df.rename(columns={'P>|t|':'p'}, inplace=True)

# pvals = df['p'].tolist()
# df

# ANOVA results analysis

In [None]:
radioisotopes = ['AV45', 'FDG', 'PiB']
radioisotope_critical_f = {'AV45': 3.005, 'FDG': 3.00053, 'PiB': 3.042}
stats_path = join('..', 'Data_revision', 'stats')

anova_dfs = {radioisotope: pd.read_csv(join(stats_path, 'anova_'+radioisotope.lower()+'.csv')) for radioisotope in radioisotopes}
anova_dfs.update({radioisotope: anova_dfs[radioisotope].loc[anova_dfs[radioisotope]['f_value']>radioisotope_critical_f[radioisotope]] for radioisotope in radioisotopes})

av45_rois = set(anova_dfs['AV45']['ROI'].tolist())
fdg_rois = set(anova_dfs['FDG']['ROI'].tolist())
pib_rois = set(anova_dfs['PiB']['ROI'].tolist())

In [None]:
# int1 = av45_rois.intersection(fdg_rois)
# int1

In [None]:
# int2 = av45_rois.intersection(pib_rois)
# int2

In [None]:
# int3 = fdg_rois.intersection(pib_rois)
# int3

### We find that there are no nodes in common across the 3 radioisotopes over the critical F-values for each of the 3 radioisotopes

# Adjacency matrix heatmap

In [None]:
scan = '006_S_4153~2013-09-20_11_20_39.0~I391529'
adj_mat = np.load(join('..', 'Data_revision', 'AD', 'AV45', scan, 'adj_mat.npy'))
np.fill_diagonal(adj_mat, 0)

heat_map = sns.heatmap(adj_mat, xticklabels=False, yticklabels=False, cmap='viridis')
plt.show()

In [None]:
scan = '006_S_4153~2013-09-20_11_20_39.0~I391529'
adj_mat = np.load(join('..', 'Data_revision', 'AD', 'AV45', scan, 'adj_mat_thresh.npy'))
np.fill_diagonal(adj_mat, 0)

heat_map = sns.heatmap(adj_mat, xticklabels=False, yticklabels=False, cmap='viridis')
plt.show()

In [None]:
# import networkx as nx

In [None]:
# net = nx.from_numpy_matrix(adj_mat)
# net.number_of_edges()

In [None]:
# net_thresh = nx.from_numpy_matrix(np.load(join('..', 'Data_revision', 'AD', 'FDG', scan, 'adj_mat_thresh.npy')))
# net_thresh.number_of_edges()

# ANOVA - Influential ranking comparison

In [None]:
# radioisotopes = ['AV45', 'FDG', 'PiB']
# radioisotope_critical_f = {'AV45': 3.005, 'FDG': 3.00053, 'PiB': 3.042}
# stats_path = join('..', 'Data_revision', 'stats')

# anova_dfs = {radioisotope: pd.read_csv(join(stats_path, 'anova_'+radioisotope.lower()+'.csv')) for radioisotope in radioisotopes}
# anova_dfs.update({radioisotope: anova_dfs[radioisotope].loc[anova_dfs[radioisotope]['f_value']>radioisotope_critical_f[radioisotope]] for radioisotope in radioisotopes})

# av45_rois = set(anova_dfs['AV45']['ROI'].tolist())
# fdg_rois = set(anova_dfs['FDG']['ROI'].tolist())
# pib_rois = set(anova_dfs['PiB']['ROI'].tolist())

In [None]:
# dataset_path = join('..', 'Data_revision')

# diagnoses_paths = [join(dataset_path, diagnosis) for diagnosis in ['AD', 'CN', 'MCI']]

# for diagnosis_path in diagnoses_paths:
#     print(diagnosis_path.split('/')[-1])
#     for radioisotope in radioisotopes:
#         print(radioisotope)
#         rois = anova_dfs[radioisotope]['ROI'].tolist()
#         ranking_df = pd.read_csv(join(diagnosis_path, radioisotope, 'stats', 'roi_ranking.csv'))
#         for roi in rois:
#             roi = roi.replace(' ', '.')
#             # print(roi)
#             row = ranking_df.loc[ranking_df['ROI'] == roi]
#             # print(row)
#             ind = ranking_df.loc[ranking_df['ROI'] == roi].index.values[0]
#             print('ROI: ', row.iloc[0]['ROI'], '\t', 'Rank: ', row.iloc[0]['Rank'], '\t', 'Relative list rank: ', ind + 1)
#     print('\n')

In [None]:
def circos(ranklist1_name, ranklist2_name, ranklist1, ranklist2):
    header = ranklist1['ROI'].tolist()
    header = {'One_'+str(x+1): header[x] for x in range(len(header))}
    ind = ranklist2['ROI'].tolist()
    ind = {'Two_'+str(x+1): ind[x] for x in range(len(ind))}

    header_keys = list(header.keys())
    ind_keys = list(ind.keys())

    # print(header_keys)
    # print(ind_keys)

    df = pd.DataFrame(index=ind_keys, columns=header_keys)

    # return df
    for i in range(30):
        df.iloc[i] = [abs(x - i) if header[header_keys[x]] == ind[ind_keys[i]] else 0 for x in range(len(header_keys))]

    df = df[(df.T != 0).any()]
    df = df.loc[:, (df != 0).any(axis=0)]
    df = df.sample(frac = 1) 

    df.to_csv(join('circos_files', ranklist1_name + '_' + ranklist2_name+'.tsv'), sep='\t')
    with open(join('circos_files', ranklist1_name + '_' + ranklist2_name+'.tsv'), 'r+') as f:
        content = f.read()
        f.seek(0, 0)
        f.write('data' + content)
        f.close()
    json.dump(header, open(join('circos_files', ranklist1_name + '_' + ranklist2_name + '_ranklist1_legend.json'), 'w'))
    json.dump(ind, open(join('circos_files', ranklist1_name + '_' + ranklist2_name + '_ranklist2_legend.json'), 'w'))

In [None]:
# ranklist1 = pd.read_csv(join('..', 'Data_revision', 'AD', 'FDG', 'stats', 'roi_ranking.csv'))
# ranklist1 = ranklist1.head(30)

In [None]:
# ranklist2 = pd.read_csv(join('..', 'Data_revision', 'AD', 'AV45', 'stats', 'roi_ranking.csv'))
# ranklist2 = ranklist2.head(30)

In [None]:
# header = ranklist1['ROI'].tolist()
# header = ['One_'+elem.replace('.', '_').replace('/', 'x').replace('-', '_').replace("'", '_') for elem in header]
# ind = ranklist2['ROI'].tolist()
# ind = ['Two_'+elem.replace('.', '_').replace('/', '_').replace('-', '_').replace("'", '_') for elem in ind]

# df = pd.DataFrame(index=ind, columns=header)
# df

In [None]:
# for i in range(30):
#     df.iloc[i] = [abs(x - i) if header[x][4:] == ind[i][4:] else 0 for x in range(len(header))]

# df

In [None]:
# df = df[(df.T != 0).any()]
# df = df.loc[:, (df != 0).any(axis=0)]
# df = df.sample(frac = 1) 
# df

In [None]:
# df.to_csv('temp.tsv', sep='\t')

In [None]:
dataset_path = join('..', 'Data_revision')

diagnoses_paths = glob(join(dataset_path, '*'))
radioisotopes = ['AV45', 'FDG', 'PiB']
ranklists = dict()

for diagnosis_path in diagnoses_paths:
    diagnosis = diagnosis_path.split('/')[-1]
    for radioisotope in radioisotopes:
        ranklists[diagnosis+'_'+radioisotope] = pd.read_csv(join(diagnosis_path, radioisotope, 'stats', 'roi_ranking.csv')).head(30)

li = [(key, ranklists[key]) for key in ranklists]
li2 = list(combinations(li, 2))

# print(li2)

for tup in li2:
    tup1 = tup[0]
    tup2 = tup[1]

    tup1_name = tup1[0]
    tup2_name = tup2[0]

    tup1_list = tup1[1]
    tup2_list = tup2[1]

    # print(tup1_name, ' ', tup2_name, end=': ')
    circos(tup1_name, tup2_name, tup1_list, tup2_list)
    # print(df)
    # break
