# 04 Comparison of results

Compares the main analyses to the results when using the alternative FDG-PET reference region  
Also compares p-tau217 results in the main analyses to the ratio of p-tau217 to amyloid-beta 42  

In [1]:
# install required packages - commented out so it doesn't install every time
#%conda install -n Lauren openpyxl numpy pandas statsmodels plotnine matplotlib scikit-learn scipy mizani nbconvert pandoc pyreadstat kmodes seaborn

# import required packages
import numpy as np;
import pandas as pd;
from sklearn import preprocessing 
import statsmodels as sm;
import statsmodels.formula.api as smf;
import plotnine as p9;
import pickle

  from pandas.core import (


# read in and clean data

In [2]:
rename_columns = ['model_p_val_baseline', 'model_B_val_baseline',
'model_n_baseline', 
       'model_p_val_Intercept', 'model_B_val_Intercept',
       'model_p_val_x_var', 'model_B_val_x_var',
        'model_p_val_age','model_B_val_age',
       'model_p_val_Education_years', 'model_B_val_Education_years', 
       'model_p_val_sex[T.M]', 'model_B_val_sex[T.M]',
        'model_p_val_apoe4_carrier[T.Yes]', 'model_B_val_apoe4_carrier[T.Yes]',
         'model_p_val_race[T.White/NH]', 'model_B_val_race[T.White/NH]']

covariate_pval_columns = [ 'model_p_val_age',
       'model_p_val_Education_years', 
       'model_p_val_sex[T.M]', 
        'model_p_val_apoe4_carrier[T.Yes]',
         'model_p_val_race[T.White/NH]']


# define biomarker groups
FDG_columns = ['Avg_MedOrbFrontal', 'Graycer_gm', 'Avg_PCC', 'Avg_Hip', 'MTL_gm', 'CO', 'Temp', 'SensMot_gm', 'L_Hip',
 'Vermis_gm', 'FRONTAL_gm', 'AC_gm', 'Precun_gm', 'Par_gm', 'RASref1_gm', 'Temp_gm', 'Paracentral_gm', 'R_Hip', 'PostCing_gm']

plasma_columns = [ 'Ab40', 'Ab42', 'Ab42_40', 'GFAP','NFL',  'pTau181', 'pTau217',  'pTau231', 'pTau181_Ab42', 'GFAP_aa', 'NFL_aa', 'pTau217_Ab42']
log10_plasma_columns = [s + '_log10' for s in plasma_columns]

cog_columns = ['MMSE', 'adascogtotal', 'bvrt', 'dstotal','tma','tmb',  'cowattotal', 'cdrtotal', 'cdrsum', 'adltotal', 'npitotal', 'gds']
vol_columns = ['VOL_Precun_Lz', 'VOL_Precun_Rz', 'VOL_InfPar_Lz', 'VOL_InfPar_Rz', 'VOL_Hip_Lz', 'VOL_Hip_Rz', 'VOL_TotalGrayz', 'VOL_Inf_Mid_Fus_Temp_Lz', 
'VOL_Inf_Mid_Fus_Temp_Rz','VOL_MidFront_Lz', 'VOL_MidFront_Rz']
      

In [3]:
# read in data
results_new_para, data4 = pd.read_pickle('./output/01_data_new_para_rr.pkl')
results_new_para = results_new_para.rename(columns = dict(zip(rename_columns, 
['new_para_' + ele for ele in rename_columns])))

# read in data
results_pons, data2 = pd.read_pickle('./output/01_data_pons_rr.pkl')
results_pons = results_pons.rename(columns = dict(zip(rename_columns, 
['pons_' + ele for ele in rename_columns])))

# Pons vs New Para

In [4]:
merged_results = pd.merge(results_pons, results_new_para, how = 'inner')

## x_var significance

In [5]:
# any relationships that are significant using pons rr but not significant when using main rr
merged_results.loc[(merged_results['pons_model_p_val_x_var'] < 0.05) & (merged_results['new_para_model_p_val_x_var'] >= 0.05), ['x_var', 'y_var', 
'pons_model_p_val_x_var', 'new_para_model_p_val_x_var',
'pons_model_B_val_x_var', 'new_para_model_B_val_x_var', 
'pons_model_n_baseline','new_para_model_n_baseline']]

Unnamed: 0,x_var,y_var,pons_model_p_val_x_var,new_para_model_p_val_x_var,pons_model_B_val_x_var,new_para_model_B_val_x_var,pons_model_n_baseline,new_para_model_n_baseline


In [6]:
# any relationships that are significant using main rr but not significant when using pons rr
merged_results.loc[(merged_results['pons_model_p_val_x_var'] >= 0.05) & (merged_results['new_para_model_p_val_x_var'] < 0.05), 
['x_var', 'y_var',
'pons_model_p_val_x_var', 'new_para_model_p_val_x_var',
'pons_model_B_val_x_var', 'new_para_model_B_val_x_var', 
'pons_model_n_baseline','new_para_model_n_baseline']]

Unnamed: 0,x_var,y_var,pons_model_p_val_x_var,new_para_model_p_val_x_var,pons_model_B_val_x_var,new_para_model_B_val_x_var,pons_model_n_baseline,new_para_model_n_baseline
0,Ab42_40_log10,MTL_gm,0.538983,0.010588,0.114689,0.389317,31,35
2,Ab42_40_log10,Precun_gm,0.269296,0.021403,0.204866,0.373903,31,35
18,pTau181_log10,MTL_gm,0.103894,4.8e-05,0.271625,0.54713,37,41
23,pTau181_log10,PostCing_gm,0.281572,0.015868,0.159634,0.373023,37,41
26,pTau217_log10,Precun_gm,0.240472,0.044767,-0.206857,-0.327454,34,38
30,pTau231_log10,MTL_gm,0.164072,0.032529,0.247989,0.347523,33,38
39,pTau217_Ab42_log10,Par_gm,0.12387,0.011858,-0.292311,-0.432901,29,33
40,pTau217_Ab42_log10,Temp_gm,0.146871,0.016024,-0.27626,-0.416077,29,33


In [7]:
# difference in p-values across all models
(merged_results['pons_model_p_val_x_var']  - merged_results['new_para_model_p_val_x_var']).describe().round(3)

count    707.000
mean       0.011
std        0.090
min       -0.598
25%       -0.000
50%        0.000
75%        0.000
max        0.924
dtype: float64

In [8]:
# relationships where p-value changes more than 0.1
merged_results.loc[(abs(merged_results['pons_model_p_val_x_var']  - merged_results['new_para_model_p_val_x_var'])> 0.1), ['x_var', 'y_var', 'pons_model_p_val_x_var',
       'pons_model_B_val_x_var', 'pons_model_n_baseline', 'new_para_model_p_val_x_var', 'new_para_model_B_val_x_var',
       'new_para_model_n_baseline']].sort_values('new_para_model_p_val_x_var')

Unnamed: 0,x_var,y_var,pons_model_p_val_x_var,pons_model_B_val_x_var,pons_model_n_baseline,new_para_model_p_val_x_var,new_para_model_B_val_x_var,new_para_model_n_baseline
18,pTau181_log10,MTL_gm,0.103894,0.271625,37,4.8e-05,0.54713,41
0,Ab42_40_log10,MTL_gm,0.538983,0.114689,31,0.010588,0.389317,35
39,pTau217_Ab42_log10,Par_gm,0.12387,-0.292311,29,0.011858,-0.432901,33
23,pTau181_log10,PostCing_gm,0.281572,0.159634,37,0.015868,0.373023,41
40,pTau217_Ab42_log10,Temp_gm,0.146871,-0.27626,29,0.016024,-0.416077,33
2,Ab42_40_log10,Precun_gm,0.269296,0.204866,31,0.021403,0.373903,35
30,pTau231_log10,MTL_gm,0.164072,0.247989,33,0.032529,0.347523,38
26,pTau217_log10,Precun_gm,0.240472,-0.206857,34,0.044767,-0.327454,38
10,GFAP_log10,Temp_gm,0.632654,-0.079818,37,0.061341,-0.291202,42
38,pTau217_Ab42_log10,Precun_gm,0.400128,-0.162343,29,0.062075,-0.328377,33


## covariate significance

In [9]:
# those where covariate significance differs between the reference regions
a_sig = merged_results[['pons_' + ele for ele in covariate_pval_columns]].notna().any(axis =1)
b_sig = merged_results[['new_para_' + ele for ele in covariate_pval_columns]].notna().any(axis =1)

temp = merged_results[a_sig != b_sig].drop(columns = merged_results.columns[merged_results.columns.str.contains('B_val|Intercept|baseline|type|p_val_x_var')]).dropna(axis = 1, how = 'all')
temp = temp[temp.columns.sort_values()]
temp = temp.loc[temp.isna().sort_values(temp.columns.to_list()).index, :]
temp

Unnamed: 0,new_para_model_p_val_Education_years,new_para_model_p_val_age,pons_model_p_val_sex[T.M],x_var,y_var
20,0.006586,0.016969,,pTau181_log10,Precun_gm
0,0.002342,,,Ab42_40_log10,MTL_gm
6,0.005543,,,GFAP_log10,MTL_gm
12,0.00188,,,NFL_log10,MTL_gm
18,1.7e-05,,,pTau181_log10,MTL_gm
24,0.013116,,,pTau217_log10,MTL_gm
36,0.006126,,,pTau217_Ab42_log10,MTL_gm
9,,0.028485,,GFAP_log10,Par_gm
15,,0.036415,,NFL_log10,Par_gm
21,,0.041938,,pTau181_log10,Par_gm


# ptau 217 vs ptau 217/Ab42 (Within results when using the main reference region)

In [10]:
temp_a, data4 = pd.read_pickle('./output/01_data_new_para_rr.pkl')
temp_b = temp_a[temp_a['x_var'].isin(['pTau217_log10']) | temp_a['y_var'].isin(['pTau217_log10'])]
temp_a = temp_a[temp_a['x_var'].isin(['pTau217_Ab42_log10']) | temp_a['y_var'].isin(['pTau217_Ab42_log10'])]
temp_a = temp_a.rename(columns = dict(zip(rename_columns, ['nonratio_' + ele for ele in rename_columns])))

temp_a['x_var'] = temp_a['x_var'].str.replace('pTau217_Ab42', 'pTau217') # change the ratio name so that they can be merged with ptau217 alone (ratio instead becomes indicated by the column name prefix)
temp_a['y_var'] = temp_a['y_var'].str.replace('pTau217_Ab42', 'pTau217')
temp_b = temp_b.rename(columns = dict(zip(rename_columns, ['ratioAb42_' + ele for ele in rename_columns])))


In [11]:
merged_results = pd.merge(temp_a, temp_b, how = 'inner').sort_values(['x_var', 'type', 'y_var'])
merged_results

Unnamed: 0,y_var,x_var,nonratio_model_p_val_baseline,nonratio_model_B_val_baseline,nonratio_model_n_baseline,nonratio_model_p_val_Intercept,nonratio_model_p_val_Education_years,nonratio_model_p_val_x_var,nonratio_model_B_val_Intercept,nonratio_model_B_val_Education_years,...,ratioAb42_model_B_val_Education_years,ratioAb42_model_B_val_x_var,ratioAb42_model_p_val_age,ratioAb42_model_B_val_age,ratioAb42_model_p_val_sex[T.M],ratioAb42_model_B_val_sex[T.M],ratioAb42_model_p_val_apoe4_carrier[T.Yes],ratioAb42_model_B_val_apoe4_carrier[T.Yes],ratioAb42_model_B_val_race[T.White/NH],ratioAb42_model_p_val_race[T.White/NH]
8,pTau217_log10,Ab42_40_log10,Intercept 1.000000 Ab42_40_log10 0.7...,Intercept 0.000000 Ab42_40_log10 0.0...,34,1.0,,0.794579,0.000000e+00,,...,,0.057931,,,,,,,,
9,pTau217_log10,GFAP_log10,Intercept 1.000000 GFAP_log10 0.160224 ...,Intercept 0.000000 GFAP_log10 0.246323 ...,34,1.0,,0.160224,0.000000e+00,,...,,0.396230,,,,,,,,
10,pTau217_log10,NFL_log10,Intercept 1.000000 NFL_log10 0.590326 dt...,Intercept 0.000000 NFL_log10 -0.095696 dt...,34,1.0,,0.590326,0.000000e+00,,...,,0.157954,,,,,,,,
11,pTau217_log10,pTau181_log10,Intercept 1.000000 pTau181_log10 0.0...,Intercept 0.000000 pTau181_log10 0.4...,34,1.0,,0.003276,0.000000e+00,,...,,0.449087,,,,,,,,
18,MMSE,pTau217_log10,Intercept 1.000000 pTau217_Ab42_lo...,Intercept 4.479307e-16 pTau217_Ab4...,29,1.0,,0.463867,4.479307e-16,,...,,-0.282524,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,VOL_TotalGray_Rz,pTau217_log10,Intercept 1.000000 pTau217_Ab42_lo...,Intercept 9.714451e-17 pTau217_Ab4...,27,1.0,,0.725136,9.714451e-17,,...,,-0.263960,,,,,,,,
82,VOL_TotalGrayz,pTau217_log10,Intercept 1.000000 pTau217_Ab42_lo...,Intercept 5.551115e-17 pTau217_Ab4...,27,1.0,,0.906817,5.551115e-17,,...,,-0.282799,,,,,,,,
30,VOL_Ventricles_Lz,pTau217_log10,Intercept 1.000000 pTau217_Ab42_lo...,Intercept -6.938894e-18 pTau217_Ab4...,27,1.0,,0.621435,-6.938894e-18,,...,,0.187538,,,,,,,,
31,VOL_Ventricles_Rz,pTau217_log10,Intercept 1.000000 pTau217_Ab42_lo...,Intercept 2.636780e-16 pTau217_Ab4...,27,1.0,,0.297138,2.636780e-16,,...,,0.390285,,,,,,,,


## x_var significance

In [12]:
# models where both the ratio and p-tau217 alone are significant
merged_results.loc[(merged_results['ratioAb42_model_p_val_x_var'] < 0.05) & (merged_results['nonratio_model_p_val_x_var'] < 0.05), ['x_var', 'y_var', 
'ratioAb42_model_p_val_x_var', 'nonratio_model_p_val_x_var',
'ratioAb42_model_B_val_x_var', 'nonratio_model_B_val_x_var', 
'ratioAb42_model_n_baseline','nonratio_model_n_baseline']]

Unnamed: 0,x_var,y_var,ratioAb42_model_p_val_x_var,nonratio_model_p_val_x_var,ratioAb42_model_B_val_x_var,nonratio_model_B_val_x_var,ratioAb42_model_n_baseline,nonratio_model_n_baseline
11,pTau181_log10,pTau217_log10,0.004133,0.003276,0.449087,0.489856,39,34
20,pTau217_log10,bvrt,0.002598,0.005452,-0.490103,-0.481772,33,28
22,pTau217_log10,tma,0.013732,0.049854,0.412498,0.381088,31,27
5,pTau217_log10,Par_gm,0.001689,0.011858,-0.492376,-0.432901,38,33
6,pTau217_log10,Temp_gm,0.000963,0.016024,-0.514007,-0.416077,38,33
16,pTau217_log10,pTau181_log10,0.004133,0.003276,0.449087,0.489856,39,34
17,pTau217_log10,pTau231_log10,0.031501,0.00321,0.359071,0.497659,36,33
12,pTau231_log10,pTau217_log10,0.031501,0.00321,0.359071,0.497659,36,33


In [13]:
# models where the ratio is significant but p-tau217 alone is not
merged_results.loc[(merged_results['ratioAb42_model_p_val_x_var'] < 0.05) & (merged_results['nonratio_model_p_val_x_var'] >= 0.05), ['x_var', 'y_var', 
'ratioAb42_model_p_val_x_var', 'nonratio_model_p_val_x_var',
'ratioAb42_model_B_val_x_var', 'nonratio_model_B_val_x_var', 
'ratioAb42_model_n_baseline','nonratio_model_n_baseline']]

Unnamed: 0,x_var,y_var,ratioAb42_model_p_val_x_var,nonratio_model_p_val_x_var,ratioAb42_model_B_val_x_var,nonratio_model_B_val_x_var,ratioAb42_model_n_baseline,nonratio_model_n_baseline
9,GFAP_log10,pTau217_log10,0.012522,0.160224,0.39623,0.246323,39,34
21,pTau217_log10,dstotal,0.044728,0.163501,-0.346448,-0.265756,34,29
4,pTau217_log10,Precun_gm,0.044767,0.062075,-0.327454,-0.328377,38,33
1,pTau217_log10,SensMot_gm,0.033251,0.086498,-0.346181,-0.303021,38,33
14,pTau217_log10,GFAP_log10,0.012522,0.160224,0.39623,0.246323,39,34
105,pTau217_log10,VOL_InfParSupra_Lz,0.004959,0.735875,-0.429074,-0.062011,32,27
106,pTau217_log10,VOL_InfParSupra_Rz,0.035118,0.964642,-0.373721,-0.008955,32,27
46,pTau217_log10,VOL_InfPar_Lz,0.018954,0.969799,-0.376511,0.007081,32,27
47,pTau217_log10,VOL_InfPar_Rz,0.046486,0.75679,-0.354532,-0.062501,32,27
87,pTau217_log10,VOL_Parietal_Lz,0.045699,0.482644,-0.316908,-0.125376,32,27


In [14]:
# models where p-tau217 alone is significant but the ratio is not
merged_results.loc[(merged_results['ratioAb42_model_p_val_x_var'] >= 0.05) & (merged_results['nonratio_model_p_val_x_var'] < 0.05), 
['x_var', 'y_var',
'ratioAb42_model_p_val_x_var', 'nonratio_model_p_val_x_var',
'ratioAb42_model_B_val_x_var', 'nonratio_model_B_val_x_var', 
'ratioAb42_model_n_baseline','nonratio_model_n_baseline']]

Unnamed: 0,x_var,y_var,ratioAb42_model_p_val_x_var,nonratio_model_p_val_x_var,ratioAb42_model_B_val_x_var,nonratio_model_B_val_x_var,ratioAb42_model_n_baseline,nonratio_model_n_baseline
74,pTau217_log10,VOL_AntCingulate_Lz,0.219084,0.043749,0.223382,0.39097,32,27
56,pTau217_log10,VOL_Insula_Lz,0.427073,0.015863,0.145435,0.459634,32,27
57,pTau217_log10,VOL_Insula_Rz,0.276686,0.01984,0.198268,0.445594,32,27
40,pTau217_log10,VOL_MidTemp_Lz,0.570045,0.009827,-0.104281,0.516237,32,27


In [15]:
# p-value difference accross all models
(merged_results['ratioAb42_model_p_val_x_var']  - merged_results['nonratio_model_p_val_x_var']).describe().round(3)

count    107.000
mean      -0.107
std        0.337
min       -0.951
25%       -0.272
50%       -0.088
75%        0.081
max        0.817
dtype: float64

In [16]:
# models where the difference in p-values is over 0.1
merged_results.loc[(abs(merged_results['ratioAb42_model_p_val_x_var']  - merged_results['nonratio_model_p_val_x_var'])> 0.1), ['x_var', 'y_var', 'ratioAb42_model_p_val_x_var',
       'ratioAb42_model_B_val_x_var', 'ratioAb42_model_n_baseline', 'nonratio_model_p_val_x_var', 'nonratio_model_B_val_x_var',
       'nonratio_model_n_baseline']].sort_values('nonratio_model_p_val_x_var')

Unnamed: 0,x_var,y_var,ratioAb42_model_p_val_x_var,ratioAb42_model_B_val_x_var,ratioAb42_model_n_baseline,nonratio_model_p_val_x_var,nonratio_model_B_val_x_var,nonratio_model_n_baseline
40,pTau217_log10,VOL_MidTemp_Lz,0.570045,-0.104281,32,0.009827,0.516237,27
56,pTau217_log10,VOL_Insula_Lz,0.427073,0.145435,32,0.015863,0.459634,27
57,pTau217_log10,VOL_Insula_Rz,0.276686,0.198268,32,0.019840,0.445594,27
74,pTau217_log10,VOL_AntCingulate_Lz,0.219084,0.223382,32,0.043749,0.390970,27
94,pTau217_log10,VOL_InfInsFrontal_Rz,0.919390,-0.018630,32,0.101900,0.321583,27
...,...,...,...,...,...,...,...,...
89,pTau217_log10,VOL_Frontal_Lz,0.462486,-0.134652,32,0.958036,0.010629,27
36,pTau217_log10,VOL_Fusi_Lz,0.191986,-0.236770,32,0.963078,-0.009351,27
106,pTau217_log10,VOL_InfParSupra_Rz,0.035118,-0.373721,32,0.964642,-0.008955,27
46,pTau217_log10,VOL_InfPar_Lz,0.018954,-0.376511,32,0.969799,0.007081,27


## covariate significance

In [17]:
# models where covariate significance differs between p-tau217 and the ratio
a_sig = merged_results[['ratioAb42_' + ele for ele in covariate_pval_columns]].notna().any(axis =1)
b_sig = merged_results[['nonratio_' + ele for ele in covariate_pval_columns]].notna().any(axis =1)

temp = merged_results[a_sig != b_sig].drop(columns = merged_results.columns[merged_results.columns.str.contains('B_val|Intercept|baseline|type|p_val_x_var')]).dropna(axis = 1, how = 'all')
temp = temp[temp.columns.sort_values()]
temp = temp.loc[temp.isna().sort_values(temp.columns.to_list()).index, :]
temp

Unnamed: 0,nonratio_model_p_val_Education_years,nonratio_model_p_val_age,nonratio_model_p_val_sex[T.M],ratioAb42_model_p_val_age,ratioAb42_model_p_val_apoe4_carrier[T.Yes],ratioAb42_model_p_val_sex[T.M],x_var,y_var
40,0.007396,0.017757,,,,,pTau217_log10,VOL_MidTemp_Lz
41,0.046259,,0.031,,,,pTau217_log10,VOL_MidTemp_Rz
60,,0.010897,,,,,pTau217_log10,VOL_MidFront_Lz
95,,0.014246,,,,,pTau217_log10,VOL_MidSupFrontal_Lz
44,,0.021432,,,,,pTau217_log10,VOL_Precun_Lz
22,,,,0.034826,,,pTau217_log10,tma
88,,,,0.017752,,,pTau217_log10,VOL_Parietal_Rz
51,,,,,0.022287,,pTau217_log10,VOL_SupraMarg_Rz
13,,,,,,0.029434,pTau217_log10,Ab42_40_log10
78,,,,,,0.015007,pTau217_log10,VOL_Entorhinal_Lz
