# 06 compare the different FDG runs / reference regions

versions needed to compare:

Data versions:
original reference region + original subject set
Paracentral-gm reference region + original subject set
Paracentral-gm reference region + new subject set
RAS-ref1 reference region + original subject set
RAS-ref1 reference region + new subject set

In [1]:
# install required packages - commented out so it doesn't install every time
#%conda install -n Lauren openpyxl numpy pandas statsmodels plotnine matplotlib scikit-learn scipy mizani nbconvert pandoc pyreadstat kmodes seaborn

# import required packages
import numpy as np;
import pandas as pd;
from sklearn import preprocessing 
import statsmodels as sm;
import statsmodels.formula.api as smf;
import plotnine as p9;
import pickle



# read in and clean data

In [2]:
data_folder_loc = "//admsyn/Primary/ADM/CustomerStudies/Rockefeller/Riluzole_Biomarkers/"
code_folder_loc = "//admsyn/homes/@DH-ADMDX/0/lauren.koenig-1606/code/Riluzole FDG/"

In [3]:
# read in data
results_composite, temp, data1 = pd.read_pickle(code_folder_loc + '/output/04_data_composite_rr.pkl')
results_pons, temp, data2 = pd.read_pickle(code_folder_loc + '/output/04_data_pons_rr.pkl')
results_para, temp, data3 = pd.read_pickle(code_folder_loc + '/output/04_data_para_rr.pkl')
results_new_para, temp, data4 = pd.read_pickle(code_folder_loc + '/output/04_data_new_para_rr.pkl')
results_ras, temp, data5 = pd.read_pickle(code_folder_loc + '/output/04_data_ras_rr.pkl')
results_subset_new_para, temp, data5_subset = pd.read_pickle(code_folder_loc + '/output/04_data_new_para_rr_subset.pkl')


#results_orig_new_para, temp, data6 = pd.read_pickle(code_folder_loc + '/output/with_RIL8_included/04_data_new_para_rr.pkl')


In [4]:
x_var_list = ['Ab42_40', 'GFAP', 'NFL', 'pTau181', 'pTau217', 'pTau231']
y_var_list = ['RASref1_gm', 'Paracentral_gm', 'Ab42_log10', 'pTau181_Ab42_log10', 'Ab40_log10']
results_composite = results_composite[results_composite['x_var'].isin(x_var_list) & ~results_composite['y_var'].isin(y_var_list)]
results_pons = results_pons[results_pons['x_var'].isin(x_var_list) & ~results_pons['y_var'].isin(y_var_list)]
results_para = results_para[results_para['x_var'].isin(x_var_list) & ~results_para['y_var'].isin(y_var_list)]
results_new_para = results_new_para[results_new_para['x_var'].isin(x_var_list) & ~results_new_para['y_var'].isin(y_var_list)]
results_ras = results_ras[results_ras['x_var'].isin(x_var_list) & ~results_ras['y_var'].isin(y_var_list)]
results_subset_new_para = results_subset_new_para[results_subset_new_para['x_var'].isin(x_var_list) & ~results_subset_new_para['y_var'].isin(y_var_list)]
#results_orig_new_para = results_orig_new_para[results_orig_new_para['x_var'].isin(x_var_list) & ~results_orig_new_para['y_var'].isin(y_var_list)]


In [5]:
rename_columns = ['model_p_val_baseline', 'model_B_val_baseline',
'model_n_baseline', 
       'model_p_val_Intercept', 'model_B_val_Intercept',
       'model_p_val_x_var', 'model_B_val_x_var',
        'model_p_val_age','model_B_val_age',
       'model_p_val_Education_years', 'model_B_val_Education_years', 
       'model_p_val_sex[T.M]', 'model_B_val_sex[T.M]',
        'model_p_val_apoe4_carrier[T.Yes]', 'model_B_val_apoe4_carrier[T.Yes]',
         'model_p_val_race[T.White/NH]', 'model_B_val_race[T.White/NH]']

In [6]:
covariate_pval_columns = [ 'model_p_val_age',
       'model_p_val_Education_years', 
       'model_p_val_sex[T.M]', 
        'model_p_val_apoe4_carrier[T.Yes]',
         'model_p_val_race[T.White/NH]']

In [7]:
results_composite = results_composite.rename(columns = dict(zip(rename_columns, 
['composite_' + ele for ele in rename_columns])))

results_pons = results_pons.rename(columns = dict(zip(rename_columns, 
['pons_' + ele for ele in rename_columns])))

results_para = results_para.rename(columns = dict(zip(rename_columns, 
['para_' + ele for ele in rename_columns])))

results_new_para = results_new_para.rename(columns = dict(zip(rename_columns, 
['new_para_' + ele for ele in rename_columns])))

results_ras = results_ras.rename(columns = dict(zip(rename_columns, 
['ras_' + ele for ele in rename_columns])))

results_subset_new_para = results_subset_new_para.rename(columns = dict(zip(rename_columns, 
['subset_new_para_' + ele for ele in rename_columns])))

#results_orig_new_para = results_orig_new_para.rename(columns = dict(zip(rename_columns, 
#['orig_new_para_' + ele for ele in rename_columns])))


In [8]:
# define biomarker groups
FDG_columns = ['Avg_MedOrbFrontal', 'Graycer_gm', 'Avg_PCC', 'Avg_Hip', 'MTL_gm', 'CO', 'Temp', 'SensMot_gm', 'L_Hip',
 'Vermis_gm', 'FRONTAL_gm', 'AC_gm', 'Precun_gm', 'Par_gm', 'RASref1_gm', 'Temp_gm', 'Paracentral_gm', 'R_Hip', 'PostCing_gm']

plasma_columns = [ 'Ab40', 'Ab42', 'Ab42_40', 'GFAP','NFL',  'pTau181', 'pTau217',  'pTau231', 'pTau181_Ab42']
log10_plasma_columns = [s + '_log10' for s in plasma_columns]

cog_columns = ['MMSE', 'adascogtotal', 'bvrt', 'dstotal','tma','tmb',  'cowattotal', 'cdrtotal', 'cdrsum', 'adltotal', 'npitotal', 'gds']
vol_columns = ['VOL_Precun_Lz', 'VOL_Precun_Rz', 'VOL_InfPar_Lz', 'VOL_InfPar_Rz', 'VOL_Hip_Lz', 'VOL_Hip_Rz', 'VOL_TotalGrayz', 'VOL_Inf_Mid_Fus_Temp_Lz', 
'VOL_Inf_Mid_Fus_Temp_Rz','VOL_MidFront_Lz', 'VOL_MidFront_Rz']


# Composite vs New Para

In [9]:
comp_new_para = pd.merge(results_composite, results_new_para, how = 'inner')
comp_new_para

Unnamed: 0,y_var,x_var,composite_model_p_val_baseline,composite_model_B_val_baseline,composite_model_n_baseline,composite_model_p_val_Intercept,composite_model_p_val_x_var,composite_model_B_val_Intercept,composite_model_B_val_x_var,composite_model_p_val_age,...,new_para_model_B_val_Education_years,new_para_model_B_val_x_var,new_para_model_p_val_age,new_para_model_B_val_age,new_para_model_p_val_sex[T.M],new_para_model_B_val_sex[T.M],new_para_model_p_val_apoe4_carrier[T.Yes],new_para_model_B_val_apoe4_carrier[T.Yes],new_para_model_p_val_race[T.White/NH],new_para_model_B_val_race[T.White/NH]
0,MTL_gm,Ab42_40,Intercept 1.000000 Education_years ...,Intercept 1.314920e-15 Education_year...,31,1.0,0.010371,1.314920e-15,0.386573,,...,0.467310,0.389317,,,,,,,,
1,FRONTAL_gm,Ab42_40,Intercept 1.000000 age ...,Intercept 2.914335e-16 age ...,31,1.0,0.955692,2.914335e-16,0.009775,0.020244,...,,-0.056644,,,,,,,,
2,AC_gm,Ab42_40,Intercept 1.000000 Ab42_40_log10 0.8...,Intercept 1.623701e-15 Ab42_40_log10 ...,31,1.0,0.864119,1.623701e-15,0.032045,,...,,0.038558,,,,,,,,
3,Precun_gm,Ab42_40,Intercept 1.000000 Ab42_40_log10 0.0...,Intercept -1.554312e-15 Ab42_40_log10 ...,31,1.0,0.040683,-1.554312e-15,0.369661,,...,0.364864,0.373903,0.007766,0.447645,,,,,,
4,Par_gm,Ab42_40,Intercept 1.000000 Ab42_40_log10 0.2...,Intercept 5.065393e-16 Ab42_40_log10 ...,31,1.0,0.256848,5.065393e-16,0.209998,,...,,0.192024,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
619,VOL_Precun_InfPar_Supramarg_Rz,pTau231,Intercept 1.000000 age ...,Intercept -4.857226e-17 age ...,32,1.0,0.008586,-4.857226e-17,0.390247,0.000075,...,0.355296,0.390247,0.000075,0.642682,,,,,,
620,VOL_LatOccLingCun_Lz,pTau231,Intercept 1.000000 pTau231_log10 0.0...,Intercept -9.714451e-17 pTau231_log10 ...,32,1.0,0.040319,-9.714451e-17,0.364402,,...,,0.364402,,,,,,,,
621,VOL_LatOccLingCun_Rz,pTau231,Intercept 1.000000 pTau231_log10 0.0...,Intercept 4.163336e-17 pTau231_log10 ...,32,1.0,0.012228,4.163336e-17,0.437723,,...,,0.437723,,,,,,,,
622,VOL_InfParSupra_Lz,pTau231,Intercept 1.000000 age 0.0...,Intercept -2.775558e-17 age ...,32,1.0,0.071284,-2.775558e-17,0.249357,0.000019,...,,0.249357,0.000019,0.678954,,,,,,


## x_var significance

In [11]:
comp_new_para.loc[(comp_new_para['composite_model_p_val_x_var'] < 0.05) & (comp_new_para['new_para_model_p_val_x_var'] >= 0.05), ['x_var', 'y_var', 'composite_model_p_val_x_var',
       'composite_model_B_val_x_var', 'composite_model_n_baseline', 'new_para_model_p_val_x_var', 'new_para_model_B_val_x_var',
       'new_para_model_n_baseline']]

Unnamed: 0,x_var,y_var,composite_model_p_val_x_var,composite_model_B_val_x_var,composite_model_n_baseline,new_para_model_p_val_x_var,new_para_model_B_val_x_var,new_para_model_n_baseline
11,GFAP,Par_gm,0.04006,-0.330695,37,0.073024,-0.271715,41


In [12]:
comp_new_para.loc[(comp_new_para['composite_model_p_val_x_var'] >= 0.05) & (comp_new_para['new_para_model_p_val_x_var'] < 0.05), ['x_var', 'y_var', 'composite_model_p_val_x_var',
       'composite_model_B_val_x_var', 'composite_model_n_baseline', 'new_para_model_p_val_x_var', 'new_para_model_B_val_x_var',
       'new_para_model_n_baseline']]

Unnamed: 0,x_var,y_var,composite_model_p_val_x_var,composite_model_B_val_x_var,composite_model_n_baseline,new_para_model_p_val_x_var,new_para_model_B_val_x_var,new_para_model_n_baseline
8,GFAP,FRONTAL_gm,0.066317,-0.305111,37,0.039403,-0.319124,42
22,pTau181,FRONTAL_gm,0.07369,0.298291,37,0.031049,0.343053,41
31,pTau217,Precun_gm,0.063515,-0.321742,34,0.044767,-0.327454,38


In [13]:
(comp_new_para['composite_model_p_val_x_var']  - comp_new_para['new_para_model_p_val_x_var']).describe().round(3)

count    624.000
mean       0.002
std        0.041
min       -0.449
25%       -0.000
50%        0.000
75%        0.000
max        0.616
dtype: float64

In [14]:
comp_new_para.loc[(abs(comp_new_para['composite_model_p_val_x_var']  - comp_new_para['new_para_model_p_val_x_var'])> 0.1), ['x_var', 'y_var', 'composite_model_p_val_x_var',
       'composite_model_B_val_x_var', 'composite_model_n_baseline', 'new_para_model_p_val_x_var', 'new_para_model_B_val_x_var',
       'new_para_model_n_baseline']].sort_values('new_para_model_p_val_x_var')

Unnamed: 0,x_var,y_var,composite_model_p_val_x_var,composite_model_B_val_x_var,composite_model_n_baseline,new_para_model_p_val_x_var,new_para_model_B_val_x_var,new_para_model_n_baseline
24,pTau181,Precun_gm,0.809518,0.041019,37,0.193241,0.204976,41
16,NFL,AC_gm,0.509327,-0.111986,37,0.321215,-0.156847,42
9,GFAP,AC_gm,0.124818,-0.256891,37,0.360539,-0.144708,42
37,pTau231,AC_gm,0.482788,0.12656,33,0.362925,0.151803,38
39,pTau231,Par_gm,0.521908,0.10612,33,0.397674,0.132297,37
26,pTau181,Temp_gm,0.66895,0.072697,37,0.417143,0.128559,42
7,GFAP,MTL_gm,0.728311,-0.052931,37,0.574787,-0.084678,41
15,NFL,FRONTAL_gm,0.812868,0.040285,37,0.6118,0.080615,42
28,pTau217,MTL_gm,0.406204,-0.130767,34,0.615648,-0.080512,37
18,NFL,Par_gm,0.988611,0.00243,37,0.722063,-0.055924,41


## covariate significance

In [15]:
# those where covariate significance differs between a and b
a_sig = comp_new_para[['composite_' + ele for ele in covariate_pval_columns]].notna().any(axis =1)
b_sig = comp_new_para[['new_para_' + ele for ele in covariate_pval_columns]].notna().any(axis =1)

temp = comp_new_para[a_sig != b_sig].drop(columns = comp_new_para.columns[comp_new_para.columns.str.contains('B_val|Intercept|baseline|type|p_val_x_var')]).dropna(axis = 1, how = 'all')
temp = temp[temp.columns.sort_values()]
temp = temp.loc[temp.isna().sort_values(temp.columns.to_list()).index, :]
temp

Unnamed: 0,composite_model_p_val_Education_years,composite_model_p_val_age,new_para_model_p_val_Education_years,new_para_model_p_val_age,x_var,y_var
1,0.041607,0.020244,,,Ab42_40,FRONTAL_gm
3,,,0.024805,0.007766,Ab42_40,Precun_gm
10,,,0.043796,0.038379,GFAP,Precun_gm
24,,,0.006586,0.016969,pTau181,Precun_gm
34,,,0.049965,,pTau217,PostCing_gm
18,,,,0.036415,NFL,Par_gm
25,,,,0.041938,pTau181,Par_gm


# new_para vs subset new_para

In [16]:
new_para_new_para = pd.merge(results_new_para, results_subset_new_para, how = 'inner')
new_para_new_para

Unnamed: 0,y_var,x_var,new_para_model_p_val_baseline,new_para_model_B_val_baseline,new_para_model_n_baseline,new_para_model_p_val_Intercept,new_para_model_p_val_Education_years,new_para_model_p_val_x_var,new_para_model_B_val_Intercept,new_para_model_B_val_Education_years,...,subset_new_para_model_B_val_Education_years,subset_new_para_model_B_val_x_var,subset_new_para_model_p_val_age,subset_new_para_model_B_val_age,subset_new_para_model_p_val_sex[T.M],subset_new_para_model_B_val_sex[T.M],subset_new_para_model_p_val_apoe4_carrier[T.Yes],subset_new_para_model_B_val_apoe4_carrier[T.Yes],subset_new_para_model_p_val_race[T.White/NH],subset_new_para_model_B_val_race[T.White/NH]
0,MTL_gm,Ab42_40,Intercept 0.917548 Education_years ...,Intercept -0.014719 Education_years ...,35,0.917548,0.002342,0.010588,-1.471869e-02,0.467310,...,0.503625,0.386791,,,,,,,,
1,SensMot_gm,Ab42_40,Intercept 1.000000 Ab42_40_log10 0.8...,Intercept 1.151856e-15 Ab42_40_log10 ...,36,1.000000,,0.816615,1.151856e-15,,...,,0.029663,,,,,,,,
2,FRONTAL_gm,Ab42_40,Intercept 1.000000 Ab42_40_log10 0.7...,Intercept -4.475587e-16 Ab42_40_log10 ...,36,1.000000,,0.742813,-4.475587e-16,,...,0.422496,0.008872,0.020670,0.491616,,,,,,
3,AC_gm,Ab42_40,Intercept 1.000000 Ab42_40_log10 0.8...,Intercept -1.360023e-15 Ab42_40_log10 ...,36,1.000000,,0.823329,-1.360023e-15,,...,,0.031606,,,,,,,,
4,Precun_gm,Ab42_40,Intercept 0.911983 age ...,Intercept -0.016633 age ...,35,0.911983,0.024805,0.021403,-1.663288e-02,0.364864,...,,0.370031,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
625,VOL_Precun_InfPar_Supramarg_Rz,pTau231,Intercept 1.000000 age ...,Intercept -5.551115e-17 age ...,32,1.000000,0.017335,0.008586,-5.551115e-17,0.355296,...,0.360586,0.400042,0.000740,0.634709,,,,,,
626,VOL_LatOccLingCun_Lz,pTau231,Intercept 1.000000 pTau231_log10 0.0...,Intercept -9.714451e-17 pTau231_log10 ...,32,1.000000,,0.040319,-9.714451e-17,,...,,0.476322,,,,,,,,
627,VOL_LatOccLingCun_Rz,pTau231,Intercept 1.000000 pTau231_log10 0.0...,Intercept 3.469447e-17 pTau231_log10 ...,32,1.000000,,0.012228,3.469447e-17,,...,,0.547344,,,,,,,,
628,VOL_InfParSupra_Lz,pTau231,Intercept 1.000000 age 0.0...,Intercept -2.775558e-17 age ...,32,1.000000,,0.071284,-2.775558e-17,,...,,0.261424,0.000109,0.657379,,,,,,


## x_var significance

In [17]:
new_para_new_para[(new_para_new_para['new_para_model_p_val_x_var'] < 0.05) & (new_para_new_para['subset_new_para_model_p_val_x_var'] >= 0.05)]

Unnamed: 0,y_var,x_var,new_para_model_p_val_baseline,new_para_model_B_val_baseline,new_para_model_n_baseline,new_para_model_p_val_Intercept,new_para_model_p_val_Education_years,new_para_model_p_val_x_var,new_para_model_B_val_Intercept,new_para_model_B_val_Education_years,...,subset_new_para_model_B_val_Education_years,subset_new_para_model_B_val_x_var,subset_new_para_model_p_val_age,subset_new_para_model_B_val_age,subset_new_para_model_p_val_sex[T.M],subset_new_para_model_B_val_sex[T.M],subset_new_para_model_p_val_apoe4_carrier[T.Yes],subset_new_para_model_B_val_apoe4_carrier[T.Yes],subset_new_para_model_p_val_race[T.White/NH],subset_new_para_model_B_val_race[T.White/NH]
10,FRONTAL_gm,GFAP,Intercept 1.000000 GFAP_log10 0.039403 ...,Intercept 4.510281e-17 GFAP_log10 -3.191...,42,1.0,,0.039403,4.510281e-17,,...,,-0.305564,,,,,,,,
26,FRONTAL_gm,pTau181,Intercept 0.987310 age ...,Intercept 0.002319 age ...,41,0.98731,0.00507,0.031049,0.002318824,0.473807,...,0.501554,0.297385,0.049704,0.373467,,,,,,
36,Precun_gm,pTau217,Intercept 1.000000 pTau217_log10 0.0...,Intercept 5.551115e-16 pTau217_log10 ...,38,1.0,,0.044767,5.551115e-16,,...,,-0.322439,,,,,,,,
86,pTau231,pTau217,Intercept 1.000000 pTau217_log10 0.0...,Intercept 1.110223e-16 pTau217_log10 ...,36,1.0,,0.031501,1.110223e-16,,...,,0.32787,,,,,,,,
94,pTau217,pTau231,Intercept 1.000000 pTau231_log10 0.0...,Intercept -7.979728e-17 pTau231_log10 ...,36,1.0,,0.031501,-7.979728000000001e-17,,...,,0.32787,,,,,,,,
288,VOL_Pericalc_Rz,GFAP,Intercept 1.000000 GFAP_log10 0.025552 ...,Intercept 2.775558e-17 GFAP_log10 3.718...,36,1.0,,0.025552,2.775558e-17,,...,,0.330142,,,,,,,,
320,VOL_InfParSupra_Lz,GFAP,Intercept 1.000000 age 0.000615 ...,Intercept -1.110223e-16 age 5.268...,36,1.0,,0.038333,-1.110223e-16,,...,,-0.288786,0.001751,0.535201,,,,,,
372,VOL_Hip_Lz,NFL,Intercept 1.000000 NFL_log10 0.049581 dt...,Intercept -4.024558e-16 NFL_log10 3.29680...,36,1.0,,0.049581,-4.024558e-16,,...,,0.308772,,,,,,,,
477,VOL_Ventricles_Rz,pTau217,Intercept 1.000000 pTau217_log10 0.0...,Intercept 1.249001e-16 pTau217_log10 ...,32,1.0,,0.027219,1.249001e-16,,...,,0.341141,,,,,,,,
493,VOL_InfPar_Rz,pTau217,Intercept 1.000000 pTau217_log10 0.0...,Intercept -3.469447e-17 pTau217_log10 ...,32,1.0,,0.046486,-3.4694470000000005e-17,,...,,-0.326615,,,,,,,,


In [18]:
new_para_new_para[(new_para_new_para['new_para_model_p_val_x_var'] >= 0.05) & (new_para_new_para['subset_new_para_model_p_val_x_var'] < 0.05)]

Unnamed: 0,y_var,x_var,new_para_model_p_val_baseline,new_para_model_B_val_baseline,new_para_model_n_baseline,new_para_model_p_val_Intercept,new_para_model_p_val_Education_years,new_para_model_p_val_x_var,new_para_model_B_val_Intercept,new_para_model_B_val_Education_years,...,subset_new_para_model_B_val_Education_years,subset_new_para_model_B_val_x_var,subset_new_para_model_p_val_age,subset_new_para_model_B_val_age,subset_new_para_model_p_val_sex[T.M],subset_new_para_model_B_val_sex[T.M],subset_new_para_model_p_val_apoe4_carrier[T.Yes],subset_new_para_model_B_val_apoe4_carrier[T.Yes],subset_new_para_model_p_val_race[T.White/NH],subset_new_para_model_B_val_race[T.White/NH]
13,Par_gm,GFAP,Intercept 0.969664 age 0.028485 ...,Intercept -0.005690 age 0.338568 ...,41,0.969664,,0.073024,-0.005689693,,...,,-0.331333,0.024734,0.363953,,,,,,
70,pTau231,NFL,Intercept 1.000000 NFL_log10 0.086283 dt...,Intercept 1.228184e-15 NFL_log10 2.78273...,39,1.0,,0.086283,1.228184e-15,,...,,0.345334,,,,,,,,
92,NFL,pTau231,Intercept 1.000000 pTau231_log10 0.0...,Intercept 4.787837e-16 pTau231_log10 ...,39,1.0,,0.086283,4.787837e-16,,...,,0.345334,,,,,,,,
205,VOL_LatOcc_Rz,Ab42_40,Intercept 1.000000 Ab42_40_log10 0.0...,Intercept 2.775558e-17 Ab42_40_log10 ...,30,1.0,,0.059005,2.775558e-17,,...,,0.405995,,,,,,,,
206,VOL_Lingual_Lz,Ab42_40,Intercept 1.000000 Ab42_40_log10 0.0...,Intercept -8.326673e-17 Ab42_40_log10 ...,30,1.0,,0.050149,-8.326673e-17,,...,,0.395428,,,,,,,,
242,VOL_LatOccLingCun_Rz,Ab42_40,Intercept 1.00000 Ab42_40_log10 0.05...,Intercept -5.377643e-17 Ab42_40_log10 ...,30,1.0,,0.05056,-5.3776430000000004e-17,,...,,0.414637,,,,,,,,
250,VOL_ParaHip_Rz,GFAP,Intercept 1.000000 GFAP_log10 0.141215 ...,Intercept -1.110223e-16 GFAP_log10 2.501...,36,1.0,,0.141215,-1.110223e-16,,...,,0.394368,,,,,,,,
277,VOL_SupFront_Lz,GFAP,Intercept 1.000000 GFAP_log10 0.058331 ...,Intercept -8.326673e-17 GFAP_log10 -3.185...,36,1.0,,0.058331,-8.326673e-17,,...,,-0.371693,,,0.047504,-0.762841,,,,
289,VOL_AntCingulate_Lz,GFAP,Intercept 1.000000 GFAP_log10 0.111798 ...,Intercept -6.938894e-17 GFAP_log10 2.696...,36,1.0,,0.111798,-6.938894000000001e-17,,...,,0.368967,,,,,,,,
430,VOL_MidFront_Rz,pTau181,Intercept 1.000000 age 0.0...,Intercept -1.387779e-17 age ...,36,1.0,,0.056514,-1.387779e-17,,...,,0.371751,0.048649,0.343032,,,,,,


In [19]:
(new_para_new_para['new_para_model_p_val_x_var']  - new_para_new_para['subset_new_para_model_p_val_x_var']).describe().round(3)

count    630.000
mean      -0.007
std        0.152
min       -0.685
25%       -0.059
50%        0.000
75%        0.043
max        0.622
dtype: float64

## covariate significance

In [33]:
# those where covariate significance differs between a and b
a_sig = new_para_new_para[['new_para_' + ele for ele in covariate_pval_columns]].notna().any(axis =1)
b_sig = new_para_new_para[['subset_new_para_' + ele for ele in covariate_pval_columns]].notna().any(axis =1)

temp = new_para_new_para[a_sig != b_sig].drop(columns = new_para_new_para.columns[new_para_new_para.columns.str.contains('B_val|Intercept|baseline|type|p_val_x_var')]).dropna(axis = 1, how = 'all')
temp = temp[temp.columns.sort_values()]
temp.loc[temp.isna().sort_values(temp.columns.to_list()).index, :]
temp

Unnamed: 0,new_para_model_p_val_Education_years,new_para_model_p_val_age,subset_new_para_model_p_val_Education_years,subset_new_para_model_p_val_age,subset_new_para_model_p_val_sex[T.M],x_var,y_var
2,,,0.042447,0.02067,,Ab42_40,FRONTAL_gm
4,0.024805,0.007766,,,,Ab42_40,Precun_gm
12,0.043796,0.038379,,,,GFAP,Precun_gm
21,,0.036415,,,,NFL,Par_gm
28,0.006586,0.016969,,,,pTau181,Precun_gm
29,,0.041938,,,,pTau181,Par_gm
39,0.049965,,,,,pTau217,PostCing_gm
179,,,,,0.026766,Ab42_40,VOL_MidTemp_Rz
182,,0.010799,,,,Ab42_40,VOL_Precun_Lz
185,0.04152,,,,,Ab42_40,VOL_InfPar_Rz


# Ras vs new para

In [21]:
ras_new_para = pd.merge(results_ras, results_new_para, how = 'inner')
ras_new_para

Unnamed: 0,y_var,x_var,ras_model_p_val_baseline,ras_model_B_val_baseline,ras_model_n_baseline,ras_model_p_val_Intercept,ras_model_p_val_x_var,ras_model_B_val_Intercept,ras_model_B_val_x_var,ras_model_p_val_age,...,new_para_model_B_val_Education_years,new_para_model_B_val_x_var,new_para_model_p_val_age,new_para_model_B_val_age,new_para_model_p_val_sex[T.M],new_para_model_B_val_sex[T.M],new_para_model_p_val_apoe4_carrier[T.Yes],new_para_model_B_val_apoe4_carrier[T.Yes],new_para_model_p_val_race[T.White/NH],new_para_model_B_val_race[T.White/NH]
0,MTL_gm,Ab42_40,Intercept 1.000000 Ab42_40_log10 0.0...,Intercept -3.642919e-16 Ab42_40_log10 ...,36,1.000000,0.009776,-3.642919e-16,0.424976,,...,0.467310,0.389317,,,,,,,,
1,SensMot_gm,Ab42_40,Intercept 1.00000 Ab42_40_log10 0.43...,Intercept -1.471046e-15 Ab42_40_log10 ...,36,1.000000,0.438580,-1.471046e-15,0.133224,,...,,0.040048,,,,,,,,
2,FRONTAL_gm,Ab42_40,Intercept 0.865954 age 0.0...,Intercept 0.027358 age 0.4...,35,0.865954,0.540627,2.735770e-02,0.102758,0.016037,...,,-0.056644,,,,,,,,
3,AC_gm,Ab42_40,Intercept 1.00000 Ab42_40_log10 0.46...,Intercept 4.926615e-16 Ab42_40_log10 ...,36,1.000000,0.466770,4.926615e-16,0.125232,,...,,0.038558,,,,,,,,
4,Precun_gm,Ab42_40,Intercept 0.908221 age 0.0...,Intercept 0.016911 age 0.4...,35,0.908221,0.009156,1.691112e-02,0.417244,0.003591,...,0.364864,0.373903,0.007766,0.447645,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
625,VOL_Precun_InfPar_Supramarg_Rz,pTau231,Intercept 1.000000 age ...,Intercept -5.551115e-17 age ...,32,1.000000,0.008586,-5.551115e-17,0.390247,0.000075,...,0.355296,0.390247,0.000075,0.642682,,,,,,
626,VOL_LatOccLingCun_Lz,pTau231,Intercept 1.000000 pTau231_log10 0.0...,Intercept -9.714451e-17 pTau231_log10 ...,32,1.000000,0.040319,-9.714451e-17,0.364402,,...,,0.364402,,,,,,,,
627,VOL_LatOccLingCun_Rz,pTau231,Intercept 1.000000 pTau231_log10 0.0...,Intercept 3.469447e-17 pTau231_log10 ...,32,1.000000,0.012228,3.469447e-17,0.437723,,...,,0.437723,,,,,,,,
628,VOL_InfParSupra_Lz,pTau231,Intercept 1.000000 age 0.0...,Intercept -2.775558e-17 age ...,32,1.000000,0.071284,-2.775558e-17,0.249357,0.000019,...,,0.249357,0.000019,0.678954,,,,,,


## x_var significance

In [22]:
ras_new_para[(ras_new_para['ras_model_p_val_x_var'] < 0.05) & (ras_new_para['new_para_model_p_val_x_var'] >= 0.05)]

Unnamed: 0,y_var,x_var,ras_model_p_val_baseline,ras_model_B_val_baseline,ras_model_n_baseline,ras_model_p_val_Intercept,ras_model_p_val_x_var,ras_model_B_val_Intercept,ras_model_B_val_x_var,ras_model_p_val_age,...,new_para_model_B_val_Education_years,new_para_model_B_val_x_var,new_para_model_p_val_age,new_para_model_B_val_age,new_para_model_p_val_sex[T.M],new_para_model_B_val_sex[T.M],new_para_model_p_val_apoe4_carrier[T.Yes],new_para_model_B_val_apoe4_carrier[T.Yes],new_para_model_p_val_race[T.White/NH],new_para_model_B_val_race[T.White/NH]


In [23]:
ras_new_para[(ras_new_para['ras_model_p_val_x_var'] >= 0.05) & (ras_new_para['new_para_model_p_val_x_var'] < 0.05)]

Unnamed: 0,y_var,x_var,ras_model_p_val_baseline,ras_model_B_val_baseline,ras_model_n_baseline,ras_model_p_val_Intercept,ras_model_p_val_x_var,ras_model_B_val_Intercept,ras_model_B_val_x_var,ras_model_p_val_age,...,new_para_model_B_val_Education_years,new_para_model_B_val_x_var,new_para_model_p_val_age,new_para_model_B_val_age,new_para_model_p_val_sex[T.M],new_para_model_B_val_sex[T.M],new_para_model_p_val_apoe4_carrier[T.Yes],new_para_model_B_val_apoe4_carrier[T.Yes],new_para_model_p_val_race[T.White/NH],new_para_model_B_val_race[T.White/NH]
10,FRONTAL_gm,GFAP,Intercept 1.000000 GFAP_log10 0.896545 ...,Intercept -4.579670e-16 GFAP_log10 -2.068...,42,1.0,0.896545,-4.57967e-16,-0.020686,,...,,-0.319124,,,,,,,,
26,FRONTAL_gm,pTau181,Intercept 1.00000 pTau181_log10 0.12...,Intercept -4.579670e-16 pTau181_log10 ...,42,1.0,0.12241,-4.57967e-16,0.242102,,...,0.473807,0.343053,0.036655,0.341237,,,,,,
31,PostCing_gm,pTau181,Intercept 1.000000 pTau181_log10 0.0...,Intercept 1.346145e-15 pTau181_log10 ...,42,1.0,0.051008,1.346145e-15,0.303131,,...,0.368301,0.373023,,,,,,,,
33,SensMot_gm,pTau217,Intercept 1.000000 pTau217_log10 0.4...,Intercept -5.967449e-16 pTau217_log10 ...,38,1.0,0.419613,-5.967449e-16,-0.134832,,...,,-0.346181,,,,,,,,
36,Precun_gm,pTau217,Intercept 1.000000 pTau217_log10 0.1...,Intercept -7.771561e-16 pTau217_log10 ...,38,1.0,0.168858,-7.771561e-16,-0.22786,,...,,-0.327454,,,,,,,,


In [24]:
(ras_new_para['ras_model_p_val_x_var']  - ras_new_para['new_para_model_p_val_x_var']).describe().round(3)

count    630.000
mean       0.002
std        0.089
min       -0.760
25%        0.000
50%        0.000
75%        0.000
max        0.857
dtype: float64

## covariate significance

In [25]:
# those where covariate significance differs between a and b
a_sig = ras_new_para[['new_para_' + ele for ele in covariate_pval_columns]].notna().any(axis =1)
b_sig = ras_new_para[['ras_' + ele for ele in covariate_pval_columns]].notna().any(axis =1)

temp = ras_new_para[a_sig != b_sig].drop(columns = ras_new_para.columns[ras_new_para.columns.str.contains('B_val|Intercept|baseline|type|p_val_x_var')]).dropna(axis = 1, how = 'all')
temp = temp[temp.columns.sort_values()]
temp.loc[temp.isna().sort_values(temp.columns.to_list()).index, :]
temp

Unnamed: 0,new_para_model_p_val_Education_years,new_para_model_p_val_age,ras_model_p_val_age,x_var,y_var
0,0.002342,,,Ab42_40,MTL_gm
2,,,0.016037,Ab42_40,FRONTAL_gm
8,0.005543,,,GFAP,MTL_gm
12,0.043796,0.038379,,GFAP,Precun_gm
16,0.00188,,,NFL,MTL_gm
24,1.7e-05,,,pTau181,MTL_gm
26,0.00507,0.036655,,pTau181,FRONTAL_gm
27,0.037417,,,pTau181,AC_gm
28,0.006586,0.016969,,pTau181,Precun_gm
31,0.017593,,,pTau181,PostCing_gm


# Composite vs Old Para

In [26]:
comp_para = pd.merge(results_composite, results_para, how = 'inner')
comp_para

Unnamed: 0,y_var,x_var,composite_model_p_val_baseline,composite_model_B_val_baseline,composite_model_n_baseline,composite_model_p_val_Intercept,composite_model_p_val_x_var,composite_model_B_val_Intercept,composite_model_B_val_x_var,composite_model_p_val_age,...,para_model_p_val_age,para_model_B_val_age,para_model_p_val_Education_years,para_model_B_val_Education_years,para_model_p_val_sex[T.M],para_model_B_val_sex[T.M],para_model_p_val_apoe4_carrier[T.Yes],para_model_B_val_apoe4_carrier[T.Yes],para_model_p_val_race[T.White/NH],para_model_B_val_race[T.White/NH]
0,Avg_PCC,Ab42_40,Intercept 0.104584 sex[T.M] ...,Intercept 0.341689 sex[T.M] ...,31,0.104584,0.010887,3.416895e-01,0.446655,0.005147,...,0.010285,0.443213,,,,,,,,
1,MTL_gm,Ab42_40,Intercept 1.000000 Education_years ...,Intercept 1.314920e-15 Education_year...,31,1.000000,0.010371,1.314920e-15,0.386573,,...,,,,,,,,,,
2,AC_gm,Ab42_40,Intercept 1.000000 Ab42_40_log10 0.8...,Intercept 1.623701e-15 Ab42_40_log10 ...,31,1.000000,0.864119,1.623701e-15,0.032045,,...,,,,,,,,,,
3,Precun_gm,Ab42_40,Intercept 1.000000 Ab42_40_log10 0.0...,Intercept -1.554312e-15 Ab42_40_log10 ...,31,1.000000,0.040683,-1.554312e-15,0.369661,,...,0.008202,0.437075,,,,,,,,
4,Par_gm,Ab42_40,Intercept 1.000000 Ab42_40_log10 0.2...,Intercept 5.065393e-16 Ab42_40_log10 ...,31,1.000000,0.256848,5.065393e-16,0.209998,,...,0.011089,0.446742,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
619,VOL_Precun_InfPar_Supramarg_Rz,pTau231,Intercept 1.000000 age ...,Intercept -4.857226e-17 age ...,32,1.000000,0.008586,-4.857226e-17,0.390247,0.000075,...,0.000075,0.642682,0.017335,0.355296,,,,,,
620,VOL_LatOccLingCun_Lz,pTau231,Intercept 1.000000 pTau231_log10 0.0...,Intercept -9.714451e-17 pTau231_log10 ...,32,1.000000,0.040319,-9.714451e-17,0.364402,,...,,,,,,,,,,
621,VOL_LatOccLingCun_Rz,pTau231,Intercept 1.000000 pTau231_log10 0.0...,Intercept 4.163336e-17 pTau231_log10 ...,32,1.000000,0.012228,4.163336e-17,0.437723,,...,,,,,,,,,,
622,VOL_InfParSupra_Lz,pTau231,Intercept 1.000000 age 0.0...,Intercept -2.775558e-17 age ...,32,1.000000,0.071284,-2.775558e-17,0.249357,0.000019,...,0.000019,0.678954,,,,,,,,


## x_var significance

In [27]:
comp_para[(comp_para['composite_model_p_val_x_var'] < 0.05) & (comp_para['para_model_p_val_x_var'] >= 0.05)]

Unnamed: 0,y_var,x_var,composite_model_p_val_baseline,composite_model_B_val_baseline,composite_model_n_baseline,composite_model_p_val_Intercept,composite_model_p_val_x_var,composite_model_B_val_Intercept,composite_model_B_val_x_var,composite_model_p_val_age,...,para_model_p_val_age,para_model_B_val_age,para_model_p_val_Education_years,para_model_B_val_Education_years,para_model_p_val_sex[T.M],para_model_B_val_sex[T.M],para_model_p_val_apoe4_carrier[T.Yes],para_model_B_val_apoe4_carrier[T.Yes],para_model_p_val_race[T.White/NH],para_model_B_val_race[T.White/NH]
11,Par_gm,GFAP,Intercept 1.000000 age 0.024579 ...,Intercept 3.593780e-17 age 3.643...,37,1.0,0.04006,3.5937800000000006e-17,-0.330695,0.024579,...,0.00331,0.471988,,,,,,,,
36,MTL_gm,pTau231,Intercept 1.000000 pTau231_log10 0.0...,Intercept 1.114743e-15 pTau231_log10 ...,33,1.0,0.033658,1.114743e-15,0.370769,,...,,,,,,,,,,


In [28]:
comp_para[(comp_para['composite_model_p_val_x_var'] >= 0.05) & (comp_para['para_model_p_val_x_var'] < 0.05)]

Unnamed: 0,y_var,x_var,composite_model_p_val_baseline,composite_model_B_val_baseline,composite_model_n_baseline,composite_model_p_val_Intercept,composite_model_p_val_x_var,composite_model_B_val_Intercept,composite_model_B_val_x_var,composite_model_p_val_age,...,para_model_p_val_age,para_model_B_val_age,para_model_p_val_Education_years,para_model_B_val_Education_years,para_model_p_val_sex[T.M],para_model_B_val_sex[T.M],para_model_p_val_apoe4_carrier[T.Yes],para_model_B_val_apoe4_carrier[T.Yes],para_model_p_val_race[T.White/NH],para_model_B_val_race[T.White/NH]
4,Par_gm,Ab42_40,Intercept 1.000000 Ab42_40_log10 0.2...,Intercept 5.065393e-16 Ab42_40_log10 ...,31,1.0,0.256848,5.065393e-16,0.209998,,...,0.011089,0.446742,,,,,,,,


In [29]:
(comp_para['composite_model_p_val_x_var']  - comp_para['para_model_p_val_x_var']).describe().round(3)

count    624.000
mean      -0.003
std        0.061
min       -0.556
25%        0.000
50%        0.000
75%        0.000
max        0.519
dtype: float64

## covariate significance

In [30]:
# those where covariate significance differs between a and b
a_sig = comp_para[['composite_' + ele for ele in covariate_pval_columns]].notna().any(axis =1)
b_sig = comp_para[['para_' + ele for ele in covariate_pval_columns]].notna().any(axis =1)

temp = comp_para[a_sig != b_sig].drop(columns = comp_para.columns[comp_para.columns.str.contains('B_val|Intercept|baseline|type|p_val_x_var')]).dropna(axis = 1, how = 'all')
temp = temp[temp.columns.sort_values()]
temp.loc[temp.isna().sort_values(temp.columns.to_list()).index, :]
temp

Unnamed: 0,composite_model_p_val_Education_years,composite_model_p_val_age,para_model_p_val_age,x_var,y_var
1,0.001304,,,Ab42_40,MTL_gm
3,,,0.008202,Ab42_40,Precun_gm
4,,,0.011089,Ab42_40,Par_gm
7,0.037201,0.041555,,GFAP,Avg_PCC
8,0.002343,,,GFAP,MTL_gm
14,0.026751,0.044073,,NFL,Avg_PCC
15,0.000934,,,NFL,MTL_gm
18,,,0.007013,NFL,Par_gm
21,0.007906,0.018571,,pTau181,Avg_PCC
22,3.8e-05,,,pTau181,MTL_gm


# New Para with and without RIL8

In [31]:
comp_new_para = pd.merge(results_orig_new_para, results_new_para, how = 'inner')
comp_new_para

NameError: name 'results_orig_new_para' is not defined

## x_var significance

In [None]:
comp_new_para.loc[(comp_new_para['orig_new_para_model_p_val_x_var'] < 0.05) & (comp_new_para['new_para_model_p_val_x_var'] >= 0.05), ['x_var', 'y_var', 'orig_new_para_model_p_val_x_var',
       'orig_new_para_model_B_val_x_var', 'orig_new_para_model_n_baseline', 'new_para_model_p_val_x_var', 'new_para_model_B_val_x_var',
       'new_para_model_n_baseline']]

In [None]:
comp_new_para.loc[(comp_new_para['orig_new_para_model_p_val_x_var'] >= 0.05) & (comp_new_para['new_para_model_p_val_x_var'] < 0.05), ['x_var', 'y_var', 'orig_new_para_model_p_val_x_var',
       'orig_new_para_model_B_val_x_var', 'orig_new_para_model_n_baseline', 'new_para_model_p_val_x_var', 'new_para_model_B_val_x_var',
       'new_para_model_n_baseline']]

In [None]:
(comp_new_para['orig_new_para_model_p_val_x_var']  - comp_new_para['new_para_model_p_val_x_var']).describe().round(3)

In [None]:
comp_new_para.loc[(abs(comp_new_para['orig_new_para_model_p_val_x_var']  - comp_new_para['new_para_model_p_val_x_var'])> 0.1), ['x_var', 'y_var', 'orig_new_para_model_p_val_x_var',
       'orig_new_para_model_B_val_x_var', 'orig_new_para_model_n_baseline', 'new_para_model_p_val_x_var', 'new_para_model_B_val_x_var',
       'new_para_model_n_baseline']].sort_values('new_para_model_p_val_x_var')

## covariate significance

In [None]:
# those where covariate significance differs between a and b
a_sig = comp_new_para[['orig_new_para_' + ele for ele in covariate_pval_columns]].notna().any(axis =1)
b_sig = comp_new_para[['new_para_' + ele for ele in covariate_pval_columns]].notna().any(axis =1)

temp = comp_new_para[a_sig != b_sig].drop(columns = comp_new_para.columns[comp_new_para.columns.str.contains('B_val|Intercept|baseline|type|p_val_x_var')]).dropna(axis = 1, how = 'all')
temp = temp[temp.columns.sort_values()]
temp.loc[temp.isna().sort_values(temp.columns.to_list()).index, :]
temp