# 02 Bootstrapping of the significant models to get confidence intervals
Run only on the significant models from script 01  
Gives confidence intervals without assuming underlying normal distribution  
  
Given the relatively small available sample size, it was noted that the presence of outliers or a change in the statistical setup might lead to different results than those reported for all available data. To better quantify the uncertainty surrounding our findings, we used a bootstrapping approach to further examine the relationships that the previous models had found to be significant (independent coefficient having a p-value < 0.05). For each model, we randomly selected subjects with replacement - which allows for an iteration to include a particular subject multiple times or not at all - until reaching a cohort of the same size as the original number of subjects. This new cohort was modeled in the same way, and the process was repeated 1000 times. This iterative process provided a distribution of results from the model, showing how consistent our results were when varying the participants included, and was used to generate 95% confidence intervals. 

In [1]:
# install required packages - commented out so it doesn't install every time
#%conda install -n Lauren openpyxl numpy pandas statsmodels plotnine matplotlib scikit-learn scipy mizani nbconvert pandoc pyreadstat kmodes seaborn

# import required packages
import numpy as np;
import pandas as pd;
from sklearn import preprocessing 
from sklearn.utils import resample
import statsmodels as sm;
import statsmodels.formula.api as smf;
import plotnine as p9;
import itertools
import pickle

import scipy; # for spearmann correlation

  from pandas.core import (


# read in and clean data

In [2]:
data_name = 'new_para_rr'  # change this value to pons_rr to get the result when using pons as the FDG-PET reference region

In [3]:
# read in results from main analyses
model_results, all_data = pd.read_pickle('./output/01_data_' + data_name + '.pkl')

In [4]:
# define biomarker groups
FDG_columns = ['Avg_MedOrbFrontal', 'Graycer_gm', 'Avg_PCC', 'Avg_Hip', 'MTL_gm', 'CO', 'Temp', 'SensMot_gm', 'L_Hip',
 'Vermis_gm', 'FRONTAL_gm', 'AC_gm', 'Precun_gm', 'Par_gm', 'Temp_gm', 'R_Hip', 'PostCing_gm']

plasma_columns = ['Ab42_40', 'GFAP','NFL',  'pTau181', 'pTau217',  'pTau231', 'pTau217_Ab42']
log10_plasma_columns = [s + '_log10' for s in plasma_columns]

cog_columns = ['MMSE', 'adascogtotal', 'bvrt', 'dstotal','tma','tmb',  'cowattotal', 'cdrtotal', 'cdrsum', 'adltotal', 'npitotal', 'gds']
vol_columns = ['VOL_Ventricles_Lz',
       'VOL_Ventricles_Rz', 'VOL_Putamen_Lz', 'VOL_Putamen_Rz',
       'VOL_ParaHip_Lz', 'VOL_ParaHip_Rz', 'VOL_Fusi_Lz', 'VOL_Fusi_Rz',
       'VOL_InfTemp_Lz', 'VOL_InfTemp_Rz', 'VOL_MidTemp_Lz', 'VOL_MidTemp_Rz',
       'VOL_SupTemp_Lz', 'VOL_SupTemp_Rz', 'VOL_Precun_Lz', 'VOL_Precun_Rz',
       'VOL_InfPar_Lz', 'VOL_InfPar_Rz', 'VOL_ParaPostCentr_Lz',
       'VOL_ParaPostCentr_Rz', 'VOL_SupraMarg_Lz', 'VOL_SupraMarg_Rz',
       'VOL_SupPar_Lz', 'VOL_SupPar_Rz', 'VOL_OrbitFront_Lz',
       'VOL_OrbitFront_Rz', 'VOL_Insula_Lz', 'VOL_Insula_Rz',
       'VOL_InfFront_Lz', 'VOL_InfFront_Rz', 'VOL_MidFront_Lz',
       'VOL_MidFront_Rz', 'VOL_SupFront_Lz', 'VOL_SupFront_Rz',
       'VOL_PrecFront_Lz', 'VOL_PrecFront_Rz', 'VOL_LatOcc_Lz',
       'VOL_LatOcc_Rz', 'VOL_Lingual_Lz', 'VOL_Lingual_Rz', 'VOL_Cuneus_Lz',
       'VOL_Cuneus_Rz', 'VOL_Pericalc_Lz', 'VOL_Pericalc_Rz',
       'VOL_AntCingulate_Lz', 'VOL_AntCingulate_Rz', 'VOL_PostCingulate_Lz',
       'VOL_PostCingulate_Rz', 'VOL_Entorhinal_Lz', 'VOL_Entorhinal_Rz',
       'VOL_Hip_Lz', 'VOL_Hip_Rz', 'VOL_TotalGrayz', 'VOL_TotalGray_Lz',
       'VOL_TotalGray_Rz', 'VOL_LatTemp_Lz', 'VOL_LatTemp_Rz',
       'VOL_Parietal_Lz', 'VOL_Parietal_Rz', 'VOL_Frontal_Lz',
       'VOL_Frontal_Rz', 'VOL_InfMidTemp_Lz', 'VOL_InfMidTemp_Rz',
       'VOL_InfInsFrontal_Lz', 'VOL_InfInsFrontal_Rz', 'VOL_MidSupFrontal_Lz',
       'VOL_MidSupFrontal_Rz', 'VOL_Inf_Mid_Fus_Temp_Lz',
       'VOL_Inf_Mid_Fus_Temp_Rz', 'VOL_Precun_InfPar_Lz',
       'VOL_Precun_InfPar_Rz', 'VOL_Precun_InfPar_Supramarg_Lz',
       'VOL_Precun_InfPar_Supramarg_Rz', 'VOL_LatOccLingCun_Lz',
       'VOL_LatOccLingCun_Rz', 'VOL_InfParSupra_Lz', 'VOL_InfParSupra_Rz']

# set up bootstrapping

In [5]:
# function to run the bootstrap regression for a specific model
def bootstrap_regression(significant_combos):
    biomarker_relationship_results_log10 = pd.DataFrame()
    biomarker_relationship_results_log10_975 = pd.DataFrame()
    biomarker_relationship_results_log10_025 = pd.DataFrame()

    covariate_p_threshold = 0.05 

    for idx in range(0,len(significant_combos)):
        x_var = significant_combos.iloc[idx]['x_var']
        y_var = significant_combos.iloc[idx]['y_var']
        
        temp_df_baseline =  all_data.copy().dropna(subset = [y_var, x_var])

        # z-score data
        temp_df_baseline[[y_var, x_var, 'Education_years', 'age']] = preprocessing.StandardScaler().fit_transform(temp_df_baseline[[y_var, x_var, 'Education_years', 'age']])

        # run 1 - check for significant covariates
        sig_pvals_start = ['age', 'Education_years', 'apoe4_carrier', 'sex', 'race_ethnicity', x_var]
        change = 1
        full_model = smf.ols(formula = y_var + ' ~ ' + ' + '.join(sig_pvals_start), data = temp_df_baseline).fit()
        temp_df_baseline['full_model_residuals'] = full_model.resid

        while change > 0:
            model_vars = ' + '.join(sig_pvals_start)        
            test_model = smf.ols(formula = y_var + ' ~ ' + model_vars, data = temp_df_baseline).fit()
            sig_pvals = test_model.pvalues[test_model.pvalues < covariate_p_threshold].index.to_list()
            sig_pvals = [ele.split('[')[0] for ele in sig_pvals] # drop the [] indicators
            sig_pvals_unique = []
            for item in sig_pvals:
                if (item not in sig_pvals_unique) & (item not in ['Intercept']): sig_pvals_unique.append(item)
            if (x_var not in sig_pvals_unique):
                sig_pvals_unique = sig_pvals_unique + [x_var]
            change = len(sig_pvals_start) - len(sig_pvals_unique)
            sig_pvals_start = sig_pvals_unique
        
        # run 2 -with just the significant covariates (but forcing the x_var to be included)
        model_vars = ' + '.join(sig_pvals_unique)  

        resampled_df = pd.DataFrame()
        for resample_idx in range(0,1000):
            # multivariate linear model with age and education years as covariates
            resampled_data = resample(temp_df_baseline, replace=True, n_samples=None, random_state=resample_idx, stratify=None)
            model = smf.ols(formula = y_var + ' ~ ' + model_vars , data = resampled_data).fit()
            resampled_result = pd.DataFrame([model.pvalues, model.params])
            resampled_result['index'] = ['p', 'B']
            resampled_result.index = [0,0]
            resampled_result = resampled_result.pivot(columns = ['index'])
            resampled_result.columns = [f'{y}_{x}' for x,y in resampled_result.columns]
            resampled_result
            resampled_df = pd.concat([resampled_df, resampled_result])

        model_n_baseline = len(temp_df_baseline.dropna(subset = sig_pvals_unique + [y_var]))



        # save average results
        temp_results  = pd.DataFrame({'x_var':[y_var], 'y_var':[x_var], 'model_n_baseline':[model_n_baseline]})
        temp_results  = pd.DataFrame(resampled_df.quantile(q=0.5)).T
        temp_results['x_var'], temp_results['y_var'], temp_results['model_n_baseline'] = y_var, x_var, model_n_baseline        
        temp_results = temp_results.rename(columns = dict(zip(temp_results.columns, [ele.replace(x_var, 'y_var') \
            for ele in temp_results.columns ])))         # rename so var2 is in the same column
        biomarker_relationship_results_log10 = pd.concat([biomarker_relationship_results_log10, 
        temp_results]) # add to full list of results

        # save 95% results
        temp_results  = pd.DataFrame({'x_var':[y_var], 'y_var':[x_var], 'model_n_baseline':[model_n_baseline]})
        temp_results  = pd.DataFrame(resampled_df.quantile(q=0.975)).T
        temp_results['x_var'], temp_results['y_var'], temp_results['model_n_baseline'] = y_var, x_var, model_n_baseline        
        temp_results = temp_results.rename(columns = dict(zip(temp_results.columns, [ele.replace(x_var, 'y_var') \
            for ele in temp_results.columns ])))         # rename so var2 is in the same column
        biomarker_relationship_results_log10_975 = pd.concat([biomarker_relationship_results_log10_975, 
        temp_results]) # add to full list of results


        # save 5% results
        temp_results  = pd.DataFrame({'x_var':[y_var], 'y_var':[x_var], 'model_n_baseline':[model_n_baseline]})
        temp_results  = pd.DataFrame(resampled_df.quantile(q=0.025)).T
        temp_results['x_var'], temp_results['y_var'], temp_results['model_n_baseline'] = y_var, x_var, model_n_baseline        
        temp_results = temp_results.rename(columns = dict(zip(temp_results.columns, [ele.replace(x_var, 'y_var') \
            for ele in temp_results.columns ])))         # rename so var2 is in the same column
        biomarker_relationship_results_log10_025 = pd.concat([biomarker_relationship_results_log10_025, 
        temp_results]) # add to full list of results


        # plot results - currently not run
        p9.options.figure_size = (4,3)    
        if len(resampled_df['p_' + x_var].dropna()) > 0:
            title = y_var + ': original B=' + str(round(significant_combos.iloc[idx]['model_B_val_x_var'], 2))
            subtitle =  'bootstrap B=' + str(round(resampled_df['B_' + x_var].quantile(q=0.5), 2)) + ' (' + resampled_df['B_' + x_var].quantile(q=0.025).round(2).astype('str') + ' to ' + resampled_df['B_' + x_var].quantile(q=0.975).round(2).astype('str') + ')'
            plot = (
                    p9.ggplot(resampled_df, p9.aes(x = 'B_' + x_var))
                    + p9.theme_bw(base_size = 11)            
                    + p9.geom_histogram(size = 0.5, bins = 20)
                    + p9.geom_vline(xintercept = resampled_df['B_' + x_var].quantile(q=0.025), color = 'red')
                    + p9.geom_vline(xintercept = resampled_df['B_' + x_var].quantile(q=0.975), color = 'red')
                    + p9.geom_vline(xintercept = resampled_df['B_' + x_var].quantile(q=0.5), color = 'blue')   
                    + p9.labs(title = title, subtitle = subtitle)   
                    + p9.scale_x_continuous(breaks = np.arange(round(resampled_df['B_' + x_var].min(),1), round(resampled_df['B_' + x_var].max(), 1), 0.2).round(1))
                )
            #print(plot)    
            title = y_var + ': original p=' + str(round(significant_combos.iloc[idx]['model_p_val_x_var'], 3))
            subtitle =  'bootstrap p=' + str(round(resampled_df['p_' + x_var].quantile(q=0.5), 3)) + ' (' + resampled_df['p_' + x_var].quantile(q=0.025).round(3).astype('str') + ' to ' + resampled_df['p_' + x_var].quantile(q=0.975).round(3).astype('str') + ')'
            plot = (
                    p9.ggplot(resampled_df, p9.aes(x = 'p_' + x_var))
                    + p9.theme_bw(base_size = 11)            
                    + p9.geom_histogram(size = 0.5, bins = 20)
                    + p9.geom_vline(xintercept = resampled_df['p_' + x_var].quantile(q=0.025), color = 'red')
                    + p9.geom_vline(xintercept = resampled_df['p_' + x_var].quantile(q=0.975), color = 'red')
                    + p9.geom_vline(xintercept = resampled_df['p_' + x_var].quantile(q=0.5), color = 'blue')   
                    + p9.labs(title = title, subtitle = subtitle)   
                    + p9.scale_x_continuous(breaks = np.arange(round(resampled_df['p_' + x_var].min(),2), round(resampled_df['p_' + x_var].max(), 2), 0.1).round(2))
                    )
            #print(plot)    

            
    # remove self-correlations
    biomarker_relationship_results_log10 = biomarker_relationship_results_log10[~(biomarker_relationship_results_log10['y_var'] == biomarker_relationship_results_log10['x_var'])]
    biomarker_relationship_results_log10_975 = biomarker_relationship_results_log10_975[~(biomarker_relationship_results_log10_975['y_var'] == biomarker_relationship_results_log10_975['x_var'])]
    biomarker_relationship_results_log10_025 = biomarker_relationship_results_log10_025[~(biomarker_relationship_results_log10_025['y_var'] == biomarker_relationship_results_log10_025['x_var'])]

    return biomarker_relationship_results_log10,  biomarker_relationship_results_log10_975, biomarker_relationship_results_log10_025

In [6]:
# dataframes to hold results
biomarker_relationship_results_log10 = pd.DataFrame() # median value from replicates
biomarker_relationship_results_log10_975 = pd.DataFrame() #97.5 percentile from replicates
biomarker_relationship_results_log10_025 = pd.DataFrame() # 2.5 percentile from replicates

# select out the just the models that were significant, too much to run bootstrapping for every model
significant_combos = model_results.copy()
significant_combos = significant_combos[significant_combos['model_p_val_x_var']<0.05].reset_index(drop = True)

# Ab42/40 bootstrapping

In [7]:
a,  b, c = bootstrap_regression(significant_combos[significant_combos['x_var'].isin(['Ab42_40_log10'])])
biomarker_relationship_results_log10 = pd.concat([biomarker_relationship_results_log10, a])
biomarker_relationship_results_log10_975 = pd.concat([biomarker_relationship_results_log10_975, b])
biomarker_relationship_results_log10_025 = pd.concat([biomarker_relationship_results_log10_025, c])

# GFAP bootstrapping

In [8]:
a,  b, c = bootstrap_regression(significant_combos[significant_combos['x_var'].isin(['GFAP_log10'])])
biomarker_relationship_results_log10 = pd.concat([biomarker_relationship_results_log10, a])
biomarker_relationship_results_log10_975 = pd.concat([biomarker_relationship_results_log10_975, b])
biomarker_relationship_results_log10_025 = pd.concat([biomarker_relationship_results_log10_025, c])

# NFL bootstrapping

In [9]:
a,  b, c = bootstrap_regression(significant_combos[significant_combos['x_var'].isin(['NFL_log10'])])
biomarker_relationship_results_log10 = pd.concat([biomarker_relationship_results_log10, a])
biomarker_relationship_results_log10_975 = pd.concat([biomarker_relationship_results_log10_975, b])
biomarker_relationship_results_log10_025 = pd.concat([biomarker_relationship_results_log10_025, c])

# pTau217 bootstrapping

In [10]:
a,  b, c = bootstrap_regression(significant_combos[significant_combos['x_var'].isin(['pTau217_log10'])])
biomarker_relationship_results_log10 = pd.concat([biomarker_relationship_results_log10, a])
biomarker_relationship_results_log10_975 = pd.concat([biomarker_relationship_results_log10_975, b])
biomarker_relationship_results_log10_025 = pd.concat([biomarker_relationship_results_log10_025, c])

# pTau181 bootstrapping

In [11]:
a,  b, c = bootstrap_regression(significant_combos[significant_combos['x_var'].isin(['pTau181_log10'])])
biomarker_relationship_results_log10 = pd.concat([biomarker_relationship_results_log10, a])
biomarker_relationship_results_log10_975 = pd.concat([biomarker_relationship_results_log10_975, b])
biomarker_relationship_results_log10_025 = pd.concat([biomarker_relationship_results_log10_025, c])

# pTau231 bootstrapping

In [12]:
a,  b, c = bootstrap_regression(significant_combos[significant_combos['x_var'].isin(['pTau231_log10'])])
biomarker_relationship_results_log10 = pd.concat([biomarker_relationship_results_log10, a])
biomarker_relationship_results_log10_975 = pd.concat([biomarker_relationship_results_log10_975, b])
biomarker_relationship_results_log10_025 = pd.concat([biomarker_relationship_results_log10_025, c])

# Clean up full list of results

In [13]:
biomarker_relationship_results_log10['type'] = 'other'
biomarker_relationship_results_log10.loc[biomarker_relationship_results_log10['y_var'].isin( [ele for ele in FDG_columns if ele in all_data.columns]), 'type'] = 'FDG PET'
biomarker_relationship_results_log10.loc[biomarker_relationship_results_log10['y_var'].isin(log10_plasma_columns + log10_plasma_columns), 'type'] = 'Plasma'
biomarker_relationship_results_log10.loc[biomarker_relationship_results_log10['y_var'].isin(cog_columns), 'type'] = 'Cognitive'
biomarker_relationship_results_log10.loc[biomarker_relationship_results_log10['y_var'].isin(vol_columns), 'type'] = 'Volumetric'

biomarker_relationship_results_log10_025['type'] = 'other'
biomarker_relationship_results_log10_025.loc[biomarker_relationship_results_log10_025['y_var'].isin( [ele for ele in FDG_columns if ele in all_data.columns]), 'type'] = 'FDG PET'
biomarker_relationship_results_log10_025.loc[biomarker_relationship_results_log10_025['y_var'].isin(log10_plasma_columns + log10_plasma_columns), 'type'] = 'Plasma'
biomarker_relationship_results_log10_025.loc[biomarker_relationship_results_log10_025['y_var'].isin(cog_columns), 'type'] = 'Cognitive'
biomarker_relationship_results_log10_025.loc[biomarker_relationship_results_log10_025['y_var'].isin(vol_columns), 'type'] = 'Volumetric'

biomarker_relationship_results_log10_975['type'] = 'other'
biomarker_relationship_results_log10_975.loc[biomarker_relationship_results_log10_975['y_var'].isin( [ele for ele in FDG_columns if ele in all_data.columns]), 'type'] = 'FDG PET'
biomarker_relationship_results_log10_975.loc[biomarker_relationship_results_log10_975['y_var'].isin(log10_plasma_columns + log10_plasma_columns), 'type'] = 'Plasma'
biomarker_relationship_results_log10_975.loc[biomarker_relationship_results_log10_975['y_var'].isin(cog_columns), 'type'] = 'Cognitive'
biomarker_relationship_results_log10_975.loc[biomarker_relationship_results_log10_975['y_var'].isin(vol_columns), 'type'] = 'Volumetric'

# save results

In [14]:
biomarker_relationship_results_log10 = biomarker_relationship_results_log10.rename(columns = {'B_y_var':'B_x_var', 'p_y_var':'p_x_var', 'x_var':'y_var', 'y_var':'x_var'})
biomarker_relationship_results_log10_025 = biomarker_relationship_results_log10_025.rename(columns = {'B_y_var':'B_x_var', 'p_y_var':'p_x_var', 'x_var':'y_var', 'y_var':'x_var'})
biomarker_relationship_results_log10_975 = biomarker_relationship_results_log10_975.rename(columns = {'B_y_var':'B_x_var', 'p_y_var':'p_x_var', 'x_var':'y_var', 'y_var':'x_var'})

In [15]:
with pd.ExcelWriter('./output/02_bootstrap_results_500_' + data_name + '.xlsx') as writer:
    biomarker_relationship_results_log10[biomarker_relationship_results_log10['x_var'].isin(['Ab42_40_log10'])].to_excel(writer, sheet_name='Ab42_40', index=False)
    biomarker_relationship_results_log10[biomarker_relationship_results_log10['x_var'].isin(['pTau181_log10'])].to_excel(writer, sheet_name='pTau181', index=False)
    biomarker_relationship_results_log10[biomarker_relationship_results_log10['x_var'].isin(['pTau217_log10'])].to_excel(writer, sheet_name='pTau217', index=False)
    biomarker_relationship_results_log10[biomarker_relationship_results_log10['x_var'].isin(['pTau231_log10'])].to_excel(writer, sheet_name='pTau231', index=False)
    biomarker_relationship_results_log10[biomarker_relationship_results_log10['x_var'].isin(['GFAP_log10'])].to_excel(writer, sheet_name='GFAP', index=False)
    biomarker_relationship_results_log10[biomarker_relationship_results_log10['x_var'].isin(['pTau217_Ab42_log10'])].to_excel(writer, sheet_name='pTau217_Ab42', index=False)
    biomarker_relationship_results_log10.loc[(biomarker_relationship_results_log10[['p_x_var']].min(axis = 1)<0.05), :].to_excel(writer, sheet_name='Significant', index=False)

In [16]:
with pd.ExcelWriter('./output/02_bootstrap_results_025_' + data_name + '.xlsx') as writer:
    biomarker_relationship_results_log10_025[biomarker_relationship_results_log10_025['x_var'].isin(['Ab42_40_log10'])].to_excel(writer, sheet_name='Ab42_40', index=False)
    biomarker_relationship_results_log10_025[biomarker_relationship_results_log10_025['x_var'].isin(['pTau181_log10'])].to_excel(writer, sheet_name='pTau181', index=False)
    biomarker_relationship_results_log10_025[biomarker_relationship_results_log10_025['x_var'].isin(['pTau217_log10'])].to_excel(writer, sheet_name='pTau217', index=False)
    biomarker_relationship_results_log10_025[biomarker_relationship_results_log10_025['x_var'].isin(['pTau231_log10'])].to_excel(writer, sheet_name='pTau231', index=False)
    biomarker_relationship_results_log10_025[biomarker_relationship_results_log10_025['x_var'].isin(['GFAP_log10'])].to_excel(writer, sheet_name='GFAP', index=False)
    biomarker_relationship_results_log10_025[biomarker_relationship_results_log10_025['x_var'].isin(['NFL_log10'])].to_excel(writer, sheet_name='NFL', index=False)
    biomarker_relationship_results_log10_025[biomarker_relationship_results_log10_025['x_var'].isin(['pTau217_Ab42_log10'])].to_excel(writer, sheet_name='pTau217_Ab42', index=False)
    biomarker_relationship_results_log10_025.loc[(biomarker_relationship_results_log10_025[['p_x_var']].min(axis = 1)<0.05), :].to_excel(writer, sheet_name='Significant', index=False)

In [17]:
with pd.ExcelWriter('./output/02_bootstrap_results_975_' + data_name + '.xlsx') as writer:
    biomarker_relationship_results_log10_975[biomarker_relationship_results_log10_975['x_var'].isin(['Ab42_40_log10'])].to_excel(writer, sheet_name='Ab42_40', index=False)
    biomarker_relationship_results_log10_975[biomarker_relationship_results_log10_975['x_var'].isin(['pTau181_log10'])].to_excel(writer, sheet_name='pTau181', index=False)
    biomarker_relationship_results_log10_975[biomarker_relationship_results_log10_975['x_var'].isin(['pTau217_log10'])].to_excel(writer, sheet_name='pTau217', index=False)
    biomarker_relationship_results_log10_975[biomarker_relationship_results_log10_975['x_var'].isin(['pTau231_log10'])].to_excel(writer, sheet_name='pTau231', index=False)
    biomarker_relationship_results_log10_975[biomarker_relationship_results_log10_975['x_var'].isin(['GFAP_log10'])].to_excel(writer, sheet_name='GFAP', index=False)
    biomarker_relationship_results_log10_975[biomarker_relationship_results_log10_975['x_var'].isin(['NFL_log10'])].to_excel(writer, sheet_name='NFL', index=False)
    biomarker_relationship_results_log10_975[biomarker_relationship_results_log10_975['x_var'].isin(['pTau217_Ab42_log10'])].to_excel(writer, sheet_name='pTau217_Ab42', index=False)
    biomarker_relationship_results_log10_975.loc[(biomarker_relationship_results_log10_975[['p_x_var']].min(axis = 1)<0.05), :].to_excel(writer, sheet_name='Significant', index=False)