# Aggregating Odds Ratios of Logistic Regression Models

In [1]:
import pandas as pd
import numpy as np

In [2]:
def data_process_ors(infection, infection_type, analysis_type):
    #read csv data
    data_ors = pd.read_csv(f'../output/hospitalisation_prediction_'+infection+'/coefs_lr_deciles_'+infection+'_'+infection_type+'_'+analysis_type+'.csv')
    #rename column headers
    data_ors.columns = ['bins', 'OR', 'Lower CI', 'Upper CI']
    #keep 3 decimals
    data_ors['OR'] = np.round(data_ors['OR'], decimals=3)
    data_ors['Lower CI'] = np.round(data_ors['Lower CI'], decimals=3)
    data_ors['Upper CI'] = np.round(data_ors['Upper CI'], decimals=3)
    #keep 3 digits
    data_ors['OR'] = data_ors['OR'].apply(lambda x: "{:.3f}".format(x))
    data_ors['Lower CI'] = data_ors['Lower CI'].apply(lambda x: "{:.3f}".format(x))
    data_ors['Upper CI'] = data_ors['Upper CI'].apply(lambda x: "{:.3f}".format(x))
    #combine ors and bounds
    data_ors['or_bounds'] = data_ors['OR'].astype(str) + " (" + data_ors['Lower CI'].astype(str) + '-' + data_ors['Upper CI'].astype(str) + ')'
    
    return data_ors

In [4]:
lr_lrti_incdt_ors = data_process_ors('lrti', 'incdt', 'deciles')
lr_lrti_prevl_ors = data_process_ors('lrti', 'prevl', 'deciles')

lr_urti_combined_incdt_ors = data_process_ors('urti_combined', 'incdt', 'deciles')
lr_urti_combined_prevl_ors = data_process_ors('urti_combined', 'prevl', 'deciles')

lr_uti_incdt_ors = data_process_ors('uti', 'incdt', 'deciles')
lr_uti_prevl_ors = data_process_ors('uti', 'prevl', 'deciles')

lr_sinusitis_incdt_ors = data_process_ors('sinusitis', 'incdt', 'deciles')
# lr_sinusitis_prevl_ors = data_process_ors('sinusitis', 'prevl', 'deciles')

lr_ot_externa_incdt_ors = data_process_ors('ot_externa', 'incdt', 'deciles')
lr_ot_externa_prevl_ors = data_process_ors('ot_externa', 'prevl', 'deciles')

lr_otmedia_incdt_ors = data_process_ors('otmedia', 'incdt', 'deciles')
lr_otmedia_prevl_ors = data_process_ors('otmedia', 'prevl', 'deciles')

lr_urti_incdt_ors = data_process_ors('urti', 'incdt', 'deciles')
lr_urti_prevl_ors = data_process_ors('urti', 'prevl', 'deciles')

lr_cough_incdt_ors = data_process_ors('cough', 'incdt', 'deciles')
lr_cough_prevl_ors = data_process_ors('cough', 'prevl', 'deciles')

lr_cough_cold_incdt_ors = data_process_ors('cough_cold', 'incdt', 'deciles')
lr_cough_cold_prevl_ors = data_process_ors('cough_cold', 'prevl', 'deciles')

lr_throat_incdt_ors = data_process_ors('throat', 'incdt', 'deciles')
lr_throat_prevl_ors = data_process_ors('throat', 'prevl', 'deciles')

In [5]:
d = {'bins': lr_lrti_incdt_ors['bins'],
     
     'lrti_incdt': lr_lrti_incdt_ors['or_bounds'], 
     'lrti_prevl': lr_lrti_prevl_ors['or_bounds'],

     'urti_combined_incdt': lr_urti_combined_incdt_ors['or_bounds'], 
     'urti_combined_prevl': lr_urti_combined_prevl_ors['or_bounds'],

     'uti_incdt': lr_uti_incdt_ors['or_bounds'], 
     'uti_prevl': lr_uti_prevl_ors['or_bounds'],

     'sinusitis_incdt': lr_sinusitis_incdt_ors['or_bounds'], 
    #  'sinusitis_prevl': lr_sinusitis_prevl_ors['or_bounds'],

     'ot_externa_incdt': lr_ot_externa_incdt_ors['or_bounds'], 
     'ot_externa_prevl': lr_ot_externa_prevl_ors['or_bounds'],

     'otmedia_incdt': lr_otmedia_incdt_ors['or_bounds'], 
     'otmedia_prevl': lr_otmedia_prevl_ors['or_bounds'],

     'urti_incdt': lr_urti_incdt_ors['or_bounds'], 
     'urti_prevl': lr_urti_prevl_ors['or_bounds'],

     'cough_incdt': lr_cough_incdt_ors['or_bounds'], 
     'cough_prevl': lr_cough_prevl_ors['or_bounds'],

     'cough_cold_incdt': lr_cough_cold_incdt_ors['or_bounds'], 
     'cough_cold_prevl': lr_cough_cold_prevl_ors['or_bounds'],

     'throat_incdt': lr_throat_incdt_ors['or_bounds'], 
     'throat_prevl': lr_throat_prevl_ors['or_bounds'],

    }
data = pd.DataFrame(data=d)

#save ors of infections
data.to_csv(f'../output/aggregate_lr_ors/lr_ors_all_infections.csv', index=False)
data

Unnamed: 0,bins,lrti_incdt,lrti_prevl,urti_combined_incdt,urti_combined_prevl,uti_incdt,uti_prevl
0,"bins_(0.0012499999999999998, 0.00463]",0.111 (0.105-0.118),0.117 (0.098-0.139),0.110 (0.106-0.115),0.111 (0.098-0.126),0.119 (0.112-0.126),0.109 (0.091-0.131)
1,"bins_(0.00463, 0.00514]",0.110 (0.103-0.116),0.105 (0.087-0.126),0.111 (0.106-0.116),0.106 (0.094-0.121),0.109 (0.103-0.116),0.109 (0.091-0.131)
2,"bins_(0.00514, 0.00555]",0.109 (0.103-0.116),0.114 (0.096-0.136),0.109 (0.105-0.114),0.116 (0.102-0.131),0.111 (0.105-0.118),0.122 (0.103-0.145)
3,"bins_(0.00555, 0.00592]",0.115 (0.109-0.122),0.117 (0.098-0.139),0.111 (0.106-0.116),0.123 (0.109-0.139),0.107 (0.101-0.114),0.099 (0.082-0.119)
4,"bins_(0.00592, 0.00629]",0.113 (0.106-0.120),0.111 (0.093-0.133),0.110 (0.105-0.114),0.110 (0.097-0.124),0.114 (0.108-0.121),0.106 (0.088-0.127)
5,"bins_(0.00629, 0.00668]",0.106 (0.100-0.113),0.118 (0.099-0.141),0.115 (0.111-0.120),0.115 (0.102-0.130),0.114 (0.107-0.121),0.130 (0.110-0.154)
6,"bins_(0.00668, 0.00712]",0.112 (0.106-0.119),0.108 (0.090-0.129),0.109 (0.104-0.114),0.103 (0.090-0.118),0.114 (0.107-0.121),0.137 (0.116-0.162)
7,"bins_(0.00712, 0.00767]",0.114 (0.107-0.121),0.114 (0.096-0.136),0.113 (0.108-0.118),0.106 (0.093-0.121),0.110 (0.104-0.117),0.109 (0.091-0.130)
8,"bins_(0.00767, 0.00852]",0.113 (0.107-0.120),0.108 (0.090-0.129),0.108 (0.104-0.113),0.125 (0.111-0.141),0.110 (0.104-0.117),0.109 (0.091-0.131)
9,"bins_(0.00852, 0.0191]",0.109 (0.103-0.116),0.115 (0.096-0.137),0.112 (0.108-0.117),0.113 (0.100-0.128),0.110 (0.103-0.117),0.097 (0.081-0.118)


In [11]:
def data_process_ors_rfs(infection, infection_type, analysis_type):
    #read csv data
    data_ors = pd.read_csv(f'../output/hospitalisation_prediction_'+infection+'/coefs_lr_'+infection+'_'+infection_type+'_'+analysis_type+'.csv')
    #rename column headers
    data_ors.columns = ['bins', 'OR', 'Lower CI', 'Upper CI']
    #keep 3 decimals
    data_ors['OR'] = np.round(data_ors['OR'], decimals=3)
    data_ors['Lower CI'] = np.round(data_ors['Lower CI'], decimals=3)
    data_ors['Upper CI'] = np.round(data_ors['Upper CI'], decimals=3)
    #keep 3 digits
    data_ors['OR'] = data_ors['OR'].apply(lambda x: "{:.3f}".format(x))
    data_ors['Lower CI'] = data_ors['Lower CI'].apply(lambda x: "{:.3f}".format(x))
    data_ors['Upper CI'] = data_ors['Upper CI'].apply(lambda x: "{:.3f}".format(x))
    #combine ors and bounds
    data_ors['or_bounds'] = data_ors['OR'].astype(str) + " (" + data_ors['Lower CI'].astype(str) + '-' + data_ors['Upper CI'].astype(str) + ')'
    
    return data_ors

In [16]:
lr_lrti_incdt_ors_riskfactors = data_process_ors_rfs('lrti', 'incdt', 'riskfactors')
lr_lrti_prevl_ors_riskfactors = data_process_ors_rfs('lrti', 'prevl', 'riskfactors')

lr_urti_combined_incdt_ors_riskfactors = data_process_ors_rfs('urti_combined', 'incdt', 'riskfactors')
lr_urti_combined_prevl_ors_riskfactors = data_process_ors_rfs('urti_combined', 'prevl', 'riskfactors')

lr_uti_incdt_ors_riskfactors = data_process_ors_rfs('uti', 'incdt', 'riskfactors')
lr_uti_prevl_ors_riskfactors = data_process_ors_rfs('uti', 'prevl', 'riskfactors')

# lr_sinusitis_incdt_ors_riskfactors = data_process_ors_rfs('sinusitis', 'incdt', 'deciles')
# # lr_sinusitis_prevl_ors_riskfactors = data_process_ors_rfs('sinusitis', 'prevl', 'deciles')

# lr_ot_externa_incdt_ors_riskfactors = data_process_ors_rfs('ot_externa', 'incdt', 'deciles')
# lr_ot_externa_prevl_ors_riskfactors = data_process_ors_rfs('ot_externa', 'prevl', 'deciles')

# lr_otmedia_incdt_ors_riskfactors = data_process_ors_rfs('otmedia', 'incdt', 'deciles')
# lr_otmedia_prevl_ors_riskfactors = data_process_ors_rfs('otmedia', 'prevl', 'deciles')

# lr_urti_incdt_ors_riskfactors = data_process_ors_rfs('urti', 'incdt', 'deciles')
# lr_urti_prevl_ors_riskfactors = data_process_ors_rfs('urti', 'prevl', 'deciles')

# lr_cough_incdt_ors_riskfactors = data_process_ors_rfs('cough', 'incdt', 'deciles')
# lr_cough_prevl_ors_riskfactors = data_process_ors_rfs('cough', 'prevl', 'deciles')

# lr_cough_cold_incdt_ors_riskfactors = data_process_ors_rfs('cough_cold', 'incdt', 'deciles')
# lr_cough_cold_prevl_ors_riskfactors = data_process_ors_rfs('cough_cold', 'prevl', 'deciles')

# lr_throat_incdt_ors_riskfactors = data_process_ors_rfs('throat', 'incdt', 'deciles')
# lr_throat_prevl_ors_riskfactors = data_process_ors_rfs('throat', 'prevl', 'deciles')

In [17]:
lr_lrti_incdt_ors_riskfactors

Unnamed: 0,bins,OR,Lower CI,Upper CI,or_bounds
0,age_cat_15_24,0.929,,,0.929 (nan-nan)
1,age_cat_25_34,0.841,,,0.841 (nan-nan)
2,age_cat_35_44,0.895,,,0.895 (nan-nan)
3,age_cat_45_54,0.904,,,0.904 (nan-nan)
4,age_cat_55_64,0.882,,,0.882 (nan-nan)
5,age_cat_65_74,0.897,,,0.897 (nan-nan)
6,age_cat_75_more,0.855,,,0.855 (nan-nan)
7,sex_female,0.65,0.0,inf,0.650 (0.000-inf)
8,sex_male,0.657,0.0,inf,0.657 (0.000-inf)
9,CCI_cat_high,0.832,0.0,inf,0.832 (0.000-inf)


In [18]:
d = {'bins': lr_lrti_incdt_ors_riskfactors['bins'],
     
     'lrti_incdt': lr_lrti_incdt_ors_riskfactors['or_bounds'], 
     'lrti_prevl': lr_lrti_prevl_ors_riskfactors['or_bounds'],

     'urti_combined_incdt': lr_urti_combined_incdt_ors_riskfactors['or_bounds'], 
     'urti_combined_prevl': lr_urti_combined_prevl_ors_riskfactors['or_bounds'],

     'uti_incdt': lr_uti_incdt_ors_riskfactors['or_bounds'], 
     'uti_prevl': lr_uti_prevl_ors_riskfactors['or_bounds'],

    #  'sinusitis_incdt': lr_sinusitis_incdt_ors_riskfactors['or_bounds'], 
    # #  'sinusitis_prevl': lr_sinusitis_prevl_ors_riskfactors['or_bounds'],

    #  'ot_externa_incdt': lr_ot_externa_incdt_ors_riskfactors['or_bounds'], 
    #  'ot_externa_prevl': lr_ot_externa_prevl_ors_riskfactors['or_bounds'],

    #  'otmedia_incdt': lr_otmedia_incdt_ors_riskfactors['or_bounds'], 
    #  'otmedia_prevl': lr_otmedia_prevl_ors_riskfactors['or_bounds'],

    #  'urti_incdt': lr_urti_incdt_ors_riskfactors['or_bounds'], 
    #  'urti_prevl': lr_urti_prevl_ors_riskfactors['or_bounds'],

    #  'cough_incdt': lr_cough_incdt_ors_riskfactors['or_bounds'], 
    #  'cough_prevl': lr_cough_prevl_ors_riskfactors['or_bounds'],

    #  'cough_cold_incdt': lr_cough_cold_incdt_ors_riskfactors['or_bounds'], 
    #  'cough_cold_prevl': lr_cough_cold_prevl_ors_riskfactors['or_bounds'],

    #  'throat_incdt': lr_throat_incdt_ors_riskfactors['or_bounds'], 
    #  'throat_prevl': lr_throat_prevl_ors_riskfactors['or_bounds'],

    }
data = pd.DataFrame(data=d)

#save ors of infections
data.to_csv(f'../output/aggregate_lr_ors/lr_ors_riskfactors_all_infections.csv', index=False)
data

Unnamed: 0,bins,lrti_incdt,lrti_prevl,urti_combined_incdt,urti_combined_prevl,uti_incdt,uti_prevl
0,age_cat_15_24,0.929 (nan-nan),0.805 (nan-nan),0.889 (nan-nan),0.873 (nan-nan),0.943 (0.000-inf),0.734 (nan-nan)
1,age_cat_25_34,0.841 (nan-nan),0.937 (nan-nan),0.908 (nan-nan),0.849 (nan-nan),0.865 (0.000-inf),1.023 (nan-nan)
2,age_cat_35_44,0.895 (nan-nan),0.871 (nan-nan),0.925 (nan-nan),0.920 (nan-nan),0.869 (0.000-inf),0.876 (nan-nan)
3,age_cat_45_54,0.904 (nan-nan),0.884 (nan-nan),0.862 (nan-nan),0.900 (nan-nan),0.855 (0.000-inf),0.910 (nan-nan)
4,age_cat_55_64,0.882 (nan-nan),0.850 (nan-nan),0.874 (nan-nan),0.880 (nan-nan),0.864 (0.000-inf),0.879 (nan-nan)
5,age_cat_65_74,0.897 (nan-nan),0.958 (nan-nan),0.878 (nan-nan),0.895 (nan-nan),0.905 (0.000-inf),0.939 (nan-nan)
6,age_cat_75_more,0.855 (nan-nan),0.920 (nan-nan),0.859 (nan-nan),0.889 (nan-nan),0.886 (0.000-inf),0.852 (nan-nan)
7,sex_female,0.650 (0.000-inf),0.687 (0.000-inf),0.655 (nan-nan),0.664 (0.000-inf),0.643 (nan-nan),0.673 (nan-nan)
8,sex_male,0.657 (0.000-inf),0.634 (0.000-inf),0.647 (nan-nan),0.647 (0.000-inf),0.653 (nan-nan),0.625 (nan-nan)
9,CCI_cat_high,0.832 (0.000-inf),0.832 (0.000-inf),0.828 (nan-nan),0.818 (nan-nan),0.818 (0.000-inf),0.746 (0.000-inf)
