# Aggregating counts and rates of hospital admissions and make a table

In [31]:
import pandas as pd

## 0- functions and read data

In [32]:
#round counts in a way that the last digit become 0 or 5
def round_five_mul(x, base=5):
    return base * round(x/base)

In [33]:
pd.read_csv(f'../output/hospitalisation_prediction_urti/prob_pred_bin_urti_incdt.csv')
# pd.read_csv(f'../output/hospitalisation_prediction_lrti/summary_table_deciles_lrti_incdt.csv')

Unnamed: 0,bins,pred_mean,pred_sum,pred_count,ab_prob_mean,ab_prob_sum,ab_prob_count,event_urti_admitted_mean,event_urti_admitted_sum,event_urti_admitted_count
0,"(0.0017599999999999998, 0.00489]",0.004446,72.404488,16287,0.097071,1581,16287,0.005219,85.0,16287
1,"(0.00489, 0.00539]",0.005157,83.9896,16287,0.09658,1573,16287,0.005771,94.0,16287
2,"(0.00539, 0.00576]",0.00558,90.87337,16287,0.100632,1639,16287,0.005219,85.0,16287
3,"(0.00576, 0.0061]",0.005931,96.597526,16287,0.102781,1674,16287,0.005894,96.0,16287
4,"(0.0061, 0.00643]",0.006264,102.009964,16286,0.10156,1654,16286,0.005956,97.0,16286
5,"(0.00643, 0.00678]",0.006605,107.589396,16289,0.099454,1620,16289,0.006139,100.0,16289
6,"(0.00678, 0.00717]",0.006966,113.441033,16284,0.100712,1640,16284,0.005834,95.0,16284
7,"(0.00717, 0.00766]",0.007403,120.57887,16287,0.099589,1622,16287,0.007184,117.0,16287
8,"(0.00766, 0.00837]",0.007979,129.960218,16287,0.096764,1576,16287,0.008534,139.0,16287
9,"(0.00837, 0.0146]",0.009188,149.647576,16287,0.101001,1645,16287,0.008043,131.0,16287


In [34]:
def data_process(infection, infection_type):
    #read data
    data = pd.read_csv(f'../output/hospitalisation_prediction_'+infection+'/prob_pred_bin_'+infection+'_'+infection_type+'.csv')#, index=False)
    #keep relvant columns
    data = data[['bins', 'event_'+infection+'_admitted_sum', 'event_'+infection+'_admitted_mean']]
    #round hosp counts in each decile to make their last digit either 0 or 5
    data['count'] = round_five_mul(data['event_'+infection+'_admitted_sum'], base=5)
    #multiply hosp rate in each decile by 1000 people
    data['rate_1000'] = data['event_'+infection+'_admitted_mean']*1000
    #keep 0 digits of hosp counts
    data['count'] = data['count'].apply(lambda x: "{:.0f}".format(x))
    #replace small hosp counts (<=5) with 'SN'
    data.loc[(data['event_'+infection+'_admitted_sum'] <= 5), 'count'] = 'SN'
    #keep 1 digits of hosp rate
    data['rate_1000'] = data['rate_1000'].apply(lambda x: "{:.1f}".format(x))
    #make new column with counts and rates
    data[infection+'_'+infection_type+'_count_rate'] = data['count'].astype(str) + ' (' + data['rate_1000'].astype(str) + ')'

    return data[[infection+'_'+infection_type+'_count_rate']]

## 1- process data to make a table of counts and rates, and save it

In [35]:
summary_table_cases_lrti_incdt = data_process('lrti', 'incdt')
summary_table_cases_lrti_prevl = data_process('lrti', 'prevl')
summary_table_cases_urti_combined_incdt = data_process('urti_combined', 'incdt')
summary_table_cases_urti_combined_prevl = data_process('urti_combined', 'prevl')
summary_table_cases_uti_incdt = data_process('uti', 'incdt')
summary_table_cases_uti_prevl = data_process('uti', 'prevl')
summary_table_cases_sinusitis_incdt = data_process('sinusitis', 'incdt')
summary_table_cases_sinusitis_prevl = data_process('sinusitis', 'prevl')
summary_table_cases_otmedia_incdt = data_process('otmedia', 'incdt')
summary_table_cases_otmedia_prevl = data_process('otmedia', 'prevl')
summary_table_cases_ot_externa_incdt = data_process('ot_externa', 'incdt')
summary_table_cases_ot_externa_prevl = data_process('ot_externa', 'prevl')
summary_table_cases_urti_incdt = data_process('urti', 'incdt')
summary_table_cases_urti_prevl = data_process('urti', 'prevl')
summary_table_cases_cough_incdt = data_process('cough', 'incdt')
summary_table_cases_cough_prevl = data_process('cough', 'prevl')
summary_table_cases_cough_cold_incdt = data_process('cough_cold', 'incdt')
summary_table_cases_cough_cold_prevl = data_process('cough_cold', 'prevl')
summary_table_cases_throat_incdt = data_process('throat', 'incdt')
summary_table_cases_throat_prevl = data_process('throat', 'prevl')

In [37]:
#concat all processed data
data = pd.concat([summary_table_cases_lrti_incdt, summary_table_cases_lrti_prevl, summary_table_cases_urti_combined_incdt, summary_table_cases_urti_combined_prevl, summary_table_cases_uti_incdt, summary_table_cases_uti_prevl, summary_table_cases_sinusitis_incdt, summary_table_cases_sinusitis_prevl, summary_table_cases_otmedia_incdt, summary_table_cases_otmedia_prevl, summary_table_cases_ot_externa_incdt, summary_table_cases_ot_externa_prevl, summary_table_cases_urti_incdt, summary_table_cases_urti_prevl, summary_table_cases_cough_incdt, summary_table_cases_cough_prevl, summary_table_cases_cough_cold_incdt, summary_table_cases_cough_cold_prevl, summary_table_cases_throat_incdt, summary_table_cases_throat_prevl], axis=1)
#save
data.to_csv(f'../output/aggregate_deciles_table/decile_table_all_infections.csv', index=False)
data

Unnamed: 0,lrti_incdt_count_rate,lrti_prevl_count_rate,urti_incdt_count_rate,urti_prevl_count_rate
0,70 (4.4),10 (5.0),85 (5.2),SN (2.8)
1,95 (5.9),10 (4.4),95 (5.8),15 (8.4)
2,90 (5.4),SN (2.2),85 (5.2),5 (3.9)
3,90 (5.6),10 (4.4),95 (5.9),5 (3.9)
4,90 (5.5),15 (7.2),95 (6.0),10 (4.5)
5,115 (7.0),10 (6.1),100 (6.1),5 (3.9)
6,120 (7.4),15 (8.3),95 (5.8),15 (7.8)
7,110 (6.6),20 (9.9),115 (7.2),10 (5.0)
8,130 (8.0),10 (6.1),140 (8.5),20 (10.6)
9,135 (8.4),20 (11.6),130 (8.0),25 (13.4)
