# Aggregate Summary of Statistics and Make a Baseline Table

This notebook aggregates summary stats files of all infection-related hospital admission data and processes them to make a baseline table.

In [1]:
import pandas as pd
from matplotlib import pyplot
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import warnings
warnings.filterwarnings('ignore')
warnings.warn('DelftStack')
warnings.warn('Do not show this message')

## 0- functions and read data

In [2]:
def data_process(data):
    #conver counts into int
    data['count_int'] = data['count'].astype(str).replace('\.0', '', regex=True).astype(int)
    #convert means into percentages with two decimals
    data['mean_perc'] = data['mean']*100
    data['mean_perc'] = np.round(data['mean_perc'], decimals=2)
    #custom order of variables to fit tables of the manuscript
    data['variable'] = pd.Categorical(data['variable'], ["age_cat_15_24", "age_cat_25_34", "age_cat_35_44", "age_cat_45_54", "age_cat_55_64", "age_cat_65_74", "age_cat_75_more", "sex_male", "sex_female", "bmi_cat_underweight", "bmi_cat_healthy_weight", "bmi_cat_overweight", "bmi_cat_obese", "bmi_cat_unknown", "ethnicity_white", "ethnicity_non-white", "ethnicity_unknown", "CCI_cat_very_low", "CCI_cat_low", "CCI_cat_medium", "CCI_cat_high", "CCI_cat_very_high", "smoking_status_smoker", "smoking_status_ex_smoker", "smoking_status_never_smoked", "smoking_status_unknown", "imd_very_affluent", "imd_affluent", "imd_medium", "imd_unaffluent", "imd_very_unaffluent", "imd_unknown", "season_spring", "season_summer", "season_autumn", "season_winter", "region_london", "region_north_east", "region_north_west", "region_east", "region_west_midlands", "region_yorkshire", "region_south_east", "region_east_midlands", "region_south_west", "flu_vaccine_yes", "flu_vaccine_no", "antibacterial_brit"])
    data = data.sort_values('variable')
    #keep mean values of gp_count and antibacterial_brit
    data['mean'] = np.round(data['mean'], decimals=2)
    #reset index to prevent issue in calculating total value
    data = data.reset_index()
    #calculate total from sex_male and sex_female
    total = data.iloc[int(data[data['variable']=='sex_female'].index[0]),int(data.columns.get_loc('count'))] + data.iloc[int(data[data['variable']=='sex_male'].index[0]),int(data.columns.get_loc('count'))]
    new_row = {'variable':'Total', 'count':'', 'mean':'', 'std':'', 'count_int':total, 'mean_perc':'', 'count_mean':''}
    #append row to the dataframe
    data = data.append(new_row, ignore_index=True)
    data['count_int']=data['count_int'].astype(int)
    data['count_int'] = data['count_int'].apply(lambda x : "{:,}".format(x))
    #combine counts and means
    data['count_mean'] = data['count_int'].astype(str) + ' (' + data['mean_perc'].astype(str) + ')'#' (' + data['mean_perc'].astype(str) + ')'
    #replace
    data['std_round'] = pd.to_numeric(data['std'],errors='coerce')# 
    data['std_round'] = np.round(data['std_round'], decimals=2)
    data.iloc[int(data[data['variable']=='antibacterial_brit'].index[0]),int(data.columns.get_loc('count_mean'))] = str(data.iloc[int(data[data['variable']=='antibacterial_brit'].index[0]),int(data.columns.get_loc('mean'))]) + ' (' + str(data.iloc[int(data[data['variable']=='antibacterial_brit'].index[0]),int(data.columns.get_loc('std_round'))]) + ')'
    data.iloc[int(data[data['variable']=='Total'].index[0]),int(data.columns.get_loc('count_mean'))] = data.iloc[int(data[data['variable']=='Total'].index[0]),int(data.columns.get_loc('count_int'))]
    # #keep variable names and counts comibed with means
    # data = data[['variable', 'count_mean']]
    # #custom order of variables to fit tables of the manuscript again
    # data['variable'] = pd.Categorical(data['variable'], ["Total", "age_cat_15_24", "age_cat_25_34", "age_cat_35_44", "age_cat_45_54", "age_cat_55_64", "age_cat_65_74", "age_cat_75_more", "sex_male", "sex_female", "bmi_cat_underweight", "bmi_cat_healthy_weight", "bmi_cat_overweight", "bmi_cat_obese", "bmi_cat_unknown", "ethnicity_white", "ethnicity_non-white", "ethnicity_unknown", "CCI_cat_very_low", "CCI_cat_low", "CCI_cat_medium", "CCI_cat_high", "CCI_cat_very_high", "smoking_status_smoker", "smoking_status_ex_smoker", "smoking_status_never_smoked", "smoking_status_unknown", "imd_very_affluent", "imd_affluent", "imd_medium", "imd_unaffluent", "imd_very_unaffluent", "imd_unknown", "season_spring", "season_summer", "season_autumn", "season_winter", "region_london", "region_north_east", "region_north_west", "region_east", "region_west_midlands", "region_yorkshire", "region_south_east", "region_east_midlands", "region_south_west", "flu_vaccine_yes", "flu_vaccine_no", "antibacterial_brit"])
    # data = data.sort_values('variable')

    return data

In [3]:
summary_table_lrti_no_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_lrti/summary_table_lrti_no_abs_incdt.csv')
summary_table_lrti_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_lrti/summary_table_lrti_abs_incdt.csv')
summary_table_lrti_no_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_lrti/summary_table_lrti_no_abs_prevl.csv')
summary_table_lrti_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_lrti/summary_table_lrti_abs_prevl.csv')
summary_table_urti_no_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_urti/summary_table_urti_no_abs_incdt.csv')
summary_table_urti_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_urti/summary_table_urti_abs_incdt.csv')
summary_table_urti_no_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_urti/summary_table_urti_no_abs_prevl.csv')
summary_table_urti_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_urti/summary_table_urti_abs_prevl.csv')
summary_table_uti_no_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_uti/summary_table_uti_no_abs_incdt.csv')
summary_table_uti_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_uti/summary_table_uti_abs_incdt.csv')
summary_table_uti_no_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_uti/summary_table_uti_no_abs_prevl.csv')
summary_table_uti_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_uti/summary_table_uti_abs_prevl.csv')
summary_table_sinusitis_no_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_sinusitis/summary_table_sinusitis_no_abs_incdt.csv')
summary_table_sinusitis_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_sinusitis/summary_table_sinusitis_abs_incdt.csv')
summary_table_sinusitis_no_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_sinusitis/summary_table_sinusitis_no_abs_prevl.csv')
summary_table_sinusitis_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_sinusitis/summary_table_sinusitis_abs_prevl.csv')
summary_table_otmedia_no_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_otmedia/summary_table_otmedia_no_abs_incdt.csv')
summary_table_otmedia_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_otmedia/summary_table_otmedia_abs_incdt.csv')
summary_table_otmedia_no_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_otmedia/summary_table_otmedia_no_abs_prevl.csv')
summary_table_otmedia_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_otmedia/summary_table_otmedia_abs_prevl.csv')
summary_table_ot_externa_no_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_ot_externa/summary_table_ot_externa_no_abs_incdt.csv')
summary_table_ot_externa_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_ot_externa/summary_table_ot_externa_abs_incdt.csv')
summary_table_ot_externa_no_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_ot_externa/summary_table_ot_externa_no_abs_prevl.csv')
summary_table_ot_externa_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_ot_externa/summary_table_ot_externa_abs_prevl.csv')
summary_table_pneumonia_no_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_pneumonia/summary_table_pneumonia_no_abs_incdt.csv')
summary_table_pneumonia_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_pneumonia/summary_table_pneumonia_abs_incdt.csv')
summary_table_pneumonia_no_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_pneumonia/summary_table_pneumonia_no_abs_prevl.csv')
summary_table_pneumonia_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_pneumonia/summary_table_pneumonia_abs_prevl.csv')
summary_table_cough_no_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_cough/summary_table_cough_no_abs_incdt.csv')
summary_table_cough_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_cough/summary_table_cough_abs_incdt.csv')
summary_table_cough_no_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_cough/summary_table_cough_no_abs_prevl.csv')
summary_table_cough_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_cough/summary_table_cough_abs_prevl.csv')
summary_table_cough_cold_no_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_cough_cold/summary_table_cough_cold_no_abs_incdt.csv')
summary_table_cough_cold_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_cough_cold/summary_table_cough_cold_abs_incdt.csv')
summary_table_cough_cold_no_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_cough_cold/summary_table_cough_cold_no_abs_prevl.csv')
summary_table_cough_cold_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_cough_cold/summary_table_cough_cold_abs_prevl.csv')
summary_table_throat_no_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_throat/summary_table_throat_no_abs_incdt.csv')
summary_table_throat_abs_incdt = pd.read_csv(f'../output/hospitalisation_prediction_throat/summary_table_throat_abs_incdt.csv')
summary_table_throat_no_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_throat/summary_table_throat_no_abs_prevl.csv')
summary_table_throat_abs_prevl = pd.read_csv(f'../output/hospitalisation_prediction_throat/summary_table_throat_abs_prevl.csv')

FileNotFoundError: [Errno 2] No such file or directory: '../output/hospitalisation_prediction_urti/summary_table_urti_no_abs_incdt.csv'

## 1- process data to make basline table, and save it

In [4]:
summary_table_lrti_no_abs_incdt = data_process(summary_table_lrti_no_abs_incdt)
summary_table_lrti_abs_incdt = data_process(summary_table_lrti_abs_incdt)
summary_table_lrti_no_abs_prevl = data_process(summary_table_lrti_no_abs_prevl)
summary_table_lrti_abs_prevl = data_process(summary_table_lrti_abs_prevl)
summary_table_urti_no_abs_incdt = data_process(summary_table_urti_no_abs_incdt)
summary_table_urti_abs_incdt = data_process(summary_table_urti_abs_incdt)
summary_table_urti_no_abs_prevl = data_process(summary_table_urti_no_abs_prevl)
summary_table_urti_abs_prevl = data_process(summary_table_urti_abs_prevl)
summary_table_uti_no_abs_incdt = data_process(summary_table_uti_no_abs_incdt)
summary_table_uti_abs_incdt = data_process(summary_table_uti_abs_incdt)
summary_table_uti_no_abs_prevl = data_process(summary_table_uti_no_abs_prevl)
summary_table_uti_abs_prevl = data_process(summary_table_uti_abs_prevl)
summary_table_sinusitis_no_abs_incdt = data_process(summary_table_sinusitis_no_abs_incdt)
summary_table_sinusitis_abs_incdt = data_process(summary_table_sinusitis_abs_incdt)
summary_table_sinusitis_no_abs_prevl = data_process(summary_table_sinusitis_no_abs_prevl)
summary_table_sinusitis_abs_prevl = data_process(summary_table_sinusitis_abs_prevl)
summary_table_otmedia_no_abs_incdt = data_process(summary_table_otmedia_no_abs_incdt)
summary_table_otmedia_abs_incdt = data_process(summary_table_otmedia_abs_incdt)
summary_table_otmedia_no_abs_prevl = data_process(summary_table_otmedia_no_abs_prevl)
summary_table_otmedia_abs_prevl = data_process(summary_table_otmedia_abs_prevl)
summary_table_ot_externa_no_abs_incdt = data_process(summary_table_ot_externa_no_abs_incdt)
summary_table_ot_externa_abs_incdt = data_process(summary_table_ot_externa_abs_incdt)
summary_table_ot_externa_no_abs_prevl = data_process(summary_table_ot_externa_no_abs_prevl)
summary_table_ot_externa_abs_prevl = data_process(summary_table_ot_externa_abs_prevl)
summary_table_pneumonia_no_abs_incdt = data_process(summary_table_pneumonia_no_abs_incdt)
summary_table_pneumonia_abs_incdt = data_process(summary_table_pneumonia_abs_incdt)
summary_table_pneumonia_no_abs_prevl = data_process(summary_table_pneumonia_no_abs_prevl)
summary_table_pneumonia_abs_prevl = data_process(summary_table_pneumonia_abs_prevl)
summary_table_cough_no_abs_incdt = data_process(summary_table_cough_no_abs_incdt)
summary_table_cough_abs_incdt = data_process(summary_table_cough_abs_incdt)
summary_table_cough_no_abs_prevl = data_process(summary_table_cough_no_abs_prevl)
summary_table_cough_abs_prevl = data_process(summary_table_cough_abs_prevl)
summary_table_cough_cold_no_abs_incdt = data_process(summary_table_cough_cold_no_abs_incdt)
summary_table_cough_cold_abs_incdt = data_process(summary_table_cough_cold_abs_incdt)
summary_table_cough_cold_no_abs_prevl = data_process(summary_table_cough_cold_no_abs_prevl)
summary_table_cough_cold_abs_prevl = data_process(summary_table_cough_cold_abs_prevl)
summary_table_throat_no_abs_incdt = data_process(summary_table_throat_no_abs_incdt)
summary_table_throat_abs_incdt = data_process(summary_table_throat_abs_incdt)
summary_table_throat_no_abs_prevl = data_process(summary_table_throat_no_abs_prevl)
summary_table_throat_abs_prevl = data_process(summary_table_throat_abs_prevl)

In [6]:
d = {'variable': summary_table_lrti_no_abs_incdt['variable'],
     
     'lrti_no_abs_incdt': summary_table_lrti_no_abs_incdt['count_mean'], 
     'lrti_abs_incdt': summary_table_lrti_abs_incdt['count_mean'],
     'lrti_no_abs_prevl': summary_table_lrti_no_abs_prevl['count_mean'], 
     'lrti_abs_prevl': summary_table_lrti_abs_prevl['count_mean'],
     
     'urti_no_abs_incdt': summary_table_urti_no_abs_incdt['count_mean'], 
     'urti_abs_incdt': summary_table_urti_abs_incdt['count_mean'],
     'urti_no_abs_prevl': summary_table_urti_no_abs_prevl['count_mean'], 
     'urti_abs_prevl': summary_table_urti_abs_prevl['count_mean'],
     
     'uti_no_abs_incdt': summary_table_uti_no_abs_incdt['count_mean'], 
     'uti_abs_incdt': summary_table_uti_abs_incdt['count_mean'],
     'uti_no_abs_prevl': summary_table_uti_no_abs_prevl['count_mean'], 
     'uti_abs_prevl': summary_table_uti_abs_prevl['count_mean'],
     
     'sinusitis_no_abs_incdt': summary_table_sinusitis_no_abs_incdt['count_mean'], 
     'sinusitis_abs_incdt': summary_table_sinusitis_abs_incdt['count_mean'],
     'sinusitis_no_abs_prevl': summary_table_sinusitis_no_abs_prevl['count_mean'], 
     'sinusitis_abs_prevl': summary_table_sinusitis_abs_prevl['count_mean'],
     
     'otmedia_no_abs_incdt': summary_table_otmedia_no_abs_incdt['count_mean'], 
     'otmedia_abs_incdt': summary_table_otmedia_abs_incdt['count_mean'],
     'otmedia_no_abs_prevl': summary_table_otmedia_no_abs_prevl['count_mean'], 
     'otmedia_abs_prevl': summary_table_otmedia_abs_prevl['count_mean'],
     
     'ot_externa_no_abs_incdt': summary_table_ot_externa_no_abs_incdt['count_mean'], 
     'ot_externa_abs_incdt': summary_table_ot_externa_abs_incdt['count_mean'],
     'ot_externa_no_abs_prevl': summary_table_ot_externa_no_abs_prevl['count_mean'], 
     'ot_externa_abs_prevl': summary_table_ot_externa_abs_prevl['count_mean'],

     'pneumonia_no_abs_incdt': summary_table_pneumonia_no_abs_incdt['count_mean'], 
     'pneumonia_abs_incdt': summary_table_pneumonia_abs_incdt['count_mean'],
     'pneumonia_no_abs_prevl': summary_table_pneumonia_no_abs_prevl['count_mean'], 
     'pneumonia_abs_prevl': summary_table_pneumonia_abs_prevl['count_mean'],

     'cough_no_abs_incdt': summary_table_cough_no_abs_incdt['count_mean'], 
     'cough_abs_incdt': summary_table_cough_abs_incdt['count_mean'],
     'cough_no_abs_prevl': summary_table_cough_no_abs_prevl['count_mean'], 
     'cough_abs_prevl': summary_table_cough_abs_prevl['count_mean'],

     'cough_cold_no_abs_incdt': summary_table_cough_cold_no_abs_incdt['count_mean'], 
     'cough_cold_abs_incdt': summary_table_cough_cold_abs_incdt['count_mean'],
     'cough_cold_no_abs_prevl': summary_table_cough_cold_no_abs_prevl['count_mean'], 
     'cough_cold_abs_prevl': summary_table_cough_cold_abs_prevl['count_mean'],

     'throat_no_abs_incdt': summary_table_throat_no_abs_incdt['count_mean'], 
     'throat_abs_incdt': summary_table_throat_abs_incdt['count_mean'],
     'throat_no_abs_prevl': summary_table_throat_no_abs_prevl['count_mean'], 
     'throat_abs_prevl': summary_table_throat_abs_prevl['count_mean'],
    }
data = pd.DataFrame(data=d)#, index=summary_table_lrti_no_abs_incdt['variable'])
# data#.head()

data.to_csv(f'../output/aggregate_tables/baseline_table_all_infections.csv', index=False)