# Predcition of Hospital Admission Related to Infections

In this notebook, we develop a Cox proportional hazard regression model to predict the risk of hospital admission for common infections including urinary tract infection (urti), upper respiratory tract infection (URTI), lower respiratory tract infection (LRTI), sinusitis, otitis media or middle ear infection (ot media), and ear infection or otitis externa (ot externa).

In [5]:
import pandas as pd
from matplotlib import pyplot
import matplotlib.pyplot as plt
import seaborn as sns
import math
import numpy as np
import os
import glob
import gzip
from matplotlib.ticker import PercentFormatter
from patsy import dmatrices
from lifelines import CoxPHFitter
import statsmodels.api as sm
import statsmodels.formula.api as smf
# import sklearn
# from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.impute import KNNImputer
import pickle
from sklearn.preprocessing import PolynomialFeatures
from lifelines.utils import k_fold_cross_validation
from lifelines.utils import concordance_index
from lifelines.calibration import survival_probability_calibration
import io
import sys
from contextlib import redirect_stdout
# import pyreadr
# import miceforest as mf
%matplotlib inline

In [6]:
def create_histplot(title, df, ax):
    ax.hist(df, color = 'purple', edgecolor = 'black', bins=20)
    ax.set_title(title)
    ax.set_ylabel('Frequency')
    ax.xaxis.set_tick_params(which='both', labelbottom=True, labeltop=False, rotation=0)

In [7]:
def create_lineplot(data, var, title, legend_title, ax1):
    a = data.groupby(['date', var]).size().reset_index()
    a.columns = ['date', var, 'Freq']
    sns.lineplot(x='date', y='Freq', 
            hue=var, 
            data=a,
            ax=ax1)
    ax1.set_title(title)
    ax1.set_ylabel('Frequency')
    ax1.xaxis.set_tick_params(which='both', labelbottom=True, labeltop=False, rotation=90)
    ax1.legend(title=legend_title)
#     ax1.axvline(x="2020-01", color='black', ls='--', lw=1.5)
#     ax1.axvline(x="2020-04", color='black', ls='--', lw=1.5)
#     ax1.axvline(x="2021-04", color='black', ls='--', lw=1.5)

In [8]:
# function to transform cph model summary and save it as html
def GetPrintSummary(model):
    output = ""
    with io.StringIO() as buf, redirect_stdout(buf):
        model.print_summary(style="html")
        output = buf.getvalue()
    return output

In [9]:
# data2019_month = {}
# data2019 = []

# i=1
# while  i < 13:
#     # data of 2019
#     # if (i<4):
#     data2019_month["%s" %i] = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-'+str(i).zfill(2)+'-01.csv.gz')
#     data2019_month["%s" %i]['date'] = "2019-"+str(i).zfill(2)
#     # if (i>3) and (i<7):
#     #     data2019_month["%s" %i] = pd.read_csv(f'output/hospitalisation_data/input_hospitalisatio2019-'+str(i).zfill(2)+'-01.csv.gz')
#     #     data2019_month["%s" %i]['date'] = "2019-"+str(i).zfill(2)
#     if (i<3) or (i>11):
#         data2019_month["%s" %i]['season'] = "Winter"
#     elif (i>2) and (i<6):
#         data2019_month["%s" %i]['season'] = "Spring"
#     elif (i>5) and (i<9):
#         data2019_month["%s" %i]['season'] = "Summer"
#     elif (i>8) and (i<12):
#         data2019_month["%s" %i]['season'] = "Autumn"

#     data2019.append(data2019_month["%s" %i])
#     i=i+1
    
# data2019 = pd.concat(data2019_month, axis=0, ignore_index=True)

# data = data2019

In [10]:
# data201901 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-01-01.csv.gz')
# data201902 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-02-01.csv.gz')
# data201903 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-03-01.csv.gz')
# data201904 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-04-01.csv.gz')
# data201905 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-05-01.csv.gz')
# data201906 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-06-01.csv.gz')
# data201907 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-07-01.csv.gz')
# data201908 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-08-01.csv.gz')
# data201909 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-09-01.csv.gz')
# data201910 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-10-01.csv.gz')
# data201911 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-11-01.csv.gz')
# data201912 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-12-01.csv.gz')

# data = pd.concat([data201901, data201902, data201903, data201904, data201905, data201906, data201907, data201908, data201909, data201910, data201911, data201912], axis=0)
# # data.shape

In [11]:
# data201901 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-01-01.csv.gz')
# data201901['season'] = "Winter"
# data201902 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-02-01.csv.gz')
# data201902['season'] = "Winter"
# data201903 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-03-01.csv.gz')
# data201903['season'] = "Spring"
# data201904 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-04-01.csv.gz')
# data201904['season'] = "Spring"
# data201905 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-05-01.csv.gz')
# data201905['season'] = "Spring"
# data201906 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-06-01.csv.gz')
# data201906['season'] = "Summer"
# data201907 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-07-01.csv.gz')
# data201907['season'] = "Summer"
# data201908 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-08-01.csv.gz')
# data201908['season'] = "Summer"
# data201909 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-09-01.csv.gz')
# data201909['season'] = "Autumn"
# data201910 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-10-01.csv.gz')
# data201910['season'] = "Autumn"
# data201911 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-11-01.csv.gz')
# data201911['season'] = "Autumn"
# data201912 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-12-01.csv.gz')
# data201912['season'] = "Winter"

In [12]:
# data = pd.concat([data201901, data201902, data201903, data201904, data201905, data201906, data201907, data201908, data201909, data201910, data201911, data201912], axis=0)
# data#.shape

In [13]:
# data = pyreadr.read_r(f'../output/hospitalisation_data/dat2019-12-01.rds')

In [19]:
# data = pd.read_csv(f'../output/hospitalisation_data/data2019.csv.gz')

In [None]:
data201901 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-01-01.csv.gz')
data201901['season'] = "Winter"
data201902 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-02-01.csv.gz')
data201902['season'] = "Winter"
data201903 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-03-01.csv.gz')
data201903['season'] = "Spring"
data201904 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-04-01.csv.gz')
data201904['season'] = "Spring"
data201905 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-05-01.csv.gz')
data201905['season'] = "Spring"
data201906 = pd.read_csv(f'../output/hospitalisation_data/input_hospitalisation_2019-06-01.csv.gz')
data201906['season'] = "Summer"