#### Explores relationships between adverse hospital outcomes, rehabilitation activity and demographic history

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import matplotlib.ticker as mtick
from matplotlib.dates import DateFormatter
from datetime import timedelta
from datetime import datetime
from tqdm import tqdm
from tableone import TableOne
from sklearn.preprocessing import KBinsDiscretizer

##### Load cohort and relevant data

In [None]:
inp_data = pd.read_csv('')
print(inp_data.shape, inp_data.ppid.nunique())
#### Get index admission for observation
#inp_data = inp_data.sort_values(['ppid', 'EpisodeNumber', 'AdmissionDate', 'AdmissionTime']).drop_duplicates(['ppid'], keep='first')
print(inp_data.shape)
#inp_data = inp_data[inp_data['gt_m']==0]
print(inp_data.shape)

In [None]:
inp_data.columns.tolist()

In [None]:
## Comorbidities
#smr_data = pd.read_csv('', sep='\t', low_memory=False, encoding='iso-8859-1')
#gp_data = pd.read_csv('', sep='\t', low_memory=False, encoding='iso-8859-1')
#nrs_data = pd.read_csv('', sep='\t', low_memory=False, encoding='iso-8859-1')
## TRAK Questionnaires
trq_4at = pd.read_csv('', sep='\t', low_memory=False)
trq_BBF = pd.read_csv('', sep='\t', low_memory=False, encoding='iso-8859-1')
trq_falls = pd.read_csv('', sep='\t', low_memory=False)
trq_mobility = pd.read_csv('', sep='\t', low_memory=False)
trq_MRSA = pd.read_csv('', sep='\t', low_memory=False)
trq_MUST = pd.read_csv('', sep='\t', low_memory=False)
trq_nutr = pd.read_csv('', sep='\t', low_memory=False, encoding='iso-8859-1')
trq_RUB = pd.read_csv('', sep='\t', low_memory=False)
trq_wt = pd.read_csv('', sep='\t', low_memory=False)

In [None]:
#### Set MM groups
inp_data['total_count_rehab'] = inp_data[['n_PT', 'n_OT', 'n_SLT']].sum(axis=1)
inp_data['mm_group'] = np.where((inp_data['total_longterm_conditions'] >= 4), 'High-count MM',
                        np.where(inp_data['total_longterm_conditions'].isin([2,3]), 'Simple MM','No MM'))

inp_data['pm_mm'] = np.where(inp_data['phys_men_multimorbidity'] == 1, 'Y', 'N')
inp_data['pm_mm'] = np.where((inp_data['total_physlongterm_conditions'] > 1)&(inp_data['total_menlongterm_conditions'] == 0),
                                          'M', inp_data['pm_mm'])
inp_data['pm_mm'] = np.where((inp_data['total_physlongterm_conditions'] == 0)&(inp_data['total_menlongterm_conditions'] > 1),
                                          'M', inp_data['pm_mm'])

##### Get demographic and rehab summary data

In [None]:
inp_data = inp_data[['ppid', 'EpisodeNumber', 'AdmissionDate', 'DischargeDate', 'triage_code', 'HOSP_adt',
                     'AgeAtAdmission', 'Sex', 'simd_gr',
                     'gt_m','gt_es_hosp','gt_cc','gt_dd','gt_eld',
                     'total_count_all', 'total_count_ooh_all',
                     'total_count_rehab', 'total_count_ooh_rehab',
                     'total_mins_rehab', 'total_mins_ooh_rehab', 'n_PT', 'n_OT', 'n_DT',
                     'n_SLT', 'n_NURSE', 'time_to_therapy', 'time_to_therapy_2', 'LOS_ED_hr', 'LOS_hosp',
                    'total_n_disciplines', 'n_lead_spec', 'mm_group', 'phys_men_multimorbidity',
                    'physltc_active_cancer', 'physltc_ischaemic_heart_disease', 'physltc_arthritis_arthropathy',
                     'physltc_prog_neur_disease', 'physltc_hypertension', 'physltc_asthma', 'total_count_all_tf',
                    'avg_n_cts_per_day','avg_r_cts_per_day','avg_cts_per_day']]
inp_data.isnull().sum()

In [None]:
def discretize(y, nb=5):
    discretizer = KBinsDiscretizer(n_bins=nb, encode='ordinal', strategy='quantile')
    return discretizer.fit_transform(y.values.reshape(-1, 1)).flatten()

inp_data['intensity_group'] = discretize(inp_data['total_count_all_tf'])
inp_data['intensity_group'] = inp_data['intensity_group'].astype(int)
inp_data['intensity_group'] = pd.Categorical(inp_data['intensity_group'].map({0: 'Very Low', 1: 'Low', 2: 'Medium',
                                                                              3: 'Medium-high', 4: 'High'}))

In [None]:
inp_data['intensity_group'].value_counts()

##### Link TRAK questionnaires data

In [None]:
trq_4at.columns

In [None]:
trq_4at['DateOfAssessment'] = pd.DatetimeIndex(pd.to_datetime(trq_4at['DateOfAssessment'])).tz_localize(None)
trq_4at = pd.merge(trq_4at, inp_data[['ppid', 'EpisodeNumber', 'HOSP_adt']], how='left', on=['ppid', 'EpisodeNumber'])
trq_4at['HOSP_adt'] = pd.to_datetime(trq_4at['HOSP_adt'])
#trq_4at = trq_4at[(trq_4at['HOSP_adt'] + timedelta(hours=24)) >= trq_4at['DateOfAssessment']].sort_values(['ppid', 'DateOfAssessment']).drop_duplicates('ppid', keep='last')
trq_4at = trq_4at.sort_values(['ppid', 'EpisodeNumber', 'DateOfAssessment']).drop_duplicates(['ppid', 'EpisodeNumber'], keep='first')
inp_data = inp_data.merge(trq_4at[['ppid', 'EpisodeNumber', '4AT score']], how='left', on=['ppid', 'EpisodeNumber'])
inp_data['4AT score'] = inp_data['4AT score'].fillna(-1).astype(int)

In [None]:
trq_4at['DateOfAssessment'].min()

In [None]:
inp_data['4AT score'] = pd.Categorical(np.where(inp_data['4AT score'] >= 4, '>=4', inp_data['4AT score']))
inp_data['4AT score'] = pd.Categorical(np.where(inp_data['4AT score'] == '-1', 'Not assessed', inp_data['4AT score']))

In [None]:
inp_data['4AT score'].value_counts()

In [None]:
inp_data.ppid.nunique(), inp_data.shape

In [None]:
trq_MUST['DateOfAssessment'] = pd.DatetimeIndex(pd.to_datetime(trq_MUST['DateOfAssessment'])).tz_localize(None)
trq_MUST = pd.merge(trq_MUST, inp_data[['ppid', 'EpisodeNumber', 'HOSP_adt']], how='left', on=['ppid', 'EpisodeNumber'])
trq_MUST['HOSP_adt'] = pd.to_datetime(trq_MUST['HOSP_adt'])
#trq_MUST = trq_MUST[(trq_MUST['HOSP_adt'] + timedelta(hours=24)) >= trq_MUST['DateOfAssessment']].sort_values(['ppid', 'DateOfAssessment']).drop_duplicates('ppid', keep='last')
trq_4at = trq_MUST.sort_values(['ppid', 'EpisodeNumber', 'DateOfAssessment']).drop_duplicates(['ppid', 'EpisodeNumber'], keep='first')
inp_data = inp_data.merge(trq_MUST[['ppid', 'EpisodeNumber', 'Total score']], how='left', on=['ppid', 'EpisodeNumber'])
inp_data['Total score'] = inp_data['Total score'].fillna(-1).astype(int)
inp_data = inp_data.rename(columns={'Total score': 'MUST score'})

In [None]:
trq_MUST['DateOfAssessment'].max()

In [None]:
inp_data['MUST score'] = pd.Categorical(np.where(inp_data['MUST score'] >= 2, '>=2', inp_data['MUST score']))
inp_data['MUST score'] = pd.Categorical(np.where(inp_data['MUST score'] == '-1', 'Not assessed', inp_data['MUST score']))

In [None]:
inp_data = inp_data.sort_values(['ppid', 'EpisodeNumber', 'AdmissionDate',
                                 'MUST score']).drop_duplicates(['ppid', 'EpisodeNumber'], keep='first')

In [None]:
inp_data['MUST score'].value_counts()

In [None]:
inp_data.ppid.nunique(), inp_data.shape

In [None]:
trq_falls['DateOfAssessment'] = pd.DatetimeIndex(pd.to_datetime(trq_falls['DateOfAssessment'])).tz_localize(None)
trq_falls = pd.merge(trq_falls, inp_data[['ppid', 'EpisodeNumber', 'HOSP_adt']], how='left', on=['ppid', 'EpisodeNumber'])
trq_falls['HOSP_adt'] = pd.to_datetime(trq_falls['HOSP_adt'])
#trq_falls = trq_falls[(trq_falls['HOSP_adt'] + timedelta(hours=24)) >= trq_falls['DateOfAssessment']].sort_values(['ppid', 'DateOfAssessment']).drop_duplicates('ppid', keep='last')
trq_falls = trq_falls.sort_values(['ppid', 'EpisodeNumber', 'DateOfAssessment']).drop_duplicates(['ppid', 'EpisodeNumber'], keep='first')
inp_data = inp_data.merge(trq_falls[['ppid', 'EpisodeNumber', 'Has_the_patient_fallen_in_the_last_6_months_code']], how='left', on=['ppid', 'EpisodeNumber'])
inp_data['Has_the_patient_fallen_in_the_last_6_months_code'] = inp_data['Has_the_patient_fallen_in_the_last_6_months_code'].fillna('Not assessed').astype(str)
inp_data = inp_data.rename(columns={'Has_the_patient_fallen_in_the_last_6_months_code': 'Fall event (within 6 months of admission)'})

In [None]:
trq_falls['DateOfAssessment'].min()

In [None]:
inp_data['Fall event (within 6 months of admission)'].value_counts()

In [None]:
inp_data.ppid.nunique(), inp_data.shape

In [None]:
trq_mobility.columns

In [None]:
trq_mobility['DateOfAssessment'] = pd.DatetimeIndex(pd.to_datetime(trq_mobility['DateOfAssessment'])).tz_localize(None)
trq_mobility = pd.merge(trq_mobility, inp_data[['ppid', 'EpisodeNumber', 'HOSP_adt']], how='left', on=['ppid', 'EpisodeNumber'])
trq_mobility['HOSP_adt'] = pd.to_datetime(trq_mobility['HOSP_adt'])
#trq_mobility = trq_mobility[(trq_mobility['HOSP_adt'] + timedelta(hours=24)) >= trq_mobility['DateOfAssessment']].sort_values(['ppid', 'DateOfAssessment']).drop_duplicates('ppid', keep='last')
trq_mobility = trq_mobility.sort_values(['ppid', 'EpisodeNumber', 'DateOfAssessment']).drop_duplicates(['ppid', 'EpisodeNumber'], keep='first')
inp_data = inp_data.merge(trq_mobility[['ppid', 'EpisodeNumber', 'Walking']], how='left', on=['ppid', 'EpisodeNumber'])
inp_data['Walking'] = inp_data['Walking'].fillna('Not assessed').astype(str)
inp_data = inp_data.rename(columns={'Walking': 'Mobility (walking independence)'})

In [None]:
inp_data = inp_data.merge(trq_mobility[['ppid', 'EpisodeNumber', 'Bathing/showering']], how='left', on=['ppid', 'EpisodeNumber'])
inp_data['Bathing'] = inp_data['Bathing/showering'].fillna('Not assessed').astype(str)
inp_data = inp_data.rename(columns={'Bathing': 'Mobility (bathing independence)'})

In [None]:
trq_mobility['DateOfAssessment'].max()

In [None]:
inp_data['Mobility (walking independence)'] = np.where(inp_data['Mobility (walking independence)'].str.contains('Independent'), 'Y', 
                                                       inp_data['Mobility (walking independence)'])
inp_data['Mobility (walking independence)'] = np.where(inp_data['Mobility (walking independence)'].str.contains('Not applicable'), 
                                                       'Bed rest (N/A)', 
                                                       inp_data['Mobility (walking independence)'])
inp_data['Mobility (walking independence)'] = np.where((inp_data['Mobility (walking independence)'].str.contains('Assistance'))|
                                                       (inp_data['Mobility (walking independence)'].str.contains('Supervision'))|
                                                       (inp_data['Mobility (walking independence)'].str.contains('Zimmer / Rollator'))|
                                                       (inp_data['Mobility (walking independence)'].str.contains('Stick'))|
                                                       (inp_data['Mobility (walking independence)'].str.contains('Crutches'))| 
                                                       (inp_data['Mobility (walking independence)'].str.contains('Restraint')),
                                                       'N', inp_data['Mobility (walking independence)'])

inp_data['Mobility (bathing independence)'] = np.where(inp_data['Mobility (bathing independence)'].str.contains('Independent'), 'Y', 
                                                       inp_data['Mobility (bathing independence)'])
inp_data['Mobility (bathing independence)'] = np.where(inp_data['Mobility (bathing independence)'].str.contains('Not applicable'), 
                                                       'Bed rest (N/A)', 
                                                       inp_data['Mobility (bathing independence)'])
inp_data['Mobility (bathing independence)'] = np.where((inp_data['Mobility (bathing independence)'].str.contains('Assistance'))|
                                                       (inp_data['Mobility (bathing independence)'].str.contains('Supervision'))|
                                                       (inp_data['Mobility (bathing independence)'].str.contains('Zimmer / Rollator'))|
                                                       (inp_data['Mobility (bathing independence)'].str.contains('Stick'))|
                                                       (inp_data['Mobility (bathing independence)'].str.contains('Crutches'))| 
                                                       (inp_data['Mobility (bathing independence)'].str.contains('Restraint')),
                                                       'N', inp_data['Mobility (bathing independence)'])

In [None]:
inp_data['Mobility (bathing independence)'] = np.where(~inp_data['Mobility (bathing independence)'].isin(['Not assessed','Y','N',
                                                                                                         'Bed rest (N/A)']), 
                                                       'Y', 
                                                       inp_data['Mobility (bathing independence)'])

In [None]:
inp_data['Mobility (walking independence)'].value_counts()

In [None]:
inp_data['Mobility (bathing independence)'].value_counts()

In [None]:
inp_data.ppid.nunique(), inp_data.shape

In [None]:
trq_nutr['DateOfAssessment'] = pd.to_datetime(pd.to_datetime(trq_nutr['DateIdentified']).dt.date)
trq_nutr = pd.merge(trq_nutr, inp_data[['ppid', 'EpisodeNumber', 'AdmissionDate']], how='left', on=['ppid', 'EpisodeNumber'])
trq_nutr['AdmissionDate'] = pd.to_datetime(trq_nutr['AdmissionDate'])
#trq_nutr = trq_nutr[(trq_nutr['AdmissionDate'] + pd.DateOffset(days=1)) >= trq_nutr['DateIdentified']].sort_values(['ppid', 'DateIdentified']).drop_duplicates('ppid', keep='last')
trq_nutr = trq_nutr.sort_values(['ppid', 'EpisodeNumber', 'DateIdentified']).drop_duplicates(['ppid', 'EpisodeNumber'], keep='first')
inp_data = inp_data.merge(trq_nutr[['ppid', 'EpisodeNumber', 
                                    'Does_the_patient_have_any_swallowing_difficulties?_Code']], how='left', on=['ppid', 'EpisodeNumber'])
inp_data['Does_the_patient_have_any_swallowing_difficulties?_Code'] = inp_data['Does_the_patient_have_any_swallowing_difficulties?_Code'].fillna('Not assessed').astype(str)
inp_data = inp_data.rename(columns={'Does_the_patient_have_any_swallowing_difficulties?_Code': 'Nutritional assessment (swallowing difficulties)'})

In [None]:
trq_nutr['DateOfAssessment'].min()

In [None]:
inp_data['Nutritional assessment (swallowing difficulties)'].value_counts()

In [None]:
inp_data.ppid.nunique(), inp_data.shape

In [None]:
trq_wt['DateOfAssessment'] = pd.DatetimeIndex(pd.to_datetime(trq_wt['DateOfAssessment'])).tz_localize(None)
trq_wt = pd.merge(trq_wt, inp_data[['ppid', 'EpisodeNumber', 'HOSP_adt']], how='left', on=['ppid', 'EpisodeNumber'])
trq_wt['HOSP_adt'] = pd.to_datetime(trq_wt['HOSP_adt'])
#trq_wt = trq_wt[(trq_wt['HOSP_adt'] + timedelta(hours=24)) >= trq_wt['DateOfAssessment']].sort_values(['ppid', 'DateOfAssessment']).drop_duplicates('ppid', keep='last')
trq_wt = trq_wt.sort_values(['ppid', 'EpisodeNumber', 'DateOfAssessment']).drop_duplicates(['ppid', 'EpisodeNumber'], keep='first')
inp_data = inp_data.merge(trq_wt[['ppid', 'EpisodeNumber', 'Score']], how='left', on=['ppid', 'EpisodeNumber'])
#inp_data['Score'] = inp_data['Score'].fillna(0).astype(int)
inp_data = inp_data.rename(columns={'Score': 'Waterlow score (Pressure ulcer)'})

In [None]:
trq_wt['DateOfAssessment'].max()

In [None]:
inp_data['Waterlow score (Pressure ulcer)'] = pd.Categorical(np.where(inp_data['Waterlow score (Pressure ulcer)'] >= 20, '>=20 (At very high risk)',
                                                             np.where(inp_data['Waterlow score (Pressure ulcer)'] >= 15, '>=15 (At high risk)',
                                                            np.where(inp_data['Waterlow score (Pressure ulcer)'] >= 10, '>=10 (At risk)',
                                                            np.where(inp_data['Waterlow score (Pressure ulcer)'] <10, 'Not at risk',
                                                     'Not assessed')))))

In [None]:
inp_data['Waterlow score (Pressure ulcer)'].value_counts()

In [None]:
inp_data.ppid.nunique(), inp_data.shape

##### Get summary tables

In [None]:
inp_data.simd_gr.value_counts()

In [None]:
inp_data.shape

In [None]:
inp_data.columns.tolist()

In [None]:
inp_sel = inp_data[['AgeAtAdmission',
 'Sex',
 'simd_gr',
 'LOS_ED_hr',
 'LOS_hosp',
 'n_lead_spec',
 'mm_group',
 'phys_men_multimorbidity',
 'physltc_active_cancer',
 'physltc_ischaemic_heart_disease',
 'physltc_arthritis_arthropathy',
 'physltc_prog_neur_disease',
 'physltc_hypertension',
 'physltc_asthma',
 '4AT score',
 'MUST score',
 'Fall event (within 6 months of admission)',
 'Mobility (walking independence)',
 'Mobility (bathing independence)',
 'Nutritional assessment (swallowing difficulties)',
 'Waterlow score (Pressure ulcer)',
 'total_count_all',
 'total_count_ooh_all',
 'total_count_rehab',
 'total_count_ooh_rehab',
 'total_mins_rehab',
 'total_mins_ooh_rehab',
 'time_to_therapy',
 'time_to_therapy_2',
 'total_n_disciplines',
 'intensity_group',
 'n_NURSE',
 'n_PT',
 'n_OT',
 'n_SLT',
 'avg_n_cts_per_day',
 'avg_r_cts_per_day',
 'avg_cts_per_day',
 'gt_m',
 'gt_es_hosp',
 'gt_cc',
 'gt_dd',
 'gt_eld'
 ]]

inp_sel['Received rehab'] = np.where(inp_sel['total_count_rehab'] > 0, 'Y', 'N')
## For rehab-only
#inp_sel = inp_sel[(inp_sel['total_count_rehab'] >= 2)]
#inp_sel = inp_sel.drop('LOS_hosp', axis=1)
for col in ['phys_men_multimorbidity',
            'physltc_active_cancer',
 'physltc_ischaemic_heart_disease',
 'physltc_arthritis_arthropathy',
 'physltc_prog_neur_disease',
 'physltc_hypertension',
 'physltc_asthma']:
    inp_sel[col] = np.where(inp_sel[col] == 0, 'N', 'Y')
    
inp_sel = inp_sel.rename(columns={
    'AgeAtAdmission': 'Age at index admission',
    'simd_gr': 'SIMD (1 - most deprived, 5 - least deprived region)',
    'LOS_hosp': 'Length of hospital stay (days)',
    'LOS_ED_hr': 'Length of stay in ED (hours)',
    'n_lead_spec': '# leading specialties',
    'mm_group': 'Multimorbidity group',
    'phys_men_multimorbidity': 'Physical-mental multimorbidity',
    'physltc_active_cancer': 'Active cancer',
    'physltc_ischaemic_heart_disease': 'Ischaemic Heart Disease',
    'physltc_arthritis_arthropathy': 'Arthritis or other arthropathy',
    'physltc_prog_neur_disease': 'Progressive neurological disease',
    'physltc_hypertension': 'Hypertension',
    'physltc_asthma': 'Asthma',
    'total_count_all': 'Total health contacts',
    'total_count_ooh_all': 'Total out-of-hours health contacts',
    'total_count_rehab': 'Total rehabilitation contacts',
    'total_count_ooh_rehab': 'Total out-of-hours rehabilitation contacts',
    'total_mins_rehab': 'Minutes of rehabilitation therapy',
    'total_mins_ooh_rehab': 'Minutes of out-of-hours rehabilitation therapy',
    'time_to_therapy': 'Time to first rehabilitation contact (hours)',
    'time_to_therapy_2': 'Time to second rehabilitation contact (hours)',
    'total_n_disciplines': '# disciplines involved',
    'intensity_group': 'Care intensity group',
    'n_NURSE': 'Nursing contacts',
    'n_PT': 'Physiotherapy contacts',
    'n_OT': 'Occupational therapy contacts',
    'n_SLT': 'Speech & Language contacts',
    'avg_n_cts_per_day': 'Nursing contacts per admission day',
    'avg_r_cts_per_day': 'Rehab contacts per admission day',
    'avg_cts_per_day': 'Contacts per admission day',
    'gt_m': 'In-hospital death',
    'gt_es_hosp': 'Extended stay','gt_cc': 'ICU/HDU admission', 'gt_dd': 'Home discharge', 'gt_eld': 'Admission to MoE'
})

inp_sel['Time to first rehabilitation contact (hours)'] = np.where(inp_sel['Time to first rehabilitation contact (hours)'] == -1, np.nan, inp_sel['Time to first rehabilitation contact (hours)'])
inp_sel['Time to second rehabilitation contact (hours)'] = np.where(inp_sel['Time to second rehabilitation contact (hours)'] == -1, np.nan, inp_sel['Time to second rehabilitation contact (hours)'])
inp_sel['Minutes of rehabilitation therapy'] = np.where(inp_sel['Minutes of rehabilitation therapy'] == 0, np.nan, inp_sel['Minutes of rehabilitation therapy'])
inp_sel['Minutes of out-of-hours rehabilitation therapy'] = np.where(inp_sel['Minutes of out-of-hours rehabilitation therapy'] == 0, np.nan, inp_sel['Minutes of out-of-hours rehabilitation therapy'])
inp_sel['Total rehabilitation contacts'] = np.where(inp_sel['Total rehabilitation contacts'] == 0, np.nan, inp_sel['Total rehabilitation contacts'])
inp_sel['Total out-of-hours health contacts'] = np.where(inp_sel['Total out-of-hours health contacts'] == 0, np.nan, inp_sel['Total out-of-hours health contacts'])
inp_sel['Total out-of-hours rehabilitation contacts'] = np.where(inp_sel['Total out-of-hours rehabilitation contacts'] == 0, np.nan, inp_sel['Total out-of-hours rehabilitation contacts'])
inp_sel['# disciplines involved'] = np.where(inp_sel['# disciplines involved'] <=0, np.nan, inp_sel['# disciplines involved'])
inp_sel['Rehab contacts per admission day'] = np.where(inp_sel['Rehab contacts per admission day']<=0, np.nan, 
                                                       inp_sel['Rehab contacts per admission day'])

inp_sel['In-hospital death'] = np.where(inp_sel['In-hospital death'] == 1, 'Y', 'N').astype(str)
inp_sel['Extended stay'] = np.where(inp_sel['Extended stay'] == 1, 'Y', 'N').astype(str)
inp_sel['ICU/HDU admission'] = np.where(inp_sel['ICU/HDU admission'] == 1, 'Y', 'N').astype(str)
inp_sel['Home discharge'] = np.where(inp_sel['Home discharge'] == 1, 'Y', 'N').astype(str)
inp_sel['Admission to MoE'] = np.where(inp_sel['Admission to MoE'] == 1, 'Y', 'N').astype(str)
inp_sel['Care intensity group'] = pd.Categorical(inp_sel['Care intensity group'], categories=['Very Low', 'Low',
                                                                                              'Medium', 'Medium-high',
                                                                                              'High'], ordered=True)

In [None]:
### In-hospital death
t_cols = ['Length of hospital stay (days)', 'Length of stay in ED (hours)',
          'Total long-term conditions', 'Total health contacts', 'Total rehabilitation contacts',
          'Total out-of-hours health contacts', 'Total out-of-hours rehabilitation contacts',
          'Minutes of out-of-hours rehabilitation therapy', 'Nursing contacts',
          'Minutes of rehabilitation therapy', 'Time to first rehabilitation contact (hours)',
         'Time to second rehabilitation contact (hours)', 'Nursing contacts per admission day',
          'Rehab contacts per admission day', 'Contacts per admission day']
categorical = ['Sex', 'SIMD (1 - most deprived, 5 - least deprived region)', 'Multimorbidity group', 'Physical-mental multimorbidity',
               'Active cancer', 'Ischaemic Heart Disease', 'Arthritis or other arthropathy', 
               'Progressive neurological disease', 'Hypertension', 'Asthma', 'Care intensity group', '4AT score',
               'MUST score', 'Fall event (within 6 months of admission)','Mobility (walking independence)',
               'Mobility (bathing independence)',
               'Nutritional assessment (swallowing difficulties)','Waterlow score (Pressure ulcer)','Received rehab', 
               'Extended stay', 'ICU/HDU admission', 'Home discharge','Admission to MoE'
              ]
groupby = ['In-hospital death']
sum_table = TableOne(inp_sel, columns=inp_sel.columns.tolist(), 
                     categorical=categorical, overall=True, missing=True, htest_name=True,
                     decimals = {'Age at index admission': 0,
                                 'Length of hospital stay (days)': 0,
                                 'Length of stay in ED (hours)': 0,
                                 '# leading specialties': 0,
                                 'Total long-term conditions': 0,
                                 'Total health contacts': 0,
                                 'Total rehabilitation contacts': 0,
                                 'Total out-of-hours health contacts': 0,
                                 'Minutes of out-of-hours rehabilitation therapy': 0,
                                 'Minutes of rehabilitation therapy': 0,
                                 'Number of disciplines involved': 0,
                                 'Time to first rehabilitation contact (hours)':0,
                                 'Time to second rehabilitation contact (hours)':0,
                                'Nursing contacts': 0,
                                'Physiotherapy contacts': 0,
                                'Occupational therapy contacts': 0,
                                'Speech & Language contacts': 0}, tukey_test=True,
                     groupby=groupby, nonnormal=t_cols,
                     pval=True)

In [None]:
sum_table.to_html('outputs/sum_table_inh_death.html')

In [None]:
### In-hospital death
t_cols = ['Length of hospital stay (days)', 'Length of stay in ED (hours)',
          'Total long-term conditions', 'Total health contacts', 'Total rehabilitation contacts',
          'Total out-of-hours health contacts', 'Total out-of-hours rehabilitation contacts',
          'Minutes of out-of-hours rehabilitation therapy',
          'Minutes of rehabilitation therapy', 'Time to first rehabilitation contact (hours)',
         'Time to second rehabilitation contact (hours)', 'Nursing contacts', 'Nursing contacts per admission day',
          'Rehab contacts per admission day', 'Contacts per admission day']
categorical = ['Sex', 'SIMD (1 - most deprived, 5 - least deprived region)', 'Multimorbidity group', 'Physical-mental multimorbidity',
               'Active cancer', 'Ischaemic Heart Disease', 'Arthritis or other arthropathy', 
               'Progressive neurological disease', 'Hypertension', 'Asthma', 'Care intensity group', '4AT score',
               'MUST score', 'Fall event (within 6 months of admission)','Mobility (walking independence)',
               'Mobility (bathing independence)',
               'Nutritional assessment (swallowing difficulties)','Waterlow score (Pressure ulcer)', 'Received rehab', 
               'In-hospital death', 'ICU/HDU admission', 'Home discharge','Admission to MoE'
              ]
groupby = ['Extended stay']
sum_table = TableOne(inp_sel, columns=inp_sel.columns.tolist(), 
                     categorical=categorical, overall=True, missing=True, htest_name=True,
                     decimals = {'Age at index admission': 0,
                                 'Length of hospital stay (days)': 0,
                                 'Length of stay in ED (hours)': 0,
                                 '# leading specialties': 0,
                                 'Total long-term conditions': 0,
                                 'Total health contacts': 0,
                                 'Total rehabilitation contacts': 0,
                                 'Total out-of-hours health contacts': 0,
                                 'Minutes of out-of-hours rehabilitation therapy': 0,
                                 'Minutes of rehabilitation therapy': 0,
                                 'Number of disciplines involved': 0,
                                 'Time to first rehabilitation contact (hours)':0,
                                 'Time to second rehabilitation contact (hours)':0,
                                'Nursing contacts': 0,
                                'Physiotherapy contacts': 0,
                                'Occupational therapy contacts': 0,
                                'Speech & Language contacts': 0}, tukey_test=True,
                     groupby=groupby, nonnormal=t_cols,
                     pval=True)

In [None]:
sum_table.to_html('')

In [None]:
### In-hospital death
t_cols = ['Length of hospital stay (days)', 'Length of stay in ED (hours)',
          'Total long-term conditions', 'Total health contacts', 'Total rehabilitation contacts',
          'Total out-of-hours health contacts', 'Total out-of-hours rehabilitation contacts',
          'Minutes of out-of-hours rehabilitation therapy',
          'Minutes of rehabilitation therapy', 'Time to first rehabilitation contact (hours)',
         'Time to second rehabilitation contact (hours)','Nursing contacts', 'Nursing contacts per admission day',
          'Rehab contacts per admission day', 'Contacts per admission day']
categorical = ['Sex', 'SIMD (1 - most deprived, 5 - least deprived region)', 'Multimorbidity group', 'Physical-mental multimorbidity',
               'Active cancer', 'Ischaemic Heart Disease', 'Arthritis or other arthropathy', 
               'Progressive neurological disease', 'Hypertension', 'Asthma', 'Care intensity group', '4AT score',
               'MUST score', 'Fall event (within 6 months of admission)','Mobility (walking independence)',
               'Mobility (bathing independence)',
               'Nutritional assessment (swallowing difficulties)','Waterlow score (Pressure ulcer)', 'Received rehab', 
               'In-hospital death', 'Extended stay', 'Home discharge','Admission to MoE'
              ]
groupby = ['ICU/HDU admission']
sum_table = TableOne(inp_sel, columns=inp_sel.columns.tolist(), 
                     categorical=categorical, overall=True, missing=True, htest_name=True,
                     decimals = {'Age at index admission': 0,
                                 'Length of hospital stay (days)': 0,
                                 'Length of stay in ED (hours)': 0,
                                 '# leading specialties': 0,
                                 'Total long-term conditions': 0,
                                 'Total health contacts': 0,
                                 'Total rehabilitation contacts': 0,
                                 'Total out-of-hours health contacts': 0,
                                 'Minutes of out-of-hours rehabilitation therapy': 0,
                                 'Minutes of rehabilitation therapy': 0,
                                 'Number of disciplines involved': 0,
                                 'Time to first rehabilitation contact (hours)':0,
                                 'Time to second rehabilitation contact (hours)':0,
                                'Nursing contacts': 0,
                                'Physiotherapy contacts': 0,
                                'Occupational therapy contacts': 0,
                                'Speech & Language contacts': 0}, tukey_test=True,
                     groupby=groupby, nonnormal=t_cols,
                     pval=True)

In [None]:
sum_table.to_html('')

In [None]:
### In-hospital death
t_cols = ['Length of hospital stay (days)', 'Length of stay in ED (hours)',
          'Total long-term conditions', 'Total health contacts', 'Total rehabilitation contacts',
          'Total out-of-hours health contacts', 'Total out-of-hours rehabilitation contacts',
          'Minutes of out-of-hours rehabilitation therapy',
          'Minutes of rehabilitation therapy', 'Time to first rehabilitation contact (hours)',
         'Time to second rehabilitation contact (hours)', 'Nursing contacts', 'Nursing contacts per admission day',
          'Rehab contacts per admission day', 'Contacts per admission day']
categorical = ['Sex', 'SIMD (1 - most deprived, 5 - least deprived region)', 'Multimorbidity group', 'Physical-mental multimorbidity',
               'Active cancer', 'Ischaemic Heart Disease', 'Arthritis or other arthropathy', 
               'Progressive neurological disease', 'Hypertension', 'Asthma', 'Care intensity group', '4AT score',
               'MUST score', 'Fall event (within 6 months of admission)','Mobility (walking independence)',
               'Mobility (bathing independence)',
               'Nutritional assessment (swallowing difficulties)','Waterlow score (Pressure ulcer)', 'Received rehab', 
               'In-hospital death', 'Extended stay', 'ICU/HDU admission','Admission to MoE'
              ]
groupby = ['Home discharge']
sum_table = TableOne(inp_sel, columns=inp_sel.columns.tolist(), 
                     categorical=categorical, overall=True, missing=True, htest_name=True,
                     decimals = {'Age at index admission': 0,
                                 'Length of hospital stay (days)': 0,
                                 'Length of stay in ED (hours)': 0,
                                 '# leading specialties': 0,
                                 'Total long-term conditions': 0,
                                 'Total health contacts': 0,
                                 'Total rehabilitation contacts': 0,
                                 'Total out-of-hours health contacts': 0,
                                 'Minutes of out-of-hours rehabilitation therapy': 0,
                                 'Minutes of rehabilitation therapy': 0,
                                 'Number of disciplines involved': 0,
                                 'Time to first rehabilitation contact (hours)':0,
                                 'Time to second rehabilitation contact (hours)':0,
                                'Nursing contacts': 0,
                                'Physiotherapy contacts': 0,
                                'Occupational therapy contacts': 0,
                                'Speech & Language contacts': 0}, tukey_test=True,
                     groupby=groupby, nonnormal=t_cols,
                     pval=True)

In [None]:
sum_table.to_html('')

In [None]:
### In-hospital death
t_cols = ['Length of hospital stay (days)', 'Length of stay in ED (hours)',
          'Total long-term conditions', 'Total health contacts', 'Total rehabilitation contacts',
          'Total out-of-hours health contacts', 'Total out-of-hours rehabilitation contacts',
          'Minutes of out-of-hours rehabilitation therapy',
          'Minutes of rehabilitation therapy', 'Time to first rehabilitation contact (hours)',
         'Time to second rehabilitation contact (hours)', 'Nursing contacts', 'Nursing contacts per admission day',
          'Rehab contacts per admission day', 'Contacts per admission day']
categorical = ['Sex', 'SIMD (1 - most deprived, 5 - least deprived region)', 'Multimorbidity group', 'Physical-mental multimorbidity',
               'Active cancer', 'Ischaemic Heart Disease', 'Arthritis or other arthropathy', 
               'Progressive neurological disease', 'Hypertension', 'Asthma', 'Care intensity group', '4AT score',
               'MUST score', 'Fall event (within 6 months of admission)','Mobility (walking independence)',
               'Mobility (bathing independence)',
               'Nutritional assessment (swallowing difficulties)','Waterlow score (Pressure ulcer)', 'Received rehab', 
               'In-hospital death', 'Extended stay', 'ICU/HDU admission','Home discharge'
              ]
groupby = ['Admission to MoE']
sum_table = TableOne(inp_sel, columns=inp_sel.columns.tolist(), 
                     categorical=categorical, overall=True, missing=True, htest_name=True,
                     decimals = {'Age at index admission': 0,
                                 'Length of hospital stay (days)': 0,
                                 'Length of stay in ED (hours)': 0,
                                 '# leading specialties': 0,
                                 'Total long-term conditions': 0,
                                 'Total health contacts': 0,
                                 'Total rehabilitation contacts': 0,
                                 'Total out-of-hours health contacts': 0,
                                 'Minutes of out-of-hours rehabilitation therapy': 0,
                                 'Minutes of rehabilitation therapy': 0,
                                 'Number of disciplines involved': 0,
                                 'Time to first rehabilitation contact (hours)':0,
                                 'Time to second rehabilitation contact (hours)':0,
                                'Nursing contacts': 0,
                                'Physiotherapy contacts': 0,
                                'Occupational therapy contacts': 0,
                                'Speech & Language contacts': 0}, tukey_test=True,
                     groupby=groupby, nonnormal=t_cols,
                     pval=True)

In [None]:
sum_table.to_html('')

In [None]:
### In-hospital death
t_cols = ['Length of hospital stay (days)', 'Length of stay in ED (hours)',
          'Total long-term conditions', 'Total health contacts', 'Total rehabilitation contacts',
          'Total out-of-hours health contacts', 'Total out-of-hours rehabilitation contacts',
          'Minutes of out-of-hours rehabilitation therapy',
          'Minutes of rehabilitation therapy', 'Time to first rehabilitation contact (hours)',
         'Time to second rehabilitation contact (hours)', 'Nursing contacts', 'Nursing contacts per admission day',
          'Rehab contacts per admission day', 'Contacts per admission day']
categorical = ['Sex', 'SIMD (1 - most deprived, 5 - least deprived region)', 'Multimorbidity group', 'Physical-mental multimorbidity',
               'Active cancer', 'Ischaemic Heart Disease', 'Arthritis or other arthropathy', 
               'Progressive neurological disease', 'Hypertension', 'Asthma', '4AT score',
               'MUST score', 'Fall event (within 6 months of admission)','Mobility (walking independence)',
               'Mobility (bathing independence)',
               'Nutritional assessment (swallowing difficulties)','Waterlow score (Pressure ulcer)', 'Received rehab', 
               'In-hospital death', 'Extended stay', 'ICU/HDU admission','Home discharge', 'Admission to MoE'
              ]
groupby = ['Care intensity group']
sum_table = TableOne(inp_sel, columns=inp_sel.columns.tolist(), 
                     categorical=categorical, overall=True, missing=True, htest_name=True,
                     decimals = {'Age at index admission': 0,
                                 'Length of hospital stay (days)': 0,
                                 'Length of stay in ED (hours)': 0,
                                 '# leading specialties': 0,
                                 'Total long-term conditions': 0,
                                 'Total health contacts': 0,
                                 'Total rehabilitation contacts': 0,
                                 'Total out-of-hours health contacts': 0,
                                 'Minutes of out-of-hours rehabilitation therapy': 0,
                                 'Minutes of rehabilitation therapy': 0,
                                 'Number of disciplines involved': 0,
                                 'Time to first rehabilitation contact (hours)':0,
                                 'Time to second rehabilitation contact (hours)':0,
                                'Nursing contacts': 0,
                                'Physiotherapy contacts': 0,
                                'Occupational therapy contacts': 0,
                                'Speech & Language contacts': 0}, tukey_test=True,
                     groupby=groupby, nonnormal=t_cols,
                     pval=True)

In [None]:
sum_table.to_html('')

In [None]:
### In-hospital death
t_cols = ['Length of hospital stay (days)', 'Length of stay in ED (hours)',
          'Total long-term conditions', 'Total health contacts', 'Total rehabilitation contacts',
          'Total out-of-hours health contacts', 'Total out-of-hours rehabilitation contacts',
          'Minutes of out-of-hours rehabilitation therapy',
          'Minutes of rehabilitation therapy', 'Time to first rehabilitation contact (hours)',
         'Time to second rehabilitation contact (hours)']
categorical = ['Sex', 'SIMD (1 - most deprived, 5 - least deprived region)', 'Multimorbidity group', 'Physical-mental multimorbidity',
               'Active cancer', 'Ischaemic Heart Disease', 'Arthritis or other arthropathy', 
               'Progressive neurological disease', 'Hypertension', 'Asthma', '4AT score',
               'MUST score', 'Fall event (within 6 months of admission)','Mobility (walking independence)',
               'Mobility (bathing independence)',
               'Nutritional assessment (swallowing difficulties)','Waterlow score (Pressure ulcer)', 
               'In-hospital death', 'Extended stay', 'ICU/HDU admission','Home discharge', 'Admission to MoE'
              ]
groupby = ['Care intensity group']
sum_table = TableOne(inp_sel, columns=inp_sel[inp_sel['In-hospital death']=='N'].columns.tolist(), 
                     categorical=categorical, overall=True, missing=True, htest_name=True,
                     decimals = {'Age at index admission': 0,
                                 'Length of hospital stay (days)': 0,
                                 'Length of stay in ED (hours)': 0,
                                 '# leading specialties': 0,
                                 'Total long-term conditions': 0,
                                 'Total health contacts': 0,
                                 'Total rehabilitation contacts': 0,
                                 'Total out-of-hours health contacts': 0,
                                 'Minutes of out-of-hours rehabilitation therapy': 0,
                                 'Minutes of rehabilitation therapy': 0,
                                 'Number of disciplines involved': 0,
                                 'Time to first rehabilitation contact (hours)':0,
                                 'Time to second rehabilitation contact (hours)':0,
                                'Nursing contacts': 0,
                                'Physiotherapy contacts': 0,
                                'Occupational therapy contacts': 0,
                                'Speech & Language contacts': 0}, tukey_test=True,
                     groupby=groupby, nonnormal=t_cols,
                     pval=True)

In [None]:
sum_table.to_html('')