In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import matplotlib.ticker as mtick
from matplotlib.dates import DateFormatter
from datetime import timedelta
from datetime import datetime
from tqdm import tqdm
from tableone import TableOne
from sklearn.preprocessing import KBinsDiscretizer

In [None]:
inp_data = pd.read_csv('')
feat_data = pd.read_csv('')

In [None]:
feature_names = {
    'trQ_waterlow_score': 'Waterlow score',
    'AgeAtAdmission': 'Age',
    'arrival_mode_B': 'Arrival - NHSL Bus',
    'arrival_mode_E': 'Arrival - Emergency Ambulance', 
    'arrival_mode_O': 'Arrival - Other', 
    'arrival_mode_PU': 'Arrival - Public Transport',
    'arrival_mode_U': 'Arrival - GP Ambulance', 
    'arrival_mode_Unk': 'Arrival - Unknown', 
    'arrival_mode_W': 'Arrival - Walked',
    'simd_dec': 'SIMD (most to least deprived)',
    'arrival_mode_PR': 'Arrival - Private Transport',
    'trQ_bwm_urinary_catheterisation': 'Urinary Catheterisation', 
    'trQ_bwm_urinary_incontinence': 'Urinary Incontinence', 
    'trQ_bwm_dysuria': 'Dysuria', 
    'trQ_bwm_>6times_per_day': 'Bowel Movement >6 times per day', 
    'trQ_bwm_nocturia_>2_per_night': 'Nocturia >2 per night', 
    'trQ_bwm_faeces_incontinence': 'Faeces Incontinence', 
    'trQ_bwm_constipation': 'Constipation', 
    'trQ_bwm_diarrhoea': 'Diarrhoea', 
    'trQ_bwm_blood_in_stools': 'Blood in stools', 
    'trQ_bwm_medication': 'Bowel movement medication',
    'trQ_falls_within_6_months': 'Fall within last 6 months', 
    'trQ_falls_clinical_risk': 'At clinical risk of falls', 
    'trQ_nutr_food_allergies': 'Food allergies', 
    'trQ_nutr_swallowing_difficulty': 'Swallowing difficulty', 
    'trQ_mrsa_infection_prevention': 'Infection prevention measures', 
    'trQ_mrsa_transfer_with_norovirus': 'MRSA Norovirus', 
    'trQ_mrsa_resp_or_fever': 'MRSA with Respiratory issues or Fever', 
    'trQ_mrsa_rash_fever_or_flu': 'MRSA with Rash, Fever or Flu', 
    'trQ_mrsa_infectious_diseases_contact': 'MRSA contact with infection diseases', 
    'trQ_rub_nursing_falls_risk_assessment': 'Nursing Falls risk assessment', 
    'trQ_rub_at_risk_of_bed_fall': 'At risk of bed fall', 
    'trQ_MUST_score': 'MUST Score', 
    'trQ_mobility_walking_ASSISTANCE': 'Walking assistance', 
    'trQ_mobility_walking_BED_REST': 'Walking (Bed rest)', 
    'trQ_mobility_walking_INDEPENDENT': 'Walking dependence', 
    'trQ_mobility_toileting_ASSISTANCE': 'Toileting assistance', 
    'trQ_mobility_toileting_BED_REST': 'Toileting (Bed rest)', 
    'trQ_mobility_toileting_INDEPENDENT': 'Toileting dependence', 
    'trQ_mobility_bathing_ASSISTANCE': 'Bathing assistance', 
    'trQ_mobility_bathing_BED_REST': 'Bathing (Bed rest)', 
    'trQ_mobility_bathing_INDEPENDENT': 'Bathing dependence', 
    'trQ_mobility_bed_rolling_ASSISTANCE': 'Rolling in bed assistance', 
    'trQ_mobility_bed_rolling_INDEPENDENT': 'Rolling in bed dependence', 
    'trQ_mobility_bed_moveup_ASSISTANCE': 'Moving up bed assistance',
    'trQ_mobility_bed_moveup_INDEPENDENT': 'Moving up bed dependence', 
    'trQ_mobility_bed_out_ASSISTANCE': 'Moving out of bed assistance', 
    'trQ_mobility_bed_out_BED_REST': 'Moving out of bed (Bed rest)',
    'trQ_mobility_bed_out_INDEPENDENT': 'Moving out of bed dependence', 
    'trQ_mobility_bed_in_ASSISTANCE': 'Moving in bed assistance', 
    'trQ_mobility_bed_in_BED_REST': 'Moving in bed (Bed rest)', 
    'trQ_mobility_bed_in_INDEPENDENT': 'Moving in bed dependence', 
    'trQ_mobility_sss_ASSISTANCE': 'Sit-stand-sit assistance', 
    'trQ_mobility_sss_BED_REST': 'Sit-stand-sit (Bed rest)',
    'trQ_mobility_sss_INDEPENDENT': 'Sit-stand-sit dependence',
    'trQ_mobility_lateral_ASSISTANCE': 'Lateral movement assistance', 
    'trQ_mobility_lateral_BED_REST': 'Lateral movement (Bed rest)', 
    'trQ_mobility_lateral_INDEPENDENT': 'Lateral movement dependence', 
    'trQ_mobility_floorup_ASSISTANCE': 'Floor-up movement assistance',
    'trQ_mobility_floorup_BED_REST': 'Floor-up movement (Bed rest)', 
    'trQ_mobility_floorup_INDEPENDENT': 'Floor-up movement dependence',
    'num_inp_attendances_lyr': 'Scheduled inpatient attendances last year', 
    'total_longterm_conditions': '# unique long-term conditions', 
    'num_outp_att_CB': 'Outpatient visits (Urology)', 
    'lactate_v': 'Lactate (mmol/L) - last value',
    'lactate_rm': 'Lactate (mmol/L) - moving average',
    'dsl_outp_att': 'Last outpatient attendance (days)', 
    'haemoglobin_nl': 'Haemoglobin - low', 
    'dsl_physltc_pulmonary_fibrosis': 'Pulmonary fibrosis (days)', 
    'hba1c_(ifcc)_rs': 'HbA1c (IFCC, mmol/mol) - moving std', 
    'urea_v': 'Urea (mmol/L) - last value', 
    'dsl_antipsychotics': 'Antipsychotics (days since last)', 
    'red_cell_count_nl': 'Red Cell Count - low', 
    'bilirubin_nh': 'Bilirubin - high', 
    'num_outp_att_AR': 'Outpatient visits (Rheumatology)', 
    'num_outp_att_F2': 'Outpatient visits (Gynaecology)', 
    'hba1c_(ifcc)_v': 'HbA1c (IFCC, mmol/mol) - last value', 
    'c-reactive_prot_nh': 'CRP - high', 
    'n_presc_anticoagulant_protamine_drugs': 'Anticoagulants and protaime (# prescribed)', 
    'num_outp_att_G1': 'Outpatient visits (General Psychiatry)', 
    'ggt_v': 'GGT (U/L) - last value', 
    'num_outp_att_C11': 'Outpatient visits (General Surgery)', 
    'dsl_antidementia_drugs': 'Antidementia drugs (days since last)', 
    'dsl_anti_hypertension_hf_drugs': 'Antihypertensive drugs (days since last)', 
    'num_outp_att_G4': 'Outpatient visits (Psychiatry Of Old Age)', 
    'dsl_antidepressant_drugs': 'Antidepressant drugs (days since last)', 
    'num_outp_att_A1': 'Outpatient visits (General Medicine)', 
    'phys_men_multimorbidity': 'Physical-mental multimorbidity', 
    'num_outp_att_C3': 'Outpatient visits (Anaesthetics)', 
    'hba1c_(ifcc)_nh': 'HbA1c (IFCC) - high', 
    'n_presc_nausea_vertigo_drugs': 'Nausea and vertigo drugs (# prescribed)', 
    'albumin_nl': 'Albumin - low', 
    'total_menlongterm_conditions': '# Unique mental chronic conditions', 
    'num_outp_att_AB': 'Outpatient visits (Geriatric Medicine)', 
    'num_outp_att_R5': 'Outpatient visits (Physiotherapy)', 
    'dsl_physltc_prog_neur_disease': 'Progressive neurological disease (days)', 
    'ast_rm': 'AST (U/L) - moving average', 
    'num_inp_attendances': '# Scheduled inpatient attendances', 
    'ferritin_nl': 'Ferritin - low', 
    'dsl_physltc_arthritis_arthropathy': 'Arthritis or other arthropathy (days)', 
    'num_outp_att_C7': 'Outpatient visits (Opthalmology)', 
    'dsl_physltc_heart_failure': 'Heart Failure (days)', 
    'num_outp_att_AG': 'Outpatient visits (Renal Medicine)', 
    'num_outp_att_A9': 'Outpatient visits (Gastroenterology)', 
    'total_drug_categories': '# Unique prescribed drug categories', 
    'num_outp_att_A82': 'Outpatient visits (Diabetes)', 
    'urea_rs': 'Urea (mmol/L) - moving std', 
    'num_outp_att_C8': 'Outpatient visits (Trauma and Orthopaedic Surgery)', 
    'ferritin_nh': 'Ferritin - high', 
    'bilirubin_nl': 'Bilirubin - low', 
    'num_outp_att_A2': 'Outpatient visits (Cardiology)', 
    'num_outp_att_C5': 'Outpatient visits (ENT)',
    'n_presc_antidepressant_drugs': 'Antidepressant drugs (# prescribed)',
    'urea_nh': 'Urea - high', 
    'num_outp_att_A81': 'Outpatient visits (Endocrine)', 
    'dsl_physltc_liver_disease': 'Liver disease (days)', 
    'dsl_antiplatelet_drugs': 'Antiplatelet drugs (days since last)', 
    'num_inp_att_AG': 'Inpatient visits (Renal Medicine)', 
    'white_cell_count_nh': 'White Cell Count - high', 
    'ggt_rs': 'GGT (U/L) - moving std', 
    'mean_cell_volume_nh': 'MCV - high', 
    'dsl_physltc_chronic_renal_disease': 'Chronic renal disease (days)', 
    'bilirubin_rs': 'Bilirubin (umol/L) - moving std', 
    'c-reactive_prot_rs': 'CRP (mg/L) - moving std', 
    'dsl_physltc_stroke': 'Stroke - (days)', 
    'dsl_physltc_atrial_fibrillation': 'Atrial fibrillation (days)', 
    'dsl_physltc_copd': 'COPD (days)', 
    'dsl_physltc_per_vascular_disease': 'Peripheral Vascular Disease (days)', 
    'n_presc_parkinsonism_drugs': 'Parkinsonism drugs (# prescribed)', 
    'num_outp_att_dna_AG': 'Outpatient failed visits (Renal Medicine)', 
    'c-reactive_prot_rm': 'CRP (mg/L) - moving average', 
    'n_presc_nitrates_ccb_drugs': 'Nitrates and CCBs (# prescribed)', 
    'n_presc_beta_blockers': 'Beta blockers (# prescribed)', 
    'ferritin_rs': 'Ferritin (ug/L) - moving std', 
    'num_inp_att_C8': 'Inpatient visits (Trauma and Orthopaedic Surgery)', 
    'n_presc_antiplatelet_drugs': 'Antiplatelet drugs (# prescribed)', 
    'monocyte_count_nh': 'Monocyte count - high', 
    'num_outp_att_C9': 'Outpatient visits (Plastic Surgery)', 
    'lactate_rs': 'Lactate (mmol/L) - moving std', 
    'neutrophil_count_nh': 'Neutrophil count - high', 
    'n_presc_diuretics': 'Diuretics (# prescribed)', 
    'num_outp_attendances': '# Outpatient visits', 
    'dsl_bone_metabolism_affecting_drugs': 'Bone/metabolism-affecting drugs (days since last)', 
    'ggt_rm': 'GGT (U/L) - moving average', 
    'hba1c_(ifcc)_rm': 'HbA1c (IFCC) - moving average', 
    'dsl_menltc_depression': 'Depression (days)', 
    'dsl_physltc_hypertension': 'Hypertension (days)', 
    'sodium_nl': 'Sodium - low', 
    'ggt_nh': 'GGT - high', 
    'n_presc_lipid_regulators': 'Lipid regulators (# prescribed)', 
    'n_presc_antipsychotics': 'Antipsychotics (# prescribed)', 
    'dsl_physltc_inf_bowel_disease': 'Inflammatory Bowel Disease (days)', 
    'calcium_nl': 'Calcium - low', 
    'dsl_physltc_ischaemic_heart_disease': 'Ischaemic Heart Disease (days)', 
    'n_presc_anti_hypertension_hf_drugs': 'Antihypertensive drugs (# prescribed)', 
    'n_presc_antidementia_drugs': 'Anti-dementia drugs (# prescribed)', 
    'lymphocyte_count_nl': 'Lymphocyte count - low', 
    'egfr_(/1.73m2)_nl': 'eGFR (/1.73m2) - low',
    'egfr_(/1.73m2)_v': 'eGFR (/1.73m2) - last value',
    'egfr_(/1.73m2)_rs': 'eGFR (/1.73m2) - moving std',
    'platelet_count_nl': 'Platelet Count - low', 
    'dsl_diuretics': 'Diuretics (days since last)', 
    'n_presc_bone_metabolism_affecting_drugs': 'Bone/metabolism-affecting drugs (# prescribed)', 
    'ferritin_v': 'Ferritin (ug/L) - last value', 
    'num_outp_att_AQ': 'Outpatient visits (Respiratory Medicine)', 
    'esr_v': 'ESR (mm/hr) - last value', 
    'dsl_physltc_epilepsy': 'Epilepsy (days)', 
    'num_outp_att_AD': 'Outpatient visits (Medical Oncology)', 
    'dsl_physltc_diabetes': 'Diabetes (days)', 
    'dsl_menltc_alcohol_substance_misuse': 'Alcohol/substance misuse (days)', 
    'dsl_physltc_obesity': 'Obesity (days)', 
    'num_outp_att_AP': 'Outpatient visits (Rehabilitation Medicine)', 
    'num_outp_att_H2': 'Outpatient visits (Clinical Oncology)', 
    'c-reactive_prot_v': 'CRP (mg/L) - last value', 
    'ck_v': 'CK (IU/L) - last value', 
    'dsl_physltc_asthma': 'Asthma (days)', 
    'hs_troponin_i_v': 'HS Troponin I (ng/L) - last value', 
    'basophil_count_rm': 'Basophil Count - moving average', 
    'num_outp_att_dna_A9': 'Outpatient failed visits (Gastroenterology)', 
    'num_outp_att_AH': 'Outpatient visits (Neurology)',
    'alt_nl': 'ALT - low', 
    'dsl_menltc_chronic_psychiatry_disorder': 'Chronic Psychiatric Disorder (days)', 
    'hs_troponin_i_rm': 'HS Troponin I (ng/L) - moving average', 
    'dsl_physltc_osteoporosis': 'Osteoporosis (days)', 
    'basophil_count_v': 'Basophil Count - last value', 
    'albumin_rm': 'Albumin (g/L) - moving average',
    'creatinine_nl': 'Creatinine - low', 
    'ferritin_rm': 'Ferritin (ug/L) - moving average', 
    'dsl_physltc_hip_fracture': 'Hip fracture (days)', 
    'num_outp_att_dna_C7': 'Outpatient failed visits (Opthalmology)', 
    'creatinine_nh': 'Creatinine - high', 
    'alk.phos_nh': 'Alkaline Phosphatase - high', 
    'eosinophil_count_nl': 'Eosinophil Count - low', 
    'dsl_physltc_historical_or_active_cancer': 'Historical or Active Cancer (days)', 
    'bilirubin_rm': 'Bilirubin (umol/L) - moving average', 
    'Sex_F': 'Sex (Female)',
    'esr_rm': 'ESR (mm/hr) - moving average',
    'urea_v': 'Urea (mmol/L) - last value',
    'urea_rm': 'Urea (mmol/L) - moving average',
    'ck_rm': 'CK (IU/L) - moving average',
    'triage_code': 'ED triage code',
    'tco2_v': 'tCO2 (mmol/L) - last value',
    'arrival_mode_R': 'Arrival - Routine Ambulance',
    'dsl_inp_att': 'Last scheduled inpatient attendance',
    'trQ_4at': '4AT Score',
    'basophil_count_rm': 'Basophil Count - moving average',
    'albumin_v': 'Albumin (g/L) - last value',
    'c-reactive_prot_rs': 'CRP (mg/L) - moving std',
    'hs_troponin_t_v': 'HS Troponin T (ng/L) - last value',
    'albumin_rs': 'Albumin (g/L) - moving std',
    'trQ_mobility_bathing_INDEPENDENT': 'Bathing dependence',
    'tco2_rs': 'tCO2 (mmol/L) - moving std',
    'bilirubin_v': 'Bilirubin (umol/L) - last value',
    'haematocrit_nl': 'Haematocrit - low'}

In [None]:
feat_data = feat_data.rename(columns=feature_names)

In [None]:
inp_data.columns.tolist()

In [None]:
feat_data.columns.tolist()

In [None]:
inp_data.shape, feat_data.shape

#### Select top-ranking features from SHAP analysis and observe missing %

In [None]:
feat_sel['Bathing dependence'].value_counts()

In [None]:
trq_mobility = pd.read_csv('', sep='\t', low_memory=False)
trq_4at = pd.read_csv('', sep='\t', low_memory=False)
trq_wt = pd.read_csv('', sep='\t', low_memory=False)

In [None]:
trq_4at.columns

In [None]:
trq_wt.columns

In [None]:
trak_ed = pd.read_csv('', sep='\t', low_memory=False, encoding='iso-8859-1')
trak_inp_a = pd.read_csv('', sep='\t', low_memory=False)

In [None]:
trak_ed = trak_ed[['ppid', 'UniqueAEAttendanceIdentifier', 'TriageCategoryCode']]
trak_inp_a = trak_inp_a[['ppid', 'EpisodeNumber', 'AssociatedAEAdmissionIdentifier']]
trak_ed = trak_ed.rename(columns={'UniqueAEAttendanceIdentifier': 'AssociatedAEAdmissionIdentifier'})
trak_ed = trak_ed.merge(trak_inp_a, how='left', on=['ppid', 'AssociatedAEAdmissionIdentifier'])

In [None]:
trq_mobility = trq_mobility[['ppid', 'EpisodeNumber', 'Bathing/showering', 'Up from floor']]
trq_4at = trq_4at[['ppid', 'EpisodeNumber', '4AT score']]
trq_wt = trq_wt[['ppid', 'EpisodeNumber', 'Score']]

In [None]:
feat_sel = feat_data[['ppid', 'EpisodeNumber', 'SIMD (most to least deprived)', 'Arrival - Unknown', 'Arrival - Other', 'ED triage code',
                      'Albumin (g/L) - last value', 'Bilirubin (umol/L) - last value', 
                      'Urea (mmol/L) - last value', 
                      'lymphocyte_count_v', 'Basophil Count - last value', 'eosinophil_count_v','neutrophil_count_v',
                      'ESR (mm/hr) - last value', 'alk.phos_v', 'alt_v',
                      'HS Troponin I (ng/L) - last value',  'HS Troponin T (ng/L) - last value',
                      'CK (IU/L) - last value', 'tCO2 (mmol/L) - last value',  
                      'CRP (mg/L) - last value', 'GGT (U/L) - last value', 'sodium_v',
                      'Diuretics (# prescribed)', 'Nausea and vertigo drugs (# prescribed)', 
                      'Progressive neurological disease (days)', 'Ischaemic Heart Disease (days)',
                      'Arthritis or other arthropathy (days)',
                      'Outpatient visits (Psychiatry Of Old Age)', 'Outpatient visits (Cardiology)', 
                      'Last scheduled inpatient attendance',
                      '4AT Score', 'Waterlow score', 'Bathing dependence', 'Floor-up movement assistance', 
                     'gt_cc']]

In [None]:
feat_sel = feat_sel.rename(columns={'alt_v': 'ALT - last value', 
                                    'alk.phos_v': 'Alkaline Phosphatase - last value', 
                                    'lymphocyte_count_v': 'Lymphocyte Count - last value',
                                    'eosinophil_count_v': 'Eosinophil Count - last value',
                                    'neutrophil_count_v': 'Neutrophil Count - last value',
                                    'sodium_v': 'Sodium - last value'})
print(feat_sel.shape)
trq_mobility = trq_mobility.dropna()
trq_mobility = trq_mobility[trq_mobility.EpisodeNumber.isin(feat_sel.EpisodeNumber.unique().tolist())]
trq_mobility = trq_mobility.sort_values(['ppid', 'EpisodeNumber']).drop_duplicates(['ppid'], keep='first')
feat_sel = feat_sel.merge(trq_mobility, how='left', on=['ppid', 'EpisodeNumber'])
print(feat_sel.shape)
trq_4at = trq_4at.dropna()
trq_4at = trq_4at[trq_4at.EpisodeNumber.isin(feat_sel.EpisodeNumber.unique().tolist())]
trq_4at = trq_4at.sort_values(['ppid', 'EpisodeNumber']).drop_duplicates(['ppid'], keep='first')
feat_sel = feat_sel.merge(trq_4at, how='left', on=['ppid', 'EpisodeNumber'])
print(feat_sel.shape)
trq_wt = trq_wt.dropna()
trq_wt = trq_wt[trq_wt.EpisodeNumber.isin(feat_sel.EpisodeNumber.unique().tolist())]
trq_wt = trq_wt.sort_values(['ppid', 'EpisodeNumber']).drop_duplicates(['ppid'], keep='first')
feat_sel = feat_sel.merge(trq_wt, how='left', on=['ppid', 'EpisodeNumber'])
print(feat_sel.shape)
trak_ed = trak_ed.dropna()
trak_ed = trak_ed[trak_ed.EpisodeNumber.isin(feat_sel.EpisodeNumber.unique().tolist())]
trak_ed = trak_ed.sort_values(['ppid', 'EpisodeNumber']).drop_duplicates(['ppid'], keep='first')
feat_sel = feat_sel.merge(trak_ed, how='left', on=['ppid', 'EpisodeNumber'])
print(feat_sel.shape)
mdata_sel = inp_data[['ppid', 'gt_eld', 'total_count_rehab', 'n_PT','n_OT','n_SLT']]
feat_sel = feat_sel.merge(mdata_sel, how='left', on='ppid')
print(feat_sel.shape)

In [None]:
feat_sel['TriageCategoryCode'].isnull().sum()

In [None]:
feat_sel['SIMD (most to least deprived)'] = np.where(feat_sel['SIMD (most to least deprived)'] == -1, 'Y', 'N')
feat_sel['Mode of arrival'] = np.where((feat_sel['Arrival - Unknown']==1)|(feat_sel['Arrival - Other']==1), 'Y', 'N')
feat_sel['Diuretics (# prescribed)'] = np.where(feat_sel['Diuretics (# prescribed)']==0, 'Y', 'N')
feat_sel['Nausea and vertigo drugs (# prescribed)'] = np.where(feat_sel['Nausea and vertigo drugs (# prescribed)']==0, 'Y', 'N')
feat_sel['Outpatient visits (Psychiatry Of Old Age)'] = np.where(feat_sel['Outpatient visits (Psychiatry Of Old Age)']==0, 'Y', 'N')
feat_sel['Outpatient visits (Cardiology)'] = np.where(feat_sel['Outpatient visits (Cardiology)']==0, 'Y', 'N')
feat_sel['Last scheduled inpatient attendance'] = np.where(feat_sel['Last scheduled inpatient attendance']==39999, 'Y', 'N')
feat_sel['Progressive neurological disease'] = np.where(feat_sel['Progressive neurological disease (days)']==39999, 'Y', 'N')
feat_sel['Ischaemic Heart Disease'] = np.where(feat_sel['Ischaemic Heart Disease (days)']==39999, 'Y', 'N')
feat_sel['Arthritis or other arthropathy'] = np.where(feat_sel['Arthritis or other arthropathy (days)']==39999, 'Y', 'N')
feat_sel['4AT Score'] = np.where((feat_sel['4AT Score']==-1)&(feat_sel['4AT score'].isnull()), 'Y', 'N')
feat_sel['Waterlow score'] = np.where((feat_sel['Waterlow score']==-1)&(feat_sel['Score'].isnull()), 'Y', 'N')
feat_sel['Bathing dependence'] = np.where((feat_sel['Bathing dependence']==0)&(feat_sel['Bathing/showering'].isnull()), 'Y', 'N')
feat_sel['Floor-up movement assistance'] = np.where((feat_sel['Floor-up movement assistance']==0)&(feat_sel['Up from floor'].isnull()), 'Y', 'N')
feat_sel['ED triage code'] = np.where(feat_sel['TriageCategoryCode'].isnull(), 'Y', 'N')

for lb in ['Albumin (g/L) - last value', 'Bilirubin (umol/L) - last value', 
                      'Urea (mmol/L) - last value', 
                      'Lymphocyte Count - last value', 'Basophil Count - last value', 'Eosinophil Count - last value','Neutrophil Count - last value',
                      'ESR (mm/hr) - last value', 'Alkaline Phosphatase - last value', 'ALT - last value',
                      'HS Troponin I (ng/L) - last value',  'HS Troponin T (ng/L) - last value',
                      'CK (IU/L) - last value', 'tCO2 (mmol/L) - last value',  
                      'CRP (mg/L) - last value', 'GGT (U/L) - last value', 'Sodium - last value']:
    feat_sel[lb] = np.where(feat_sel[lb] == -9999.0, 'Y', 'N')

feat_sel['ICU/HDU Admission'] = np.where(feat_sel['gt_cc'] == 1, 'Y', 'N')
feat_sel['Geriatric Medicine Admission'] = np.where(feat_sel['gt_eld'] == 1, 'Y', 'N')
feat_sel['Rehabilitation'] = np.where(feat_sel['total_count_rehab'] > 0, 'Y', 'N')

In [None]:
print(len(feat_sel[feat_sel['n_PT'] > 0]), len(feat_sel[feat_sel['n_PT'] > 0]) / len(feat_sel))

In [None]:
print(len(feat_sel[feat_sel['n_SLT'] > 0]), len(feat_sel[feat_sel['n_SLT'] > 0]) / len(feat_sel))

In [None]:
print(len(feat_sel[feat_sel['n_OT'] > 0]), len(feat_sel[feat_sel['n_OT'] > 0]) / len(feat_sel))

In [None]:
feat_sel.columns.tolist()

#### Get missingness summary

In [None]:
cols_to_use = ['SIMD (most to least deprived)',
 'ED triage code',
 'Albumin (g/L) - last value',
 'Bilirubin (umol/L) - last value',
 'Urea (mmol/L) - last value',
 'Lymphocyte Count - last value',
 'Basophil Count - last value',
 'Eosinophil Count - last value',
 'Neutrophil Count - last value',
 'ESR (mm/hr) - last value',
 'Alkaline Phosphatase - last value',
 'ALT - last value',
 'HS Troponin I (ng/L) - last value',
 'HS Troponin T (ng/L) - last value',
 'CK (IU/L) - last value',
 'tCO2 (mmol/L) - last value',
 'CRP (mg/L) - last value',
 'GGT (U/L) - last value',
 'Sodium - last value',
 'Diuretics (# prescribed)',
 'Nausea and vertigo drugs (# prescribed)',
 'Outpatient visits (Psychiatry Of Old Age)',
 'Outpatient visits (Cardiology)',
 'Last scheduled inpatient attendance',
 '4AT Score',
 'Waterlow score',
 'Bathing dependence',
 'Floor-up movement assistance',
 'Mode of arrival',
 'Progressive neurological disease',
 'Ischaemic Heart Disease',
 'Arthritis or other arthropathy',
 'ICU/HDU Admission',
 'Geriatric Medicine Admission',
 'Rehabilitation']

In [None]:
categorical = cols_to_use
groupby = ['ICU/HDU Admission']
sum_table = TableOne(feat_sel, columns=cols_to_use, 
                     categorical=categorical, overall=True, missing=True, htest_name=True, tukey_test=True,
                     groupby=groupby,
                     pval=True)
sum_table.to_html('')

In [None]:
categorical = cols_to_use
groupby = ['Geriatric Medicine Admission']
sum_table = TableOne(feat_sel, columns=cols_to_use, 
                     categorical=categorical, overall=True, missing=True, htest_name=True, tukey_test=True,
                     groupby=groupby,
                     pval=True)
sum_table.to_html('')

In [None]:
categorical = cols_to_use
groupby = ['Rehabilitation']
sum_table = TableOne(feat_sel, columns=cols_to_use, 
                     categorical=categorical, overall=True, missing=True, htest_name=True, tukey_test=True,
                     groupby=groupby,
                     pval=True)
sum_table.to_html('')