# ACP Project - Model Evaluation

## Setup

In [2]:
import warnings, pickle, os
import numpy as np
import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 300)

from IPython.display import display
import matplotlib.pyplot as plt

import seaborn as sns
sns.set(rc={'figure.figsize':(10,10)})

import shap
import optuna

%load_ext autoreload
%autoreload 1

In [3]:
from dataset import SCIData, SCICols
%aimport utils.evaluation, dataset

In [4]:
from sklearn.model_selection import train_test_split

sci = (
    SCIData(SCIData.quickload("data/sci_processed.h5").sort_values("AdmissionDateTime"))
    .mandate(SCICols.news_data_raw)
    .omit_ae()
)

scii = (
    sci.derive_critical_event(within=1, return_subcols=True)
    .augment_shmi(onehot=False)
    #.omit_redundant()
    .raw_news()
 #   .categorize()
   # .onehot_encode_categories()
)

sci_train, sci_test, _, y_test_mortality, _, y_test_criticalcare = train_test_split(
    scii,
    scii.DiedWithinThreshold,
    scii.CriticalCare,
    test_size=0.33,
    random_state=42,
    shuffle=False,
)
sci_train, sci_test = SCIData(sci_train), SCIData(sci_test)

## Population

In [5]:
pd.DataFrame(
    [
        dict(
            Subset=name,
            Records=df.shape[0],
            Patients=df.PatientNumber.unique().shape[0],
            CriticalEvents=df.CriticalEvent.sum(),
            Deaths=df.DiedWithinThreshold.sum(),
            CriticalCare=df.CriticalCare.sum(),
            Readmissions=df.Readmission.sum(),
        )
        for name, df in {'Total': scii, 'Training Set': sci_train, 'Testing Set': sci_test}.items()
    ]
).set_index('Subset').T

Subset,Total,Training Set,Testing Set
Records,121321,81285,40036
Patients,62211,44789,25579
CriticalEvents,3114,2182,932
Deaths,342,224,118
CriticalCare,2814,1986,828
Readmissions,15438,10764,4674


1. Data size (total, training, testing, total patients)
2. Outcomes (critical, deaths, icu)
3. Sex (female %), Age (median)
4. Comorbid conditions (median #, max 7 per patient)
5. Presence of vital signs, presence of blood test results, 
6. LOS of hospitalisation 

In [6]:
scii.Female.value_counts()

True     63578
False    57743
Name: Female, dtype: int64

In [7]:
scii.AgeBand.value_counts()

80 +            36894
65 - 79         31156
50 - 64         23267
30 - 49         22210
17 - 29          7757
16 and Under       37
Name: AgeBand, dtype: int64

In [8]:
scii.LOSBand.value_counts()

More than 72 Hrs    54207
Less than 12 Hrs    25238
12 - 24 Hrs         16569
48 - 72 Hrs          9834
24 - 36 Hrs          7950
36 - 48 Hrs          7479
Still In               44
Name: LOSBand, dtype: int64

In [9]:
scii.AdmitWard.value_counts()

EAU     81131
AEC     18964
AAA     10136
AAAC     6132
HH3      2925
HH1M     2033
Name: AdmitWard, dtype: int64

In [11]:
sci_full = SCIData.quickload("data/sci.h5")

In [34]:
sci_full[sci_full.notna().all(axis=1)].iloc[0]

SpellSerial                                                              222967_146
PatientNumber                                                                298990
SEQ                                                                              15
Female                                                                         True
Age                                                                              61
AgeBand                                                                     50 - 64
Area                                                                           M28 
AdmissionDateTime                                               2020-10-14 23:18:00
DischargeDateTime                                               2020-12-18 20:40:00
TotalLOS                                                                  64.890278
LOSBand                                                            More than 72 Hrs
Over7Days                                                                   

In [23]:
sci[SCICols.wards].stack().value_counts()

EAU     114643
AEC      32618
DISL     20437
AAA      15498
AAAC     11355
L6        8346
M2        7772
HH3       7178
HH2       6290
L3        6101
L2        5985
L4        5877
PNDS      5040
L5        4514
HH1M      4112
HCU       3772
MA3       2794
HH4       2406
L8        2233
L7        1722
HB2       1423
HB1       1418
ANU       1411
CCU       1180
HH5       1151
RU        1129
B6        1056
MIU        982
HG         936
B3         911
LIME       877
ASU        836
B8         737
WPW        735
HH7        625
HAEM       567
ETW        566
M2SS       544
STU        445
B7         403
B5         342
HASU       327
CPIU       320
THE3       247
SBU        233
SRU        230
M2S        210
RCV3       200
THE1       169
NSSU       123
C1         105
C3          81
HH6M        76
M2SU        72
HH8         60
B4          58
TAU         54
DCU         51
HAED        50
L1          47
ML          40
SAL         37
HB2M        33
RTU         23
C2          22
RUSS        21
MET       