In [1]:
import pickle
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from sklearn.metrics import log_loss, brier_score_loss, accuracy_score, confusion_matrix

import matplotlib.pyplot as plt

In [2]:
round_precision = 4
dtype_dict = {'binary_crossentropy': float, 'brier_score': float, 'accuracy': float, 'TN': int, 'TP': int, 'FN': int, 'FP': int}

In [3]:
with open('../data/data_test.pt', 'rb') as file:
    X_test, y_test = pickle.load(file)

print(f'{X_test.shape = }')
print(f'{y_test.shape = }')

X_test.shape = (630290, 28)
y_test.shape = (630290, 2)


In [4]:
model_name_list = ['LR', 'RF', 'SVC']

In [5]:
y_pred = {}

for model_name in model_name_list:
    with open(f'./{model_name}.pt', 'rb') as file:
        y_pred[model_name] = pickle.load(file)

In [6]:
results = {}

for i, y_label in enumerate(['hosp', 'death']):
    results[y_label] = {}
    
    for model_name in model_name_list:
        results[y_label][model_name] = {}

        results[y_label][model_name]['binary_crossentropy'] = log_loss(y_test[:, i], y_pred[model_name][y_label][:, 1]).round(round_precision)
        results[y_label][model_name]['brier_score'] = brier_score_loss(y_test[:, i], y_pred[model_name][y_label][:, 1]).round(round_precision)
        results[y_label][model_name]['accuracy'] = accuracy_score(y_test[:, i], y_pred[model_name][y_label][:, 1].round()).round(round_precision)

        CM = confusion_matrix(y_test[:, i], (y_pred[model_name][y_label][:, 1] > 0.5).astype(int))
        results[y_label][model_name]['TN'] = CM[0, 0]
        results[y_label][model_name]['TP'] = CM[1, 1]
        results[y_label][model_name]['FN'] = CM[0, 1]
        results[y_label][model_name]['FP'] = CM[1, 0]

# Results

## Hospitalization

In [7]:
pd.DataFrame(results['hosp']).T.astype(dtype_dict).sort_values('binary_crossentropy')

Unnamed: 0,binary_crossentropy,brier_score,accuracy,TN,TP,FN,FP
SVC,0.2133,0.0576,0.9313,585207,1778,415,42890
RF,0.3636,0.0547,0.9343,581891,6960,3731,37708
LR,0.4043,0.1098,0.918,568669,9957,16953,34711


## Fatality

In [8]:
pd.DataFrame(results['death']).T.astype(dtype_dict).sort_values('binary_crossentropy')

Unnamed: 0,binary_crossentropy,brier_score,accuracy,TN,TP,FN,FP
RF,0.0352,0.0093,0.9887,621176,1985,310,6819
SVC,0.0435,0.0111,0.9874,621044,1291,442,7513
LR,0.062,0.0116,0.9876,621466,1020,20,7784
