In [1]:
import pickle
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from sklearn.metrics import log_loss, brier_score_loss, accuracy_score, confusion_matrix

import matplotlib.pyplot as plt

In [6]:
model_name_list = ['Prior', 'LR', 'SVC']#, 'RF', 'INN']

In [7]:
round_precision = 4
dtype_dict = {'binary_crossentropy': float, 'brier_score': float, 'accuracy': float, 'TN': int, 'TP': int, 'FN': int, 'FP': int}

In [8]:
with open('../data/data_test.pt', 'rb') as file:
    X_test, y_test = pickle.load(file)

print(f'{X_test.shape = }')
print(f'{y_test.shape = }')

X_test.shape = (622230, 33)
y_test.shape = (622230, 2)


In [9]:
y_pred = {}
for model_name in model_name_list:
    with open(f'./models/{model_name}.pt', 'rb') as file:
        y_pred[model_name] = pickle.load(file)

In [10]:
results = {}

for i, y_label in enumerate(['hosp', 'death']):
    results[y_label] = {}
    
    for model_name in model_name_list:
        results[y_label][model_name] = {}

        results[y_label][model_name]['binary_crossentropy'] = log_loss(y_test[:, i], y_pred[model_name][y_label][:, 1]).round(round_precision)
        results[y_label][model_name]['brier_score'] = brier_score_loss(y_test[:, i], y_pred[model_name][y_label][:, 1]).round(round_precision)
        results[y_label][model_name]['accuracy'] = accuracy_score(y_test[:, i], y_pred[model_name][y_label][:, 1].round()).round(round_precision)

        CM = confusion_matrix(y_test[:, i], (y_pred[model_name][y_label][:, 1] > 0.5).astype(int))
        results[y_label][model_name]['TN'] = CM[0, 0]
        results[y_label][model_name]['TP'] = CM[1, 1]
        results[y_label][model_name]['FP'] = CM[0, 1]
        results[y_label][model_name]['FN'] = CM[1, 0]

# Results

## Hospitalization

In [11]:
pd.DataFrame(results['hosp']).T.astype(dtype_dict).sort_values('binary_crossentropy')

Unnamed: 0,binary_crossentropy,brier_score,accuracy,TN,TP,FP,FN
SVC,0.2083,0.0559,0.9339,579243,1882,404,40701
Prior,0.2496,0.0638,0.9316,579647,0,0,42583
LR,0.3994,0.1071,0.9257,568582,7438,11065,35145


## Fatality

In [12]:
pd.DataFrame(results['death']).T.astype(dtype_dict).sort_values('binary_crossentropy')

Unnamed: 0,binary_crossentropy,brier_score,accuracy,TN,TP,FP,FN
SVC,0.0411,0.0104,0.9885,613726,1323,252,6929
LR,0.0518,0.0105,0.9886,613963,1145,15,7107
Prior,0.0705,0.0131,0.9867,613978,0,0,8252
