In [6]:
import numpy as np
import pickle
from sklearn.linear_model import SGDClassifier
from sklearn.calibration import CalibratedClassifierCV

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.metrics import log_loss, brier_score_loss, accuracy_score, confusion_matrix

import matplotlib.pyplot as plt

# Data

In [7]:
with open('../data/data_train.pt', 'rb') as file:
    X_train, y_train = pickle.load(file)

print(f'{X_train.shape = }')
print(f'{y_train.shape = }')

X_train.shape = (192638, 28)
y_train.shape = (192638, 2)


# Preprocessing

In [8]:
sc_X_train = StandardScaler()
X_train_scaled = sc_X_train.fit_transform(X_train)

# Parameter Optimization

## Hospitalization

In [9]:
base_svc_hosp = SGDClassifier()
calibrated_svc_hosp = CalibratedClassifierCV(base_svc_hosp)

calibrated_svc_hosp.fit(X_train_scaled, y_train[:, 0])

CalibratedClassifierCV(base_estimator=SGDClassifier())

In [10]:
base_svc_death = SGDClassifier()
calibrated_svc_death = CalibratedClassifierCV(base_svc_death)

calibrated_svc_death.fit(X_train_scaled, y_train[:, 1])

CalibratedClassifierCV(base_estimator=SGDClassifier())

## Deaths

# Training

# Evaluation on Test Set

In [11]:
with open('../data/data_test.pt', 'rb') as file:
    X_test, y_test = pickle.load(file)

print(f'{X_test.shape = }')
print(f'{y_test.shape = }')

X_test.shape = (48160, 28)
y_test.shape = (48160, 2)


In [21]:
y_pred_hosp = calibrated_svc_hosp.predict(X_test)
y_pred_death = calibrated_svc_death.predict(X_test)

a = calibrated_svc_hosp.predict_proba(X_test)
y_pred_death_proba = calibrated_svc_death.predict_proba(X_test)

In [26]:
print(f'binary cross-entropy: {log_loss(y_test[:, 0], y_pred_hosp_proba[:, 1])}')
print(f'brier loss: {brier_score_loss(y_test[:, 0], y_pred_hosp_proba[:, 1]).round(4)}')
print(f'accuracy: {accuracy_score(y_test[:, 0], y_pred_hosp).round(4)}')
print('confusion matrix:')
print(confusion_matrix(y_test[:, 0], y_pred_hosp))

binary cross-entropy: 0.31185133571250834
brier loss: 0.0782
accuracy: 0.944
confusion matrix:
[[45462     0]
 [ 2698     0]]


In [27]:
print(f'binary cross-entropy: {log_loss(y_test[:, 1], y_pred_death_proba[:, 1])}')
print(f'brier loss: {brier_score_loss(y_test[:, 1], y_pred_death_proba[:, 1]).round(4)}')
print(f'accuracy: {accuracy_score(y_test[:, 1], y_pred_death).round(4)}')
print('confusion matrix:')
print(confusion_matrix(y_test[:, 1], y_pred_death))

binary cross-entropy: 0.44064179922524277
brier loss: 0.1287
accuracy: 0.8202
confusion matrix:
[[39466  8419]
 [  241    34]]
