In [1]:
import os
import gc
import csv
import argparse
import pickle

import torch
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d

from tqdm import tqdm

from dataset import ICUSepsisDataset
from model import SepsisPredictionModel_B1

icu_test = ICUSepsisDataset('./data/test')

  from .autonotebook import tqdm as notebook_tqdm


Found 10000 files in ./data/test


In [3]:
EPOCHS = 10
LR = 0.005

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SepsisPredictionModel_B1(input_size=len(ICUSepsisDataset.features), hidden_dim=200)
model.to(device)
print(model)

icu_train = ICUSepsisDataset(os.path.join('./data', 'train'))
train_loader = DataLoader(icu_train, batch_size=1, shuffle=True)
train_size = len(icu_train)

loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

for epoch in range(EPOCHS):
    train_loss = 0.0
    i = 0
    for x,y in icu_train:
        # ignore invalid samples
        if x is None:
            continue

        i += 1
        optimizer.zero_grad()

        x = x.to(device)
        print('x.shape', x.shape)
        y = y.to(device)
        print('y.shape', y.shape)
        N = len(y)

        # Forward pass
        output = model(x)
        print('output.shape', output.shape)
        L = loss(output, y[-1])
        train_loss += L.item() * x.size(0)

        # Backpropagation
        L.backward()
        optimizer.step()

        if i % 40 == 0:
            print(f'L: {train_loss / i: .5}')
        
        break
    break

    train_loss /= train_size

    print(f'Epoch {epoch+1}/{EPOCHS}, Loss {train_loss}')

SepsisPredictionModel_B1(
  (lstm): LSTM(9, 200, num_layers=2, batch_first=True)
  (mlp): Sequential(
    (0): Linear(in_features=200, out_features=100, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=100, out_features=20, bias=True)
    (4): ReLU()
    (5): Linear(in_features=20, out_features=2, bias=True)
  )
)
Found 20000 files in ./data/train
x.shape torch.Size([23, 9])
y.shape torch.Size([23])
asd torch.Size([200])
output.shape torch.Size([2])


In [None]:
def calc_accuracy(predicted:np.array, labels:np.array):
    return sum(predicted == labels) / len(labels)

In [None]:
def calc_f1_score(predicted:np.array, labels:np.array):
    tp = int(np.logical_and(labels==1, predicted==1).sum())
    fp = int(np.logical_and(labels==0, predicted==1).sum())
    fn = int(np.logical_and(labels==1, predicted==0).sum())
    tn = int(np.logical_and(labels==0, predicted==0).sum())
    print('tp = ', tp)
    print('fp = ', fp)
    print('fn = ', fn)
    print('tn = ', tn)
    
    return 2 * tp / (2 * tp + fp + fn)

In [None]:
class TestModel(torch.nn.Module):
    def __init__(self, input_size, hidden_dim=100):
        super(TestModel, self).__init__()
        self.lstm = torch.nn.LSTM(input_size, hidden_size=hidden_dim, batch_first=True, num_layers=2)
        self.mlp = torch.nn.Sequential(
            torch.nn.Linear(hidden_dim, 100), 
            torch.nn.ReLU(), 
            torch.nn.Dropout(p=0.2),
            torch.nn.Linear(100, 20), 
            torch.nn.ReLU(),
            torch.nn.Linear(20, 2)
        )

    def forward(self, x):
        print(1, x.shape)

        # considers only the last state for predicting sepsis
        x, _ = self.lstm(x)
        print(2, x.shape)
        x = x[-1].squeeze()
        print(3, x.shape)

        x = self.mlp(x)
        print(4, x.shape)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = TestModel(input_size=len(ICUSepsisDataset.features))
model.to(device)
# model.load_state_dict(torch.load('model_a_1.state', map_location=device))

In [None]:
x,y = next(iter(icu_test))
label = int(y[-1])
print(x.shape, y.shape)
output = model(x)
print(output, output.shape)

In [None]:
loss = torch.nn.CrossEntropyLoss()

In [None]:
loss(output, y[-1])

In [None]:
y_true = []
y_predict = []

with tqdm(total=len(icu_test)) as pbar:
    for x,y in icu_test:
        pbar.update(1)
        # ignore invalid samples

        
        y = y.to(device)  # squeeze since batch_size=1
        label = int(y[-1])
        N = len(y)

        if x is None:
            y_true.append(label)
            y_predict.append(0)
            continue

        x = x.to(device).unsqueeze(0)

        # Forward pass
        output = model(x).view(N,2)
        prediction = torch.softmax(output, dim=1).argmax(dim=1)

        assert prediction.shape == y.shape

        y_true.append(label)
        y_predict.append(int(prediction[-1]))

In [None]:
y_true = np.array(y_true)
y_predict = np.array(y_predict)

In [None]:
print('Model A.1 accuracy:', calc_accuracy(y_predict, y_true))

In [None]:
print('Model A.1 F1-score:', calc_f1_score(y_predict, y_true))

We observe that model A.1 has very poor performence, without a single True Positive

# Model A.2

In [None]:
model = SepsisPredictionModel_A2(input_size=len(ICUSepsisDataset.features))
model.to(device)
model.load_state_dict(torch.load('model_a_2.state', map_location=device))

In [None]:
y_true = []
y_predict = []

with tqdm(total=len(icu_test)) as pbar:
    for x,y in icu_test:
        pbar.update(1)
        # ignore invalid samples

        
        y = y.to(device)  # squeeze since batch_size=1
        label = int(y[-1])
        N = len(y)

        if x is None:
            y_true.append(label)
            y_predict.append(0)
            continue

        x = x.to(device).unsqueeze(0)

        # Forward pass
        output = model(x).view(N,2)
        prediction = torch.softmax(output, dim=1).argmax(dim=1)

        assert prediction.shape == y.shape

        y_true.append(label)
        y_predict.append(int(prediction[-1]))

In [None]:
y_true = np.array(y_true)
y_predict = np.array(y_predict)

In [None]:
print('Model A.2 accuracy:', calc_accuracy(y_predict, y_true))

In [None]:
print('Model A.2 F1-score:', calc_f1_score(y_predict, y_true))

Model A.2 ....