# LSTM Model
## Imports

In [1]:
import pickle
import numpy as np
import torch
import torch.nn as nn
from dataloader import dataset, load_data
from sklearn.model_selection import KFold
from torch.utils.data import WeightedRandomSampler, SubsetRandomSampler, SequentialSampler

## LSTM Model Design

In [2]:
class ReadmissionLSTM(nn.Module):
    def __init__(self, input_size, input_len, hidden_size, dropout=0.5):
        super(ReadmissionLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size,
                            hidden_size,
                            num_layers=3,
                            batch_first=True,
                            dropout=dropout,
                            bidirectional=True)
        self.fc = nn.Linear(2* input_len * hidden_size, 2)
        self.softmax = nn.Softmax(dim=1)
    def forward(self, x, masks):
        batch_size, seq_len = x.shape[:2]
        x = torch.reshape(x, (batch_size, seq_len, -1))
        masks = torch.reshape(masks, (batch_size, seq_len, -1))
        x, _ = self.lstm(x)
        x = torch.flatten(x, start_dim=1)
        x = self.fc(x)
        out = self.softmax(x)
        return out

In [4]:
# train_dl, val_dl = load_data(SequentialSampler(dataset), SequentialSampler(dataset), batch_size=10)
# x, feats, masks, y = next(iter(train_dl))
# model = ReadmissionLSTM(input_size=3926, input_len=48, hidden_size=128)
# y_hat = model(x, masks)
# y_hat

tensor([[0.5002, 0.4998],
        [0.5252, 0.4748],
        [0.5023, 0.4977],
        [0.5040, 0.4960],
        [0.5027, 0.4973],
        [0.5123, 0.4877],
        [0.5081, 0.4919],
        [0.5182, 0.4818],
        [0.5237, 0.4763],
        [0.5159, 0.4841]], grad_fn=<SoftmaxBackward>)

## LSTM Training
Define Training and Validation Functions
adapted from: https://medium.com/dataseries/k-fold-cross-validation-with-pytorch-and-sklearn-d094aa00105f

In [3]:
def train_epoch(model, train_dataloader, optimizer, criterion):
    train_loss, train_correct = 0.0, 0
    train_false_pos, train_false_neg, train_true_pos, train_true_neg = 0.0, 0.0, 0.0, 0.0

    model.train()

    for x, feats, masks, y in train_dataloader:
        optimizer.zero_grad()
        y_hat = model(x, masks)
        y_pred = torch.argmax(y_hat, dim=1)
        loss = criterion(y_hat, y)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_correct += (y_pred == y).sum().item()
        train_false_pos += ((y_pred == 1) * (y == 0)).sum()
        train_true_pos += ((y_pred == 1) * (y == 1)).sum()
        train_false_neg += ((y_pred == 0) * (y == 1)).sum()
        train_true_neg += ((y_pred == 0) * (y == 0)).sum()

    return train_loss, train_correct, train_false_pos, train_false_neg, train_true_pos, train_true_neg

def valid_epoch(model, valid_dataloader, criterion):
    valid_loss, valid_correct = 0.0, 0
    valid_false_pos, valid_false_neg, valid_true_pos, valid_true_neg = 0.0, 0.0, 0.0, 0.0

    model.eval()

    with torch.no_grad():
        for x, feats, masks, y in valid_dataloader:
            y_hat = model(x, masks)
            y_pred = torch.argmax(y_hat, dim=1)
            loss = criterion(y_hat, y)

            valid_loss += loss.item()
            valid_correct += (y_pred == y).sum().item()
            valid_false_pos += ((y_pred == 1) * (y == 0)).sum()
            valid_true_pos += ((y_pred == 1) * (y == 1)).sum()
            valid_false_neg += ((y_pred == 0) * (y == 1)).sum()
            valid_true_neg += ((y_pred == 0) * (y == 0)).sum()

    return valid_loss, valid_correct, valid_false_pos, valid_false_neg, valid_true_pos, valid_true_neg

def subset_weighted_random_sampler(dataset, idx):
    labels = torch.tensor(dataset.y)
    subset_labels = labels[idx]
    majority_len = int((subset_labels == 0).sum())
    minority_len = int((subset_labels == 1).sum())

    dist = torch.zeros(len(dataset.y))
    dist[idx] = 1
    dist[labels == 1] = dist[labels == 1] * (0.5 / minority_len)
    dist[labels == 0] = dist[labels == 0] * (0.5 / majority_len)

    return WeightedRandomSampler(dist, num_samples=2 * majority_len, replacement=True)

## K-Fold Cross Validation

In [4]:
def kfold(dataset=dataset, k_folds=2, n_epochs=10, batch_size=64):
    history = {'train_loss': [], 'test_loss': [],'train_acc':[],'test_acc':[], 'train_tpr':[], 'test_tpr':[], 'train_fpr':[], 'test_fpr':[]}
    kf = KFold(n_splits=k_folds, shuffle=True)
    for fold, (train_idx, val_idx) in enumerate(kf.split(np.arange(len(dataset)))):
        print('Fold {}'.format(fold + 1))

        train_sampler = subset_weighted_random_sampler(dataset, train_idx)
        test_sampler = subset_weighted_random_sampler(dataset, val_idx)
        train_loader, test_loader = load_data(train_sampler, test_sampler, batch_size=batch_size)

        model = ReadmissionLSTM(input_size=3926, input_len=48, hidden_size=256)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.002)
        #criterion = FocalLoss(gamma=0.2, alpha=0.75)
        criterion = nn.CrossEntropyLoss()

        for epoch in range(n_epochs):
            train_loss, train_correct, train_fpos, train_fneg, train_tpos, train_tneg = train_epoch(model, train_loader, optimizer, criterion)
            test_loss, test_correct, test_fpos, test_fneg, test_tpos, test_tneg = valid_epoch(model, test_loader, criterion)

            train_loss = train_loss / len(train_loader.sampler)
            train_acc = train_correct / len(train_loader.sampler) * 100
            test_loss = test_loss / len(test_loader.sampler)
            test_acc = test_correct / len(test_loader.sampler) * 100
            train_tpr = train_tpos / (train_tpos + train_fneg)
            test_tpr = test_tpos / (test_tpos + test_fneg)
            train_fpr = train_fpos / (train_fpos + train_tneg)
            test_fpr = test_fpos / (test_fpos + test_tneg)

            print("Epoch:{}/{} AVG Training Loss:{:.3f} AVG Test Loss:{:.3f} AVG Training Acc {:.2f} % AVG Test Acc {:.2f}%".format(epoch + 1, n_epochs, train_loss, test_loss, train_acc, test_acc))
            print("Epoch:{}/{} AVG Training TPR:{:.2f} AVG Test TPR:{:.2f} AVG Training FPR:{:.2f} AVG Test FPR:{:.2f}".format(epoch + 1, n_epochs, train_tpr, test_tpr, train_fpr, train_fpr))
            history['train_loss'].append(train_loss)
            history['test_loss'].append(test_loss)
            history['train_acc'].append(train_acc)
            history['test_acc'].append(test_acc)
            history['train_tpr'].append(train_tpr)
            history['test_tpr'].append(test_tpr)
            history['train_fpr'].append(train_fpr)
            history['test_fpr'].append(test_fpr)


    return history

In [5]:
%%time
history = kfold()

Fold 1
Epoch:1/10 AVG Training Loss:0.010 AVG Test Loss:0.012 AVG Training Acc 61.68 % AVG Test Acc 56.15%
Epoch:1/10 AVG Training TPR:0.67 AVG Test TPR:0.33 AVG Training FPR:0.44 AVG Test FPR:0.44
Epoch:2/10 AVG Training Loss:0.010 AVG Test Loss:0.011 AVG Training Acc 66.48 % AVG Test Acc 61.90%
Epoch:2/10 AVG Training TPR:0.69 AVG Test TPR:0.57 AVG Training FPR:0.36 AVG Test FPR:0.36
Epoch:3/10 AVG Training Loss:0.009 AVG Test Loss:0.011 AVG Training Acc 71.19 % AVG Test Acc 62.94%
Epoch:3/10 AVG Training TPR:0.73 AVG Test TPR:0.51 AVG Training FPR:0.31 AVG Test FPR:0.31
Epoch:4/10 AVG Training Loss:0.010 AVG Test Loss:0.011 AVG Training Acc 62.57 % AVG Test Acc 51.85%
Epoch:4/10 AVG Training TPR:0.64 AVG Test TPR:0.63 AVG Training FPR:0.39 AVG Test FPR:0.39
Epoch:5/10 AVG Training Loss:0.010 AVG Test Loss:0.011 AVG Training Acc 60.81 % AVG Test Acc 57.40%
Epoch:5/10 AVG Training TPR:0.67 AVG Test TPR:0.65 AVG Training FPR:0.46 AVG Test FPR:0.46
Epoch:6/10 AVG Training Loss:0.010 AVG

## Storing Results

In [6]:
with open("./lstm_model_history.pickle", "wb") as f:
    pickle.dump(history, f)