# LSTM+CNN Model
## Imports

In [1]:
import pickle
import numpy as np
import torch
import torch.nn as nn
from dataloader import dataset, load_data
from sklearn.model_selection import KFold
from torch.utils.data import WeightedRandomSampler, SubsetRandomSampler, SequentialSampler

## LSTM+CNN Model Design

In [11]:
class LSTMPlusCNN(nn.Module):
    def __init__(self, input_size, input_len, feature_len, hidden_size, dropout=0.5):
        super(LSTMPlusCNN, self).__init__()
        self.lstm = nn.LSTM(input_size,
                            hidden_size,
                            num_layers=3,
                            batch_first=True,
                            dropout=dropout, # dropout percentage not specified in paper
                            bidirectional=True)
        self.bn = nn.BatchNorm1d(2*hidden_size)
        self.cnn = nn.Conv1d(in_channels=2*hidden_size,
                             out_channels=3, # paper refers to "No of filters: n"
                             kernel_size=3)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool1d(2)
        linear_features = feature_len + 3*(input_len - 2)//2
        self.fc = nn.Linear(in_features=linear_features,
                            out_features=2)
        self.softmax = nn.Softmax(dim=1)
    def forward(self, x, feats, masks):
        batch_size, seq_len = x.shape[:2]
        x = torch.reshape(x, (batch_size, seq_len, -1))
        masks = torch.reshape(masks, (batch_size, seq_len, -1))
        x, _ = self.lstm(x)
        x = torch.movedim(x, 1, 2) # (N,L,C) -> (N,C,L)
        x = self.bn(x)
        x = self.cnn(x)
        x = self.relu(x)
        x = self.pool(x)
        x = torch.flatten(x, start_dim=1)
        x_plus_feats = torch.cat((x, feats), dim=1)
        x = self.fc(x_plus_feats)
        out = self.softmax(x) # (N,C*L) -> (N,2)
        return out

In [5]:
# idx = np.arange(500)
# sampler = subset_weighted_random_sampler(dataset, idx)
# train_dl, val_dl = load_data(sampler, sampler, batch_size=64)
# x, feats, masks, y = next(iter(train_dl))
# our_input_size = 3926
# model = LSTMPlusCNN(input_size=3926, input_len=48, feature_len=4037, hidden_size=128)
# model(x, feats, masks)

tensor([[0.4995, 0.5005],
        [0.4862, 0.5138],
        [0.4920, 0.5080],
        [0.4900, 0.5100],
        [0.4822, 0.5178],
        [0.5069, 0.4931],
        [0.4880, 0.5120],
        [0.4780, 0.5220],
        [0.4579, 0.5421],
        [0.4929, 0.5071]], grad_fn=<SoftmaxBackward>)

## LSTM+CNN Training

### Define Training and Validation Functions
adapted from: https://medium.com/dataseries/k-fold-cross-validation-with-pytorch-and-sklearn-d094aa00105f

In [21]:
def train_epoch(model, train_dataloader, optimizer, criterion):
    train_loss, train_correct = 0.0, 0
    train_false_pos, train_false_neg, train_true_pos, train_true_neg = 0.0, 0.0, 0.0, 0.0

    model.train()

    for x, feats, masks, y in train_dataloader:
        optimizer.zero_grad()
        y_hat = model(x, feats, masks)
        y_pred = torch.argmax(y_hat, dim=1)
        loss = criterion(y_hat, y)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_correct += (y_pred == y).sum().item()
        train_false_pos += ((y_pred == 1) * (y == 0)).sum()
        train_true_pos += ((y_pred == 1) * (y == 1)).sum()
        train_false_neg += ((y_pred == 0) * (y == 1)).sum()
        train_true_neg += ((y_pred == 0) * (y == 0)).sum()

    return train_loss, train_correct, train_false_pos, train_false_neg, train_true_pos, train_true_neg

def valid_epoch(model, valid_dataloader, criterion):
    valid_loss, valid_correct = 0.0, 0
    valid_false_pos, valid_false_neg, valid_true_pos, valid_true_neg = 0.0, 0.0, 0.0, 0.0

    model.eval()

    with torch.no_grad():
        for x, feats, masks, y in valid_dataloader:
            y_hat = model(x, feats, masks)
            y_pred = torch.argmax(y_hat, dim=1)
            loss = criterion(y_hat, y)

            valid_loss += loss.item()
            valid_correct += (y_pred == y).sum().item()
            valid_false_pos += ((y_pred == 1) * (y == 0)).sum()
            valid_true_pos += ((y_pred == 1) * (y == 1)).sum()
            valid_false_neg += ((y_pred == 0) * (y == 1)).sum()
            valid_true_neg += ((y_pred == 0) * (y == 0)).sum()

    return valid_loss, valid_correct, valid_false_pos, valid_false_neg, valid_true_pos, valid_true_neg

def subset_weighted_random_sampler(dataset, idx, sample="over"):
    labels = torch.tensor(dataset.y)
    subset_labels = labels[idx]
    majority_len = int((subset_labels == 0).sum())
    minority_len = int((subset_labels == 1).sum())
    if sample == "over":
        sample_size = 2 * majority_len
    elif sample == "under":
        sample_size = majority_len
    else:
        sample_size = len(idx)

    dist = torch.zeros(len(dataset.y))
    dist[idx] = 1
    dist[labels == 1] = dist[labels == 1] * (0.5 / minority_len)
    dist[labels == 0] = dist[labels == 0] * (0.5 / majority_len)

    return WeightedRandomSampler(dist, num_samples=sample_size, replacement=True)

## K Fold Cross-Validation

In [22]:
def kfold(dataset=dataset, k_folds=5, n_epochs=10, batch_size=64):
    history = {'train_loss': [], 'test_loss': [],'train_acc':[],'test_acc':[], 'train_tpr':[], 'test_tpr':[], 'train_fpr':[], 'test_fpr':[]}
    kf = KFold(n_splits=k_folds, shuffle=True)
    for fold, (train_idx, val_idx) in enumerate(kf.split(np.arange(len(dataset)))):
        print('Fold {}'.format(fold + 1))

        train_sampler = subset_weighted_random_sampler(dataset, train_idx)
        test_sampler = subset_weighted_random_sampler(dataset, val_idx)
        train_loader, test_loader = load_data(train_sampler, test_sampler, batch_size=batch_size)

        model = LSTMPlusCNN(input_size=3926, input_len=48, feature_len=4037, hidden_size=128)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.002)
        #criterion = FocalLoss(gamma=0.2, alpha=0.75)
        criterion = nn.CrossEntropyLoss()

        for epoch in range(n_epochs):
            train_loss, train_correct, train_fpos, train_fneg, train_tpos, train_tneg = train_epoch(model, train_loader, optimizer, criterion)
            test_loss, test_correct, test_fpos, test_fneg, test_tpos, test_tneg = valid_epoch(model, test_loader, criterion)

            train_loss = train_loss / len(train_loader.sampler)
            train_acc = train_correct / len(train_loader.sampler) * 100
            test_loss = test_loss / len(test_loader.sampler)
            test_acc = test_correct / len(test_loader.sampler) * 100
            train_tpr = train_tpos / (train_tpos + train_fneg)
            test_tpr = test_tpos / (test_tpos + test_fneg)
            train_fpr = train_fpos / (train_fpos + train_tneg)
            test_fpr = test_fpos / (test_fpos + test_tneg)

            print("Epoch:{}/{} AVG Training Loss:{:.3f} AVG Test Loss:{:.3f} AVG Training Acc {:.2f} % AVG Test Acc {:.2f} %".format(epoch + 1, n_epochs, train_loss, test_loss, train_acc, test_acc))
            print("Epoch:{}/{} AVG Training TPR:{:.3f} AVG Test TPR:{:.3f} AVG Training FPR:{:.3f} AVG Test FPR:{:.3f}".format(epoch + 1, n_epochs, train_tpr, test_tpr, train_fpr, train_fpr))
            history['train_loss'].append(train_loss)
            history['test_loss'].append(test_loss)
            history['train_acc'].append(train_acc)
            history['test_acc'].append(test_acc)
            history['train_tpr'].append(train_tpr)
            history['test_tpr'].append(test_tpr)
            history['train_fpr'].append(train_fpr)
            history['test_fpr'].append(test_fpr)

    return history

In [23]:
%%time
history = kfold()

Fold 1
Epoch:1/10 AVG Training Loss:0.010 AVG Test Loss:0.010 AVG Training Acc 68.41 % AVG Test Acc 69.04 %
Epoch:1/10 AVG Training TPR:0.664 AVG Test TPR:0.785 AVG Training FPR:0.295 AVG Test FPR:0.295
Epoch:2/10 AVG Training Loss:0.009 AVG Test Loss:0.010 AVG Training Acc 76.25 % AVG Test Acc 67.24 %
Epoch:2/10 AVG Training TPR:0.780 AVG Test TPR:0.624 AVG Training FPR:0.255 AVG Test FPR:0.255
Epoch:3/10 AVG Training Loss:0.008 AVG Test Loss:0.009 AVG Training Acc 82.10 % AVG Test Acc 70.08 %
Epoch:3/10 AVG Training TPR:0.841 AVG Test TPR:0.658 AVG Training FPR:0.199 AVG Test FPR:0.199
Epoch:4/10 AVG Training Loss:0.008 AVG Test Loss:0.009 AVG Training Acc 83.50 % AVG Test Acc 74.12 %
Epoch:4/10 AVG Training TPR:0.857 AVG Test TPR:0.714 AVG Training FPR:0.188 AVG Test FPR:0.188
Epoch:5/10 AVG Training Loss:0.007 AVG Test Loss:0.009 AVG Training Acc 86.87 % AVG Test Acc 68.66 %
Epoch:5/10 AVG Training TPR:0.899 AVG Test TPR:0.626 AVG Training FPR:0.165 AVG Test FPR:0.165
Epoch:6/10 AV

## Storing Results

In [24]:
with open("./lstm_cnn_model_history_undersample.pickle", "wb") as f:
    pickle.dump(history, f)