In [None]:
#from torch import nn
#import torch

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn import metrics
from tqdm import tqdm
import numpy as np

# Some experiments with LSTM

In [None]:
torch.manual_seed(1)
lstm = nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5

# initialize the hidden state.
hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3))
for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    print(i, i.view(1, 1, -1))
    out, hidden = lstm(i.view(1, 1, -1), hidden)

print(inputs)
print(out)
print(hidden)
print(hidden[0].shape)

print("---------")

torch.manual_seed(1)
# alternatively, we can do the entire sequence all at once.
# the first value returned by LSTM is all of the hidden states throughout
# the sequence. the second is just the most recent hidden state
# (compare the last slice of "out" with "hidden" below, they are the same)
# The reason for this is that:
# "out" will give you access to all hidden states in the sequence
# "hidden" will allow you to continue the sequence and backpropagate,
# by passing it as an argument  to the lstm at a later time
# Add the extra 2nd dimension
inputs = torch.cat(inputs).view(len(inputs), 1, -1)
hidden = (torch.randn(1, 1, 3), torch.randn(1, 1, 3))  # clean out hidden state
out, hidden = lstm(inputs, hidden)
print(inputs.shape)
print(out)
print(hidden)

# Define the LSTM+CNN model

In [None]:
#input_size  = 50  # representing the one-hot encoded vector size
#hidden_size = 100 # number of hidden nodes in the LSTM layer
#n_layers    = 2   # number of LSTM layers
#output_size = 50  # output of 50 scores for the next character

#lstm   = nn.LSTM(input_size, hidden_size, n_layers, batch_first=True)
#linear = nn.Linear(hidden_size, output_size)

# Data Flow Protocol
# 1. network input shape: (batch_size, seq_length, num_features)
# 2. LSTM output shape: (batch_size, seq_length, hidden_size)
# 3. Linear input shape:  (batch_size * seq_length, hidden_size)
# 4. Linear output: (batch_size * seq_length, out_size)

#x = get_batches(data)         
#x, hs = lstm(x, hs)
#x = x.reshape(-1, hidden_size) 
#x = linear(x)

class LSTM_CNN(nn.Module):
    
    def __init__(self, input_dim=390, hidden_dim=8, lstm_layers=1):

        #dim, batch_norm, dropout, rec_dropout, task,
        #target_repl = False, deep_supervision = False, num_classes = 1,
        #depth = 1, input_dim = 390, ** kwargs

        super(LSTM_CNN, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.layers = lstm_layers
        self.bidirectional = True
        #self.dense = dense

        # some more parameters
        #self.output_dim = dim
        #self.batch_norm = batch_norm
        self.dropout = 0.5
        self.rec_dropout = 0.5
        self.depth = lstm_layers
        self.dropout_words = 0.3
        self.dropout_rnn_U = 0.3
        self.drop_conv = 0.5

        # define the LSTM layer
        # in keras we have inputs: A 3D tensor with shape [batch, timesteps, feature]
        # units: Positive integer, dimensionality of the output space. = dim=num_units=hidden_size
        self.lstm = nn.LSTM(input_size=self.input_dim,
                            hidden_size=self.hidden_dim,
                            num_layers=self.layers,
                            dropout=self.rec_dropout,
                            bidirectional=self.bidirectional,
                           batch_first=True)

        # this is not in the original model
        self.act1 = nn.ReLU()

        ##self.do1 = nn.Dropout(self.dropout)
        ##self.cnn = nn.Conv1d()
        # concat the three outputs from the CNN
        ##self.do2 = nn.Dropout(self.drop_conv)
        ##self.dense = nn.Linear(self.hidden_dim, self.num_classes)

        # not needed
        # change linear layer inputs depending on if lstm is bidrectional
        #if not bidirectional:
        #    self.linear = nn.Linear(self.hidden_dim, self.hidden_dim)
        #else:
        #    self.linear = nn.Linear(self.hidden_dim * 2, self.hidden_dim)
        #self.act2 = nn.ReLU()

        # change linear layer inputs depending on if lstm is bidrectional and extra dense layer isn't added
        ##if bidirectional and not dense:
        self.final = nn.Linear(self.hidden_dim * 2, 1)
        ##else:
        ##    self.final = nn.Linear(self.hidden_dim, 1)


    def forward(self, inputs, labels=None):
        out = inputs #.unsqueeze(1)
        #print("inputs.shape = ", inputs.shape)
        out, h = self.lstm(out)
        #print("out lstm.shape = ", out.shape)
        out = self.act1(out[:,-1])
        #print("out relu.shape = ", out.shape)
        #if self.dense:
        #    out = self.linear(out)
        #    out = self.act2(out)
        out = self.final(out)
        #print("out final.shape = ", out.shape)
        return out

# Load train and test data from Pickle files

In [None]:
# read train and test data
import pickle

already_loaded = True
try:
    train_data
except NameError as e:
    already_loaded = False

if not already_loaded:
    train_data = pickle.load(open( "../readmission/train_data/train_data", "rb" ))
    test_data = pickle.load(open( "../readmission/train_data/test_data", "rb" ))

In [None]:
for idx in range(10):
    print(train_data[1][idx],end=" ")

In [None]:
print("Dimensions Train Data: ",len(train_data[0]), len(train_data[0][0]), len(train_data[0][0][0]))
print("Dimensions: ",len(test_data['data'][0]), len(test_data['data'][0][0]), len(test_data['data'][0][0][0]))

In [None]:
test_data_ = test_data['data']

# Some analysis of the model and it's input

In [None]:
# instantiate some model
model = LSTM_CNN()

In [None]:
model

In [None]:
td = torch.Tensor(train_data[0][0])
result = model(td.unsqueeze(1))

In [None]:
td.unsqueeze(1).shape

In [None]:
td.shape

In [None]:
result.shape

# Define Dataset Class

In [None]:
class MIMICDataset(Dataset):
    """MIMIC dataset."""

    def __init__(self, data):
        """
        Args:
            data tuple(numpy.ndarray, list): data structured as tuple containing x which is a numpy array and y that is a list of values
        """
        self.x = data[0]
        self.y = data[1]

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        x = torch.tensor(self.x[idx], dtype=torch.float32)
        y = torch.tensor(self.y[idx], dtype=torch.float32)

        return [x, y]

In [None]:
len(test_data)

In [None]:
train_data[0].shape, test_data['data'][0].shape

# Instantiate both Datasets

In [None]:
ds_train = MIMICDataset(train_data)
ds_test = MIMICDataset(test_data_)

In [None]:
len(ds_train), len(ds_test)

# Create both dataloaders

In [None]:
dataloader_train = DataLoader(ds_train, batch_size=4,
                        shuffle=True, num_workers=0)

dataloader_test = DataLoader(ds_test, batch_size=4,
                        shuffle=True, num_workers=0)

# Check if dataloaders work fine

In [None]:
for i_batch, sample_batched in tqdm(enumerate(dataloader_train)):
    print(type(sample_batched), len(sample_batched))
    print(i_batch, sample_batched[0].shape)
    inputs = sample_batched[0]
    targets = sample_batched[1]
    print(inputs.shape, targets.shape)
    print(inputs, targets)

    # observe 4th batch and stop.
    if i_batch == 0:
        break

In [None]:
for i_batch, sample_batched in enumerate(dataloader_test):
    print(type(sample_batched), len(sample_batched))
    print(i_batch, sample_batched[0].shape)
    inputs = sample_batched[0]
    targets = sample_batched[1]
    print(inputs.shape, targets.shape)
    print(inputs, targets)

    # observe 4th batch and stop.
    if i_batch == 0:
        break

In [None]:
result = model(inputs)

In [None]:
result.shape

In [None]:
result[:,-1]

In [None]:
result = model(inputs)

In [None]:
result.shape

In [None]:
result[:,-1]

# LSTM+CNN from Keras implementation

# Train/Eval functions

In [None]:
# training loop of the LSTM model

def train(dataloader, model, optimizer, loss, device):
    """
    main training function that trains model for one epoch/iteration cycle
    Args:
        :param dataloader: torch dataloader
        :param model: model to train
        :param optimizer: torch optimizer, e.g., adam, sgd, etc.
        :param loss: torch loss, e.g., BCEWithLogitsLoss()
        :param device: the target device, "cuda" oder "cpu"
    """
    
    total_loss = []
    
    # set model to training mode
    model.train()
    
    # iterate over batches from dataloader
    for inputs, targets in tqdm(dataloader, desc="Train epoch"):
        
        # set inputs and targets
        inputs = inputs.to(device, dtype=torch.float32)
        targets = targets.to(device, dtype=torch.float32)
        
        # clear the gradients
        optimizer.zero_grad()
        
        # forward pass of inputs through the model
        predictions = model(inputs)
        
        # calculate the loss
        loss_ = loss(predictions, targets.view(-1,1))
        
        total_loss.append(loss_.item())
        
        # compute gradienta of loss w.r.t. to trainable parameters of the model
        loss_.backward()
        
        # single optimizer step
        optimizer.step()
        
    return total_loss
        
def evaluate(dataloader, model, device):
    """
    main eval function
    Args:
        :param dataloader: torch dataloader for test data set
        :param model: model to evaluate
        :param device: the target device, "cuda" oder "cpu"
    """
    
    # initialize empty lists to store predictions and targets
    final_predictions = []
    final_targets = []
    
    # set model in eval mode
    model.eval()
    
    # disable gradient calculation
    with torch.no_grad():
        for inputs, targets in tqdm(dataloader, desc="Eval epoch"):
            # set inputs and targets
            #inputs = inputs.unsqueeze(1)
            inputs = inputs.to(device, dtype=torch.float32)
            targets = targets.to(device, dtype=torch.float32)
            
            # make predictions
            predictions = model(inputs)
            
            # move predicitions and targets to list
            predictions = predictions.cpu().numpy().tolist()
            targets = targets.cpu().numpy().tolist()
            final_predictions.extend(predictions)
            final_targets.extend(targets)
            
    # return final predicitions and targets
    return final_predictions, final_targets
        

# The training/evaluation loop

In [None]:
number_epochs = 20

# create device depending which one is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# fetch model
model = LSTM_CNN()

# send model to device
model.to(device)

# initialize optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# initialize loss function
loss = nn.BCEWithLogitsLoss()

print("Training Model")

best_accuracy = 0
early_stopping_counter = 0

for epoch in range(number_epochs):
    # train one epoch
    error = train(dataloader_train, model, optimizer, loss, device)
    #validate
    outputs, targets = evaluate(dataloader_test, model, device)
    
    #outputs = nn.ReLU()(torch.tensor(outputs)) #np.array(outputs) #>= 0.5
    #accuracy = metrics.accuracy_score(targets, outputs)
    #print(f"Epoch: {epoch}, Accuracy Score = {accuracy}, Loss = {loss.mean()}")
    o = nn.ReLU()(torch.tensor(outputs))
    o = np.where(o.clone().detach().numpy() > 0.5, 1, 0)

    accuracy = metrics.accuracy_score(targets, o)
    l = np.asarray(error)
    print(f"Epoch: {epoch}, Accuracy Score = {accuracy}, Loss = {l.mean()}")
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
    else:
        early_stopping_counter += 1
    
    #if early_stopping_counter > 2:
    #    print("Early stopping done.")
    #    break