In [None]:
import pandas as pd
import pickle
import glob

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn import metrics
from tqdm.auto import tqdm
import numpy as np
import random

import mimic3models.metrics as m
import matplotlib.pyplot as plt

import hiplot as hip
import re

from DataLoader import LoadDataSets
from lstm_cnn import LSTM_CNN4
from lstm_cnn import trainer, evaluate, calcMetrics, plotLoss, plotAUC

In [None]:
#CUDA RNN and LSTM
#In some versions of CUDA, RNNs and LSTM networks may have non-deterministic behavior. See torch.nn.RNN() and torch.nn.LSTM() for details and workarounds.
# https://pytorch.org/docs/stable/notes/randomness.html

def randseed(seed=42):
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)

randseed()

# MIMIC-III and MIMIC-IV test data

In [None]:
try:
    del train_data
except:
    pass

try:
    del test_data
except:
    pass

try:
    del val_data
except:
    pass

already_loaded = False
mimic4 = False

dataloader_train, dataloader_val, dataloader_test = LoadDataSets(batch_size=64,mimic4=mimic4)

# Get data from files

In [None]:
def computeAnalysisData(listoffiles):
    repat = r"/model__(\d+)_(\d+)_(\d+)_(.*)_(\d\.\d)-(\d\.\d)-(\d\.\d)__epoch-(\d+)_loss-(.*)_acc-(.*)_auc-(.*).pth"

    allaucs = []
    prepfordf = []

    for file in listoffiles:
        r = re.compile(repat)
        p = re.findall(r,file)

        repat2 = r"/model__(.*)__"
        r = re.compile(repat2)
        filekey = re.findall(r,file)[0]
        _, hidden_dim, lstm_layers, lr, dropout, dropout_w, dropout_conv, epoch, loss, acc, auc  = [float(p) for p in list(p[0])]
        hidden_dim = int(hidden_dim)
        lstm_layers = int(lstm_layers)
        epoch = int(epoch)
        #o = [hidden_dim, lstm_layers, lr, dropout, dropout_w, dropout_conv]
        #aucrec = [auc,o,filekey]
        #allaucs.append(aucrec)
        prepfordf.append([hidden_dim, lstm_layers, lr, dropout, dropout_w, dropout_conv, epoch, loss, acc, auc, filekey])
    
    a = pd.DataFrame(prepfordf, columns=["hidden_dim", "lstm_layers", "lr", "dropout", "dropout_w", "dropout_conv", "epoch", "loss", "acc", "auc","filekey"])
    #b = a.groupby(["hidden_dim", "lstm_layers", "lr", "dropout", "dropout_w", "dropout_conv"], sort=True)['epoch'].max()
    idx = a.groupby(["hidden_dim", "lstm_layers", "lr", "dropout", "dropout_w", "dropout_conv"])['epoch'].transform(max) == a['epoch']
    df = a[idx]
    
    # convert to list again
    allaucs = []
    for index, row in df.iterrows():
        auc = row['auc']
        filekey = row['filekey']
        o = [int(row['hidden_dim']), int(row['lstm_layers']), row['lr'], row['dropout'], row['dropout_w'], row['dropout_conv']]
        aucrec = [auc,o,filekey]
        allaucs.append(aucrec)
        
    allaucs.sort(key=lambda tup: tup[0])
    print(len(allaucs))

    maxrecords = 2000
    topdata = allaucs[-maxrecords:]

    print(f"From {len(allaucs)} records we will use {len(topdata)} for data analysis.")
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    best_test_auc = 0.0
    dataforhiplot = []

    for idx, auc in enumerate(topdata):
        print(idx, " -> ", auc[2],auc[0],auc[1][:6])
        (hidden_dim, lstm_layers, lr, dropout, dropout_w, dropout_conv) = auc[1][:6]
        pattern = auc[2]
        filename = glob.glob(f'model_archive/20210325_mimic3_gridsearch/*{pattern}*.pth')[-1]

        # define threshold
        threshold = 0.5
        logit_threshold = torch.tensor (threshold / (1 - threshold)).log()
        number_epochs=6

        model = LSTM_CNN4(hidden_dim=hidden_dim, lstm_layers=lstm_layers, dropout=0.5, dropout_w=0.5, dropout_conv=0.5)
        model.to(device)
        model.load_state_dict(torch.load(filename))
        model.eval()

        # validation of the model
        outputs, targets = evaluate(dataloader_test, model, device)

        #y_pred = torch.sigmoid(torch.tensor(outputs))
        outputs = torch.tensor(outputs)

        #predicted_vals = y_pred > logit_threshold
        #o = np.where(outputs.clone().detach().numpy() > 0.5, 1., 0.)
        o = outputs > logit_threshold
        accuracy = metrics.accuracy_score(targets, o)
        #print(metrics.classification_report(targets, o))
        #l = np.asarray(error)
        l = nn.BCEWithLogitsLoss()(outputs, torch.tensor(targets).detach().view(-1,1))
        #val_loss_values.append(l)
        #print(f"Epochs Val: {number_epochs}, Accuracy Score = {accuracy}, Loss = {l.mean()}")
        #print("-"*20)
        #m.print_metrics_binary(targets, outputs.reshape(-1,))

        fpr, tpr, threshold = metrics.roc_curve(targets, outputs)
        roc_auc = metrics.auc(fpr, tpr)
        print("ROC AUC = ", roc_auc)

        dataforhiplot.append({"hidden_dim": hidden_dim, "lstm_layers": lstm_layers, "lr": lr,
         "dropout": dropout, "dropout_w": dropout_w, "dropout_conv": dropout_conv,
         "val_auc": auc[0], "test_auc": roc_auc})

        if roc_auc > best_test_auc:
            print("***** best...")
            best = auc
            best_test_auc = roc_auc
    
    # save hiplot data
    pickle.dump(dataforhiplot, open(f"dataforhiplot_mimic3_all.pkl", "wb" ) )

    return dataforhiplot

In [None]:
model_file_path = "model_archive/20210325_mimic3_gridsearch"

if mimic4:
    try:
        dataforhiplot = pickle.load(open(f"dataforhiplot_mimic4_all.pkl", "rb" ))
    except:
        listoffiles = glob.glob(F"{model_file_path}/model__*.pth")
        dataforhiplot = computeAnalysisData(listoffiles)
else:
    try:
        dataforhiplot = pickle.load(open(f"dataforhiplot_mimic3_all.pkl", "rb" ))
    except:
        listoffiles = glob.glob(F"{model_file_path}/model__*.pth")
        dataforhiplot = computeAnalysisData(listoffiles)

In [None]:
# MIMIC-III
hip.Experiment.from_iterable(dataforhiplot).display()