In [None]:
import pandas as pd
import numpy as np
import os 

import matplotlib.pyplot as plt
import seaborn as sns

import keras
#from tensorflow.python.keras import backend as K
from tensorflow.python.keras import regularizers
from tensorflow.python.keras.models import Sequential, Model
from tensorflow.python.keras.layers import Flatten, Dense, Dropout, Input, concatenate, Activation, Concatenate, LSTM, GRU
from tensorflow.python.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Conv1D, BatchNormalization, GRU, Convolution1D, LSTM
from tensorflow.python.keras.layers import UpSampling1D, MaxPooling1D, GlobalMaxPooling1D, GlobalAveragePooling1D,MaxPool1D
from tensorflow.python.keras.callbacks import EarlyStopping, ModelCheckpoint, History, ReduceLROnPlateau
#from tensorflow.keras.utils import np_utils
import tensorflow as tf

from sklearn.utils import class_weight
from sklearn.metrics import average_precision_score, roc_auc_score, accuracy_score, f1_score
import gc 

In [None]:
timeseries_path = "data/timeseries/"

x_train = pd.read_pickle(os.path.join(timeseries_path, "x_train_lstm.p"))
x_dev = pd.read_pickle(os.path.join(timeseries_path, "x_dev_lstm.p"))
x_test = pd.read_pickle(os.path.join(timeseries_path, "x_test_lstm.p"))

y_train = pd.read_pickle(os.path.join(timeseries_path, "y_train.p"))
y_dev = pd.read_pickle(os.path.join(timeseries_path, "y_dev.p"))
y_test = pd.read_pickle(os.path.join(timeseries_path, "y_test.p"))

ys = pd.read_pickle(os.path.join(timeseries_path, "ys.p"))

In [None]:
# Reset Keras Session
def reset_keras(model):

    try:
        del model # this is from global space - change this as you need
    except:
        pass

    gc.collect() # if it's done something you should see a number being outputted

def make_prediction_timeseries(model, test_data):
    
    probs = model.predict(test_data)
    y_pred = [1 if i>=0.5 else 0 for i in probs]
    return probs, y_pred

def save_scores_timeseries(predictions, probs, ground_truth):
    
    auc = roc_auc_score(ground_truth, probs)
    auprc = average_precision_score(ground_truth, probs)
    acc   = accuracy_score(ground_truth, predictions)
    F1    = f1_score(ground_truth, predictions)
    
    result_dict = {}    
    result_dict['auc'] = auc
    result_dict['auprc'] = auprc
    result_dict['acc'] = acc
    result_dict['F1'] = F1


    print("AUC: ", auc, "AUPRC: ", auprc, "ACC: ", acc, "F1: ",F1)
    return result_dict

In [None]:
def timeseries_model():
    sequence_input = Input(shape=(24,104),  name = "timeseries_input")


    x = GRU(128)(sequence_input)    
    FC1 = Dense(512, activation='relu')(x)
    FC2 = Dropout(0.3)(FC1)
    FC3 = Dense(256, activation='relu')(FC2)
    
    logits_regularizer = tf.keras.regularizers.L2(l2=0.05)
    
    sigmoid_pred = Dense(1, activation='sigmoid',use_bias=True,
                         kernel_initializer=tf.keras.initializers.glorot_normal(),
                  kernel_regularizer=logits_regularizer
                        )(FC3)

    model_temp = Model(inputs=sequence_input, outputs=sigmoid_pred)


    model_temp.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
    return model_temp

In [None]:
epoch_num = 100
model_patience = 4
monitor_criteria = 'val_loss'
batch_size = 32
iteration_number = 10

target_problems = ['mort_hosp', 'mort_icu', 'los_3', 'los_7']


save_scores = {0:[], 1:[], 2:[], 3:[], 4:[], 5:[], 6:[], 7:[], 8:[], 9:[],}

for iteration in range(0, iteration_number):
    np.random.seed(iteration)
    tf.random.set_seed(iteration)
    
    temp_results = []
    for each_problem in target_problems:
        print (iteration, ": Problem type: ", each_problem)
        print ("__________________")


        early_stopping_monitor = EarlyStopping(monitor=monitor_criteria, patience=model_patience)
        best_model_name = "models/timeseries/128+hidden/timeseries_best_model_128"+str(iteration)+".hdf5"
        checkpoint = ModelCheckpoint(best_model_name, monitor='val_loss', verbose=1,
            save_best_only=True, mode='min', period=1)

        callbacks = [early_stopping_monitor, checkpoint]

        
        model = timeseries_model()
        
#         class_weights = class_weight.compute_class_weight('balanced',
#                                                          np.unique(y_train[each_problem]),
#                                                          y_train[each_problem])
#         class_weight_dict = dict(enumerate(class_weights))
        
        if each_problem == "mort_icu":
            class_weight_dict = {0: 1, 1: 5}
        else: 
            class_weights = class_weight.compute_class_weight('balanced',
                                                             np.unique(y_train[each_problem]),
                                                             y_train[each_problem])        
            class_weight_dict = dict(enumerate(class_weights))
            
        model.fit(x_train, y_train[each_problem], epochs=epoch_num, verbose=1, 
                  validation_data=(x_dev, y_dev[each_problem]), callbacks=callbacks, batch_size= batch_size,
                 class_weight=class_weight_dict)
        
        model.load_weights(best_model_name)

        probs, predictions = make_prediction_timeseries(model, x_test)
        scores = save_scores_timeseries(predictions, probs, y_test[each_problem].values)
                
        temp = {each_problem: scores}
        temp_results.append(temp)
        
        del model
        tf.keras.backend.clear_session()
    save_scores[iteration] = temp_results

In [None]:
def compare_result(data, target, metric):
    res = 0
    res_list = []
    if target == "mort_hosp":
        ind = 0
    elif target == "mort_icu":
        ind = 1
    elif target == "los_3":
        ind = 2
    elif target == "los_7":
        ind = 3
    
    counter = 0
    for i,info in data.items():
        #print(info[0])
        if counter == 5:
            break
        counter +=1
        res_list.append(info[ind][target][metric])
        res += info[ind][target][metric]
    
    #print(target, metric, res / 5, np.mean(res_list), np.std(res_list))
    print(target, metric,np.mean(res_list), np.std(res_list))

In [None]:
target = "mort_hosp"
compare_result(save_scores, target, "auc")
compare_result(save_scores, target, "auprc")
compare_result(save_scores, target, "F1")
print("")
target = "mort_icu"
compare_result(save_scores, target, "auc")
compare_result(save_scores, target, "auprc")
compare_result(save_scores, target, "F1")
print("")
target = "los_3"
compare_result(save_scores, target, "auc")
compare_result(save_scores, target, "auprc")
compare_result(save_scores, target, "F1")
print("")
target = "los_7"
compare_result(save_scores, target, "auc")
compare_result(save_scores, target, "auprc")
compare_result(save_scores, target, "F1")
print("")

In [None]:
path = "models/timeseries/"
pd.to_pickle(save_scores, os.path.join(path, "timeseries_score.p"))