In [7]:
import pandas as pd
import os
import numpy as np
from gensim.models import Word2Vec, FastText
from mittens import GloVe

# import glove
# from glove import Corpus

import collections
import gc 

import keras
from keras import backend as K
from keras import regularizers
from keras.models import Sequential, Model
from keras.layers import Flatten, Dense, Dropout, Input, concatenate, merge, Activation, Concatenate, LSTM, GRU
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Conv1D, BatchNormalization, GRU, Convolution1D, LSTM
from keras.layers import UpSampling1D, MaxPooling1D, GlobalMaxPooling1D, GlobalAveragePooling1D,MaxPool1D, merge

from keras.optimizers import Adam


from keras.utils import np_utils
from keras.backend.tensorflow_backend import set_session, clear_session, get_session
import tensorflow as tf

from keras.callbacks import EarlyStopping, ModelCheckpoint, History, ReduceLROnPlateau


from sklearn.utils import class_weight
from sklearn.metrics import average_precision_score, roc_auc_score, accuracy_score, f1_score

import warnings
warnings.filterwarnings('ignore')

In [10]:
# Reset Keras Session
def reset_keras(model):
    sess = get_session()
    clear_session()
    sess.close()
    sess = get_session()

    try:
        del model # this is from global space - change this as you need
    except:
        pass

    gc.collect() # if it's done something you should see a number being outputted

def make_prediction_timeseries(model, test_data):
    probs = model.predict(test_data)
    y_pred = [1 if i>=0.5 else 0 for i in probs]
    return probs, y_pred

def save_scores_timeseries(predictions, probs, ground_truth, model_name, 
                problem_type, iteration, hidden_unit_size, type_of_ner):
    
    auc = roc_auc_score(ground_truth, probs)
    auprc = average_precision_score(ground_truth, probs)
    acc   = accuracy_score(ground_truth, predictions)
    F1    = f1_score(ground_truth, predictions)
    
    
    result_dict = {}    
    result_dict['auc'] = auc
    result_dict['auprc'] = auprc
    result_dict['acc'] = acc
    result_dict['F1'] = F1

        
    file_name = str(hidden_unit_size)+"-"+model_name+"-"+problem_type+"-"+str(iteration)+"-"+type_of_ner+".p"
    
    result_path = "results/"
    pd.to_pickle(result_dict, os.path.join(result_path, file_name))

    print(auc, auprc, acc, F1)

In [11]:
def timeseries_model(layer_name, number_of_unit):
    K.clear_session()
    
    sequence_input = Input(shape=(24,104),  name = "timeseries_input")
    
    if layer_name == "LSTM":
        x = LSTM(number_of_unit)(sequence_input)
    else:
        x = GRU(number_of_unit)(sequence_input)
    
    logits_regularizer = tf.contrib.layers.l2_regularizer(scale=0.01)
    sigmoid_pred = Dense(1, activation='sigmoid',use_bias=False,
                         kernel_initializer=tf.contrib.layers.xavier_initializer(), 
                  kernel_regularizer=logits_regularizer)(x)
    
    
    model = Model(inputs=sequence_input, outputs=sigmoid_pred)
    
    
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
    return model

In [4]:
type_of_ner = "new"

x_train_lstm = pd.read_pickle("data/"+type_of_ner+"_x_train.pkl")
x_dev_lstm = pd.read_pickle("data/"+type_of_ner+"_x_dev.pkl")
x_test_lstm = pd.read_pickle("data/"+type_of_ner+"_x_test.pkl")

y_train = pd.read_pickle("data/"+type_of_ner+"_y_train.pkl")
y_dev = pd.read_pickle("data/"+type_of_ner+"_y_dev.pkl")
y_test = pd.read_pickle("data/"+type_of_ner+"_y_test.pkl")

In [5]:
from keras.utils.layer_utils import count_params

def countParams(model):
    trainable_count = count_params(model.trainable_weights)
    non_trainable_count = count_params(model.non_trainable_weights)
    print("Trainable count:", trainable_count)
    print("Non-Trainable count:", non_trainable_count)


In [6]:
epoch_num = 100
model_patience = 3
monitor_criteria = 'val_loss'
batch_size = 128

unit_sizes = [128, 256]
#unit_sizes = [256]
iter_num = 11
target_problems = ['mort_hosp', 'mort_icu', 'los_3', 'los_7']
layers = ["LSTM", "GRU"]
#layers = ["GRU"]
for each_layer in layers:
    print("Layer: ", each_layer)
    for each_unit_size in unit_sizes:
        print("Hidden unit: ", each_unit_size)
        for iteration in range(1, iter_num):
            print("Iteration number: ", iteration)
            print("=============================")

            for each_problem in target_problems:
                print ("Problem type: ", each_problem)
                print ("__________________")


                early_stopping_monitor = EarlyStopping(monitor=monitor_criteria, patience=model_patience)
                best_model_name = str(each_layer)+"-"+str(each_unit_size)+"-"+str(each_problem)+"-"+"best_model.hdf5"
                checkpoint = ModelCheckpoint(best_model_name, monitor='val_loss', verbose=1,
                    save_best_only=True, mode='min', period=1)


                callbacks = [early_stopping_monitor, checkpoint]

                model = timeseries_model(each_layer, each_unit_size)
                model.fit(x_train_lstm, y_train[each_problem], epochs=epoch_num, verbose=1, 
                          validation_data=(x_dev_lstm, y_dev[each_problem]), callbacks=callbacks, batch_size= batch_size)

                model.load_weights(best_model_name)

                probs, predictions = make_prediction_timeseries(model, x_test_lstm)
                save_scores_timeseries(predictions, probs, y_test[each_problem].values,str(each_layer),
                                       each_problem, iteration, each_unit_size,type_of_ner)
                trainable_count = count_params(model.trainable_weights)
                non_trainable_count = count_params(model.non_trainable_weights)
                print("Trainable count:", trainable_count)
                print("Non-Trainable count:", non_trainable_count)
                reset_keras(model)
                #del model
                clear_session()
                gc.collect()

Layer:  LSTM
Hidden unit:  128
Iteration number:  1
Problem type:  mort_hosp
__________________





The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Train on 15219 samples, validate on 2164 samples
Epoch 1/100







2022-04-23 21:10:41.013876: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2022-04-23 21:10:41.034143: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fec4b161440 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2022-04-23 21:10:41.034175: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version



Epoch 00001: val_loss improved from inf to 0.24437, saving model to LSTM-128-mort_hosp-best_model.hdf5
Epoch 2/100

Epoch 00002: val_loss improved from 0.24437 to 0.23786, saving model to LSTM-128-mort_hosp-best_model.hdf5
Epoch 3/100

Epoch 00003: val_loss improved from 0.23786 to 0.23298, saving model to LSTM-128-mort_hosp-best_model.hdf5
Epoch 4/100

Epoch 00004: val_loss did not improve from 0.23298
Epoch 5/100

Epoch 00005: val_loss did not improve from 0.23298
Epoch 6/100

Epoch 00006: val_loss did not improve from 0.23298
0.871069587628866 0.5533930244032212 0.9132934682612696 0.43137254901960786
Trainable count: 119424
Non-Trainable count: 0
Problem type:  mort_icu
__________________
Train on 15219 samples, validate on 2164 samples
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.17890, saving model to LSTM-128-mort_icu-best_model.hdf5
Epoch 2/100

Epoch 00002: val_loss improved from 0.17890 to 0.17662, saving model to LSTM-128-mort_icu-best_model.hdf5
Epoch 3/100

Ep