# Changes made by kts4 / noami2
- `glove` was not available and so needed to install `mittens` instead
  - `from mittens import GloVe as glove`
- Removed `merge` from import from `keras.layers`
  - No longer available and not used.
- `set_session`, `clear_session` and `get_session` are no longer available from `keras.backend.tensorflow_backend`
  - Loaded from `tf.compat.v1.keras.backend` instead
- `tf.contrib.layers.l2_regularizer` no longer available
  - Used `tf.keras.regularizers.l2` instead
- `tf.contrib.layers.xavier_initializer` no longer available
  - Used `tf.keras.initializers.GlorotUniform` instead 
- Ensured same results as paper
   - Removed unused calls to use `LSTM` models.
   - Calculated using `unit_size` of 256 only
- Changes to newlines / spacings / printing etc.

In [1]:
import pandas as pd
import os
import numpy as np
from gensim.models import Word2Vec, FastText
from mittens import GloVe as glove

import collections
import gc 

import tensorflow as tf
tf.compat.v1.enable_eager_execution()
from tensorflow.keras import backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Flatten, Dense, Dropout, Input, concatenate, Activation, Concatenate, GRU, Conv2D, MaxPooling2D, UpSampling2D, Conv1D, BatchNormalization, Convolution1D, UpSampling1D, MaxPooling1D, GlobalMaxPooling1D, GlobalAveragePooling1D, MaxPool1D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, History, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.backend import clear_session

from sklearn.utils import class_weight
from sklearn.metrics import average_precision_score, roc_auc_score, accuracy_score, f1_score

import warnings
warnings.filterwarnings('ignore')

In [2]:
import tensorflow as tf

# List all available devices
gpus = tf.config.list_physical_devices('GPU')
cpus = tf.config.list_physical_devices('CPU')

if not gpus:
    print("GPU not available. Training on CPU.")
else:
    # Set TensorFlow to only use the first GPU
    tf.config.set_visible_devices(gpus[0], 'GPU')
    print("Training on GPU:", gpus[0])

Training on GPU: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [3]:
# Reset Keras Session
#def reset_keras(model):
#    sess = get_session()
#    clear_session()
#    sess.close()
#    sess = get_session()

def reset_keras(model):
    tf.keras.backend.clear_session()  # Clear the Keras session
    del model  # Delete the model to help ensure that the model is garbage collected
    gc.collect()  # Suggest to the garbage collector to free up memory

    try:
        del model # this is from global space - change this as you need
    except:
        pass

    gc.collect() # if it's done something you should see a number being outputted

def make_prediction_timeseries(model, test_data):
    probs = model.predict(test_data)
    y_pred = [1 if i>=0.5 else 0 for i in probs]
    return probs, y_pred

def save_scores_timeseries(predictions, probs, ground_truth, model_name, 
                problem_type, iteration, hidden_unit_size, type_of_ner):
    
    auc = roc_auc_score(ground_truth, probs)
    auprc = average_precision_score(ground_truth, probs)
    acc   = accuracy_score(ground_truth, predictions)
    F1    = f1_score(ground_truth, predictions)
    
    
    result_dict = {}    
    result_dict['auc'] = auc
    result_dict['auprc'] = auprc
    result_dict['acc'] = acc
    result_dict['F1'] = F1

        
    file_name = str(hidden_unit_size)+"-"+model_name+"-"+problem_type+"-"+str(iteration)+"-"+type_of_ner+".p"
    
    result_path = "results/07-GRU"
    pd.to_pickle(result_dict, os.path.join(result_path, file_name))

    print(auc, auprc, acc, F1)

In [4]:
def timeseries_model(layer_name, number_of_unit):
    clear_session()
    
    sequence_input = Input(shape=(24,104),  name = "timeseries_input")
    
    x = GRU(number_of_unit)(sequence_input)
    
    logits_regularizer = tf.keras.regularizers.l2(0.01)
    sigmoid_pred = Dense(1, activation='sigmoid',use_bias=False,
                         kernel_initializer=tf.keras.initializers.GlorotUniform(), 
                  kernel_regularizer=logits_regularizer)(x)
    
    
    model = Model(inputs=sequence_input, outputs=sigmoid_pred)
    
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
    return model

In [5]:
type_of_ner = "new"

x_train = pd.read_pickle("data/"+type_of_ner+"_x_train.pkl")
x_dev   = pd.read_pickle("data/"+type_of_ner+"_x_dev.pkl")
x_test  = pd.read_pickle("data/"+type_of_ner+"_x_test.pkl")

y_train = pd.read_pickle("data/"+type_of_ner+"_y_train.pkl")
y_dev   = pd.read_pickle("data/"+type_of_ner+"_y_dev.pkl")
y_test  = pd.read_pickle("data/"+type_of_ner+"_y_test.pkl")

In [6]:
epoch_num        = 100
model_patience   = 3
monitor_criteria = 'val_loss'
batch_size       = 128

unit_sizes       = [256]
iter_num         = 11
target_problems  = ['mort_hosp', 'mort_icu', 'los_3', 'los_7']
layers           = ["GRU"]

for each_layer in layers:
    for each_unit_size in unit_sizes:
        for iteration in range(1, iter_num):
            for each_problem in target_problems:
                
                print("Layer: ", each_layer)
                print("Hidden unit: ", each_unit_size)
                print ("Problem type: ", each_problem)
                print("Iteration number: ", iteration)
                print ("__________________")


                early_stopping_monitor = EarlyStopping(monitor=monitor_criteria, patience=model_patience)
                
                #best_model_name = str(each_layer)+"-"+str(each_unit_size)+"-"+str(each_problem)+"-"+"best_model.hdf5"
                best_model_name = str(each_layer) + "-" + str(each_unit_size) + "-" + str(each_problem) + "-" + "best_model.keras"

                checkpoint = ModelCheckpoint(best_model_name, 
                                             monitor='val_loss', 
                                             verbose=0,
                                             save_best_only=True, 
                                             mode='min', 
                                             #period=1
                                            )

                callbacks = [early_stopping_monitor, checkpoint]

                model = timeseries_model(each_layer, each_unit_size)
                model.fit(x_train, 
                          y_train[each_problem], 
                          epochs=epoch_num, 
                          verbose=0, 
                          validation_data=(x_dev, y_dev[each_problem]), 
                          callbacks=callbacks, 
                          batch_size= batch_size)

                model.load_weights(best_model_name)

                probs, predictions = make_prediction_timeseries(model, x_test)
                save_scores_timeseries(predictions, probs, y_test[each_problem].values,str(each_layer),
                                       each_problem, iteration, each_unit_size,type_of_ner)
                reset_keras(model)
                clear_session()
                gc.collect()

Layer:  GRU
Hidden unit:  256
Problem type:  mort_hosp
Iteration number:  1
__________________


2024-04-08 18:27:15.166105: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Max
2024-04-08 18:27:15.166122: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 64.00 GB
2024-04-08 18:27:15.166126: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 24.00 GB
2024-04-08 18:27:15.166139: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-04-08 18:27:15.166147: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2024-04-08 18:27:15.467830: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
0.8773534384491213 0.5533717176681976 0.9141816526291828 0.4081632653061224
Layer:  GRU
Hidden unit:  256
Problem type:  mort_icu
Iteration number:  1
__________________
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
0.8851879207311646 0.5138554139462548 0.9405872979740496 0.4640657084188911
Layer:  GRU
Hidden unit:  256
Problem type:  los_3
Iteration number:  1
__________________
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
0.6992651122021863 0.6444658108897028 0.66264511723196 0.5825352112676055
Layer:  GRU
Hidden unit:  256
Problem type:  los_7
Iteration number:  1
__________________
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
0.7401696453924518 0.19821845992356196 0.9194172547234236 0.022099447513812154
Layer:  GRU
Hidden unit:  256
Problem type:  mort_hosp
Iteration number:  2
__________________
[1m138/138[0m [32m━━━