In [None]:
import numpy as np
import pandas as pd
import sklearn

import random, os, json
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Masking, GRU, Dropout, Dense
import pickle

import sys
sys.path.append("../../../Libraries/")
import utils

# Functions

In [None]:
def build_model(layers, hyperparameters, lr_sch):    
    dynamic_input = tf.keras.layers.Input(shape=(hyperparameters["timeStep"], layers[0]))
    masked = tf.keras.layers.Masking(mask_value=hyperparameters['maskValue'])(dynamic_input)

    lstm_encoder = tf.keras.layers.GRU(
        layers[1],
        dropout=hyperparameters['dropout'],
        return_sequences=False,
        activation='tanh',
        use_bias=True
    )(masked)

    output = tf.keras.layers.Dense(1, activation="sigmoid")(lstm_encoder)

    model = tf.keras.Model(dynamic_input, [output])
    myOptimizer = tf.keras.optimizers.Adam(learning_rate=lr_sch)
    customized_loss = utils.weighted_binary_crossentropy(hyperparameters)
    model.compile(loss=customized_loss, optimizer=myOptimizer)
        
    return model

In [None]:
def run_network(X_train, y_train, X_val, y_val, hyperparameters, seed):
    model = None
    model = build_model(hyperparameters['layers'], hyperparameters, hyperparameters['lr_scheduler'])
    earlystopping = None
    try:
        earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                      min_delta=hyperparameters["mindelta"],
                                                      patience=hyperparameters["patience"],
                                                      restore_best_weights=True,
                                                      mode="min")
        hist = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                         callbacks=[earlystopping], batch_size=hyperparameters['batch_size'], epochs=hyperparameters['epochs'],
                         verbose=hyperparameters['verbose'])
        
        return model, hist, earlystopping
    except KeyboardInterrupt:
        print ('Training duration (s) : ', time.time() - global_start_time)
        return model, y_test, 0, 0


In [None]:
def myCVGrid(hyperparameters, dropout, lr_scheduler, layers, split, seed):
    bestHyperparameters = {}
    bestMetricDev = np.inf

    for k in range(len(dropout)):
        for l in range(len(layers)):
            for m in range(len(lr_scheduler)):
                v_early = []
                v_metric_dev = []
                v_hist = []
                v_val_loss = []

                hyperparameters_copy = hyperparameters.copy()
                hyperparameters_copy['dropout'] = dropout[k]
                hyperparameters_copy['layers'] = layers[l]
                hyperparameters_copy['lr_scheduler'] = lr_scheduler[m]
                
                for n in range(5):

                    X_train = np.load("../../../ORIGINAL_DATA/splits_14_days/glasso/split_" + str(split) +
                                          "/X_train_tensor_" + str(n)+ ".npy")
                    y_train = pd.read_csv("../../../ORIGINAL_DATA/splits_14_days/glasso/split_" + str(split) +
                                          "/y_train_" + str(n)+ ".csv",
                                         index_col=0)
                    
                    X_val = np.load("../../../ORIGINAL_DATA/splits_14_days/glasso/split_" + str(split) +
                                        "/X_val_tensor_" + str(n)+ ".npy")
                    y_val = pd.read_csv("../../../ORIGINAL_DATA/splits_14_days/glasso/split_" + str(split) +
                                        "/y_val_" + str(n)+ ".csv",
                                       index_col=0)

                    utils.reset_keras()
                    model, hist, early = run_network(
                        X_train, 
                        y_train,
                        X_val, 
                        y_val,
                        hyperparameters_copy,  
                        seed 
                    )
                                        
                    v_early.append(early)
                    v_hist.append(hist)
                    v_val_loss.append(np.min(hist.history["val_loss"]))
                    
                metric_dev = np.mean(v_val_loss)
                if metric_dev < bestMetricDev:
                    bestMetricDev = metric_dev
                    bestHyperparameters = {
                        'dropout': dropout[k],
                        'layers': layers[l],
                        'lr_scheduler': lr_scheduler[m]
                    }

    return bestHyperparameters, X_train, y_train, X_val, y_val

# Hyperparameters

In [None]:
seeds = [20, 30, 45, 70]
results = ""
debug = True
balance = True

epochs = 10000
batch_size = 128
inputShape = 16

layers = [[inputShape, 3, 1], [inputShape, 5, 1], [inputShape, 8, 1], [inputShape, 10, 1], 
          [inputShape, 15, 1], [inputShape, 20, 1],  [inputShape, 25, 1], [inputShape, 30, 1], 
          [inputShape, 35, 1], [inputShape, 40, 1], [inputShape, 50, 1]]
lr_scheduler = [0.0001, 0.001, 0.01, 0.1]
dropout = [0.0, 0.1, 0.2, 0.3]

w2 = 0.18
w1 = 0.82

hyperparameters = {
    "epochs":epochs,
    'batch_size': batch_size,
    "w1":w1, "w2":w2, 
    'maskValue':666,
    'patience':30,
    'monitor': 'val_loss', "mindelta": 0,
    'balance': balance, "timeStep": 14,
    'optimizer':'adam',
    'kfold':5,
    'level':3,
    'verbose':0
}

tensor = True
tab = "\t" * hyperparameters["level"]


# Code

In [None]:
run_model = False

if run_model:
    v_early = []
    loss_dev = []
    v_models = []
    bestHyperparameters_bySplit = {}
    y_pred_by_split = {}

    for i in range(1, 4):
        X_test = np.load("../../../ORIGINAL_DATA/splits_14_days/glasso/split_" + str(i) + "/X_test_tensor.npy")
        y_test = pd.read_csv("../../../ORIGINAL_DATA/splits_14_days/glasso/split_" + str(i) + "/y_test.csv",
                            index_col=0)
        
    
        bestHyperparameters, X_train, y_train, X_val, y_val = myCVGrid(hyperparameters,
                                                                       dropout,
                                                                       lr_scheduler,
                                                                       layers,
                                                                       i,                                                              
                                                                       seeds[i],
                                                                      )
        bestHyperparameters_bySplit[str(i)] = bestHyperparameters
    
        # Save best hyperparameters for current split
        split_directory = './Results_GRU-Glasso/split_' + str(i)
        if not os.path.exists(split_directory):
            os.makedirs(split_directory)
    
        with open(os.path.join(split_directory, f"bestHyperparameters_split_{i}.pkl"), 'wb') as f:
            pickle.dump(bestHyperparameters, f)
    
    
        hyperparameters = {
            "w1":hyperparameters["w1"], "w2":hyperparameters["w2"],                                    
            "timeStep": hyperparameters["timeStep"],
            'epochs':  hyperparameters["epochs"],
            'batch_size': hyperparameters["batch_size"],
            'maskValue': hyperparameters["maskValue"],
            'earlyStopping': True,
            'kfold': hyperparameters["kfold"],
            'monitor': hyperparameters["monitor"],
            "mindelta": hyperparameters["mindelta"],
            "patience": hyperparameters["patience"],
            'balance': hyperparameters["balance"],
            "dropout": bestHyperparameters["dropout"],
            "layers": bestHyperparameters["layers"],
            "lr_scheduler": bestHyperparameters["lr_scheduler"],
            "level": 3, 'verbose': 0
        }
    
        #Try on test
        utils.reset_keras()
    
        model, hist, early = run_network(
            X_train, y_train.individualMRGerm.values,
            X_val, y_val.individualMRGerm.values,
            hyperparameters, 
            seeds[i]
        )    
    
        v_models.append(model)
        loss_dev.append(hist.history['val_loss'])
    
        y_pred = model.predict(x=[X_test])
        y_pred_by_split[str(i)] = y_pred
        
        with open(os.path.join(split_directory, f"y_pred_split_{i}.pkl"), 'wb') as f:
            pickle.dump(y_pred, f)
    
        # Save model for current split
        model_filename = os.path.join(split_directory, f"model_split_{i}.h5")
        model.save(model_filename)
    
        # Calculate metrics
        metrics_dict = utils.calculate_and_save_metrics(
        y_test.individualMRGerm.values, 
        y_pred, 
        split_directory, 
        split_index=i
        )