In [None]:
import tensorflow as tf
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
import pickle
from sklearn.model_selection import GridSearchCV

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append("../../../libraries/")
import utils

sys.path.append("../../../classification_architectures/")
import lflr

### Auxiliary functions

In [None]:
def save_results(split_directory, best_hyperparameters, y_test_pred, y_train_pred=None, model=None):

    if not os.path.exists(split_directory):
        os.makedirs(split_directory)

    with open(os.path.join(split_directory, "bestHyperparameters.pkl"), 'wb') as f:
        pickle.dump(best_hyperparameters, f)

    with open(os.path.join(split_directory, "y_test_pred.pkl"), 'wb') as f:
        pickle.dump(y_test_pred, f)
        
    if y_train_pred is not None:
        with open(os.path.join(split_directory, "y_train_pred.pkl"), 'wb') as f:
            pickle.dump(y_train_pred, f)

    if model is not None:
        model_filename = os.path.join(split_directory, "model.h5")
        model.save(model_filename)


# Hyperparameters

In [None]:
seeds = [20, 30, 45, 70]

tensor = True
debug = True
balance = True

n_max_num = 5
n_categorical_features = 3
n_numerical_features = 5
n_static_features = n_categorical_features + n_numerical_features
n_dynamic_features = 56
n_timesteps = 14

# Hyperparamas of network
epochs = 10000
batch_size = 128

layers = [3, 5, 8, 10, 15, 20, 25, 30, 35, 40, 50]
lr_scheduler = [0.0001, 0.001, 0.01, 0.1]
dropout_rate = [0.0, 0.1, 0.2, 0.3]

w2 = 0.18
w1 = 0.82

hyperparameters = {
    "n_categorical_features": n_categorical_features,
    "n_numerical_features": n_numerical_features,
    "n_static_features": n_static_features,
    "n_dynamic_features": n_dynamic_features,
    "n_timesteps": n_timesteps,
    "w1":w1, "w2":w2, 
    "epochs":epochs,
    'batch_size': batch_size,
    'maskValue':666,
    'monitor': 'val_loss', 
    "mindelta": 0,
    "patience":30,
    'balance': balance,
    'optimizer':'adam',
    'kfold':5,
    'level':3, 
    'verbose':0
}

# Code

In [None]:
v_early = []
loss_dev_stat = []
loss_dev_dyn = []
loss_dev_LR = []
v_models_stat = []
v_models_dyn = []
v_models_LR = []
bestHyperparameters_bySplit_dyn = {}
bestHyperparameters_bySplit_stat = {}
bestHyperparameters_bySplit_LR = {}
y_pred_by_split_stat = {}
y_pred_by_split_dyn = {}
y_pred_by_split_LR = {}


for i in range(4):

    path = f'../../../ORIGINAL_DATA/MDR/splits_14_days/notbalanced/split_{str(i)}/'

    X_test_dynamic = np.load(path + f"/X_test_tensor.npy")
    X_test_static = pd.read_csv(path + f"/X_test_static.csv", index_col=0)
    y_test = pd.read_csv(path + f"/y_test.csv", index_col=0)

    # DYNAMIC #########################################################################################################
    bestHyperparameters_dyn, X_train, y_train, X_train_static, X_val, y_val, X_val_static = lflr.myCVGrid(hyperparameters,
                                                                                                     dropout_rate,
                                                                                                     lr_scheduler,
                                                                                                     layers,
                                                                                                     i,                                                              
                                                                                                     seeds[i],
                                                                                                     path,
                                                                                                     model_type="dynamic"
                                                                                                    )

    bestHyperparameters_bySplit_stat[str(i)] = bestHyperparameters_dyn

    hyperparameters.update({
        "dropout_rate": bestHyperparameters_dyn["dropout_rate"],
        "layers": bestHyperparameters_dyn["layers"],
        "lr_scheduler": bestHyperparameters_dyn["lr_scheduler"],
    })

    utils.reset_keras()
    model_dyn, hist, early = lflr.run_network(
        X_train, X_train_static, y_train.individualMRGerm.values,
        X_val, X_val_static, y_val.individualMRGerm.values,
        hyperparameters, seeds[i], model_type="dynamic"
    )

    #Save the hyperparameters and predictions
    split_directory = f'./Results_LFLR/Dynamic/split_{i}'
    y_pred_dynamic = model_dyn.predict(X_test_dynamic)
    y_train_pred_dynamic = model_dyn.predict(X_train)
    
    save_results(split_directory, bestHyperparameters_dyn, y_pred_dynamic, y_train_pred_dynamic, model_dyn)

    v_models_dyn.append(model_dyn)
    loss_dev_dyn.append(hist.history['val_loss'])
    y_pred_by_split_dyn[str(i)] = y_pred_dynamic

    # STATIC #########################################################################################################
    bestHyperparameters_stat, X_train, y_train, X_train_static, X_val, y_val, X_val_static = lflr.myCVGrid(hyperparameters,
                                                                                                           dropout_rate,
                                                                                                           lr_scheduler,
                                                                                                           layers,
                                                                                                           i,                                                              
                                                                                                           seeds[i],
                                                                                                           path,
                                                                                                           model_type="static"
                                                                                                           )
    bestHyperparameters_bySplit_dyn[str(i)] = bestHyperparameters_stat

    hyperparameters.update({
        "dropout_rate": bestHyperparameters_stat["dropout_rate"],
        "layers": bestHyperparameters_stat["layers"],
        "lr_scheduler": bestHyperparameters_stat["lr_scheduler"],
    })

    utils.reset_keras()
    model_stat, hist, early = lflr.run_network(X_train, X_train_static, y_train.individualMRGerm.values,
                                               X_val, X_val_static, y_val.individualMRGerm.values,
                                               hyperparameters, seeds[i], model_type="static"
                                               )

    #Save the hyperparameters and predictions
    split_directory = f'./Results_LFLR/Static/split_{i}'
    y_pred_static = model_stat.predict(X_test_static)
    y_train_pred_static = model_stat.predict(X_train_static.values)
    
    save_results(split_directory, bestHyperparameters_stat, y_pred_static, y_train_pred_static, model_stat)

    v_models_stat.append(model_stat)
    loss_dev_stat.append(hist.history['val_loss'])
    y_pred_by_split_stat[str(i)] = y_pred_static

    # LR #########################################################################################################
    y_train_summary = y_train.reset_index()
    y_train_summary["y_pred_static"] = y_train_pred_static
    y_train_summary["y_pred_dynamic"] = y_train_pred_dynamic    
    y_test_summary = y_test.reset_index()
    y_test_summary["y_pred_static"] = y_pred_static
    y_test_summary["y_pred_dynamic"] = y_pred_dynamic

    bestHyperparameters_LR, X_train_summary, y_train_summary_2, X_val_summary, y_val_summary_2, = lflr.myCVGrid_LR(y_train_summary[["y_pred_static", "y_pred_dynamic"]], 
                                                                                                                   y_train_summary[["individualMRGerm"]],
                                                                                                                   hyperparameters,
                                                                                                                   lr_scheduler,
                                                                                                                   seeds[i],
                                                                                                                   )

    bestHyperparameters_bySplit_LR[str(i)] = bestHyperparameters_LR

    hyperparameters.update({
        "lr_scheduler": bestHyperparameters_LR["lr_scheduler"],
    })

    utils.reset_keras()
    model_LR, hist, earlystopping = lflr.run_network_LR(X_train_summary, 
                                                y_train_summary_2, 
                                                X_val_summary, 
                                                y_val_summary_2, 
                                                hyperparameters, 
                                                seeds[i])
    
    y_pred = model_LR.predict(y_test_summary[["y_pred_static", "y_pred_dynamic"]])

    split_directory = f'./Results_LFLR/LR/split_{i}'
    save_results(split_directory, bestHyperparameters_LR, y_pred, None, model_LR)

    v_models_stat.append(model_LR)
    loss_dev_LR.append(hist.history['val_loss'])
    y_pred_by_split_LR[str(i)] = y_pred

    # Calculate metrics
    metrics_dict = utils.calculate_and_save_metrics(
    y_test.individualMRGerm.values, 
    y_pred, 
    split_directory, 
    split_index=i
    )