In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import datetime
import warnings
warnings.filterwarnings('ignore')

import sklearn
from sklearn import metrics
from sklearn.metrics import confusion_matrix, f1_score
from sklearn.model_selection import KFold

import random, os, json
# Configurar variables de entorno
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # "-1" significa deshabilitar todas las GPUs


import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Masking, GRU, Dropout, Dense
from tensorflow.keras import backend as K

from joblib import Parallel, delayed
import multiprocessing

# FUNCTIONS OF THE MODEL

#### Reset Keras

In [2]:
def reset_keras(seed=42):
    """Function to ensure that results from Keras models
    are consistent and reproducible across different runs"""
    
    K.clear_session()
    # 1. Set `PYTHONHASHSEED` environment variable at a fixed value
    os.environ['PYTHONHASHSEED']=str(seed)
    # 2. Set `python` built-in pseudo-random generator at a fixed value
    random.seed(seed)
    # 3. Set `numpy` pseudo-random generator at a fixed value
    np.random.seed(seed)
    # 4. Set `tensorflow` pseudo-random generator at a fixed value
    tf.random.set_seed(seed)

#### Building GRU

In [3]:
def build_model(hyperparameters):
    """
    Builds a LSTM model based on several hyperparameters.

    Args:
        - hyperparameters: Dictionary containing the hyperparameters. 
    Returns:
        - model: A tf.keras.Model with the compiled model.
    """
    
    dynamic_input = tf.keras.layers.Input(shape=(hyperparameters["n_time_steps"], hyperparameters["layers"][0]))
    masked = tf.keras.layers.Masking(mask_value=hyperparameters['mask_value'])(dynamic_input)

    gru = tf.keras.layers.GRU(
        hyperparameters["layers"][1],
        dropout=hyperparameters['dropout'],
        return_sequences=False,
        activation=hyperparameters['activation'],
        use_bias=True
    )(masked)

    output = tf.keras.layers.Dense(1, use_bias=True, activation="sigmoid")(gru)

    model = tf.keras.Model(dynamic_input, [output])
    model.compile(
        loss='binary_crossentropy',
        optimizer=tf.keras.optimizers.Adam(learning_rate=hyperparameters["lr_scheduler"]),
        metrics=['accuracy', "AUC"]
    )
        
    return model

#### Running the model

In [4]:
def run_network(X_train, X_val, y_train, y_val, hyperparameters, seed):
    """
    Trains and evaluates the built LSTM model based on the provided data and hyperparameters.

    Args:
        - X_train, X_val, y_train, y_val: numpy.ndarray. Training (T) and Validation (V) data labels.
        - sample_weights_train, sample_weights_val: numpy.ndarray. Weights for the T and V data to handle class imbalance.
        - hyperparameters: Dictionary containing the hyperparameters.
        - seed: Integer seed for reproducibility.
    Returns:
        - model: A tf.keras.Model with the trained model.
        - hist:  The training history.
        - earlystopping: The early stopping callback.
    """
    batch_size = hyperparameters['batch_size']
    n_epochs_max = hyperparameters['n_epochs_max']    

    model = None
    model = build_model(hyperparameters)
    earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                  min_delta=hyperparameters["mindelta"],
                                                  patience=hyperparameters["patience"],
                                                  restore_best_weights=True,
                                                  mode="min")
    hist = model.fit(X_train, y_train,
                     validation_data=(X_val, y_val),
                     callbacks=[earlystopping], batch_size=batch_size, epochs=n_epochs_max,
                     verbose=hyperparameters['verbose'])
    
    return model, hist, earlystopping


#### GridSearch

In [5]:
def evaluate_combination(k, l, m, b, hyperparameters, dropout, layers, lr_scheduler, activation, seed, split, norm, n_time_steps):
    hyperparameters_copy = hyperparameters.copy()
    hyperparameters_copy['dropout'] = dropout[k]
    hyperparameters_copy['layers'] = layers[l]
    hyperparameters_copy['lr_scheduler'] = lr_scheduler[m]
    hyperparameters_copy['activation'] = activation[b]
    
    v_val_loss = []
    
    X_train = np.load("../DATA/s" + str(i) + "/X_train_tensor_normPower2" + ".npy")
    X_val = np.load("../DATA/s" + str(i) + "/X_val_tensor_normPower2" + ".npy")

    y_train = pd.read_csv("../DATA/s" + str(i) + "/y_train_normPower2" + ".csv")
    y_train = y_train[['individualMRGerm_stac']]
    y_train = y_train.iloc[0:y_train.shape[0]:hyperparameters["n_time_steps"]].reset_index(drop=True)

    y_val = pd.read_csv("../DATA/s" + str(i) + "/y_val_normPower2" + ".csv")
    y_val = y_val[['individualMRGerm_stac']]
    y_val = y_val.iloc[0:y_val.shape[0]:hyperparameters["n_time_steps"]].reset_index(drop=True)
    
    reset_keras()

    model, hist, early = run_network(
        X_train, X_val,
        y_train,
        y_val,
        hyperparameters_copy,
        seed
    )

    v_val_loss.append(np.min(hist.history["val_loss"]))

    metric_dev = np.mean(v_val_loss)
    return (metric_dev, k, l, m, b, X_train, y_train, X_val, y_val)

def myCVGridParallel(hyperparameters, dropout, lr_scheduler, layers, activation, seed, split, norm, n_time_steps=14):
    """Parallelized Grid Search. 
       Calculate metricDev based on the evaluation. Compares the metricDev with the current bestMetricDev. 
       If better, updates bestMetricDev and stores those hyperparameters in bestHyperparameters.
       
    Args:
        - hyperparameters: Dictionary containing the hyperparameters.
        - dropout: A list of dropout rates.
        - lr_scheduler: A list of learning rates.
        - layers: A list of layer configurations.
        - seed : Seed value for reproducibility.
        - split: String indicating the data split.
        - norm: String with the type of normalization applied to the data.
    Returns:
        - bestHyperparameters: A dictionary with the best hyperparameters found and Train and Val data.
    """
    bestHyperparameters = {}
    bestMetricDev = np.inf

    
#     for k in range(len(dropout)):
#         for l in range(len(layers)):
#             for m in range(len(lr_scheduler)):
#                 for b in range(len(activation)):
#                     metric_dev, k, l, m, b, X_train, y_train, X_val, y_val = evaluate_combination(k, l, m, b, hyperparameters, dropout, layers, lr_scheduler, activation, seed, split, norm, n_time_steps)
    
    num_cores = multiprocessing.cpu_count()
    results = Parallel(n_jobs=num_cores)(
        delayed(evaluate_combination)(k, l, m, b, hyperparameters, dropout, layers, lr_scheduler, activation, seed, split, norm, n_time_steps)
        for k in range(len(dropout))
        for l in range(len(layers))
        for m in range(len(lr_scheduler))
        for b in range(len(activation))
    )

    for metric_dev, k, l, m, b, X_train, y_train, X_val, y_val in results:
        if metric_dev < bestMetricDev:
            print("\t\t\tCambio the best", bestMetricDev, "por metric dev:", metric_dev)
            bestMetricDev = metric_dev
            bestHyperparameters = {
                'dropout': dropout[k],
                'layers': layers[l],
                'lr_scheduler': lr_scheduler[m],
                'activation': activation[b],
                'X_train': X_train,
                'y_train': y_train,
                'X_val': X_val,
                'y_val': y_val
            }

    return bestHyperparameters


## Hyperparameters
In the dictionary, hyperparameters related to: data, training, evaluation, regularization

In [16]:
seeds = [42, 76, 124, 163, 192, 205]

input_shape = 80
n_time_steps = 14
batch_size = 32
n_epochs_max = 1000

layer_list = [
    [input_shape, 20, 1],  [input_shape, 30, 1], [input_shape, 40, 1], 
    [input_shape, 50, 1], [input_shape, 60, 1]
]

dropout = [0.0, 0.15, 0.3]
lr_scheduler = [1e-1, 1e-2, 1e-3, 1e-4]

activation = ['tanh', 'LeakyReLU']
 
norm = "robustNorm"

hyperparameters = {
    "n_time_steps": n_time_steps,
    "mask_value": 666,
    "batch_size": batch_size,
    "n_epochs_max": n_epochs_max,
    "monitor": "val_loss",
    "mindelta": 0,
    "patience": 25,
    "dropout": 0.0,
    "verbose": 0,
}

## Predictions

In [17]:
import os
import pickle
import time
import numpy as np
import pandas as pd


run_model = True
if run_model:
    loss_train = []
    loss_dev = []
    v_models = []

    bestHyperparameters_bySplit = {}
    y_pred_by_split = {}

    for i in [1,2,3,4,5]:
        init = time.time()
        # LOAD TEST AND PRE-TRAIN
        X_test = np.load("../DATA/s" + str(i) + "/X_test_tensor_normPower2" + ".npy")

        y_test = pd.read_csv("../DATA/s" + str(i) + "/y_test_normPower2" + ".csv")
        y_test = y_test[['individualMRGerm_stac']]
        y_test = y_test.iloc[0:y_test.shape[0]:hyperparameters["n_time_steps"]].reset_index(drop=True)

        # GridSearch of hyperparameters 
        bestHyperparameters = myCVGridParallel(hyperparameters,
                                               dropout,
                                               lr_scheduler,
                                               layer_list,
                                               activation,
                                               seeds[i],
                                               "s"+str(i),
                                               norm)
        fin = time.time()
        X_train = bestHyperparameters["X_train"]
        y_train = bestHyperparameters["y_train"]
        X_val = bestHyperparameters["X_val"]
        y_val = bestHyperparameters["y_val"]

        bestHyperparameters_bySplit[str(i)] = bestHyperparameters

        # Save best hyperparameters for current split
        split_directory = './Results_GRU/split_' + str(i)
        if not os.path.exists(split_directory):
            os.makedirs(split_directory)

        with open(os.path.join(split_directory, f"bestHyperparameters_split_{i}.pkl"), 'wb') as f:
            pickle.dump(bestHyperparameters, f)

        hyperparameters = {
            'n_time_steps': hyperparameters["n_time_steps"],
            'mask_value': hyperparameters["mask_value"],

            'batch_size': hyperparameters["batch_size"],
            'n_epochs_max': hyperparameters["n_epochs_max"],
            'monitor':  hyperparameters["monitor"],
            "mindelta": hyperparameters["mindelta"],
            "patience": hyperparameters["patience"],
            "dropout": bestHyperparameters["dropout"],
            "layers": bestHyperparameters["layers"],
            "lr_scheduler": bestHyperparameters["lr_scheduler"],
            "activation": bestHyperparameters["activation"],
            'verbose': 0
        }

        # --- TRY ON TEST ----------------------------------------------------------------------
        reset_keras()

        model, hist, early = run_network(
            X_train, X_val,
            y_train,
            y_val,
            hyperparameters,
            seeds[i-1]
        )

        v_models.append(model)
        loss_train.append(hist.history['loss'])
        loss_dev.append(hist.history['val_loss'])

        y_pred = model.predict(x=X_test)
        y_pred_by_split[str(i)] = y_pred

        # Save y_pred for current split
        with open(os.path.join(split_directory, f"y_pred_split_{i}.pkl"), 'wb') as f:
            pickle.dump(y_pred, f)

        # Save model for current split
        model_filename = os.path.join(split_directory, f"model_split_{i}.h5")
        model.save(model_filename)

    # END EXECUTION - SAVE AGGREGATED RESULTS
    directory = './Results_GRU'
    if not os.path.exists(directory):
        os.makedirs(directory)

    def save_to_pickle(data, filename):
        with open(filename, 'wb') as f:
            pickle.dump(data, f)

    save_to_pickle(bestHyperparameters_bySplit, os.path.join(directory, "bestHyperparameters_bySplit.pkl"))
    save_to_pickle(y_pred_by_split, os.path.join(directory, "y_pred_by_split.pkl"))
    
    for i, model in enumerate(v_models):
        model_filename = os.path.join(directory, f"model_{i}.h5")
        model.save(model_filename)


			Cambio the best inf por metric dev: 0.36251774430274963
			Cambio the best 0.36251774430274963 por metric dev: 0.3127076327800751
			Cambio the best 0.3127076327800751 por metric dev: 0.25204935669898987
			Cambio the best inf por metric dev: 0.29703181982040405
			Cambio the best 0.29703181982040405 por metric dev: 0.29395484924316406
			Cambio the best 0.29395484924316406 por metric dev: 0.28914982080459595
			Cambio the best 0.28914982080459595 por metric dev: 0.28896594047546387
			Cambio the best 0.28896594047546387 por metric dev: 0.2775346636772156
			Cambio the best inf por metric dev: 0.3293093144893646
			Cambio the best 0.3293093144893646 por metric dev: 0.30943480134010315
			Cambio the best 0.30943480134010315 por metric dev: 0.30369889736175537
			Cambio the best 0.30369889736175537 por metric dev: 0.3033117353916168
			Cambio the best 0.3033117353916168 por metric dev: 0.2621467709541321
			Cambio the best inf por metric dev: 0.34157827496528625
			Cambio the best 0.3

In [18]:
bestHyperparameters_bySplit

{'1': {'dropout': 0.0,
  'layers': [80, 30, 1],
  'lr_scheduler': 0.1,
  'activation': 'tanh',
  'X_train': array([[[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
           -1.72938632e-03, -3.98213791e-04,  9.31827501e-05],
          [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
           -1.72938632e-03, -3.98213791e-04,  9.31827501e-05],
          [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
           -1.72938632e-03, -3.98213791e-04,  9.31827501e-05],
          ...,
          [ 6.66000000e+02,  6.66000000e+02,  6.66000000e+02, ...,
            6.66000000e+02,  6.66000000e+02,  6.66000000e+02],
          [ 6.66000000e+02,  6.66000000e+02,  6.66000000e+02, ...,
            6.66000000e+02,  6.66000000e+02,  6.66000000e+02],
          [ 6.66000000e+02,  6.66000000e+02,  6.66000000e+02, ...,
            6.66000000e+02,  6.66000000e+02,  6.66000000e+02]],
  
         [[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
           -1.72938632e-03, 

In [19]:
from sklearn.metrics import confusion_matrix, roc_auc_score

def get_metrics_over_time(n_time_steps, y_test_df, y_pred_df):
    """
    Calculate metrics per time step.
    Args:
        - n_time_steps: Number of time steps.
        - y_test_df : DataFrame containing the real values.
        - y_pred_df : DataFrame containing the predicted values.
    Returns:
        - metrics_df: DataFrame containing the metrics for each time step.
    """
    
    # Lists to store the metrics
    tn_list = []
    fp_list = []
    fn_list = []
    tp_list = []
    specificity_list = []
    recall_list = []
    roc_auc_list = []

    # Calculate the metrics for each time step
    for t in range(n_time_steps):
        y_test_valid = y_test_df
        y_pred_valid = y_pred_df

        # Round the predictions
        y_pred_rounded = np.round(y_pred_valid)

        # Calculate the confusion matrix
        tn, fp, fn, tp = confusion_matrix(y_test_valid, y_pred_rounded, labels=[0, 1]).ravel()

        # Calculate specifciity and recall
        specificity = tn / (tn + fp) if (tn + fp) > 0 else np.nan
        recall = tp / (tp + fn) if (tp + fn) > 0 else np.nan

        # Calculate ROC-AUC
        roc = roc_auc_score(y_test_valid, y_pred_valid) if len(np.unique(y_test_valid)) > 1 else np.nan

        tn_list.append(tn)
        fp_list.append(fp)
        fn_list.append(fn)
        tp_list.append(tp)
        specificity_list.append(specificity)
        recall_list.append(recall)
        roc_auc_list.append(roc)

    # Dataframe to store the metrics per time step
    metrics_df = pd.DataFrame({
        'Time Step': range(1, n_time_steps+1),
        'TN': tn_list,
        'FP': fp_list,
        'FN': fn_list,
        'TP': tp_list,
        'Specificity': specificity_list,
        'Recall': recall_list,
        'ROC AUC': roc_auc_list
    })

    return metrics_df

In [23]:
directory = './Results_GRU'
def load_from_pickle(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)

    
y_pred_by_split = {}
y_pred_by_split['1'] = load_from_pickle(os.path.join('./Results_GRU/split_1', "y_pred_split_1.pkl"))
y_pred_by_split['2'] = load_from_pickle(os.path.join('./Results_GRU/split_2', "y_pred_split_2.pkl"))
y_pred_by_split['3'] = load_from_pickle(os.path.join('./Results_GRU/split_3', "y_pred_split_3.pkl"))
y_pred_by_split['4'] = load_from_pickle(os.path.join('./Results_GRU/split_4', "y_pred_split_4.pkl"))
y_pred_by_split['5'] = load_from_pickle(os.path.join('./Results_GRU/split_5', "y_pred_split_5.pkl"))

In [24]:
all_metrics = []

for i in [1,2,3,4,5]: 
    y_test = pd.read_csv("../DATA/s" + str(i) + "/y_test_normPower2" + ".csv")
    y_test = y_test[['individualMRGerm_stac']]
    y_test = y_test.iloc[0:y_test.shape[0]:hyperparameters["n_time_steps"]].reset_index(drop=True)
    
    y_pred = y_pred_by_split[str(i)].squeeze()
    y_pred_df = pd.DataFrame(y_pred)

    df_metrics = get_metrics_over_time(n_time_steps, y_test, y_pred_df)
    all_metrics.append(df_metrics)

In [25]:
all_metrics

[    Time Step   TN  FP  FN  TP  Specificity    Recall   ROC AUC
 0           1  278  19  22  31     0.936027  0.584906  0.817674
 1           2  278  19  22  31     0.936027  0.584906  0.817674
 2           3  278  19  22  31     0.936027  0.584906  0.817674
 3           4  278  19  22  31     0.936027  0.584906  0.817674
 4           5  278  19  22  31     0.936027  0.584906  0.817674
 5           6  278  19  22  31     0.936027  0.584906  0.817674
 6           7  278  19  22  31     0.936027  0.584906  0.817674
 7           8  278  19  22  31     0.936027  0.584906  0.817674
 8           9  278  19  22  31     0.936027  0.584906  0.817674
 9          10  278  19  22  31     0.936027  0.584906  0.817674
 10         11  278  19  22  31     0.936027  0.584906  0.817674
 11         12  278  19  22  31     0.936027  0.584906  0.817674
 12         13  278  19  22  31     0.936027  0.584906  0.817674
 13         14  278  19  22  31     0.936027  0.584906  0.817674,
     Time Step   TN  FP 

In [None]:
def myCVGrid(hyperparameters, dropout, lr_scheduler, layers, seed):
    """Grid Search. Calculate metricDev based on the evaluation. Compares the metricDev with the current bestMetricDev. 
       If better, updates bestMetricDev and stores those hyperparameters in bestHyperparameters.
       
       Returns:
          - bestHyperparameters (dict)
          - X_train, X_val, y_train, y_val (arrays): Training and validation datasets.
          - v_early (list): Early stopping information for each hyperparameter combination.
          - v_hist (list): Training history for each hyperparameter combination.
    """
    
    print("\tEntra en validación...")
    bestHyperparameters = {'dropout': -1, 'layers': -1, 'lr_scheduler':-1}
    bestHyperparameters = {'dropout': -1, 'layers': -1, 'lr_scheduler':-1}
    bestMetricDev = np.inf

    for k in range(len(dropout)):
        for l in range(len(layers)):
            for m in range(len(lr_scheduler)):
                hyperparameters = {
                    'n_time_steps': hyperparameters["n_time_steps"],
                    'mask_value': hyperparameters["mask_value"],

                    'cost_max': hyperparameters["cost_max"],
                    'cost_start': hyperparameters["cost_start"],
                    'length_infection': hyperparameters["length_infection"],
                    'length_start': hyperparameters["length_start"],
                    'thresh_after_AMR': hyperparameters["thresh_after_AMR"],
                    'thresh_AMR_far': hyperparameters["thresh_AMR_far"],
                    
                    'batch_size': hyperparameters["batch_size"],
                    'n_epochs_max': hyperparameters["n_epochs_max"],
                    'monitor':  hyperparameters["monitor"],
                    "mindelta": hyperparameters["mindelta"],
                    "patience": hyperparameters["patience"],
                    
                    "dropout":dropout[k],
                    "layers":layers[l],
                    "lr_scheduler":lr_scheduler[m],
                    
                    'kfold': hyperparameters["kfold"],
                    "level": 3, 
                    'verbose': 0
                }
                v_early = []
                v_metric_dev = []
                v_hist = []
                v_val_loss = []
                print("\t\tLearning rate:", lr_scheduler[m], ", dropout:", dropout[k], ", layers:", layers[l])

                #Load Train and Validation
                X_train = np.load("../DATA/s" + str(i) + "/X_train_tensor_normPower2" + ".npy")
                X_val = np.load("../DATA/s" + str(i) + "/X_val_tensor_normPower2" + ".npy")

                y_train = pd.read_csv("../DATA/s" + str(i) + "/y_train_normPower2" + ".csv")
                y_train = y_train[['individualMRGerm_stac']]
                y_train = y_train.iloc[0:y_train.shape[0]:hyperparameters["n_time_steps"]].reset_index(drop=True)

                y_val = pd.read_csv("../DATA/s" + str(i) + "/y_val_normPower2" + ".csv")
                y_val = y_val[['individualMRGerm_stac']]
                y_val = y_val.iloc[0:y_train.shape[0]:hyperparameters["n_time_steps"]].reset_index(drop=True)


                # Reset Keras
                reset_keras()
                gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.2)
                sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
                
                model, hist, early = run_network(
                    X_train, X_val,
                    y_train,
                    y_val,
                    hyperparameters,
                    seed
                )

                v_early.append(early)
                v_hist.append(hist)
                v_val_loss.append(np.min(hist.history["val_loss"]))
            metric_dev = np.mean(v_val_loss)

            if metric_dev < bestMetricDev:
                print("\t\t\tCambio the best", bestMetricDev, "por metric dev:", metric_dev)
                bestMetricDev = metric_dev
                bestHyperparameters['dropout'] = k
                bestHyperparameters['layers'] = l
                bestHyperparameters['lr_scheduler'] = m
            f = open("0_trazas_GRU.txt", "a")
            f.write("Executed in val :" + "con dropout" + str(dropout[k]) + "layers" + str(layers[l]) + 
                    "lr_sch" + str(lr_scheduler[m]) + " y coste medio en val " + str(metric_dev) + "\n")
            f.close()
    f = open("0_trazas_GRU.txt", "a")
    f.write("\n\n\n#####Acabada validación....")
    f.close()
    return bestHyperparameters, X_train, X_val, y_train, y_val, v_early, v_hist

#### Mean and Std of performance metrics

In [None]:
def calculateKPI(parameter):
    """
    This function calculate the mean and deviation of a set of values of
    a given performance indicator.
    
    Returns: Mean and std (float)
    """
    mean = round(np.mean(parameter)*100, 2)
    deviation = round(np.sqrt(np.sum(np.power(parameter - np.mean(parameter), 2) / len(parameter)))*100, 2)
    return mean, deviation

# PREDICTIONS

## Hyperparameters
In the dictionary, hyperparameters related to: data, training, evaluation, regularization

In [None]:
seeds = [143, 45, 67, 98, 120]
input_shape = 80

n_time_steps = 14
mask_value = 666
cost_max = 9
cost_start = 4
length_infection = 14
length_start = 5
thresh_AMR_far = 7
thresh_after_AMR = 2

batch_size = 32
n_epochs_max = 10000
mindelta = 0
patience = 50

hyperparameters = {
    "n_time_steps": n_time_steps,
    'mask_value': mask_value,
    "cost_max": cost_max,
    "cost_start": cost_start,
    "length_infection": length_infection,
    "length_start": length_start,
    "thresh_after_AMR": thresh_after_AMR,
    "thresh_AMR_far": thresh_AMR_far,
    
    'batch_size': batch_size,
    'n_epochs_max': n_epochs_max,
    'monitor': 'val_loss', 
    "mindelta": mindelta,
    "patience": patience,
    
    'kfold':5,
    'level':3,
    
    "dropout": 0.0,
    "verbose": 1
}

layer_list = [
    [input_shape, 3, 1],  [input_shape, 5, 1],  [input_shape, 10, 1], [input_shape, 15, 1], 
    [input_shape, 20, 1], [input_shape, 25, 1], [input_shape, 40, 1]
]

dropout = [0.0, 0.15, 0.3]
lr_scheduler = [0.01, 0.001, 0.0001]

#Output in a structured way
tab = "\t" * hyperparameters["level"]

In [None]:
v_accuracy_test = []
v_specificity = []
v_precision = []
v_recall = []
v_f1score = []
v_roc = []
v_early = []
v_probs = []
loss_train = []
loss_dev = []
results = ""
v_models = []
for i in range(1,6,1):
    #Cargo test y pre_train

    X_test = np.load("../DATA/s" + str(i) + "/X_test_tensor_normPower2" + ".npy")

    y_test = pd.read_csv("../DATA/s" + str(i) + "/y_test_normPower2" + ".csv")
    y_test = y_test[['individualMRGerm_stac']]
    y_test = y_test.iloc[0:y_test.shape[0]:hyperparameters["n_time_steps"]].reset_index(drop=True)
    
    #Busco los hiperparámetros y los imprimo    
    bestHyperparameters, X_train, X_val, y_train, y_val, v_early, v_hist = myCVGrid(
        hyperparameters, dropout, lr_scheduler, layer_list, seeds[i]
    )
    print("\tlr_sch seleccionado:", lr_scheduler[bestHyperparameters["lr_scheduler"]])
    print("\tdropout seleccionado:", dropout[bestHyperparameters["dropout"]])
    print("\tlayers seleccionado:", layer_list[bestHyperparameters["layers"]])
    f = open("0_trazas_GRU.txt", "a")
    f.write("mejor dropout seleccionado: "  + str(dropout[bestHyperparameters["dropout"]]) +
            ", mejor layers seleccionado: " + str(layer_list[bestHyperparameters["layers"]]) + 
            ", mejor lr_sch seleccionado:" + str(lr_scheduler[bestHyperparameters["lr_scheduler"]]) + "\n")
    f.close()
    
    
    hyperparameters = {
        'n_time_steps': hyperparameters["n_time_steps"],
        'mask_value': hyperparameters["mask_value"],

        'cost_max': hyperparameters["cost_max"],
        'cost_start': hyperparameters["cost_start"],
        'length_infection': hyperparameters["length_infection"],
        'length_start': hyperparameters["length_start"],
        'thresh_after_AMR': hyperparameters["thresh_after_AMR"],
        'thresh_AMR_far': hyperparameters["thresh_AMR_far"],
                    
        'batch_size': hyperparameters["batch_size"],
        'n_epochs_max': hyperparameters["n_epochs_max"],
        'monitor':  hyperparameters["monitor"],
        "mindelta": hyperparameters["mindelta"],
        "patience": hyperparameters["patience"],
                    
        "dropout": dropout[bestHyperparameters["dropout"]],
        "layers": layer_list[bestHyperparameters["layers"]],
        "lr_scheduler": lr_scheduler[bestHyperparameters["lr_scheduler"]],
                    
        'kfold': hyperparameters["kfold"],
        "level": 3, 
        'verbose': 0
    }
    

    #Pruebo en test
    reset_keras()
    gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.25)
    sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))

    
    model, hist, early = run_network(
        X_train, X_val,
        y_train, 
        y_val,
        hyperparameters,
        seeds[i]
    )
    
    v_models.append(model)
    loss_train.append(hist.history['loss'])
    loss_dev.append(hist.history['val_loss'])

    
    y_pred = model.predict(x=X_test)
    
    accuracy_test = sklearn.metrics.accuracy_score(y_test, np.round(y_pred))
    tn, fp, fn, tp = confusion_matrix(y_test, np.round(y_pred)).ravel()
    roc = sklearn.metrics.roc_auc_score(y_test, y_pred)


    v_accuracy_test.append(accuracy_test)
    v_specificity.append(tn / (tn + fp))
    v_precision.append(tp / (tp + fp))
    v_recall.append(tp / (tp + fn))
    v_f1score.append((2 * v_recall[i] * v_precision[i]) / (v_recall[i] + v_precision[i]))
    v_roc.append(roc)

    if debug:
        results = results + tab + "\tPositivos bien predichos" + str(tp) + "\n"
        results = results + tab + "\tPositivos mal predichos" + str(fp) + "\n"
        results = results + tab + "\tNegativos bien predichos" + str(tn) + "\n"
        results = results + tab + "\tNegativos mal predichos" + str(fn) + "\n"
        
        
mean_test, deviation_test = calculateKPI(v_accuracy_test)
mean_train, deviation_train = calculateKPI(v_accuracy_train)
mean_specificity, deviation_specificity = calculateKPI(v_specificity)
mean_recall, deviation_recall = calculateKPI(v_recall)
mean_f1, deviation_f1 = calculateKPI(v_f1score)
mean_precision, deviation_precision = calculateKPI(v_precision)
mean_roc, deviation_roc = calculateKPI(v_roc)

results = results + tab + "Accuracy en test:" + str(mean_test) + "+-" + str(deviation_test) + "\n"
results = results + tab + "Accuracy en train: " + str(mean_train) + "+-" + str(deviation_train) + "\n"
results = results + tab + "Especificidad:" + str(mean_specificity) +  "+-" + str(deviation_specificity) + "\n"
results = results + tab + "Sensibilidad:" + str(mean_recall) +  "+-" + str(deviation_recall) + "\n"
results = results + tab + "Precisión:" + str(mean_precision) +  "+-" + str(deviation_precision) + "\n"
results = results + tab + "F1-score:" + str(mean_f1) + "+-" + str(deviation_f1) + "\n"
results = results + tab + "ROC-AUC:" + str(mean_roc) + "+-" + str(deviation_roc) + "\n"

results = (results + tab + str(mean_test) + " +- " + str(deviation_test) +
            ' & ' + str(mean_specificity) +  " +- " + str(deviation_specificity) +
            ' & ' + str(mean_recall) +  " +- " + str(deviation_recall) +
            ' & ' + str(mean_f1) + " +- " + str(deviation_f1) +
            ' & ' + str(mean_roc) + " +- " + str(deviation_roc))