In [None]:
import optuna
import shap
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')
import time

import sklearn
from sklearn import metrics
from sklearn.metrics import confusion_matrix, f1_score

import random, os, json
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Masking, LSTM, Dropout, Dense
from tensorflow.keras import backend as K

import sys
sys.path.append("../../")
import utils_models 
import utils_interpretability
import utils
from joblib import Parallel, delayed
import multiprocessing

import pickle
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers

In [None]:
def binary_crossentropy():

    def loss(y_true, y_pred):
        """
        Args:
            - y_true: A tensor containing the true labels (0 or 1).
            - y_pred: A tensor containing the predicted probabilities for the positive class.
        Returns:
            Binary cross-entropy loss computed by TensorFlow's BinaryCrossentropy, 
            ignoring invalid labels.
        """
        mask = tf.not_equal(y_true, 666) 
        y_true_masked = tf.boolean_mask(y_true, mask)
        y_pred_masked = tf.boolean_mask(y_pred, mask)
        
        loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=False)

        return loss_fn(y_true_masked, y_pred_masked)

    return loss

In [None]:
def calculate_metrics(y_true, y_pred_probs):
    y_pred = np.round(y_pred_probs).astype(int)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

    accuracy = accuracy_score(y_true, y_pred)
    sensitivity = recall_score(y_true, y_pred) 
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0 
    roc_auc = roc_auc_score(y_true, y_pred_probs)
    f1 = f1_score(y_true, y_pred)  

    return accuracy, sensitivity, specificity, roc_auc, f1, tn, fp, fn, tp

### FUNCTIONS OF THE MODEL

In [None]:
def build_model(hyperparameters):
    """
    Builds a LSTM model based on several hyperparameters.

    Args:
        - hyperparameters: Dictionary containing the model hyperparameters. 
    Returns:
        - model: A tf.keras.Model with the compiled model.
    """
    hyperparameters['layers'] = [87, hyperparameters['middle_layer_dim'], 1]
    l2_lambda = hyperparameters.get("l2_lambda", 1e-4)

    
    dynamic_input = tf.keras.layers.Input(shape=(hyperparameters["n_time_steps"], hyperparameters["layers"][0]))
    masked = dynamic_input
    optimizer = Adam(learning_rate=hyperparameters["lr_scheduler"], weight_decay=hyperparameters["weight_decay"])

    lstm_encoder = tf.keras.layers.LSTM(
        hyperparameters["layers"][1],
        dropout=hyperparameters['dropout'],
        return_sequences=False,
        activation=hyperparameters['activation'],
        kernel_regularizer=regularizers.l2(l2_lambda),
        use_bias=False
    )(masked)
    
    if hyperparameters['dropout'] > 0.0:
        lstm_encoder = tf.keras.layers.Dropout(hyperparameters['dropout'])(lstm_encoder)

    output = tf.keras.layers.Dense(1, use_bias=False, activation="sigmoid",kernel_regularizer=regularizers.l2(l2_lambda))(lstm_encoder)

    model = tf.keras.Model(dynamic_input, output)
    model.compile(
        loss=binary_crossentropy(),
        optimizer=optimizer,
        metrics=['accuracy', "AUC"], weighted_metrics = []
    )
        
    return model



In [None]:
def run_network(X_train, X_val, y_train, y_val, 
                hyperparameters, seed):
    """
    Trains and evaluates the built LSTM model based on the provided data and hyperparameters.

    Args:
        - X_train, X_val, y_train, y_val: numpy.ndarray. Training (T) and Validation (V) data labels.
        - hyperparameters: Dictionary containing training and model hyperparameters.
        - seed: Random seed for reproducibility.
    Returns:
        - model (tf.keras.Model): The trained Keras model.
        - hist (tf.keras.callbacks.History): Training history object containing loss and metrics.
        - training_time (float): Total training time in seconds.
    """

    model = None
    model = build_model(hyperparameters)
    earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_auc',
                                                  min_delta=hyperparameters["mindelta"],
                                                  patience=hyperparameters["patience"],
                                                  restore_best_weights=True,
                                                  mode="max")
    start_time = time.time()
    
    hist = model.fit(X_train, y_train,
                     validation_data=(X_val, y_val),
                     callbacks=[earlystopping], batch_size=hyperparameters['batch_size'], epochs=hyperparameters['n_epochs_max'],
                     verbose=hyperparameters['verbose'])
    
    end_time = time.time()
    training_time = end_time - start_time

    return model, hist, training_time


In [None]:
def objective(trial, hyperparameters, seed, X_train, y_train, X_val, y_val, split, norm, n_time_steps):
    """
    Objective function for hyperparameter optimization using Optuna.
    Args:
        - trial (optuna.trial.Trial): Optuna trial object.
        - X_train, X_val, y_train, y_val: numpy.ndarray. Training (T) and Validation (V) data labels.
        - hyperparameters: Dictionary containing training and model hyperparameters.
        - seed: Random seed for reproducibility.  
        - split: String indicating the data split.
        - norm: String with the type of normalization applied to the data.
        - n_time_steps: Number of time steps in the input.    
    Returns:
        - metric_dev (float): Best validation AUC achieved during training.     
    """

    print(f"Trial {trial.number} started")
    hyperparameters_copy = hyperparameters.copy()

    hyperparameters_copy["dropout"] = trial.suggest_float('dropout', 0.0, 0.3)
    middle_dim = trial.suggest_int('middle_layer_dim', 2, 20, step=2)
    hyperparameters_copy['middle_layer_dim'] = middle_dim
    hyperparameters_copy["l2_lambda"] = trial.suggest_loguniform('l2_lambda', 1e-6, 1e-2)
    hyperparameters_copy["lr_scheduler"] = trial.suggest_loguniform('lr_scheduler', 1e-3, 1e-1)
    hyperparameters_copy["adjustment_factor"] = trial.suggest_categorical('adjustment_factor', [1])
    hyperparameters_copy["activation"] = trial.suggest_categorical('activation', ['tanh', 'LeakyReLU'])
    hyperparameters_copy['patience'] = trial.suggest_int('patience', 3, 20)
    hyperparameters_copy['mindelta'] = trial.suggest_loguniform('mindelta', 1e-10, 1e-5)
    hyperparameters_copy['weight_decay'] = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2)
    

    hyperparameters_copy['batch_size'] = hyperparameters['batch_size']
    hyperparameters_copy['n_epochs_max'] = hyperparameters['n_epochs_max']
   
    v_val_auc = []  

    model, hist, training_time = run_network(
            X_train, X_val,
            y_train,
            y_val,
            hyperparameters_copy,
            seed
    )

    v_val_auc.append(np.max(hist.history["val_auc"]))

    metric_dev = np.mean(v_val_auc)
    return metric_dev

In [None]:
def optuna_study(hyperparameters, seed, X_train, y_train, X_val, y_val, split, norm, n_time_steps):
    """
    Runs an Optuna study to optimize hyperparameters for the model.
    
    Args:
        - X_train, X_val, y_train, y_val: numpy.ndarray. Training (T) and Validation (V) data labels.
        - hyperparameters: Dictionary containing training and model hyperparameters.
        - seed: Random seed for reproducibility.  
        - split: String indicating the data split.
        - norm: String with the type of normalization applied to the data.
        - n_time_steps: Number of time steps in the input.       
    Returns:
        - best_hyperparameters: Dictionary containing the best hyperparameters found 
          after the optimization process.
    """
    study = optuna.create_study(direction='maximize') 
    study.optimize(lambda trial: objective(trial, hyperparameters, seed, X_train, y_train , X_val, y_val, split, norm, n_time_steps), n_trials=20)
    
    best_params = study.best_params
    best_metric = study.best_value
    
    layers = [87, best_params['middle_layer_dim'], 1]
    
    best_hyperparameters = {
        'dropout': best_params['dropout'],
        'middle_layer_dim': best_params['middle_layer_dim'],
        'layers': layers,
        'lr_scheduler': best_params['lr_scheduler'],
        'l2_lambda': best_params['l2_lambda'],
        'adjustment_factor': best_params['adjustment_factor'],
        'activation': best_params['activation'],
        'batch_size': hyperparameters['batch_size'],
        'n_epochs_max': hyperparameters['n_epochs_max'],
        'patience': best_params['patience'],
        'mindelta': best_params['mindelta'],
        'weight_decay': best_params['weight_decay']
    }

    print(f"Best Hyperparameters: {best_params}")
    print(f"Best Validation Metric: {best_metric}")

    return best_hyperparameters


### HYPERPARAMETERS 

- **seeds**: Seed values to ensure reproducibility.
- **input_shape**: Number of features in each time step of the input data.
- **n_time_steps**: Number of time steps in the input sequence.
- **batch_size**: Number of batches for training.
- **norm**: Type of normalization applied to the data.
- **dropout**: Dropout rate to prevent overfitting.
- **l2_lambda**: L2 regularization coefficient.
- **lr_scheduler**: Learning rate assigned to the optimizer.
- **patience**: Number of epochs with no improvement before early stopping is triggered.
- **weight_decay**: Weight decay for the optimizer to apply additional L2 regularization on weights.
- **middle_layer_dim**: Different configurations for the middle layer of the model.
- **mindelta**: Minimum delta required to consider as an improvement.

In [None]:
seeds = [9, 76, 227]

input_shape = 87
n_time_steps = 7
batch_size = 4
n_epochs_max = 100

adjustment_factor = [1]  
activation = ['tanh', 'LeakyReLU']
norm = "standardScaler"
patience = 3 
monitor = "val_auc"   

hyperparameters = {
    "n_time_steps": n_time_steps,
    "mask_value": 666,
    "batch_size": batch_size,
    "n_epochs_max": n_epochs_max,
    "monitor": monitor,
    "mindelta": 0,
    "patience": patience,
    "dropout": 0.2,
    "l2_lambda": 1e-4,
    "verbose": 1
}

### PREDICTIONS

In [None]:
run_model = True
if run_model:
    loss_train = []
    loss_dev = []
    v_models = []
    training_times = []

    bestHyperparameters_bySplit = {}
    y_pred_by_split = {}

    feature_names = [
    'AMG', 'ATF', 'ATI', 'ATP', 'CAR', 'CF1', 'CF2', 'CF3', 'CF4', 'Falta', 
    'GCC', 'GLI', 'LIN', 'LIP', 'MAC', 'MON', 'NTI', 
    'OTR', 'OXA', 'PAP', 'PEN', 'POL', 'QUI', 
    'SUL', 'TTC', 'hoursVM', 'acinet.$_{pc}$', 'enterobac.$_{pc}$', 'enteroc.$_{pc}$',
    'pseud.$_{pc}$', 'staph.$_{pc}$', 'others.$_{pc}$', 'hoursICU', '# pat_atb', '# pat_MR', 
    'CAR.$_{n}$', 'PAP.$_{n}$', 'Falta.$_{n}$', 'QUI.$_{n}$', 
    'ATF.$_{n}$', 'OXA.$_{n}$', 'PEN.$_{n}$', 'CF3.$_{n}$', 
    'GLI.$_{n}$', 'CF4.$_{n}$', 'SUL.$_{n}$', 'NTI.$_{n}$', 
    'LIN.$_{n}$', 'AMG.$_{n}$', 'MAC.$_{n}$', 'CF1.$_{n}$', 'GCC.$_{n}$', 
    'POL.$_{n}$', 'ATI.$_{n}$', 'MON.$_{n}$', 'LIP.$_{n}$', 'TTC.$_{n}$', 
    'OTR.$_{n}$', 'CF2.$_{n}$', 'ATP.$_{n}$', '# pat_ttl', 'posture.$_{change}$', 
    'insulin', 'nutr_art', 'sedation', 'relax', 'hep_fail', 'renal_fail', 
    'coag_fail', 'hemo_fail', 'resp_fail', 'multi_fail', 'n_transf', 
    'vasoactive.$_{drug}$', 'dosis_nems', 'hoursTracheo', 'hoursUlcer', 
    'hoursHemo', 'C01 PIVC 1', 'C01 PIVC 2', 'C02 CVC - YD', 'C02 CVC - SD', 
    'C02 CVC - SI', 'C02 CVC - FD', 'C02 CVC - YI', 'C02 CVC - FI', '# catheters'
    ]

    all_shap_values = []
    all_inputs = []
    
        
    for i in [1,2,3]:
        init = time.time()
        
        X_test = np.load(f"../../../DATA/w7days/s{i}/X_test_tensor_standardScaler.npy")
        y_test = pd.read_csv(f"../../../DATA/w7days/s{i}/y_test_tensor_standardScaler.csv")["individualMRGerm_stac"].values.astype(int)

        X_train = np.load(f"../../../DATA/w7days/s{i}/X_train_tensor_standardScaler.npy")
        y_train = pd.read_csv(f"../../../DATA/w7days/s{i}/y_train_tensor_standardScaler.csv")["individualMRGerm_stac"].values.astype(int)
    
        X_val = np.load(f"../../../DATA/w7days/s{i}/X_val_tensor_standardScaler.npy")
        y_val = pd.read_csv(f"../../../DATA/w7days/s{i}/y_val_tensor_standardScaler.csv")["individualMRGerm_stac"].values.astype(int)
   
        X_train = np.where(X_train == 666, 0, X_train)
        X_val = np.where(X_val == 666, 0, X_val)
        X_test = np.where(X_test == 666, 0, X_test)
        
        bestHyperparameters = optuna_study(
            hyperparameters,
            seeds[i-1],
            X_train, y_train,  
            X_val, y_val,
            f"s{i}",
            norm,
            n_time_steps
        )
        print(f"Best layers: {bestHyperparameters['layers']}")
        
        fin = time.time()
        
        bestHyperparameters_bySplit[str(i)] = bestHyperparameters

        # Save best hyperparameters for current split
        split_directory = './Results_LSTM_optuna/split_' + str(i)
        if not os.path.exists(split_directory):
            os.makedirs(split_directory)

        with open(os.path.join(split_directory, f"bestHyperparameters_split_{i}.pkl"), 'wb') as f:
            pickle.dump(bestHyperparameters, f)

        hyperparameters.update({
            "dropout": bestHyperparameters["dropout"],
            "layers": bestHyperparameters["layers"],
            "lr_scheduler": bestHyperparameters["lr_scheduler"],
            "adjustment_factor": bestHyperparameters["adjustment_factor"],
            "activation": bestHyperparameters["activation"], 
            "patience": bestHyperparameters["patience"], 
            "weight_decay": bestHyperparameters["weight_decay"],
            "mindelta": bestHyperparameters["mindelta"],
            "l2_lambda": bestHyperparameters["l2_lambda"],
            "middle_layer_dim": bestHyperparameters["middle_layer_dim"]
        })
        
        # --- TRY ON TEST -----------------------------------------------------------------------
        utils_models.reset_keras()
        print(hyperparameters)
   

        model, hist, training_time = run_network(
            X_train, X_val,
            y_train, 
            y_val,
            hyperparameters,
            seeds[i-1]
        )

        v_models.append(model)
        loss_train.append(hist.history['loss'])
        loss_dev.append(hist.history['val_auc'])
        training_times.append(training_time)

        y_pred = model.predict(x=X_test)
        y_pred_by_split[str(i)] = y_pred 

        # Save y_pred for current split
        with open(os.path.join(split_directory, f"y_pred_split_{i}.pkl"), 'wb') as f:
            pickle.dump(y_pred, f)

        with open(os.path.join(split_directory, "training_times.pkl"), 'wb') as f:
            pickle.dump(training_times, f)
            
        # -------- SHAP --------
        background = X_test[np.random.choice(X_test.shape[0], 100, replace=False)]
        explainer = shap.GradientExplainer(model, background)
        shap_values = explainer.shap_values(X_test)[0]
        
        all_shap_values.append(shap_values)  
        all_inputs.append(X_test)

    all_shap_concat = np.concatenate(all_shap_values, axis=0)
    all_inputs_concat = np.concatenate(all_inputs, axis=0)
    
    if all_shap_concat.ndim == 3:
        shap_values_avg = all_shap_concat.mean(axis=1)
        X_test_avg = all_inputs_concat.mean(axis=1)
    else:
        shap_values_avg = all_shap_concat
        X_test_avg = all_inputs_concat
    
    # Summary plot
    plt.figure(figsize=(12, 8))
    shap.summary_plot(
        shap_values_avg,
        X_test_avg,
        feature_names=feature_names,
        max_display=30,
        show=False
    )
    plt.xticks(rotation=45, fontsize=18)
    plt.yticks(fontsize=19)
    cbar = plt.gcf().axes[-1]
    cbar.tick_params(labelsize=20)
    cbar.set_ylabel("Feature value", fontsize=20)
    plt.tight_layout()
    plt.savefig("shap_summary_LSTM7.png", dpi=300, bbox_inches='tight')
    plt.close()
    
    # Heatmap
    plt.figure(figsize=(12, 8))
    shap_obj = shap.Explanation(values=shap_values_avg, data=X_test_avg, feature_names=feature_names)
    shap.plots.heatmap(shap_obj, show=False)
    plt.xticks(rotation=45, fontsize=15)
    plt.yticks(fontsize=15)
    cbar = plt.gcf().axes[-1]
    plt.tight_layout()
    plt.savefig("shap_heatmap_LSTM7.png", dpi=300, bbox_inches='tight')
    plt.close()

    # END EXECUTION

### RESULTS (PERFORMANCE)

## Step 1. Load model and best results

In [None]:
directory = './Results_LSTM_optuna'
def load_from_pickle(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)

    
y_pred_by_split = {}
y_pred_by_split['1'] = load_from_pickle(os.path.join('./Results_LSTM_optuna/split_1', "y_pred_split_1.pkl"))
y_pred_by_split['2'] = load_from_pickle(os.path.join('./Results_LSTM_optuna/split_2', "y_pred_split_2.pkl"))
y_pred_by_split['3'] = load_from_pickle(os.path.join('./Results_LSTM_optuna/split_3', "y_pred_split_3.pkl"))

## Step 2. Analysis of results

In [None]:
all_metrics = []

for i in [1,2,3]: 
    y_test = pd.read_csv(f"../../../DATA/w7days/s{i}/y_test_tensor_standardScaler.csv")["individualMRGerm_stac"].values.astype(int)
    y_test_single = y_test.flatten()  
    y_test_pred = y_pred_by_split[str(i)].flatten()  
    
    df_metrics = utils.get_metrics_(y_test_single, (y_test_pred))
    print(df_metrics)
    utils.plot_metrics(df_metrics)
    utils.plot_roc_curve(y_test_single, y_test_pred)

    all_metrics.append(df_metrics)
print(all_metrics)

## Save results (metrics)

In [None]:
metrics_LSTM = pd.concat(all_metrics)
metrics_LSTM.to_csv('./Results_LSTM_optuna/metrics_LSTM.csv', index=False)

In [None]:
metrics_LSTM.head()

In [None]:
metrics_mean = metrics_LSTM.mean()
metrics_std = metrics_LSTM.std()

summary_df = pd.DataFrame({
    "Metric": metrics_mean.index,
    "Mean": metrics_mean.values,
    "Standard Deviation": metrics_std.values
})

summary_df.to_csv('./Results_LSTM_optuna/metrics_summary_LSTM.csv', index=False)

print("\nMean and Standard Deviation of the Splits:")
print(summary_df)


In [None]:
metrics_LSTM = pd.read_csv('./Results_LSTM_optuna/metrics_LSTM.csv')
stats_LSTM = metrics_LSTM.agg(["mean", "std"]) 
formatted_metrics = stats_LSTM.apply(lambda x: f"{x['mean']*100:.2f} ± {x['std']*100:.2f}", axis=0)
formatted_metrics_df = pd.DataFrame(formatted_metrics, columns=["Metrics (Mean ± Std)"])
formatted_metrics_df.to_csv('./Results_LSTM_optuna/metrics_LSTM_formatted.csv', index=True)
print(formatted_metrics_df)