In [1]:
import optuna
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from datetime import datetime
import time

import warnings
warnings.filterwarnings('ignore')

import sklearn
from sklearn import metrics
from sklearn.metrics import confusion_matrix, f1_score

import random, os, json
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Masking, Dropout, Dense, Dropout, Flatten, Conv1D
from tensorflow.keras import backend as K
from tensorflow.keras import layers, Model, Input
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import optimizers

from joblib import Parallel, delayed
import multiprocessing

import pickle
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers

# FUNCTIONS OF THE MODEL

In [None]:
import os
import random
import time

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.layers import (
    Input, Masking, LayerNormalization, MultiHeadAttention,
    Dropout, Dense, GlobalMaxPooling1D
)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

import optuna
from optuna.samplers import TPESampler

def reset_keras(seed=42):
    """Ensure reproducibility across runs, safely handling thread config."""
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    K.clear_session()
    
    # Try to enforce single-threaded behavior if not already initialized
    try:
        tf.config.threading.set_inter_op_parallelism_threads(1)
        tf.config.threading.set_intra_op_parallelism_threads(1)
    except RuntimeError:
        # If TF threading config is already set, ignore the error
        pass

def get_sinusoidal_encoding(seq_len, dim):
    """Generate sinusoidal positional encoding."""
    position = np.arange(seq_len)[:, np.newaxis]
    div_term = np.exp(np.arange(0, dim, 2) * -(np.log(10000.0) / dim))
    pe = np.zeros((seq_len, dim))
    pe[:, 0::2] = np.sin(position * div_term)
    pe[:, 1::2] = np.cos(position * div_term)
    return tf.constant(pe[np.newaxis, ...], dtype=tf.float32)

def build_model(hyperparameters):
    """Build Transformer with positional encoding and proper block chaining."""
    # Unpack hyperparameters
    seq_len = hyperparameters["n_time_steps"]
    embed_dim = hyperparameters["layers"][0]
    dropout = hyperparameters["dropout"]
    num_heads = hyperparameters["num_heads"]
    num_blocks = hyperparameters["num_transformer_blocks"]
    activation = hyperparameters["activation"]
    l2_reg = hyperparameters.get('l2_reg', 1e-4)
    epsilon = hyperparameters.get('epsilon', 1e-6)

    # Inputs
    inputs = Input(shape=(seq_len, embed_dim))
    x = Masking(mask_value=hyperparameters["mask_value"])(inputs)

    # Positional Encoding
    pos_encoding = get_sinusoidal_encoding(seq_len, embed_dim)
    x = x + pos_encoding

    # Transformer Blocks
    for _ in range(num_blocks):
        # Self-Attention
        x_norm = LayerNormalization(epsilon=epsilon)(x)
        attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)(x_norm, x_norm)
        attn_output = Dropout(dropout)(attn_output)
        x = attn_output + x

        # Feed-forward
        x_norm = LayerNormalization(epsilon=epsilon)(x)
        ffn_output = Dense(
            embed_dim, activation=activation,
            kernel_regularizer=tf.keras.regularizers.l2(l2_reg)
        )(x_norm)
        x = ffn_output + x

    # Output layers
    x = Dropout(dropout)(x)
    x = Dense(
        hyperparameters["layers"][1], activation=activation,
        kernel_regularizer=tf.keras.regularizers.l2(l2_reg)
    )(x)
    x = Dropout(dropout)(x)

    output = GlobalMaxPooling1D()(x)
    output = Dense(1, activation='sigmoid',
                   kernel_regularizer=tf.keras.regularizers.l2(l2_reg))(output)

    model = Model(inputs, output)
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(learning_rate=hyperparameters["lr_scheduler"]),
        metrics=['accuracy', 'AUC']
    )
    return model

def run_network(X_train, X_val, y_train, y_val, hyperparameters):
    """Train and evaluate model using float32."""
    model = build_model(hyperparameters)

    earlystopping = tf.keras.callbacks.EarlyStopping(
        monitor="val_loss",
        min_delta=hyperparameters["mindelta"],
        patience=hyperparameters["patience"],
        restore_best_weights=True,
        mode="min",
    )

    # Ensure float32 dtype
    X_train = tf.cast(X_train, tf.float32)
    y_train = tf.cast(y_train, tf.float32)
    X_val = tf.cast(X_val, tf.float32)
    y_val = tf.cast(y_val, tf.float32)

    start = time.time()
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        callbacks=[earlystopping],
        batch_size=hyperparameters['batch_size'],
        epochs=hyperparameters['n_epochs_max'],
        verbose=hyperparameters["verbose"],
    )
    train_time = time.time() - start
    return model, history, train_time

def objective(trial, base_hparams, seed, X_train, y_train, X_val, y_val):
    """Optuna objective with reproducibility."""
    reset_keras(seed)
    # Copy base and sample
    hparams = base_hparams.copy()
    hparams["dropout"] = trial.suggest_float('dropout', 0.0, 0.3)
    hparams['middle_layer_dim'] = trial.suggest_int('middle_layer_dim', 1, 40, step=2)
    hparams['layers'] = [hparams['layers'][0], hparams['middle_layer_dim'], 1]
    hparams["lr_scheduler"] = trial.suggest_loguniform('lr_scheduler', 1e-3, 1e-1)
    hparams['l2_reg'] = trial.suggest_loguniform('l2_reg', 1e-6, 1e-2)
    hparams['num_transformer_blocks'] = trial.suggest_int("num_transformer_blocks", 1, 3)
    hparams['activation'] = trial.suggest_categorical("activation", ['LeakyReLU'])
    hparams['num_heads'] = trial.suggest_int("num_heads", 2, 3)
    hparams['epsilon'] = trial.suggest_loguniform('epsilon', 1e-6, 1e-6)
    hparams['patience'] = trial.suggest_int('patience', 1, 50)
    hparams['mindelta'] = trial.suggest_loguniform('mindelta', 1e-10, 1e-3)
    hparams['weight_decay'] = trial.suggest_loguniform('weight_decay', 1e-5, 0)

    model, history, _ = run_network(X_train, X_val, y_train, y_val, hparams)
    return min(history.history["val_loss"])

def optuna_study(base_hparams, seed, X_train, y_train, X_val, y_val):
    """Run Optuna with TPESampler for reproducibility."""
    sampler = TPESampler(seed=seed)
    study = optuna.create_study(direction='minimize', sampler=sampler)
    study.optimize(lambda t: objective(
        t, base_hparams, seed, X_train, y_train, X_val, y_val
    ), n_trials=30)
    return study.best_params, study.best_value

# HYPERPARAMETERS

- **seeds**: Seed values to ensure reproducibility.
- **input_shape**: Number of features in each time step of the input data.
- **n_time_steps**: Number of time steps in the input sequence.
- **batch_size**: Number of batches for training.
- **n_epochs_max**: Maximum number of epochs for training.
- **layer_list**: A list with different configurations for the layers of the model.
- **dropout**: Dropout rates.
- **lr_scheduler**: Learning rates.
- **norm**: Type of normalization applied to the data.
- **num_heads**: Number of attention heads in the multi-head attention mechanism.
- **num_transformer_blocks**: Number of transformer blocks.
- **epsilon**: Avoid zero division in the normalization layer.

In [None]:
seeds = [42, 76, 124, 163, 192, 205]

adjustment_factor = [1] 
activation = ['LeakyReLU']
norm = "robustNorm"
patience = 3
monitor = "val_loss" 

input_shape    = 80  
n_time_steps   = 14
batch_size     = 32
n_epochs_max   = 1000


hyperparameters = {
    "n_time_steps": n_time_steps,
    "mask_value": 666,
    "batch_size": batch_size,
    "n_epochs_max": n_epochs_max,
    "monitor": monitor,
    "layers": [input_shape, input_shape, 1],

    "mindelta": 0,
    "patience": patience,
    "dropout": 0.2,
    "verbose": 1,
    "input_shape": input_shape,
    "num_heads": 7,
    "num_transformer_blocks": 0,
    "l2_reg": 1e-4,
    "epsilon": 0
}

# RUNNING AND TRYING ON TEST

In [None]:
run_model = True
debug = True

from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    roc_auc_score,
    average_precision_score,
)

if run_model:
    loss_train = []
    loss_dev = []
    v_models = []
    training_times = []
    optimization_times = []
    inference_times = []

    v_accuracy_test = []
    v_specificity = []
    v_precision = []
    v_recall = []
    v_f1score = []
    v_roc = []
    v_aucpr = []

    bestHyperparameters_bySplit = {}
    y_pred_by_split = {}
    results = ""

    for i in [1, 2, 3]:
        print(f"\n===== Split {i} =====")

        X_train = np.load(f"../../DATA/s{i}/X_train_tensor_robustNorm.npy")
        X_val   = np.load(f"../../DATA/s{i}/X_val_tensor_robustNorm.npy")
        X_test  = np.load(f"../../DATA/s{i}/X_test_tensor_robustNorm.npy")

        y_train = pd.read_csv(f"../../DATA/s{i}/y_train_robustNorm.csv")[['individualMRGerm_stac']]
        y_train = y_train.iloc[0::hyperparameters["n_time_steps"]].reset_index(drop=True)
        y_val   = pd.read_csv(f"../../DATA/s{i}/y_val_robustNorm.csv")[['individualMRGerm_stac']]
        y_val   = y_val.iloc[0::hyperparameters["n_time_steps"]].reset_index(drop=True)
        y_test  = pd.read_csv(f"../../DATA/s{i}/y_test_robustNorm.csv")[['individualMRGerm_stac']]
        y_test  = y_test.iloc[0::hyperparameters["n_time_steps"]].reset_index(drop=True)

        start_opt = time.time()
        best_params, best_val = optuna_study(
            base_hparams=hyperparameters,
            seed=seeds[i-1],
            X_train=X_train, y_train=y_train,
            X_val=X_val,     y_val=y_val
        )
        end_opt = time.time()
        optimization_times.append(end_opt - start_opt)

        print(f"Best params (val_loss={best_val:.4f}): {best_params}")
        bestHyperparameters_bySplit[str(i)] = best_params

        embed_dim = hyperparameters["layers"][0]
        layers = [embed_dim, best_params["middle_layer_dim"], 1]

        train_hparams = hyperparameters.copy()
        train_hparams.update({
            "dropout":             best_params["dropout"],
            "layers":              layers,
            "lr_scheduler":        best_params["lr_scheduler"],
            "l2_reg":              best_params["l2_reg"],
            "num_transformer_blocks": best_params["num_transformer_blocks"],
            "activation":          best_params["activation"],
            "num_heads":           best_params["num_heads"],
            "epsilon":             best_params["epsilon"],
            "patience":            best_params["patience"],
            "mindelta":            best_params["mindelta"],
        })

        split_dir = f'./Results_Transformer_optuna/split_{i}'
        os.makedirs(split_dir, exist_ok=True)
        with open(os.path.join(split_dir, f"best_params_split_{i}.pkl"), 'wb') as f:
            pickle.dump(best_params, f)

        reset_keras(seeds[i-1])
        print("Train hyperparameters:", train_hparams)

        start_train = time.time()
        model, hist, _time_train = run_network(
            X_train, X_val,
            y_train,   y_val,
            train_hparams
        )
        end_train = time.time()
        training_times.append(end_train - start_train)

        v_models.append(model)
        loss_train.append(hist.history['loss'])
        loss_dev.append(hist.history['val_loss'])


        start_inf = time.time()
        y_pred = model.predict(X_test)
        end_inf = time.time()
        inference_times.append(end_inf - start_inf)

        y_pred_by_split[str(i)] = y_pred
        with open(os.path.join(split_dir, f"y_pred_split_{i}.pkl"), 'wb') as f:
            pickle.dump(y_pred, f)

        model.save(os.path.join(split_dir, f"model_split_{i}.h5"))

        acc   = accuracy_score(y_test, np.round(y_pred))
        tn, fp, fn, tp = confusion_matrix(y_test, np.round(y_pred)).ravel()
        roc   = roc_auc_score(y_test, y_pred)
        aucpr = average_precision_score(y_test, y_pred)

        v_accuracy_test.append(acc)
        v_specificity.append(tn / (tn + fp))
        v_precision.append(tp / (tp + fp))
        v_recall.append(tp / (tp + fn))
        v_f1score.append((2 * v_recall[-1] * v_precision[-1]) / (v_recall[-1] + v_precision[-1]))
        v_roc.append(roc)
        v_aucpr.append(aucpr)

        if debug:
            results += (
                f"Split {i} - Times (s): Opt {optimization_times[-1]:.2f}, "
                f"Train {training_times[-1]:.2f}, Inf {inference_times[-1]:.2f}\n"
                f"    TP {tp} | FP {fp} | TN {tn} | FN {fn}\n"
                f"    Acc {acc:.4f} | ROC-AUC {roc:.4f} | AUC-PR {aucpr:.4f}\n"
            )

    summary_df = pd.DataFrame({
        "Split":           [1, 2, 3],
        "OptimizationTime": optimization_times,
        "TrainingTime":     training_times,
        "InferenceTime":    inference_times,
        "Accuracy":         v_accuracy_test,
        "Specificity":      v_specificity,
        "Precision":        v_precision,
        "Recall":           v_recall,
        "F1Score":          v_f1score,
        "ROC_AUC":          v_roc,
        "AUC_PR":           v_aucpr
    })
    os.makedirs('./Results_Transformer_optuna', exist_ok=True)
    summary_df.to_csv('./Results_Transformer_optuna/summary_metrics.csv', index=False)


## RESULTS (PERFORMANCE)

In [2]:
directory = './Results_Transformer_optuna'
summary_path = os.path.join(directory, "summary_metrics.csv")
summary_df = pd.read_csv(summary_path)


def calculateKPI(parameter):
    """
    This function calculate the mean and deviation of a set of values of
    a given performance indicator.
    
    Returns: Mean and std (float)
    """
    mean = round(np.mean(parameter)*100, 2)
    deviation = round(np.sqrt(np.sum(np.power(parameter - np.mean(parameter), 2) / len(parameter)))*100, 2)
    return mean, deviation

def format_metric_line(metric_name, mean_value, deviation_value):
    return f"{metric_name}: {mean_value:.2f} +- {deviation_value:.2f}\n"

mean_test, deviation_test = calculateKPI(summary_df["Accuracy"])
mean_specificity, deviation_specificity = calculateKPI(summary_df["Specificity"])
mean_recall, deviation_recall = calculateKPI(summary_df["Recall"])
mean_f1, deviation_f1 = calculateKPI(summary_df["F1Score"])
mean_precision, deviation_precision = calculateKPI(summary_df["Precision"])
mean_roc, deviation_roc = calculateKPI(summary_df["ROC_AUC"])
mean_aucpr, deviation_aucpr = calculateKPI(summary_df["AUC_PR"])  

results = ""
results += format_metric_line("Test Accuracy", mean_test, deviation_test)
results += format_metric_line("Specificity", mean_specificity, deviation_specificity)
results += format_metric_line("Sensitivity", mean_recall, deviation_recall)
results += format_metric_line("Precision", mean_precision, deviation_precision)
results += format_metric_line("F1-score", mean_f1, deviation_f1)
results += format_metric_line("ROC-AUC", mean_roc, deviation_roc)
results += format_metric_line("AUC-PR", mean_aucpr, deviation_aucpr) 

final_results = (
    f"Sensitivity: {mean_recall:.2f} +- {deviation_recall:.2f}\n"
    f"Specificity: {mean_specificity:.2f} +- {deviation_specificity:.2f}\n"
    f"Precision: {mean_precision:.2f} +- {deviation_precision:.2f}\n"
    f"F1-score: {mean_f1:.2f} +- {deviation_f1:.2f}\n"
    f"ROC-AUC: {mean_roc:.2f} +- {deviation_roc:.2f}\n"
    f"AUC-PR: {mean_aucpr:.2f} +- {deviation_aucpr:.2f}\n" 
    f"Test Accuracy: {mean_test:.2f} +- {deviation_test:.2f}\n"
)

print(final_results)

Sensitivity: 62.26 +- 6.72
Specificity: 84.96 +- 6.33
Precision: 44.67 +- 7.91
F1-score: 51.06 +- 3.44
ROC-AUC: 78.28 +- 2.01
AUC-PR: 49.30 +- 3.70
Test Accuracy: 81.52 +- 4.38

