In [None]:
import optuna
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from datetime import datetime
import time
import warnings
from sklearn.utils import resample

warnings.filterwarnings('ignore')

import sklearn
from sklearn import metrics
import random, os, json

import random, os, json
#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import sys
sys.path.append("../")
import utils

from joblib import Parallel, delayed
import multiprocessing

import pickle
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from einops import rearrange, repeat
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt


from torch.optim import Adam
from torch.nn import BCELoss
import numpy as np
import argparse
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, recall_score, confusion_matrix

from transformers import LlamaForCausalLM, LlamaConfig
from transformers import LlamaConfig
from peft import LoraConfig, get_peft_model
import gc
from torch.optim.lr_scheduler import ReduceLROnPlateau

device = torch.device('cuda:1')
from peft import LoraConfig, get_peft_model
from peft import PeftModel, PeftConfig
from transformers import LlamaForCausalLM

In [None]:
def calculate_metrics(y_true, y_pred_probs):
    y_pred = np.round(y_pred_probs).astype(int)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

    accuracy = accuracy_score(y_true, y_pred)
    sensitivity = recall_score(y_true, y_pred) 
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0 
    roc_auc = roc_auc_score(y_true, y_pred_probs)
    f1 = f1_score(y_true, y_pred)  

    return accuracy, sensitivity, specificity, roc_auc, f1, tn, fp, fn, tp

### HYPERPARAMETERS 

- **seeds**: Seed values to ensure reproducibility.
- **batch_size**: Number of samples per batch used during training.
- **dropout**: Dropout rate applied to prevent overfitting.
- **weight_decay**: Weight decay for the optimizer to apply additional L2 regularization.
- **lr**: Learning rate assigned to the optimizer.
- **patience**: Number of epochs with no improvement before early stopping is triggered.

In [None]:
seeds = [9, 76, 227]
batch_size = 4
n_epochs_max = 100

hyperparameters = {
    "batch_size": batch_size,          
    "n_epochs_max": n_epochs_max,                
    "weight_decay": 1e-4,  
    "lr": 5e-5, 
    "patience": 15, 
    "dropout": 0.15,
}

### FUNCTIONS OF THE MODEL

In [None]:
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dims, output_dim):
        super(MLP, self).__init__()
        all_dims = [input_dim] + hidden_dims + [output_dim]
        self.linear_layers = nn.ModuleList([nn.Linear(all_dims[i], all_dims[i + 1]) for i in range(len(all_dims) - 1)])

    def forward(self, x):
        for i, layer in enumerate(self.linear_layers):
            x = layer(x)
            if i < len(self.linear_layers) - 1:
                x = F.gelu(x)
        return x


class InstructTime(nn.Module): 
    def __init__(self, config, input_dim=87, time_steps=4, hidden_dim=256, dropout=0.15):
        super().__init__()  

        config.output_hidden_states = True 
        llama_model = LlamaForCausalLM(config)

        lora_config = LoraConfig(
            r=8,  
            lora_alpha=32, 
            lora_dropout=0.1, 
            target_modules=["q_proj", "k_proj", "v_proj"],  
        )
        self.model = get_peft_model(llama_model, lora_config)  

        self.temporal_projection = MLP(input_dim, [64, 128, hidden_dim], config.hidden_size)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=config.hidden_size, nhead=config.num_attention_heads, dim_feedforward=hidden_dim, dropout=dropout 
        )
        self.temporal_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2)
        self.classifier = nn.Linear(config.hidden_size, 1)

    def forward(self, x):
        batch_size = x.shape[0]

        x = self.temporal_projection(x)         
        x = x.permute(1, 0, 2)  
        x = self.temporal_encoder(x)  
        x = x.permute(1, 0, 2)  

        llama_outputs = self.model(inputs_embeds=x, output_hidden_states=True) 
        hidden_states = llama_outputs.hidden_states[-1] 

        x = hidden_states[:, -1, :]
        output = self.classifier(x).squeeze(-1) 

        return output


In [None]:
class EarlyStopping:
    def __init__(self, patience=5, delta=0, verbose=False):
    """
    Early stops the training if validation loss does not improve after a given patience.

    Args:
        - patience: Number of epochs to wait for an improvement before stopping the training. 
        - delta: Minimum change in the monitored metric to qualify as an improvement.
        - verbose: If True, prints detailed messages each time the model improves and when early stopping is triggered.
    """
        self.patience = patience
        self.delta = delta
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.best_loss = float('inf')

    def __call__(self, val_loss, model):
        score = -val_loss  # Inverting to track improvement 

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f"EarlyStopping counter: {self.counter} out of {self.patience}")
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        """Saves the model when the validation loss decreases."""
        if self.verbose:
            print(f"Validation loss decreased ({self.best_loss:.4f} --> {val_loss:.4f}). Saving model ...")
        self.best_loss = val_loss
        torch.save(model.state_dict(), 'checkpoint.pt')

In [None]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False 
    
def run_network(X_train, X_val, y_train, y_val, hyperparameters, seed): 
    """
    Trains and evaluates the built model based on the provided data and hyperparameters.

    Args:
        - X_train, X_val, y_train, y_val: numpy.ndarray. Training (T) and Validation (V) data labels.
        - hyperparameters: Dictionary containing training and model hyperparameters.
        - seed: Random seed for reproducibility.

    Returns:
        - model: The trained PyTorch model.
        - history: Training history object containing loss and metrics.
    """   
    device = torch.device('cuda:1')
    batch_size=hyperparameters['batch_size']
    

    X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(device)
    X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(device)
    y_val_tensor = torch.tensor(y_val, dtype=torch.float32).to(device)

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    set_seed(seed)
    
    config = LlamaConfig.from_pretrained("meta-llama/Llama-3.2-1B")
    model = InstructTime(config, dropout=hyperparameters['dropout']).to(device)

    print(model)
    
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=hyperparameters['lr'], weight_decay=hyperparameters['weight_decay'])
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

    train_loss_history = []
    val_loss_history = []
    early_stopping = EarlyStopping(patience=hyperparameters['patience'], delta=0.001, verbose=True)


    for epoch in range(hyperparameters['n_epochs_max']):
        model.train()
        train_loss = 0.0
        
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            logits = model(X_batch).view(-1)
            loss = criterion(logits, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        
        train_loss /= len(train_loader)
        train_loss_history.append(train_loss)

        model.eval()set_seed
        val_loss = 0.0
        y_val_preds = []
        y_val_true = []
        with torch.no_grad():
            for X_val_batch, y_val_batch in val_loader:
                X_val_batch, y_val_batch = X_val_batch.to(device), y_val_batch.to(device)
                val_logits = model(X_val_batch).view(-1)
                loss = criterion(val_logits, y_val_batch)
                val_loss += loss.item()
                probs = torch.sigmoid(val_logits)

                y_val_preds.extend(probs.cpu().numpy())
                y_val_true.extend(y_val_batch.cpu().numpy())
        
        val_loss /= len(val_loader)
        val_loss_history.append(val_loss)

        y_val_preds = np.array(y_val_preds) #y_val_preds = np.array(y_val_preds)
        y_val_true = np.array(y_val_true)
        accuracy, sensitivity, specificity, roc_auc, f1, tn, fp, fn, tp  = calculate_metrics(y_val_true, y_val_preds)

        scheduler.step(val_loss)
        
        print(f"Epoch {epoch + 1}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")
        print(f"Validation Metrics - Accuracy: {accuracy:.4f}, f1: {f1:.4f}, sensitivity: {sensitivity:.4f}, Specificity: {specificity:.4f}, ROC AUC: {roc_auc:.4f} ")
        print(f"Confusion Matrix - TN: {tn}, FP: {fp}, FN: {fn}, TP: {tp}")

        early_stopping(val_loss, model)
        if early_stopping.early_stop:
            print("Early stopping triggered.")
            break  
    

    model.load_state_dict(torch.load('checkpoint.pt')) 

    history = {'loss': train_loss_history, 'val_loss': val_loss_history}
    return model, history

In [None]:
def objective(trial, hyperparameters, seed, X_train, y_train, X_val, y_val):
    """
    Objective function for hyperparameter optimization using Optuna.
    Args:
        - trial (optuna.trial.Trial): Optuna trial object.
        - X_train, X_val, y_train, y_val: numpy.ndarray. Training (T) and Validation (V) data labels.
        - hyperparameters: Dictionary containing training and model hyperparameters.
        - seed: Random seed for reproducibility.  
   
    Returns:
        - metric_dev: Best validation loss achieved during training.     
    """
    trial_seed = seed + trial.number  
    set_seed(trial_seed)

    hyperparameters_copy = hyperparameters.copy()

    hyperparameters_copy['lr'] = trial.suggest_float("lr", 1e-5, 1e-3, log=True)
    hyperparameters_copy['weight_decay'] = trial.suggest_float("weight_decay", 1e-6, 1e-2, log=True)
    hyperparameters_copy['dropout'] = trial.suggest_float("dropout", 0.0, 0.3)
    hyperparameters_copy['patience'] = trial.suggest_int("patience", 3, 20)
    
    v_val_loss = [] 
    
    model, history = run_network(
            X_train, X_val,
            y_train,
            y_val,
            hyperparameters_copy,
            trial_seed, 
        )

    v_val_loss.append(np.min(hist.history["val_loss"]))

    metric_dev = np.mean(v_val_loss)
    return metric_dev

def optuna_study(hyperparameters, seed, X_train, y_train, X_val, y_val):
    """
    Runs an Optuna study to optimize hyperparameters for the model.
    
    Args:
        - X_train, X_val, y_train, y_val: numpy.ndarray. Training (T) and Validation (V) data labels.
        - hyperparameters: Dictionary containing training and model hyperparameters.
        - seed: Random seed for reproducibility.  
    Returns:
        - best_hyperparameters: Dictionary containing the best hyperparameters found 
          after the optimization process.
    """    
    set_seed(seed)

    sampler = optuna.samplers.TPESampler(seed=seed)
    study = optuna.create_study(direction='minimize', sampler=sampler)

    study.optimize(
        lambda trial: objective(trial, hyperparameters, seed, X_train, y_train, X_val, y_val),
        n_trials=20,  
        n_jobs=1      
    )

    best_params = study.best_params
    best_metric = study.best_value
    
    best_hyperparameters = {
        'lr': best_params['lr'], 
        'weight_decay': best_params['weight_decay'],   
        'dropout': best_params['dropout'], 
        'patience': best_params['patience'],  
    }

    print(f"Best Hyperparameters: {best_params}")
    print(f"Best Validation Metric: {best_metric}")

    return best_hyperparameters

### PREDICTIONS

In [None]:
import time
import os
import pickle
import torch
import numpy as np
import pandas as pd

run_model = True
results = []
if run_model:
    loss_train = []
    loss_dev = []
    v_models = []

    y_pred_by_split = {}
    bestHyperparameters_bySplit = {}
    
    for i in [1,2,3]:
        init = time.time()
        
        X_test = np.load(f"../../../DATA/w4days/s{i}/X_test_tensor_standardScaler.npy")
        y_test = pd.read_csv(f"../../../DATA/w4days/s{i}/y_test_tensor_standardScaler.csv")["individualMRGerm_stac"].values.astype(int)

        X_train = np.load(f"../../../DATA/w4days/s{i}/X_train_tensor_standardScaler.npy")
        y_train = pd.read_csv(f"../../../DATA/w4days/s{i}/y_train_tensor_standardScaler.csv")["individualMRGerm_stac"].values.astype(int)
    
        X_val = np.load(f"../../../DATA/w4days/s{i}/X_val_tensor_standardScaler.npy")
        y_val = pd.read_csv(f"../../../DATA/w4days/s{i}/y_val_tensor_standardScaler.csv")["individualMRGerm_stac"].values.astype(int)
   
        X_train = np.where(X_train == 666, 0, X_train)
        X_val = np.where(X_val == 666, 0, X_val)
        X_test = np.where(X_test == 666, 0, X_test)

        X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
        y_test_tensor = torch.tensor(y_test, dtype=torch.float32).to(device)
        test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
        
        bestHyperparameters = optuna_study(
            hyperparameters,
            seeds[i-1],
            X_train, y_train,  
            X_val, y_val
        )

        bestHyperparameters_bySplit[str(i)] = bestHyperparameters
        
        split_directory = f'./Results_InstructTimeLlama/split_{i}'
        if not os.path.exists(split_directory):
            os.makedirs(split_directory)

        with open(os.path.join(split_directory, f"bestHyperparameters_split_{i}.pkl"), 'wb') as f:
            pickle.dump(bestHyperparameters, f)

        hyperparameters.update({
            'lr': bestHyperparameters['lr'], 
            'weight_decay': bestHyperparameters['weight_decay'],
            'dropout': bestHyperparameters['dropout'], 
            'patience': bestHyperparameters['patience'], 
        })
        
        model, history = run_network(
            X_train, X_val,
            y_train,
            y_val,
            hyperparameters,
            seeds[i-1]
        )

        v_models.append(model)
        loss_train.append(history['loss'])
        loss_dev.append(history['val_loss'])
        
        criterion = nn.BCEWithLogitsLoss()
        
        y_pred = []
        y_true = []
        test_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                logits = model(X_batch).view(-1)
                loss = criterion(logits, y_batch)
                test_loss += loss.item()
                probs = torch.sigmoid(logits)
                        
                y_pred.extend(probs.cpu().numpy().flatten())
                y_true.extend(y_batch.cpu().numpy().flatten())

        test_loss /= len(test_loader)
        y_pred = np.array(y_pred)  
        y_true = np.array(y_true)
        
        accuracy, sensitivity, specificity, roc_auc, f1, tn, fp, fn, tp = calculate_metrics(y_true, y_pred)
    
        print(f" Test Loss: {test_loss:.4f}")
        print(f"Test Metrics for Split {i} - Accuracy: {accuracy:.4f}, f1: {f1:.4f}, sensitivity: {sensitivity:.4f}, Specificity: {specificity:.4f}, ROC AUC: {roc_auc:.4f}")
        print(f"Confusion Matrix - TN: {tn}, FP: {fp}, FN: {fn}, TP: {tp}")

        results.append([accuracy, sensitivity, specificity, roc_auc, f1, tn, fp, fn, tp])

        y_pred_by_split[str(i)] = y_pred
        print(f"for split {i}:")
        print(y_pred_by_split[str(i)])

        y_pred_path = os.path.join(split_directory, f"y_pred_split_{i}.pkl")
        with open(y_pred_path, 'wb') as f:
            pickle.dump(y_pred, f)


### RESULTS (PERFORMANCE)

#### Step 1. Load model and best results

In [None]:
directory = './Results_InstructTimeLlama'

def load_from_pickle(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)

y_pred_by_split = {}
y_pred_by_split['1'] = load_from_pickle(os.path.join('./Results_InstructTimeLlama/split_1', "y_pred_split_1.pkl"))
y_pred_by_split['2'] = load_from_pickle(os.path.join('./Results_InstructTimeLlama/split_2', "y_pred_split_2.pkl"))
y_pred_by_split['3'] = load_from_pickle(os.path.join('./Results_InstructTimeLlama/split_3', "y_pred_split_3.pkl"))

#### Step 2. Analysis of results

In [None]:
all_metrics = []

for i in [1,2,3]: 
    y_test = pd.read_csv(f"../../../DATA/w4days/s{i}/y_test_tensor_standardScaler.csv")["individualMRGerm_stac"].values.astype(int)
    y_test_single = y_test.flatten()  
    y_test_pred = y_pred_by_split[str(i)].flatten()  
    
    df_metrics = utils.get_metrics_(y_test_single, (y_test_pred))
    print(df_metrics)
    utils.plot_metrics(df_metrics)
    utils.plot_roc_curve(y_test_single, y_test_pred)

    all_metrics.append(df_metrics)
print(all_metrics)

In [None]:
metrics_InstructTimeLlama = pd.concat(all_metrics)
metrics_InstructTimeLlama.to_csv('./Results_InstructTimeLlama/metrics_InstructTimeLlama.csv', index=False)

metrics_InstructTimeLlama.head()


In [None]:
metrics_mean = metrics_InstructTimeLlama.mean()
metrics_std = metrics_InstructTimeLlama.std()

summary_df = pd.DataFrame({
    "Metric": metrics_mean.index,
    "Mean": metrics_mean.values,
    "Standard Deviation": metrics_std.values
})

summary_df.to_csv('./Results_InstructTimeLlama/metrics_summary_InstructTimeLlama.csv', index=False)

print("\nMean and Standard Deviation of the Splits:")
print(summary_df)


In [None]:
metrics_InstructTime = pd.read_csv('./Results_InstructTimeLlama/metrics_InstructTimeLlama.csv')
stats_InstructTime= metrics_InstructTime.agg(["mean", "std"]) 
formatted_metrics = stats_InstructTime.apply(lambda x: f"{x['mean']*100:.2f} ± {x['std']*100:.2f}", axis=0)
formatted_metrics_df = pd.DataFrame(formatted_metrics, columns=["Metrics (Mean ± Std)"])
formatted_metrics_df.to_csv('./Results_InstructTimeLlama/metrics_InstructTime_formatted.csv', index=True)
print(formatted_metrics_df)