In [None]:
import logging
import os
import argparse
import math
import random
import tqdm
import numpy as np
import pandas as pd
from sklearn import preprocessing

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils as utils

from script import dataloader, utility, earlystopping
from model import models

from sklearn.metrics import roc_auc_score, confusion_matrix


In [None]:
class Dataset:
    def __init__(self):
        self.data = {}

    def __getattr__(self, name):
        if name in self.data:
            return self.data[name]
        else:
            raise AttributeError(f"'Dataset' object has no attribute '{name}'")

    def __setattr__(self, name, value):
        if name == "data":
            super().__setattr__(name, value)
        else:
            self.data[name] = value
            
args = Dataset()

args.enable_cuda=True
args.seed=42
args.dataset='pemsd7-m'
args.n_his=14
args.n_pred=1
args.time_intvl=5
args.Kt=3
args.stblock_num=2
args.act_func='glu'# choices=['glu', 'gtu']
args.Ks=3# choices=[3, 2])
args.graph_conv_type='high_order_graph_conv' 
args.gso_type='sym_norm_lap'# choices=['sym_norm_lap', 'rw_norm_lap', 'sym_renorm_adj', 'rw_renorm_adj'])
args.enable_bias=True
args.droprate=0.5
args.lr=0.001
args.weight_decay_rate=0.0005
args.batch_size=32
args.epochs=500
args.opt='adam'
args.step_size=10
args.gamma=0.95
args.patience=30

In [None]:
# # Running in Nvidia GPU (CUDA) or CPU
# if args.enable_cuda and torch.cuda.is_available():
#     # Set available CUDA devices
#     # This option is crucial for multiple GPUs
#     # 'cuda' ≡ 'cuda:0'
#     device = torch.device('cuda:1')
# else:
device = torch.device('cpu')


In [None]:
carpetas = ["s1", "s2", "s3"]

numberOfTimeStep=14
norm = "normPower2"

In [None]:
def exp_kernel(train, sigma):

    matrix_train = np.exp(-(train**2)/(2*(sigma**2)))

    x = pd.DataFrame(matrix_train)
    x = np.round(x, 6)

    print(x.loc[0])

    return x


# Load Ajacency matrix
def load_data(carpetas, c, norm, args):
    path = "./dtw_matrices/"+carpetas[c]+"/tr_AMR_"+norm+".csv"
    dtw_X  = pd.read_csv(path)
    adj = exp_kernel(dtw_X, 2.5)
    n_vertex = adj.shape[0]

    gso = adj.astype(dtype=np.float32)
    args.gso = torch.from_numpy(np.array(gso)).to(device)

     # Load data
    X_train = np.load("../DATA/" + carpetas[c] + "/X_train_tensor_"+norm+".npy")
    y_train = pd.read_csv("../DATA/" + carpetas[c] + "/y_train_tensor_"+norm+".csv")[['individualMRGerm_stac']].individualMRGerm_stac.values
    X_train[X_train == 666] = 0

    X_val = np.load("../DATA/" + carpetas[c] + "/X_val_tensor_"+norm+".npy")
    y_val = pd.read_csv("../DATA/" + carpetas[c] + "/y_val_tensor_"+norm+".csv")[['individualMRGerm_stac']].individualMRGerm_stac.values
    X_val[X_val == 666] = 0

    X_test = np.load("../DATA/" + carpetas[c] + "/X_test_tensor_"+norm+".npy")
    y_test = pd.read_csv("../DATA/" + carpetas[c] + "/y_test_tensor_"+norm+".csv")[['individualMRGerm_stac']].individualMRGerm_stac.values
    X_test[X_test == 666] = 0


    X_train = torch.Tensor(X_train)
    X_val = torch.Tensor(X_val)
    X_test = torch.Tensor(X_test)

    y_train = torch.Tensor(y_train)
    y_val = torch.Tensor(y_val)
    y_test = torch.Tensor(y_test)

    # Paso 1: Redimensionar de PxTxF a PxFxT
    X_train = X_train.permute(0, 2, 1)
    # Paso 2: Generar una cuarta dimensión replicando los valores de F
    x_train = X_train.unsqueeze(-1).expand(-1, -1, -1, X_train.size(1))

    X_test = X_test.permute(0, 2, 1)
    x_test = X_test.unsqueeze(-1).expand(-1, -1, -1, X_test.size(1))
    X_val = X_val.permute(0, 2, 1)
    x_val = X_val.unsqueeze(-1).expand(-1, -1, -1, X_val.size(1))


    print("x_train:", x_train.shape)
    print("x_val:", x_val.shape)
    print("x_test:", x_test.shape)

    train_data = utils.data.TensorDataset(x_train, y_train)
    train_iter = utils.data.DataLoader(dataset=train_data, batch_size=args.batch_size, shuffle=False)
    val_data = utils.data.TensorDataset(x_val, y_val)
    val_iter = utils.data.DataLoader(dataset=val_data, batch_size=args.batch_size, shuffle=False)
    test_data = utils.data.TensorDataset(x_test, y_test)
    test_iter = utils.data.DataLoader(dataset=test_data, batch_size=args.batch_size, shuffle=False)
    
    
    return args, train_data, train_iter, val_data, val_iter, test_data, test_iter, n_vertex, y_test

# GRIDSEARCH AND PREDICTIONS

In [None]:
from itertools import product
import torch
import torch.optim as optim
from sklearn.metrics import roc_auc_score, confusion_matrix
import json

param_grid = {
    "lr": [1e-4, 1e-3, 1e-2, 5e-2],  
    "weight_decay_rate": [0, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1],  
    "droprate": [0.15,0.3, 0.45],  
    "Ks": [3, 2], 
    "act_func": ['glu', 'gtu'],
    "graph_conv_type": ["high_order_graph_conv"]
}

results = []
best_params_per_folder = {}

for c in range(len(carpetas)):
    print(f"\n GridSearch for folder {carpetas[c]}/{len(carpetas)}")

    args, train_data, train_iter, val_data, val_iter, test_data, test_iter, n_vertex, y_test = load_data(carpetas, c, norm, args)

    best_val_loss = float("inf")
    best_params = None
    best_model = None

    for params in product(*param_grid.values()):
        hyperparams = dict(zip(param_grid.keys(), params))
        print(f"Testing configuration: {hyperparams}")

        # Assign hyperparameters to args
        args.lr = hyperparams["lr"]
        args.weight_decay_rate = hyperparams["weight_decay_rate"]
        args.droprate = hyperparams["droprate"]
        args.Ks = hyperparams["Ks"]
        args.act_func = hyperparams["act_func"]
        args.graph_conv_type = hyperparams["graph_conv_type"]

        # Define model architecture
        Ko = args.n_his - (args.Kt - 1) * 2 * args.stblock_num
        blocks = [[n_vertex]] + [[32, 8, 32] for _ in range(args.stblock_num)] + ([[64, 64]] if Ko > 0 else [[64]]) + [[1]]

        # Select the type of graph convolution
        if args.graph_conv_type == 'high_order_graph_conv':
            model = models.STGCNHighOrderGraphConv(args, blocks, n_vertex).to(device)
        else:
            raise ValueError(f"Unsupported graph_conv_type: {args.graph_conv_type}")

        # Configure optimizer and learning rate scheduler
        optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay_rate)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma)
        loss_fn = torch.nn.BCELoss()
        es = earlystopping.EarlyStopping(mode='min', patience=args.patience)

        # Training and validation
        for epoch in range(args.epochs):
            model.train()
            for x, y in train_iter:
                y_pred = model(x).squeeze()
                loss = loss_fn(y_pred, y)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            scheduler.step()

            # Validation evaluation
            model.eval()
            val_loss = sum(loss_fn(model(x).squeeze(), y).item() * y.shape[0] for x, y in val_iter) / sum(y.shape[0] for _, y in val_iter)

            if es.step(torch.tensor(val_loss)):
                print("Early stopping activated.")
                break

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_params = hyperparams
            best_model = model.state_dict()  # Save the best model state

    # Save the best hyperparameters for this folder
    best_params_per_folder[f"folder_{carpetas[c]}"] = {
        "best_params": best_params,
        "best_val_loss": best_val_loss
    }
    print(f" Best configuration for folder {c}: {best_params} with validation loss {best_val_loss:.4f}")

    # Load the best model for test evaluation
    model.load_state_dict(best_model)
    model.eval()

    # Evaluate the model on the test set
    pred_probs = [model(x_it.unsqueeze(0)).squeeze().item() for x, _ in test_iter for x_it in x]
    pred_probs = torch.tensor(pred_probs).cpu().numpy()
    y_test = y_test.cpu().numpy()
    pred_bin = (pred_probs >= 0.5).astype(int)

    # Compute performance metrics
    roc_auc = roc_auc_score(y_test, pred_probs)
    tn, fp, fn, tp = confusion_matrix(y_test, pred_bin).ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)

    results.append((roc_auc, sensitivity, specificity))

In [None]:
import csv

filename = "model_results.csv"

header = ["ROC AUC", "Sensitivity", "Specificity"]

with open(filename, mode="w", newline="") as file:
    writer = csv.writer(file)
    
    writer.writerow(header)
    
    writer.writerows(results)

print(f"Results saved to {filename}")