In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
from pathlib import Path
from decimal import Decimal
import pandas as pd
# PyTorch
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import sampler
import torchvision.datasets as datasets
import torchvision.transforms as T
from torch_geometric.data import Data, Batch
import torch.optim as optim
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
from torch_geometric.nn import GINConv, global_add_pool, GCNConv, global_mean_pool
import torch_geometric
from torch.nn import Linear
from torch_geometric.nn import GCNConv, GATConv, GATv2Conv, TransformerConv
import torch.nn.functional as F


#Sklearn
import sklearn.metrics as metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import KFold

#Networkx
import networkx as nx

In [2]:
import warnings

warnings.filterwarnings("ignore", category=UserWarning)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## 1. Funciones

In [3]:
def calculate_node_features(graph, experiment):
     
    # Obtener los nodos y sus características
    nodes = list(graph.nodes())
    
     # Calcular las características de los nodos
        
    if experiment == 1: 
        clustering_coeffs = nx.clustering(graph)
        x = torch.tensor([[clustering_coeffs[node]] for node in nodes], dtype=torch.float)
        
    elif experiment == 2: 
        clustering_coeffs = nx.clustering(graph)
        degree = nx.degree(graph)
        x = torch.tensor([[clustering_coeffs[node], degree[node]] for node in nodes], dtype=torch.float)
        
    else:
        clustering_coeffs = nx.clustering(graph)
        degree = nx.degree(graph)
        pagerank = nx.pagerank(graph)
        x = torch.tensor([[clustering_coeffs[node], degree[node],pagerank[node]] for node in nodes], dtype=torch.float)

    return x

def load_gpickle_files(path, experiment):
    X_path = os.path.join(path, "X")  # Ruta de la carpeta con los grafos
    y_path = os.path.join(path, "y")  # Ruta de la carpeta con las etiquetas

    file_list = os.listdir(X_path)  # Obtén la lista de archivos gpickle
    dataset = []

    for file in file_list:
        file_path = os.path.join(X_path, file)  # Genera la ruta del archivo
        graph = nx.read_gpickle(file_path)  # Lee el archivo gpickle con NetworkX

        # Calcular las características de los nodos
        x = calculate_node_features(graph, experiment)
        # Obtiene la matriz dispersa de adyacencia
        adj_matrix = nx.convert_matrix.to_scipy_sparse_matrix(graph)

        # Convierte la matriz dispersa en un tensor de PyTorch
        edge_index = torch.from_numpy(np.vstack(adj_matrix.nonzero()))
#         edge_index = torch.tensor(list(graph.edges()), dtype=torch.long).t().contiguous()  # Índices de las aristas
#         adj_matrix = nx.adjacency_matrix(graph)
#         adj_matrix = adj_matrix.toarray()

        file_number = os.path.splitext(file)[0]
        with open(os.path.join(y_path, f"{file_number}.txt")) as f:
            target = f.read()
            y = torch.tensor(np.float_(target), dtype=torch.float)

        data = Data(x=x, edge_index=edge_index, y=y)  # Crea un objeto Data
        dataset.append(data)

    return dataset

def custom_collate(batch):
    # Extraer los elementos del lote y crear listas separadas para cada atributo
    x_list = []
    edge_index_list = []
    y_list = []

    for data in batch:
        x_list.append(data.x)
        edge_index_list.append(data.edge_index)
        y_list.append(data.y)

    # Convertir las listas en arreglos de numpy
    x_batch = np.stack(x_list)
    edge_index_batch = np.stack(edge_index_list)
    y_batch = np.stack(y_list)

    return Data(x=x_batch, edge_index=edge_index_batch, y=y_batch)

def plot_learning_curves(train_losses, val_losses):
    epochs = range(1, len(train_losses) + 1)

    plt.figure(figsize=(8, 6))

    # Plot losses
    plt.plot(epochs, train_losses, label='Train')
    plt.plot(epochs, val_losses, label='Validation')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Curva de Aprendizaje')
    plt.legend()

    plt.tight_layout()
    plt.show()
    
def train(model, num_epochs, dataset, data_test):
    
    #model3=GNN(input_size=3, hidden_channels=3)
    learning_rate = 0.001
    # Definir la función de pérdida y el optimizador
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Listas para almacenar las pérdidas en cada época
    train_losses = []
    val_losses = []

    #num_epochs = 12
    model.train()  # Cambiar al modo de entrenamiento

    for epoch in range(num_epochs):
        running_loss = 0.0

        for data in dataset:
            x = data.x  # Características de los nodos
            edge_index = data.edge_index
            y = data.y#.view(-1)#.to(device)  # Etiquetas o clases

            optimizer.zero_grad()
            logits = model(x=x, edge_index=edge_index)
            loss = loss_fn(logits, y)
            loss.backward()  # Realizar el paso de atrás (backward)
            optimizer.step()  # Actualizar los pesos del modelo

            running_loss += loss.item()

        # Calcular la pérdida promedio en cada época
        epoch_loss = running_loss / len(dataset)
        train_losses.append(epoch_loss)

        # Realizar la validación del modelo en cada época
        model.eval()  # Cambiar al modo de evaluación

        with torch.no_grad():
            running_val_loss = 0.0

            for data in data_test:
                x_val = data.x
                edge_index_val = data.edge_index
                y_val = data.y#.view(-1)#.to(device)

                val_logits = model(x=x_val, edge_index=edge_index_val)
                val_loss = loss_fn(val_logits, y_val)

                running_val_loss += val_loss.item()

            # Calcular la pérdida promedio en la validación
            val_epoch_loss = running_val_loss / len(data_test)
            val_losses.append(val_epoch_loss)
            return epoch_loss, val_epoch_loss

        # Imprimir información del progreso del entrenamiento
        #print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {epoch_loss:.4f}, Val Loss: {val_epoch_loss:.4f}')

    # Crear las curvas de aprendizaje
#     plot_learning_curves(train_losses, val_losses)
    
def graph_testing(model, dataset, path):
    model.eval()  # Set the model in evaluation mode
    total_samples = 0
    total_loss = 0
    predictions_list = []
    labels_list = []

    learning_rate = 0.001
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    with torch.no_grad():
        for data in dataset:
            x = data.x  # Características de los nodo
            edge_index = data.edge_index
            y = data.y.unsqueeze(0)#.view(-1)#.to(device)  # Etiquetas o clases


            predictions = model(x=x, edge_index=edge_index)  # Forward pass

            loss = loss_fn(predictions, y)
            total_loss += loss.item()

            predictions_list.append(predictions.detach().cpu().numpy())
            labels_list.append(y.detach().cpu().numpy())

        # Calculate accuracy
        predictions_array = np.concatenate(predictions_list, axis=0)
        labels_array = np.concatenate(labels_list, axis=0)
        # Calculate MSE
        mse = mean_squared_error(labels_array, predictions_array)

        # Calculate MAE
        mae = mean_absolute_error(labels_array, predictions_array)

        # Calculate RMSE
        rmse = mean_squared_error(labels_array, predictions_array, squared=False)

        # Calculate R-squared
        r2 = r2_score(labels_array, predictions_array)

        print("MSE:", mse)
        print("MAE:", mae)
        print("RMSE:", rmse)
        print("R-squared:", r2)

        #print("Accuracy: {:.4f}".format(accuracy))
        # print("Mean Squared Error (MSE): {:.4f}".format(mse))
        # print("R-squared (R²): {:.4f}".format(r2))
        fig, ax = plt.subplots()
        ax.scatter(labels_array, predictions_array)
        ax.axline((0, 0), slope=1, color='red')
        # Add labels and title
        plt.xlabel("Etiquetas")
        plt.ylabel("Predicciones")
        plt.title("Predicciones vs. Etiquetas")
        

        # Display the plot
        plt.savefig(path)
        plt.close()
        
        return mse, mae, rmse, r2 
    
    
def testing(model, dataset):
    model.eval()  # Set the model in evaluation mode
    total_samples = 0
    total_loss = 0
    predictions_list = []
    labels_list = []

    learning_rate = 0.001
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    with torch.no_grad():
        for data in dataset:
            x = data.x  # Características de los nodo
            edge_index = data.edge_index
            y = data.y.unsqueeze(0)#.view(-1)#.to(device)  # Etiquetas o clases


            predictions = model(x=x, edge_index=edge_index)  # Forward pass

            loss = loss_fn(predictions, y)
            total_loss += loss.item()

            predictions_list.append(predictions.detach().cpu().numpy())
            labels_list.append(y.detach().cpu().numpy())

        # Calculate accuracy
        predictions_array = np.concatenate(predictions_list, axis=0)
        labels_array = np.concatenate(labels_list, axis=0)
        # Calculate MSE
        mse = mean_squared_error(labels_array, predictions_array)

        # Calculate MAE
        mae = mean_absolute_error(labels_array, predictions_array)

        # Calculate RMSE
        rmse = mean_squared_error(labels_array, predictions_array, squared=False)

        # Calculate R-squared
        r2 = r2_score(labels_array, predictions_array)

#         print("MSE:", mse)
#         print("MAE:", mae)
#         print("RMSE:", rmse)
#         print("R-squared:", r2)

#         #print("Accuracy: {:.4f}".format(accuracy))
#         # print("Mean Squared Error (MSE): {:.4f}".format(mse))
#         # print("R-squared (R²): {:.4f}".format(r2))
#         fig, ax = plt.subplots()
#         ax.scatter(labels_array, predictions_array)
#         ax.axline((0, 0), slope=1, color='red')
#         # Add labels and title
#         plt.xlabel("Labels")
#         plt.ylabel("Predictions")
#         plt.title("Predictions vs. Labels")
        

#         # Display the plot
#         plt.show()
        
        return mse, mae, rmse, r2
def cross_validate(model, dataset, num_folds=5, num_epochs=20):
    """
    Perform cross-validation for a given model and dataset.

    Args:
        model (torch.nn.Module): The PyTorch model to evaluate.
        dataset (list): The dataset containing data for cross-validation.
        num_folds (int): The number of folds for cross-validation.
    """
    kf = KFold(n_splits=num_folds)

    all_mse = []
    all_mae = []
    all_rmse = []
    all_r2 = []

    for train_idx, val_idx in kf.split(dataset):
        # Split the dataset into training and validation sets for this fold
        train_set = [dataset[i] for i in train_idx] 
        val_set = [dataset[i] for i in val_idx]

        # Train the model on the training set
        train(model, num_epochs, train_set, val_set)

        # Evaluate the model on the validation set
        mse, mae, rmse, r2 = testing(model, val_set)

        all_mse.append(mse)
        all_mae.append(mae)
        all_rmse.append(rmse)
        all_r2.append(r2)

    # Calculate and return the mean of evaluation metrics across all folds
    mean_mse = np.mean(all_mse)
    mean_mae = np.mean(all_mae)
    mean_rmse = np.mean(all_rmse)
    mean_r2 = np.mean(all_r2)

    return mean_mse, mean_mae, mean_rmse, mean_r2

def cross_validate_with_early_stopping(model, dataset, num_folds=5, num_epochs=20, patience=3):
    """
    Perform cross-validation for a given model and dataset with early stopping.

    Args:
        model (torch.nn.Module): The PyTorch model to evaluate.
        dataset (list): The dataset containing data for cross-validation.
        num_folds (int): The number of folds for cross-validation.
        num_epochs (int): The maximum number of epochs for training in each fold.
        patience (int): Number of epochs with no improvement to wait before stopping.
    """
    kf = KFold(n_splits=num_folds)

    all_mse = []
    all_mae = []
    all_rmse = []
    all_r2 = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
        # Split the dataset into training and validation sets for this fold
        train_set = [dataset[i] for i in train_idx]
        val_set = [dataset[i] for i in val_idx]
        print(" K-Fold :", fold)

        # Initialize variables for early stopping
        best_val_loss = float("inf")
        epochs_no_improve = 0
        #model.reset_parameters()  # Reset model parameters at the beginning of each fold

        for epoch in range(num_epochs):
            # Train the model on the training set
            epoch_loss, val_epoch_loss = train(model, 1, train_set, val_set)  # Train for 1 epoch at a time
            print(f"Epoch:  {epoch},  Train Loss: {epoch_loss:.4f}, Val Loss: {val_epoch_loss:.4f}")
            # Evaluate the model on the validation set
            mse, mae, rmse, r2 = testing(model, val_set)

            all_mse.append(mse)
            all_mae.append(mae)
            all_rmse.append(rmse)
            all_r2.append(r2)

            # Check if validation loss has improved
            if mse < best_val_loss:
                best_val_loss = mse
                epochs_no_improve = 0
            else:
                epochs_no_improve += 1

            # Implement early stopping
            if epochs_no_improve >= patience:
                print(f"Early stopping for fold {fold + 1} after {epoch + 1} epochs.")
                break

    # Calculate and return the mean of evaluation metrics across all folds
    mean_mse = np.mean(all_mse)
    mean_mae = np.mean(all_mae)
    mean_rmse = np.mean(all_rmse)
    mean_r2 = np.mean(all_r2)

    return mean_mse, mean_mae, mean_rmse, mean_r2

## 2. Datos

In [4]:
dataset = load_gpickle_files(r"..\data\train", 1)  # Carga los datos con la función load_gpickle_files
test = load_gpickle_files(r"..\data\test", 1)  # Carga los datos con la función load_gpickle_files


The scipy.sparse array containers will be used instead of matrices
in Networkx 3.0. Use `to_scipy_sparse_array` instead.
  adj_matrix = nx.convert_matrix.to_scipy_sparse_matrix(graph)


## 3. Modelos GATConv

### 3.1 One GAT Layer

In [5]:
class OneGATLayer(torch.nn.Module):
    def __init__(self, input_size, hidden_channels):
        super(OneGATLayer, self).__init__()
        torch.manual_seed(12345)
        
        self.conv1 = GATConv(input_size, hidden_channels)
        
        
        self.lin = Linear(hidden_channels, 1)
    
    def forward(self, x, edge_index, batch = None,  edge_col = None):
        
        # Node embedding 
        x = self.conv1(x, edge_index, edge_col)
               
        # Readout layer
        batch = torch.zeros(x.shape[0],dtype=int) if batch is None else batch
        x = global_mean_pool(x, batch)
        
        # Final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
    
        return x

### 3.2 Two GAT Layer

In [6]:
class TwoGATLayer(torch.nn.Module):
    def __init__(self, input_size, hidden_channels):
        super(TwoGATLayer, self).__init__()
        torch.manual_seed(12345)
        
        self.conv1 = GATConv(input_size, hidden_channels)
        self.conv2 = GATConv(hidden_channels, hidden_channels)
        
        self.lin = Linear(hidden_channels, 1)
    
    def forward(self, x, edge_index, batch = None,  edge_col = None):
        
        # Node embedding 
        x = self.conv1(x, edge_index, edge_col)
        x = x.relu()
        x = self.conv2(x, edge_index, edge_col)
        
        # Readout layer
        batch = torch.zeros(x.shape[0],dtype=int) if batch is None else batch
        x = global_mean_pool(x, batch)
        
        # Final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
    
        return x

### 3.3 Three GAT Layer

In [7]:
class ThreeGATLayer(torch.nn.Module):
    def __init__(self, input_size, hidden_channels):
        super(ThreeGATLayer, self).__init__()
        torch.manual_seed(12345)
        
        self.conv1 = GATConv(input_size, hidden_channels)
        self.conv2 = GATConv(hidden_channels, hidden_channels)
        self.conv3 = GATConv(hidden_channels, hidden_channels)
        
        self.lin = Linear(hidden_channels, 1)
    
    def forward(self, x, edge_index, batch = None,  edge_col = None):
        
        # Node embedding 
        x = self.conv1(x, edge_index, edge_col)
        x = x.relu()
        x = self.conv2(x, edge_index, edge_col)
        x = x.relu()
        x = self.conv3(x, edge_index, edge_col)
               
        # Readout layer
        batch = torch.zeros(x.shape[0],dtype=int) if batch is None else batch
        x = global_mean_pool(x, batch)
        
        # Final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
    
        return x

### 3.4 Four GCN Layer

In [8]:
class FourGATLayer(torch.nn.Module):
    def __init__(self, input_size, hidden_channels):
        super(FourGATLayer, self).__init__()
        torch.manual_seed(12345)
        
        self.conv1 = GATConv(input_size, hidden_channels)
        self.conv2 = GATConv(hidden_channels, hidden_channels)
        self.conv3 = GATConv(hidden_channels, hidden_channels)
        self.conv4 = GATConv(hidden_channels, hidden_channels)
        
        
        self.lin = Linear(hidden_channels, 1)
    
    def forward(self, x, edge_index, batch = None,  edge_col = None):
        
        # Node embedding 
        x = self.conv1(x, edge_index, edge_col)
        x = x.relu()
        x = self.conv2(x, edge_index, edge_col)
        x = x.relu()
        x = self.conv3(x, edge_index, edge_col)
        x = x.relu()
        x = self.conv4(x, edge_index, edge_col)
               
        # Readout layer
        batch = torch.zeros(x.shape[0],dtype=int) if batch is None else batch
        x = global_mean_pool(x, batch)
        
        # Final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
    
        return x

## 4. Experimentos

### 4.1 Experimento 1

#### Dataset

In [9]:
dataset = load_gpickle_files(r"..\data\train", 1)  # Carga los datos con la función load_gpickle_files
test = load_gpickle_files(r"..\data\test", 1)  # Carga los datos con la función load_gpickle_files


The scipy.sparse array containers will be used instead of matrices
in Networkx 3.0. Use `to_scipy_sparse_array` instead.
  adj_matrix = nx.convert_matrix.to_scipy_sparse_matrix(graph)


#### Modelos

In [10]:
model11=OneGATLayer(input_size=1, hidden_channels=1)
model12=TwoGATLayer(input_size=1, hidden_channels=1)
model13=ThreeGATLayer(input_size=1, hidden_channels=1)
model14=FourGATLayer(input_size=1, hidden_channels=1)

models = [("OneGATLayer" , model11), 
          ("TwoGATLayer" , model12), 
          ("ThreeGATLayer" ,model13),
          ("FourGATLayer", model14)]

#### Entrenamiento y cross-validation

In [11]:
results_df = pd.DataFrame(columns=["Model", "MSE", "MAE", "R-squared"])
for model_name, model in models:
    print("Nombre del modelo :", model_name)
    mean_mse, mean_mae, mean_rmse, mean_r2 =cross_validate_with_early_stopping(model, dataset, num_folds=5, num_epochs=40)
    results_df = results_df.append(
        {"Model": model_name, "MSE": mean_mse, "MAE": mean_mae, "R-squared": mean_r2},
        ignore_index=True,
    )

Nombre del modelo : OneGATLayer
 K-Fold : 0
Epoch:  0,  Train Loss: 0.2511, Val Loss: 0.0883
Epoch:  1,  Train Loss: 0.0887, Val Loss: 0.0823
Epoch:  2,  Train Loss: 0.0833, Val Loss: 0.0788
Epoch:  3,  Train Loss: 0.0796, Val Loss: 0.0747
Epoch:  4,  Train Loss: 0.0745, Val Loss: 0.0682
Epoch:  5,  Train Loss: 0.0698, Val Loss: 0.0641
Epoch:  6,  Train Loss: 0.0682, Val Loss: 0.0612
Epoch:  7,  Train Loss: 0.0679, Val Loss: 0.0600
Epoch:  8,  Train Loss: 0.0692, Val Loss: 0.0597
Epoch:  9,  Train Loss: 0.0679, Val Loss: 0.0599
Epoch:  10,  Train Loss: 0.0673, Val Loss: 0.0596
Epoch:  11,  Train Loss: 0.0694, Val Loss: 0.0601
Epoch:  12,  Train Loss: 0.0693, Val Loss: 0.0605
Epoch:  13,  Train Loss: 0.0668, Val Loss: 0.0598
Early stopping for fold 1 after 14 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0664, Val Loss: 0.0626
Epoch:  1,  Train Loss: 0.0685, Val Loss: 0.0627
Epoch:  2,  Train Loss: 0.0668, Val Loss: 0.0627
Epoch:  3,  Train Loss: 0.0673, Val Loss: 0.0626
Epoch:  4,  Tra

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.0844, Val Loss: 0.0798
Epoch:  1,  Train Loss: 0.0827, Val Loss: 0.0797
Epoch:  2,  Train Loss: 0.0824, Val Loss: 0.0796
Epoch:  3,  Train Loss: 0.0819, Val Loss: 0.0786
Epoch:  4,  Train Loss: 0.0809, Val Loss: 0.0773
Epoch:  5,  Train Loss: 0.0795, Val Loss: 0.0767
Epoch:  6,  Train Loss: 0.0772, Val Loss: 0.0744
Epoch:  7,  Train Loss: 0.0764, Val Loss: 0.0723
Epoch:  8,  Train Loss: 0.0765, Val Loss: 0.0714
Epoch:  9,  Train Loss: 0.0752, Val Loss: 0.0707
Epoch:  10,  Train Loss: 0.0751, Val Loss: 0.0704
Epoch:  11,  Train Loss: 0.0736, Val Loss: 0.0706
Epoch:  12,  Train Loss: 0.0763, Val Loss: 0.0703
Epoch:  13,  Train Loss: 0.0752, Val Loss: 0.0700
Epoch:  14,  Train Loss: 0.0748, Val Loss: 0.0706
Epoch:  15,  Train Loss: 0.0755, Val Loss: 0.0705
Epoch:  16,  Train Loss: 0.0748, Val Loss: 0.0701
Early stopping for fold 1 after 17 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0752, Val Loss: 0.0738
Epoch:  1,  Train Loss: 0.0744, Val Loss: 0.0736
Epoch: 

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.0824, Val Loss: 0.0778
Epoch:  1,  Train Loss: 0.0773, Val Loss: 0.0706
Epoch:  2,  Train Loss: 0.0703, Val Loss: 0.0636
Epoch:  3,  Train Loss: 0.0668, Val Loss: 0.0594
Epoch:  4,  Train Loss: 0.0657, Val Loss: 0.0576
Epoch:  5,  Train Loss: 0.0663, Val Loss: 0.0563
Epoch:  6,  Train Loss: 0.0656, Val Loss: 0.0565
Epoch:  7,  Train Loss: 0.0656, Val Loss: 0.0572
Epoch:  8,  Train Loss: 0.0649, Val Loss: 0.0560
Epoch:  9,  Train Loss: 0.0650, Val Loss: 0.0541
Epoch:  10,  Train Loss: 0.0635, Val Loss: 0.0546
Epoch:  11,  Train Loss: 0.0626, Val Loss: 0.0532
Epoch:  12,  Train Loss: 0.0633, Val Loss: 0.0545
Epoch:  13,  Train Loss: 0.0639, Val Loss: 0.0526
Epoch:  14,  Train Loss: 0.0620, Val Loss: 0.0556
Epoch:  15,  Train Loss: 0.0634, Val Loss: 0.0529
Epoch:  16,  Train Loss: 0.0618, Val Loss: 0.0509
Epoch:  17,  Train Loss: 0.0630, Val Loss: 0.0522
Epoch:  18,  Train Loss: 0.0587, Val Loss: 0.0515
Epoch:  19,  Train Loss: 0.0587, Val Loss: 0.0492
Epoch:  20

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.1448, Val Loss: 0.0814
Epoch:  1,  Train Loss: 0.0846, Val Loss: 0.0798
Epoch:  2,  Train Loss: 0.0827, Val Loss: 0.0797
Epoch:  3,  Train Loss: 0.0825, Val Loss: 0.0797
Epoch:  4,  Train Loss: 0.0825, Val Loss: 0.0797
Epoch:  5,  Train Loss: 0.0824, Val Loss: 0.0797
Epoch:  6,  Train Loss: 0.0824, Val Loss: 0.0797
Epoch:  7,  Train Loss: 0.0824, Val Loss: 0.0797
Epoch:  8,  Train Loss: 0.0824, Val Loss: 0.0797
Epoch:  9,  Train Loss: 0.0824, Val Loss: 0.0797
Epoch:  10,  Train Loss: 0.0825, Val Loss: 0.0797
Epoch:  11,  Train Loss: 0.0824, Val Loss: 0.0797
Epoch:  12,  Train Loss: 0.0824, Val Loss: 0.0797
Epoch:  13,  Train Loss: 0.0824, Val Loss: 0.0797
Early stopping for fold 1 after 14 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0813, Val Loss: 0.0842
Epoch:  1,  Train Loss: 0.0813, Val Loss: 0.0842
Epoch:  2,  Train Loss: 0.0813, Val Loss: 0.0842
Epoch:  3,  Train Loss: 0.0813, Val Loss: 0.0842
Early stopping for fold 2 after 4 epochs.
 K-Fold : 2
Epoch

  results_df = results_df.append(


#### Resultados

In [12]:
results1_df = results_df
results1_df

Unnamed: 0,Model,MSE,MAE,R-squared
0,OneGATLayer,0.062792,0.216794,0.231403
1,TwoGATLayer,0.069974,0.233442,0.134577
2,ThreeGATLayer,0.044114,0.177955,0.45379
3,FourGATLayer,0.081207,0.247533,-0.001805


In [13]:
results1_df.to_csv("Experiment 1 GATConv.csv") 

#### Gráficas

In [14]:
models = [("OneGATLayer" , model11), 
          ("TwoGATLayer" , model12), 
          ("ThreeGATLayer" ,model13),
          ("FourGATLayer", model14)]

metrics_df = pd.DataFrame(columns=["Model", "MSE", "MAE", "R-squared"])
for model_name, model in models: 
    path = f"../results/Experiment1/Exp1_PredictedVSLabelled_{model_name}.png"
    mse, mae, rmse, r2 = graph_testing(model, test, path)
    metrics_df = metrics_df.append(
        {"Model": model_name, "MSE": mse, "MAE": mae, "R-squared": r2},
        ignore_index=True,
    )
metrics_df.to_csv(f"../results/Experiment1/Metricas test experimento 1 GATConv.csv")    

MSE: 0.059839103
MAE: 0.21103296
RMSE: 0.24462032
R-squared: 0.27047846855037605


  metrics_df = metrics_df.append(


MSE: 0.06602431
MAE: 0.22949676
RMSE: 0.25695196
R-squared: 0.19507229948182736


  metrics_df = metrics_df.append(


MSE: 0.031282116
MAE: 0.14985806
RMSE: 0.17686751
R-squared: 0.618627678648507


  metrics_df = metrics_df.append(


MSE: 0.0821028
MAE: 0.24699984
RMSE: 0.28653586
R-squared: -0.0009467177549804529


  metrics_df = metrics_df.append(


### 4.2 Experimento 2

#### Dataset

In [15]:
dataset = load_gpickle_files(r"..\data\train", 2)  # Carga los datos con la función load_gpickle_files
test = load_gpickle_files(r"..\data\test", 2)  # Carga los datos con la función load_gpickle_files


The scipy.sparse array containers will be used instead of matrices
in Networkx 3.0. Use `to_scipy_sparse_array` instead.
  adj_matrix = nx.convert_matrix.to_scipy_sparse_matrix(graph)


#### Modelos

In [16]:
model21=OneGATLayer(input_size=2, hidden_channels=2)
model22=TwoGATLayer(input_size=2, hidden_channels=2)
model23=ThreeGATLayer(input_size=2, hidden_channels=2)
model24=FourGATLayer(input_size=2, hidden_channels=2)

models = [("OneGATLayer" , model21), 
          ("TwoGATLayer" , model22), 
          ("ThreeGATLayer" ,model23),
          ("FourGATLayer", model24)]

In [17]:
results_df = pd.DataFrame(columns=["Model", "MSE", "MAE", "R-squared"])
for model_name, model in models:
    print("Nombre del modelo :", model_name)
    mean_mse, mean_mae, mean_rmse, mean_r2 =cross_validate_with_early_stopping(model, dataset, num_folds=5, num_epochs=40)
    results_df = results_df.append(
        {"Model": model_name, "MSE": mean_mse, "MAE": mean_mae, "R-squared": mean_r2},
        ignore_index=True,
    )

Nombre del modelo : OneGATLayer
 K-Fold : 0
Epoch:  0,  Train Loss: 0.3409, Val Loss: 0.0940
Epoch:  1,  Train Loss: 0.0932, Val Loss: 0.0809
Epoch:  2,  Train Loss: 0.0827, Val Loss: 0.0790
Epoch:  3,  Train Loss: 0.0813, Val Loss: 0.0759
Epoch:  4,  Train Loss: 0.0742, Val Loss: 0.0617
Epoch:  5,  Train Loss: 0.0581, Val Loss: 0.0435
Epoch:  6,  Train Loss: 0.0525, Val Loss: 0.0371
Epoch:  7,  Train Loss: 0.0463, Val Loss: 0.0298
Epoch:  8,  Train Loss: 0.0397, Val Loss: 0.0253
Epoch:  9,  Train Loss: 0.0376, Val Loss: 0.0229
Epoch:  10,  Train Loss: 0.0376, Val Loss: 0.0225
Epoch:  11,  Train Loss: 0.0377, Val Loss: 0.0230
Epoch:  12,  Train Loss: 0.0359, Val Loss: 0.0193
Epoch:  13,  Train Loss: 0.0356, Val Loss: 0.0191
Epoch:  14,  Train Loss: 0.0330, Val Loss: 0.0195
Epoch:  15,  Train Loss: 0.0346, Val Loss: 0.0180
Epoch:  16,  Train Loss: 0.0324, Val Loss: 0.0180
Epoch:  17,  Train Loss: 0.0339, Val Loss: 0.0181
Epoch:  18,  Train Loss: 0.0344, Val Loss: 0.0172
Epoch:  19,  Tra

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.2089, Val Loss: 0.0882
Epoch:  1,  Train Loss: 0.0869, Val Loss: 0.0784
Epoch:  2,  Train Loss: 0.0768, Val Loss: 0.0666
Epoch:  3,  Train Loss: 0.0607, Val Loss: 0.0449
Epoch:  4,  Train Loss: 0.0498, Val Loss: 0.0314
Epoch:  5,  Train Loss: 0.0456, Val Loss: 0.0287
Epoch:  6,  Train Loss: 0.0427, Val Loss: 0.0258
Epoch:  7,  Train Loss: 0.0401, Val Loss: 0.0210
Epoch:  8,  Train Loss: 0.0349, Val Loss: 0.0190
Epoch:  9,  Train Loss: 0.0329, Val Loss: 0.0172
Epoch:  10,  Train Loss: 0.0335, Val Loss: 0.0166
Epoch:  11,  Train Loss: 0.0340, Val Loss: 0.0153
Epoch:  12,  Train Loss: 0.0330, Val Loss: 0.0157
Epoch:  13,  Train Loss: 0.0322, Val Loss: 0.0159
Epoch:  14,  Train Loss: 0.0318, Val Loss: 0.0174
Early stopping for fold 1 after 15 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0312, Val Loss: 0.0146
Epoch:  1,  Train Loss: 0.0324, Val Loss: 0.0163
Epoch:  2,  Train Loss: 0.0319, Val Loss: 0.0156
Epoch:  3,  Train Loss: 0.0318, Val Loss: 0.0154
Early sto

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.0800, Val Loss: 0.0729
Epoch:  1,  Train Loss: 0.0662, Val Loss: 0.0515
Epoch:  2,  Train Loss: 0.0461, Val Loss: 0.0267
Epoch:  3,  Train Loss: 0.0369, Val Loss: 0.0196
Epoch:  4,  Train Loss: 0.0325, Val Loss: 0.0176
Epoch:  5,  Train Loss: 0.0328, Val Loss: 0.0175
Epoch:  6,  Train Loss: 0.0310, Val Loss: 0.0173
Epoch:  7,  Train Loss: 0.0320, Val Loss: 0.0153
Epoch:  8,  Train Loss: 0.0331, Val Loss: 0.0153
Epoch:  9,  Train Loss: 0.0301, Val Loss: 0.0161
Epoch:  10,  Train Loss: 0.0316, Val Loss: 0.0137
Epoch:  11,  Train Loss: 0.0319, Val Loss: 0.0151
Epoch:  12,  Train Loss: 0.0299, Val Loss: 0.0174
Epoch:  13,  Train Loss: 0.0308, Val Loss: 0.0154
Early stopping for fold 1 after 14 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0303, Val Loss: 0.0141
Epoch:  1,  Train Loss: 0.0305, Val Loss: 0.0132
Epoch:  2,  Train Loss: 0.0315, Val Loss: 0.0144
Epoch:  3,  Train Loss: 0.0302, Val Loss: 0.0138
Epoch:  4,  Train Loss: 0.0304, Val Loss: 0.0126
Epoch:  5,

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.1837, Val Loss: 0.0865
Epoch:  1,  Train Loss: 0.0867, Val Loss: 0.0805
Epoch:  2,  Train Loss: 0.0831, Val Loss: 0.0799
Epoch:  3,  Train Loss: 0.0824, Val Loss: 0.0797
Epoch:  4,  Train Loss: 0.0825, Val Loss: 0.0797
Epoch:  5,  Train Loss: 0.0825, Val Loss: 0.0797
Epoch:  6,  Train Loss: 0.0825, Val Loss: 0.0797
Epoch:  7,  Train Loss: 0.0824, Val Loss: 0.0797
Epoch:  8,  Train Loss: 0.0824, Val Loss: 0.0797
Epoch:  9,  Train Loss: 0.0824, Val Loss: 0.0797
Epoch:  10,  Train Loss: 0.0824, Val Loss: 0.0797
Early stopping for fold 1 after 11 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0813, Val Loss: 0.0842
Epoch:  1,  Train Loss: 0.0813, Val Loss: 0.0842
Epoch:  2,  Train Loss: 0.0813, Val Loss: 0.0842
Epoch:  3,  Train Loss: 0.0813, Val Loss: 0.0842
Early stopping for fold 2 after 4 epochs.
 K-Fold : 2
Epoch:  0,  Train Loss: 0.0829, Val Loss: 0.0783
Epoch:  1,  Train Loss: 0.0828, Val Loss: 0.0783
Epoch:  2,  Train Loss: 0.0828, Val Loss: 0.0782
Epoch:  

  results_df = results_df.append(


#### Resultados

In [18]:
results2_df = results_df
results2_df

Unnamed: 0,Model,MSE,MAE,R-squared
0,OneGATLayer,0.024177,0.134829,0.698957
1,TwoGATLayer,0.023468,0.131496,0.708125
2,ThreeGATLayer,0.016695,0.112164,0.795198
3,FourGATLayer,0.081158,0.246858,-0.003834


In [19]:
results2_df.to_csv("Experiment 2 GATConv.csv") 

#### Gráficas

In [20]:
models = [("OneGATLayer" , model21), 
          ("TwoGATLayer" , model22), 
          ("ThreeGATLayer" ,model23),
          ("FourGATLayer", model24)]

metrics_df = pd.DataFrame(columns=["Model", "MSE", "MAE", "R-squared"])
for model_name, model in models: 
    path = f"../results/Experiment2/Exp2_PredictedVSLabelled_{model_name}.png"
    mse, mae, rmse, r2 = graph_testing(model, test, path)
    metrics_df = metrics_df.append(
        {"Model": model_name, "MSE": mse, "MAE": mae, "R-squared": r2},
        ignore_index=True,
    )
metrics_df.to_csv(f"../results/Experiment2/Metricas test experimento 2 GATConv.csv")  

MSE: 0.016329149
MAE: 0.117988765
RMSE: 0.12778556
R-squared: 0.8009250653321635


  metrics_df = metrics_df.append(


MSE: 0.015648305
MAE: 0.107104585
RMSE: 0.12509319
R-squared: 0.8092254928429359


  metrics_df = metrics_df.append(


MSE: 0.0135543095
MAE: 0.10427207
RMSE: 0.11642297
R-squared: 0.8347541956603197


  metrics_df = metrics_df.append(


MSE: 0.082105905
MAE: 0.24700335
RMSE: 0.28654128
R-squared: -0.000984632450789391


  metrics_df = metrics_df.append(


### 4.3 Experimento 3

#### Dataset

In [21]:
dataset = load_gpickle_files(r"..\data\train", 3)  # Carga los datos con la función load_gpickle_files
test = load_gpickle_files(r"..\data\test", 3)  # Carga los datos con la función load_gpickle_files


The scipy.sparse array containers will be used instead of matrices
in Networkx 3.0. Use `to_scipy_sparse_array` instead.
  adj_matrix = nx.convert_matrix.to_scipy_sparse_matrix(graph)


#### Modelos

In [22]:
model31=OneGATLayer(input_size=3, hidden_channels=3)
model32=TwoGATLayer(input_size=3, hidden_channels=3)
model33=ThreeGATLayer(input_size=3, hidden_channels=3)
model34=FourGATLayer(input_size=3, hidden_channels=3)

models = [("OneGATLayer" , model31), 
          ("TwoGATLayer" , model32), 
          ("ThreeGATLayer" ,model33),
          ("FourGATLayer", model34)]

In [23]:
results_df = pd.DataFrame(columns=["Model", "MSE", "MAE", "R-squared"])
for model_name, model in models:
    print("Nombre del modelo :", model_name)
    mean_mse, mean_mae, mean_rmse, mean_r2 =cross_validate_with_early_stopping(model, dataset, num_folds=5, num_epochs=40)
    results_df = results_df.append(
        {"Model": model_name, "MSE": mean_mse, "MAE": mean_mae, "R-squared": mean_r2},
        ignore_index=True,
    )

Nombre del modelo : OneGATLayer
 K-Fold : 0
Epoch:  0,  Train Loss: 0.4252, Val Loss: 0.0794
Epoch:  1,  Train Loss: 0.0670, Val Loss: 0.0291
Epoch:  2,  Train Loss: 0.0469, Val Loss: 0.0282
Epoch:  3,  Train Loss: 0.0432, Val Loss: 0.0265
Epoch:  4,  Train Loss: 0.0397, Val Loss: 0.0232
Epoch:  5,  Train Loss: 0.0364, Val Loss: 0.0235
Epoch:  6,  Train Loss: 0.0322, Val Loss: 0.0183
Epoch:  7,  Train Loss: 0.0313, Val Loss: 0.0172
Epoch:  8,  Train Loss: 0.0303, Val Loss: 0.0145
Epoch:  9,  Train Loss: 0.0305, Val Loss: 0.0190
Epoch:  10,  Train Loss: 0.0306, Val Loss: 0.0153
Epoch:  11,  Train Loss: 0.0297, Val Loss: 0.0194
Early stopping for fold 1 after 12 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0290, Val Loss: 0.0227
Epoch:  1,  Train Loss: 0.0284, Val Loss: 0.0156
Epoch:  2,  Train Loss: 0.0258, Val Loss: 0.0158
Epoch:  3,  Train Loss: 0.0272, Val Loss: 0.0125
Epoch:  4,  Train Loss: 0.0260, Val Loss: 0.0152
Epoch:  5,  Train Loss: 0.0258, Val Loss: 0.0099
Epoch:  6,  Train

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.0978, Val Loss: 0.0698
Epoch:  1,  Train Loss: 0.0565, Val Loss: 0.0288
Epoch:  2,  Train Loss: 0.0440, Val Loss: 0.0212
Epoch:  3,  Train Loss: 0.0343, Val Loss: 0.0125
Epoch:  4,  Train Loss: 0.0252, Val Loss: 0.0099
Epoch:  5,  Train Loss: 0.0233, Val Loss: 0.0129
Epoch:  6,  Train Loss: 0.0271, Val Loss: 0.0115
Epoch:  7,  Train Loss: 0.0256, Val Loss: 0.0093
Epoch:  8,  Train Loss: 0.0237, Val Loss: 0.0095
Epoch:  9,  Train Loss: 0.0256, Val Loss: 0.0102
Epoch:  10,  Train Loss: 0.0254, Val Loss: 0.0106
Early stopping for fold 1 after 11 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0257, Val Loss: 0.0115
Epoch:  1,  Train Loss: 0.0244, Val Loss: 0.0118
Epoch:  2,  Train Loss: 0.0243, Val Loss: 0.0086
Epoch:  3,  Train Loss: 0.0231, Val Loss: 0.0088
Epoch:  4,  Train Loss: 0.0254, Val Loss: 0.0092
Epoch:  5,  Train Loss: 0.0249, Val Loss: 0.0114
Early stopping for fold 2 after 6 epochs.
 K-Fold : 2
Epoch:  0,  Train Loss: 0.0245, Val Loss: 0.0098
Epoch:  

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.0642, Val Loss: 0.0226
Epoch:  1,  Train Loss: 0.0268, Val Loss: 0.0094
Epoch:  2,  Train Loss: 0.0238, Val Loss: 0.0057
Epoch:  3,  Train Loss: 0.0239, Val Loss: 0.0077
Epoch:  4,  Train Loss: 0.0215, Val Loss: 0.0095
Epoch:  5,  Train Loss: 0.0236, Val Loss: 0.0138
Early stopping for fold 1 after 6 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0232, Val Loss: 0.0072
Epoch:  1,  Train Loss: 0.0230, Val Loss: 0.0100
Epoch:  2,  Train Loss: 0.0232, Val Loss: 0.0087
Epoch:  3,  Train Loss: 0.0234, Val Loss: 0.0082
Early stopping for fold 2 after 4 epochs.
 K-Fold : 2
Epoch:  0,  Train Loss: 0.0240, Val Loss: 0.0080
Epoch:  1,  Train Loss: 0.0228, Val Loss: 0.0094
Epoch:  2,  Train Loss: 0.0235, Val Loss: 0.0072
Epoch:  3,  Train Loss: 0.0245, Val Loss: 0.0091
Epoch:  4,  Train Loss: 0.0234, Val Loss: 0.0085
Epoch:  5,  Train Loss: 0.0237, Val Loss: 0.0066
Epoch:  6,  Train Loss: 0.0234, Val Loss: 0.0090
Epoch:  7,  Train Loss: 0.0238, Val Loss: 0.0073
Epoch:  8,

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.0662, Val Loss: 0.0233
Epoch:  1,  Train Loss: 0.0294, Val Loss: 0.0112
Epoch:  2,  Train Loss: 0.0249, Val Loss: 0.0093
Epoch:  3,  Train Loss: 0.0257, Val Loss: 0.0091
Epoch:  4,  Train Loss: 0.0223, Val Loss: 0.0073
Epoch:  5,  Train Loss: 0.0230, Val Loss: 0.0094
Epoch:  6,  Train Loss: 0.0236, Val Loss: 0.0075
Epoch:  7,  Train Loss: 0.0242, Val Loss: 0.0069
Epoch:  8,  Train Loss: 0.0232, Val Loss: 0.0085
Epoch:  9,  Train Loss: 0.0241, Val Loss: 0.0079
Epoch:  10,  Train Loss: 0.0251, Val Loss: 0.0101
Early stopping for fold 1 after 11 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0243, Val Loss: 0.0101
Epoch:  1,  Train Loss: 0.0246, Val Loss: 0.0078
Epoch:  2,  Train Loss: 0.0233, Val Loss: 0.0116
Epoch:  3,  Train Loss: 0.0229, Val Loss: 0.0083
Epoch:  4,  Train Loss: 0.0240, Val Loss: 0.0074
Epoch:  5,  Train Loss: 0.0227, Val Loss: 0.0083
Epoch:  6,  Train Loss: 0.0226, Val Loss: 0.0117
Epoch:  7,  Train Loss: 0.0234, Val Loss: 0.0113
Early stoppin

  results_df = results_df.append(


#### Resultados

In [24]:
results3_df = results_df
results3_df

Unnamed: 0,Model,MSE,MAE,R-squared
0,OneGATLayer,0.01402,0.095962,0.827316
1,TwoGATLayer,0.011305,0.089271,0.859296
2,ThreeGATLayer,0.008682,0.075269,0.892698
3,FourGATLayer,0.00924,0.078929,0.886374


In [25]:
results3_df.to_csv("Experiment 3 GATConv.csv") 

#### Gráficas

In [26]:
models = [("OneGATLayer" , model31), 
          ("TwoGATLayer" , model32), 
          ("ThreeGATLayer" ,model33),
          ("FourGATLayer", model34)]

metrics_df = pd.DataFrame(columns=["Model", "MSE", "MAE", "R-squared"])
for model_name, model in models: 
    path = f"../results/Experiment3/Exp3_PredictedVSLabelled_{model_name}.png"
    mse, mae, rmse, r2 = graph_testing(model, test, path)
    metrics_df = metrics_df.append(
        {"Model": model_name, "MSE": mse, "MAE": mae, "R-squared": r2},
        ignore_index=True,
    )
metrics_df.to_csv(f"../results/Experiment3/Metricas test experimento 3 GATConv.csv") 

MSE: 0.008281275
MAE: 0.0757471
RMSE: 0.09100151
R-squared: 0.8990397921935801


  metrics_df = metrics_df.append(


MSE: 0.009583597
MAE: 0.07982729
RMSE: 0.097895846
R-squared: 0.8831626851033065


  metrics_df = metrics_df.append(


MSE: 0.008914984
MAE: 0.07924093
RMSE: 0.09441919
R-squared: 0.8913140067354814


  metrics_df = metrics_df.append(


MSE: 0.0075834375
MAE: 0.07355307
RMSE: 0.08708294
R-squared: 0.9075473987590057


  metrics_df = metrics_df.append(
