In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
from pathlib import Path
from decimal import Decimal
import pandas as pd
# PyTorch
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import sampler
import torchvision.datasets as datasets
import torchvision.transforms as T
from torch_geometric.data import Data, Batch
import torch.optim as optim
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
from torch_geometric.nn import GINConv, global_add_pool, GCNConv, global_mean_pool
import torch_geometric
from torch.nn import Linear
from torch_geometric.nn import GCNConv, GATConv, GATv2Conv, TransformerConv
import torch.nn.functional as F


#Sklearn
import sklearn.metrics as metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import KFold

#Networkx
import networkx as nx

## 1. Funciones

In [2]:
def calculate_node_features(graph, experiment):
     
    # Obtener los nodos y sus características
    nodes = list(graph.nodes())
    
     # Calcular las características de los nodos
        
    if experiment == 1: 
        clustering_coeffs = nx.clustering(graph)
        x = torch.tensor([[clustering_coeffs[node]] for node in nodes], dtype=torch.float)
        
    elif experiment == 2: 
        clustering_coeffs = nx.clustering(graph)
        degree = nx.degree(graph)
        x = torch.tensor([[clustering_coeffs[node], degree[node]] for node in nodes], dtype=torch.float)
        
    else:
        clustering_coeffs = nx.clustering(graph)
        degree = nx.degree(graph)
        pagerank = nx.pagerank(graph)
        x = torch.tensor([[clustering_coeffs[node], degree[node],pagerank[node]] for node in nodes], dtype=torch.float)

    return x

def load_gpickle_files(path, experiment):
    X_path = os.path.join(path, "X")  # Ruta de la carpeta con los grafos
    y_path = os.path.join(path, "y")  # Ruta de la carpeta con las etiquetas

    file_list = os.listdir(X_path)  # Obtén la lista de archivos gpickle
    dataset = []

    for file in file_list:
        file_path = os.path.join(X_path, file)  # Genera la ruta del archivo
        graph = nx.read_gpickle(file_path)  # Lee el archivo gpickle con NetworkX

        # Calcular las características de los nodos
        x = calculate_node_features(graph, experiment)
        # Obtiene la matriz dispersa de adyacencia
        adj_matrix = nx.convert_matrix.to_scipy_sparse_matrix(graph)

        # Convierte la matriz dispersa en un tensor de PyTorch
        edge_index = torch.from_numpy(np.vstack(adj_matrix.nonzero()))
#         edge_index = torch.tensor(list(graph.edges()), dtype=torch.long).t().contiguous()  # Índices de las aristas
#         adj_matrix = nx.adjacency_matrix(graph)
#         adj_matrix = adj_matrix.toarray()

        file_number = os.path.splitext(file)[0]
        with open(os.path.join(y_path, f"{file_number}.txt")) as f:
            target = f.read()
            y = torch.tensor(np.float_(target), dtype=torch.float)

        data = Data(x=x, edge_index=edge_index, y=y)  # Crea un objeto Data
        dataset.append(data)

    return dataset

def custom_collate(batch):
    # Extraer los elementos del lote y crear listas separadas para cada atributo
    x_list = []
    edge_index_list = []
    y_list = []

    for data in batch:
        x_list.append(data.x)
        edge_index_list.append(data.edge_index)
        y_list.append(data.y)

    # Convertir las listas en arreglos de numpy
    x_batch = np.stack(x_list)
    edge_index_batch = np.stack(edge_index_list)
    y_batch = np.stack(y_list)

    return Data(x=x_batch, edge_index=edge_index_batch, y=y_batch)

def plot_learning_curves(train_losses, val_losses):
    epochs = range(1, len(train_losses) + 1)

    plt.figure(figsize=(8, 6))

    # Plot losses
    plt.plot(epochs, train_losses, label='Train')
    plt.plot(epochs, val_losses, label='Validation')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Curva de Aprendizaje')
    plt.legend()

    plt.tight_layout()
    plt.show()
    
def train(model, num_epochs, dataset, data_test):
    
    #model3=GNN(input_size=3, hidden_channels=3)
    learning_rate = 0.001
    # Definir la función de pérdida y el optimizador
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Listas para almacenar las pérdidas en cada época
    train_losses = []
    val_losses = []

    #num_epochs = 12
    model.train()  # Cambiar al modo de entrenamiento

    for epoch in range(num_epochs):
        running_loss = 0.0

        for data in dataset:
            x = data.x  # Características de los nodos
            edge_index = data.edge_index
            y = data.y#.view(-1)#.to(device)  # Etiquetas o clases

            optimizer.zero_grad()
            logits = model(x=x, edge_index=edge_index)
            loss = loss_fn(logits, y)
            loss.backward()  # Realizar el paso de atrás (backward)
            optimizer.step()  # Actualizar los pesos del modelo

            running_loss += loss.item()

        # Calcular la pérdida promedio en cada época
        epoch_loss = running_loss / len(dataset)
        train_losses.append(epoch_loss)

        # Realizar la validación del modelo en cada época
        model.eval()  # Cambiar al modo de evaluación

        with torch.no_grad():
            running_val_loss = 0.0

            for data in data_test:
                x_val = data.x
                edge_index_val = data.edge_index
                y_val = data.y#.view(-1)#.to(device)

                val_logits = model(x=x_val, edge_index=edge_index_val)
                val_loss = loss_fn(val_logits, y_val)

                running_val_loss += val_loss.item()

            # Calcular la pérdida promedio en la validación
            val_epoch_loss = running_val_loss / len(data_test)
            val_losses.append(val_epoch_loss)
            return epoch_loss, val_epoch_loss

        # Imprimir información del progreso del entrenamiento
        #print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {epoch_loss:.4f}, Val Loss: {val_epoch_loss:.4f}')

    # Crear las curvas de aprendizaje
#     plot_learning_curves(train_losses, val_losses)
    
def graph_testing(model, dataset, path):
    model.eval()  # Set the model in evaluation mode
    total_samples = 0
    total_loss = 0
    predictions_list = []
    labels_list = []

    learning_rate = 0.001
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    with torch.no_grad():
        for data in dataset:
            x = data.x  # Características de los nodo
            edge_index = data.edge_index
            y = data.y.unsqueeze(0)#.view(-1)#.to(device)  # Etiquetas o clases


            predictions = model(x=x, edge_index=edge_index)  # Forward pass

            loss = loss_fn(predictions, y)
            total_loss += loss.item()

            predictions_list.append(predictions.detach().cpu().numpy())
            labels_list.append(y.detach().cpu().numpy())

        # Calculate accuracy
        predictions_array = np.concatenate(predictions_list, axis=0)
        labels_array = np.concatenate(labels_list, axis=0)
        # Calculate MSE
        mse = mean_squared_error(labels_array, predictions_array)

        # Calculate MAE
        mae = mean_absolute_error(labels_array, predictions_array)

        # Calculate RMSE
        rmse = mean_squared_error(labels_array, predictions_array, squared=False)

        # Calculate R-squared
        r2 = r2_score(labels_array, predictions_array)

        print("MSE:", mse)
        print("MAE:", mae)
        print("RMSE:", rmse)
        print("R-squared:", r2)

        #print("Accuracy: {:.4f}".format(accuracy))
        # print("Mean Squared Error (MSE): {:.4f}".format(mse))
        # print("R-squared (R²): {:.4f}".format(r2))
        fig, ax = plt.subplots()
        ax.scatter(labels_array, predictions_array)
        ax.axline((0, 0), slope=1, color='red')
        # Add labels and title
        plt.xlabel("Etiquetas")
        plt.ylabel("Predicciones")
        plt.title("Predicciones vs. Etiquetas")
        

        # Display the plot
        plt.savefig(path)
        plt.close()
        
        return mse, mae, rmse, r2 
    
    
def testing(model, dataset):
    model.eval()  # Set the model in evaluation mode
    total_samples = 0
    total_loss = 0
    predictions_list = []
    labels_list = []

    learning_rate = 0.001
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    with torch.no_grad():
        for data in dataset:
            x = data.x  # Características de los nodo
            edge_index = data.edge_index
            y = data.y.unsqueeze(0)#.view(-1)#.to(device)  # Etiquetas o clases


            predictions = model(x=x, edge_index=edge_index)  # Forward pass

            loss = loss_fn(predictions, y)
            total_loss += loss.item()

            predictions_list.append(predictions.detach().cpu().numpy())
            labels_list.append(y.detach().cpu().numpy())

        # Calculate accuracy
        predictions_array = np.concatenate(predictions_list, axis=0)
        labels_array = np.concatenate(labels_list, axis=0)
        # Calculate MSE
        mse = mean_squared_error(labels_array, predictions_array)

        # Calculate MAE
        mae = mean_absolute_error(labels_array, predictions_array)

        # Calculate RMSE
        rmse = mean_squared_error(labels_array, predictions_array, squared=False)

        # Calculate R-squared
        r2 = r2_score(labels_array, predictions_array)

#         print("MSE:", mse)
#         print("MAE:", mae)
#         print("RMSE:", rmse)
#         print("R-squared:", r2)

#         #print("Accuracy: {:.4f}".format(accuracy))
#         # print("Mean Squared Error (MSE): {:.4f}".format(mse))
#         # print("R-squared (R²): {:.4f}".format(r2))
#         fig, ax = plt.subplots()
#         ax.scatter(labels_array, predictions_array)
#         ax.axline((0, 0), slope=1, color='red')
#         # Add labels and title
#         plt.xlabel("Labels")
#         plt.ylabel("Predictions")
#         plt.title("Predictions vs. Labels")
        

#         # Display the plot
#         plt.show()
        
        return mse, mae, rmse, r2
def cross_validate(model, dataset, num_folds=5, num_epochs=20):
    """
    Perform cross-validation for a given model and dataset.

    Args:
        model (torch.nn.Module): The PyTorch model to evaluate.
        dataset (list): The dataset containing data for cross-validation.
        num_folds (int): The number of folds for cross-validation.
    """
    kf = KFold(n_splits=num_folds)

    all_mse = []
    all_mae = []
    all_rmse = []
    all_r2 = []

    for train_idx, val_idx in kf.split(dataset):
        # Split the dataset into training and validation sets for this fold
        train_set = [dataset[i] for i in train_idx] 
        val_set = [dataset[i] for i in val_idx]

        # Train the model on the training set
        train(model, num_epochs, train_set, val_set)

        # Evaluate the model on the validation set
        mse, mae, rmse, r2 = testing(model, val_set)

        all_mse.append(mse)
        all_mae.append(mae)
        all_rmse.append(rmse)
        all_r2.append(r2)

    # Calculate and return the mean of evaluation metrics across all folds
    mean_mse = np.mean(all_mse)
    mean_mae = np.mean(all_mae)
    mean_rmse = np.mean(all_rmse)
    mean_r2 = np.mean(all_r2)

    return mean_mse, mean_mae, mean_rmse, mean_r2

def cross_validate_with_early_stopping(model, dataset, num_folds=5, num_epochs=20, patience=3):
    """
    Perform cross-validation for a given model and dataset with early stopping.

    Args:
        model (torch.nn.Module): The PyTorch model to evaluate.
        dataset (list): The dataset containing data for cross-validation.
        num_folds (int): The number of folds for cross-validation.
        num_epochs (int): The maximum number of epochs for training in each fold.
        patience (int): Number of epochs with no improvement to wait before stopping.
    """
    kf = KFold(n_splits=num_folds)

    all_mse = []
    all_mae = []
    all_rmse = []
    all_r2 = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(dataset)):
        # Split the dataset into training and validation sets for this fold
        train_set = [dataset[i] for i in train_idx]
        val_set = [dataset[i] for i in val_idx]
        print(" K-Fold :", fold)

        # Initialize variables for early stopping
        best_val_loss = float("inf")
        epochs_no_improve = 0
        #model.reset_parameters()  # Reset model parameters at the beginning of each fold

        for epoch in range(num_epochs):
            # Train the model on the training set
            epoch_loss, val_epoch_loss = train(model, 1, train_set, val_set)  # Train for 1 epoch at a time
            print(f"Epoch:  {epoch},  Train Loss: {epoch_loss:.4f}, Val Loss: {val_epoch_loss:.4f}")
            # Evaluate the model on the validation set
            mse, mae, rmse, r2 = testing(model, val_set)

            all_mse.append(mse)
            all_mae.append(mae)
            all_rmse.append(rmse)
            all_r2.append(r2)

            # Check if validation loss has improved
            if mse < best_val_loss:
                best_val_loss = mse
                epochs_no_improve = 0
            else:
                epochs_no_improve += 1

            # Implement early stopping
            if epochs_no_improve >= patience:
                print(f"Early stopping for fold {fold + 1} after {epoch + 1} epochs.")
                break

    # Calculate and return the mean of evaluation metrics across all folds
    mean_mse = np.mean(all_mse)
    mean_mae = np.mean(all_mae)
    mean_rmse = np.mean(all_rmse)
    mean_r2 = np.mean(all_r2)

    return mean_mse, mean_mae, mean_rmse, mean_r2

## 2. Datos

In [3]:
dataset = load_gpickle_files(r"..\data\train", 1)  # Carga los datos con la función load_gpickle_files
test = load_gpickle_files(r"..\data\test", 1)  # Carga los datos con la función load_gpickle_files


The scipy.sparse array containers will be used instead of matrices
in Networkx 3.0. Use `to_scipy_sparse_array` instead.
  adj_matrix = nx.convert_matrix.to_scipy_sparse_matrix(graph)


## 3. Modelos GCNConv

### 3.1 One GCN Layer

In [4]:
class OneGCNLayer(torch.nn.Module):
    def __init__(self, input_size, hidden_channels):
        super(OneGCNLayer, self).__init__()
        torch.manual_seed(12345)
        
        self.conv1 = GCNConv(input_size, hidden_channels)
        
        
        self.lin = Linear(hidden_channels, 1)
    
    def forward(self, x, edge_index, batch = None,  edge_col = None):
        
        # Node embedding 
        x = self.conv1(x, edge_index, edge_col)
               
        # Readout layer
        batch = torch.zeros(x.shape[0],dtype=int) if batch is None else batch
        x = global_mean_pool(x, batch)
        
        # Final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
    
        return x

### 3.2 Two GCN Layer

In [5]:
class TwoGCNLayer(torch.nn.Module):
    def __init__(self, input_size, hidden_channels):
        super(TwoGCNLayer, self).__init__()
        torch.manual_seed(12345)
        
        self.conv1 = GCNConv(input_size, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        
        self.lin = Linear(hidden_channels, 1)
    
    def forward(self, x, edge_index, batch = None,  edge_col = None):
        
        # Node embedding 
        x = self.conv1(x, edge_index, edge_col)
        x = x.relu()
        x = self.conv2(x, edge_index, edge_col)
        
        # Readout layer
        batch = torch.zeros(x.shape[0],dtype=int) if batch is None else batch
        x = global_mean_pool(x, batch)
        
        # Final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
    
        return x

### 3.3 Three GCN Layer

In [6]:
class ThreeGCNLayer(torch.nn.Module):
    def __init__(self, input_size, hidden_channels):
        super(ThreeGCNLayer, self).__init__()
        torch.manual_seed(12345)
        
        self.conv1 = GCNConv(input_size, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        
        self.lin = Linear(hidden_channels, 1)
    
    def forward(self, x, edge_index, batch = None,  edge_col = None):
        
        # Node embedding 
        x = self.conv1(x, edge_index, edge_col)
        x = x.relu()
        x = self.conv2(x, edge_index, edge_col)
        x = x.relu()
        x = self.conv3(x, edge_index, edge_col)
               
        # Readout layer
        batch = torch.zeros(x.shape[0],dtype=int) if batch is None else batch
        x = global_mean_pool(x, batch)
        
        # Final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
    
        return x

### 3.4 Four GCN Layer

In [7]:
class FourGCNLayer(torch.nn.Module):
    def __init__(self, input_size, hidden_channels):
        super(FourGCNLayer, self).__init__()
        torch.manual_seed(12345)
        
        self.conv1 = GCNConv(input_size, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.conv4 = GCNConv(hidden_channels, hidden_channels)
        
        
        self.lin = Linear(hidden_channels, 1)
    
    def forward(self, x, edge_index, batch = None,  edge_col = None):
        
        # Node embedding 
        x = self.conv1(x, edge_index, edge_col)
        x = x.relu()
        x = self.conv2(x, edge_index, edge_col)
        x = x.relu()
        x = self.conv3(x, edge_index, edge_col)
        x = x.relu()
        x = self.conv4(x, edge_index, edge_col)
               
        # Readout layer
        batch = torch.zeros(x.shape[0],dtype=int) if batch is None else batch
        x = global_mean_pool(x, batch)
        
        # Final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
    
        return x

## 4. Experimentos

In [8]:
import warnings

warnings.filterwarnings("ignore", category=UserWarning)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### 4.1 Experimento 1

#### Dataset

In [9]:
dataset = load_gpickle_files(r"..\data\train", 1)  # Carga los datos con la función load_gpickle_files
test = load_gpickle_files(r"..\data\test", 1)  # Carga los datos con la función load_gpickle_files


The scipy.sparse array containers will be used instead of matrices
in Networkx 3.0. Use `to_scipy_sparse_array` instead.
  adj_matrix = nx.convert_matrix.to_scipy_sparse_matrix(graph)


#### Modelos

In [10]:
model11=OneGCNLayer(input_size=1, hidden_channels=1)
model12=TwoGCNLayer(input_size=1, hidden_channels=1)
model13=ThreeGCNLayer(input_size=1, hidden_channels=1)
model14=FourGCNLayer(input_size=1, hidden_channels=1)

models = [("OneGCNLayer" , model11), 
          ("TwoGCNLayer" , model12), 
          ("ThreeGCNLayer" ,model13),
          ("FourGCNLayer", model14)]

#### Entrenamiento y cross-validation

In [11]:
results_df = pd.DataFrame(columns=["Model", "MSE", "MAE", "R-squared"])
for model_name, model in models:
    print("Nombre del modelo :", model_name)
    mean_mse, mean_mae, mean_rmse, mean_r2 =cross_validate_with_early_stopping(model, dataset, num_folds=5, num_epochs=40)
    results_df = results_df.append(
        {"Model": model_name, "MSE": mean_mse, "MAE": mean_mae, "R-squared": mean_r2},
        ignore_index=True,
    )

Nombre del modelo : OneGCNLayer
 K-Fold : 0
Epoch:  0,  Train Loss: 0.1244, Val Loss: 0.0730
Epoch:  1,  Train Loss: 0.0745, Val Loss: 0.0695
Epoch:  2,  Train Loss: 0.0741, Val Loss: 0.0685
Epoch:  3,  Train Loss: 0.0731, Val Loss: 0.0679
Epoch:  4,  Train Loss: 0.0743, Val Loss: 0.0679
Epoch:  5,  Train Loss: 0.0727, Val Loss: 0.0671
Epoch:  6,  Train Loss: 0.0741, Val Loss: 0.0675
Epoch:  7,  Train Loss: 0.0735, Val Loss: 0.0671
Epoch:  8,  Train Loss: 0.0749, Val Loss: 0.0679
Epoch:  9,  Train Loss: 0.0735, Val Loss: 0.0673
Epoch:  10,  Train Loss: 0.0725, Val Loss: 0.0678
Early stopping for fold 1 after 11 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0730, Val Loss: 0.0709
Epoch:  1,  Train Loss: 0.0733, Val Loss: 0.0708
Epoch:  2,  Train Loss: 0.0723, Val Loss: 0.0705
Epoch:  3,  Train Loss: 0.0717, Val Loss: 0.0705
Epoch:  4,  Train Loss: 0.0720, Val Loss: 0.0704
Epoch:  5,  Train Loss: 0.0730, Val Loss: 0.0707
Epoch:  6,  Train Loss: 0.0726, Val Loss: 0.0707
Epoch:  7,  Train 

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.3578, Val Loss: 0.1098
Epoch:  1,  Train Loss: 0.0994, Val Loss: 0.0796
Epoch:  2,  Train Loss: 0.0816, Val Loss: 0.0783
Epoch:  3,  Train Loss: 0.0811, Val Loss: 0.0775
Epoch:  4,  Train Loss: 0.0802, Val Loss: 0.0773
Epoch:  5,  Train Loss: 0.0795, Val Loss: 0.0766
Epoch:  6,  Train Loss: 0.0792, Val Loss: 0.0765
Epoch:  7,  Train Loss: 0.0802, Val Loss: 0.0768
Epoch:  8,  Train Loss: 0.0791, Val Loss: 0.0756
Epoch:  9,  Train Loss: 0.0789, Val Loss: 0.0752
Epoch:  10,  Train Loss: 0.0787, Val Loss: 0.0751
Epoch:  11,  Train Loss: 0.0791, Val Loss: 0.0746
Epoch:  12,  Train Loss: 0.0789, Val Loss: 0.0751
Epoch:  13,  Train Loss: 0.0786, Val Loss: 0.0743
Epoch:  14,  Train Loss: 0.0775, Val Loss: 0.0741
Epoch:  15,  Train Loss: 0.0779, Val Loss: 0.0734
Epoch:  16,  Train Loss: 0.0772, Val Loss: 0.0740
Epoch:  17,  Train Loss: 0.0784, Val Loss: 0.0732
Epoch:  18,  Train Loss: 0.0777, Val Loss: 0.0727
Epoch:  19,  Train Loss: 0.0774, Val Loss: 0.0724
Epoch:  20

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.0972, Val Loss: 0.0802
Epoch:  1,  Train Loss: 0.0830, Val Loss: 0.0797
Epoch:  2,  Train Loss: 0.0826, Val Loss: 0.0798
Epoch:  3,  Train Loss: 0.0825, Val Loss: 0.0797
Epoch:  4,  Train Loss: 0.0825, Val Loss: 0.0797
Epoch:  5,  Train Loss: 0.0825, Val Loss: 0.0797
Epoch:  6,  Train Loss: 0.0825, Val Loss: 0.0797
Epoch:  7,  Train Loss: 0.0825, Val Loss: 0.0797
Early stopping for fold 1 after 8 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0813, Val Loss: 0.0841
Epoch:  1,  Train Loss: 0.0814, Val Loss: 0.0842
Epoch:  2,  Train Loss: 0.0813, Val Loss: 0.0841
Epoch:  3,  Train Loss: 0.0813, Val Loss: 0.0842
Epoch:  4,  Train Loss: 0.0813, Val Loss: 0.0842
Epoch:  5,  Train Loss: 0.0814, Val Loss: 0.0842
Early stopping for fold 2 after 6 epochs.
 K-Fold : 2
Epoch:  0,  Train Loss: 0.0828, Val Loss: 0.0783
Epoch:  1,  Train Loss: 0.0828, Val Loss: 0.0783
Epoch:  2,  Train Loss: 0.0828, Val Loss: 0.0783
Epoch:  3,  Train Loss: 0.0828, Val Loss: 0.0783
Early stop

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.3545, Val Loss: 0.0962
Epoch:  1,  Train Loss: 0.0950, Val Loss: 0.0804
Epoch:  2,  Train Loss: 0.0833, Val Loss: 0.0797
Epoch:  3,  Train Loss: 0.0827, Val Loss: 0.0798
Epoch:  4,  Train Loss: 0.0825, Val Loss: 0.0797
Epoch:  5,  Train Loss: 0.0825, Val Loss: 0.0797
Epoch:  6,  Train Loss: 0.0826, Val Loss: 0.0797
Epoch:  7,  Train Loss: 0.0825, Val Loss: 0.0797
Early stopping for fold 1 after 8 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0814, Val Loss: 0.0841
Epoch:  1,  Train Loss: 0.0814, Val Loss: 0.0842
Epoch:  2,  Train Loss: 0.0814, Val Loss: 0.0842
Epoch:  3,  Train Loss: 0.0813, Val Loss: 0.0842
Early stopping for fold 2 after 4 epochs.
 K-Fold : 2
Epoch:  0,  Train Loss: 0.0828, Val Loss: 0.0783
Epoch:  1,  Train Loss: 0.0829, Val Loss: 0.0783
Epoch:  2,  Train Loss: 0.0829, Val Loss: 0.0783
Epoch:  3,  Train Loss: 0.0828, Val Loss: 0.0783
Early stopping for fold 3 after 4 epochs.
 K-Fold : 3
Epoch:  0,  Train Loss: 0.0809, Val Loss: 0.0856
Epoch

  results_df = results_df.append(


#### Resultados

In [12]:
results1_df = results_df
print("Experimento 1")
results1_df

Experimento 1


Unnamed: 0,Model,MSE,MAE,R-squared
0,OneGCNLayer,0.068173,0.226956,0.161525
1,TwoGCNLayer,0.052546,0.193473,0.348737
2,ThreeGCNLayer,0.0816,0.248217,-0.001269
3,FourGCNLayer,0.082089,0.248571,-0.010173


In [13]:
results1_df.to_csv(r"..\results\Experiment1\Metricas entrenamiento experimento 1 GCNConv.csv")

#### Gráficas

In [14]:
models = [("OneGCNLayer" , model11), 
          ("TwoGCNLayer" , model12), 
          ("ThreeGCNLayer" ,model13),
          ("FourGCNLayer", model14)]

metrics_df = pd.DataFrame(columns=["Model", "MSE", "MAE", "R-squared"])
for model_name, model in models: 
    path = f"../results/Experiment1/Exp1_PredictedVSLabelled_{model_name}.png"
    mse, mae, rmse, r2 = graph_testing(model, test, path)
    metrics_df = metrics_df.append(
        {"Model": model_name, "MSE": mse, "MAE": mae, "R-squared": r2},
        ignore_index=True,
    )
metrics_df.to_csv(f"../results/Experiment1/Metricas test experimento 1 GCNConv.csv")    
    

MSE: 0.0686613
MAE: 0.2264158
RMSE: 0.26203302
R-squared: 0.16292373453490927


  metrics_df = metrics_df.append(


MSE: 0.029751588
MAE: 0.14442287
RMSE: 0.17248648
R-squared: 0.6372869504867866


  metrics_df = metrics_df.append(


MSE: 0.08211002
MAE: 0.24700785
RMSE: 0.28654847
R-squared: -0.0010348155405350834


  metrics_df = metrics_df.append(


MSE: 0.0821028
MAE: 0.24699984
RMSE: 0.28653586
R-squared: -0.0009468084951660405


  metrics_df = metrics_df.append(


In [15]:
metrics_df

Unnamed: 0,Model,MSE,MAE,R-squared
0,OneGCNLayer,0.068661,0.226416,0.162924
1,TwoGCNLayer,0.029752,0.144423,0.637287
2,ThreeGCNLayer,0.08211,0.247008,-0.001035
3,FourGCNLayer,0.082103,0.247,-0.000947


### 4.2 Experimento 2

#### Dataset

In [16]:
dataset = load_gpickle_files(r"..\data\train", 2)  # Carga los datos con la función load_gpickle_files
test = load_gpickle_files(r"..\data\test", 2)  # Carga los datos con la función load_gpickle_files


The scipy.sparse array containers will be used instead of matrices
in Networkx 3.0. Use `to_scipy_sparse_array` instead.
  adj_matrix = nx.convert_matrix.to_scipy_sparse_matrix(graph)


#### Modelos

In [17]:
model21=OneGCNLayer(input_size=2, hidden_channels=2)
model22=TwoGCNLayer(input_size=2, hidden_channels=2)
model23=ThreeGCNLayer(input_size=2, hidden_channels=2)
model24=FourGCNLayer(input_size=2, hidden_channels=2)

models = [("OneGCNLayer" , model21), 
          ("TwoGCNLayer" , model22), 
          ("ThreeGCNLayer" ,model23),
          ("FourGCNLayer", model24)]

#### Entrenamiento y cross-validation

In [18]:
results_df = pd.DataFrame(columns=["Model", "MSE", "MAE", "R-squared"])
for model_name, model in models:
    print("Nombre del modelo :", model_name)
    mean_mse, mean_mae, mean_rmse, mean_r2 =cross_validate_with_early_stopping(model, dataset, num_folds=5, num_epochs=40)
    results_df = results_df.append(
        {"Model": model_name, "MSE": mean_mse, "MAE": mean_mae, "R-squared": mean_r2},
        ignore_index=True,
    )

Nombre del modelo : OneGCNLayer
 K-Fold : 0
Epoch:  0,  Train Loss: 0.7014, Val Loss: 0.0955
Epoch:  1,  Train Loss: 0.0950, Val Loss: 0.0779
Epoch:  2,  Train Loss: 0.0809, Val Loss: 0.0763
Epoch:  3,  Train Loss: 0.0788, Val Loss: 0.0765
Epoch:  4,  Train Loss: 0.0769, Val Loss: 0.0735
Epoch:  5,  Train Loss: 0.0751, Val Loss: 0.0722
Epoch:  6,  Train Loss: 0.0731, Val Loss: 0.0699
Epoch:  7,  Train Loss: 0.0710, Val Loss: 0.0639
Epoch:  8,  Train Loss: 0.0685, Val Loss: 0.0625
Epoch:  9,  Train Loss: 0.0681, Val Loss: 0.0653
Epoch:  10,  Train Loss: 0.0691, Val Loss: 0.0592
Epoch:  11,  Train Loss: 0.0678, Val Loss: 0.0655
Epoch:  12,  Train Loss: 0.0667, Val Loss: 0.0656
Epoch:  13,  Train Loss: 0.0664, Val Loss: 0.0595
Early stopping for fold 1 after 14 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0669, Val Loss: 0.0619
Epoch:  1,  Train Loss: 0.0652, Val Loss: 0.0590
Epoch:  2,  Train Loss: 0.0634, Val Loss: 0.0572
Epoch:  3,  Train Loss: 0.0623, Val Loss: 0.0625
Epoch:  4,  Tra

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.2063, Val Loss: 0.0827
Epoch:  1,  Train Loss: 0.0885, Val Loss: 0.0808
Epoch:  2,  Train Loss: 0.0833, Val Loss: 0.0798
Epoch:  3,  Train Loss: 0.0825, Val Loss: 0.0798
Epoch:  4,  Train Loss: 0.0825, Val Loss: 0.0799
Epoch:  5,  Train Loss: 0.0827, Val Loss: 0.0799
Early stopping for fold 1 after 6 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0814, Val Loss: 0.0842
Epoch:  1,  Train Loss: 0.0814, Val Loss: 0.0842
Epoch:  2,  Train Loss: 0.0813, Val Loss: 0.0842
Epoch:  3,  Train Loss: 0.0813, Val Loss: 0.0842
Epoch:  4,  Train Loss: 0.0814, Val Loss: 0.0842
Epoch:  5,  Train Loss: 0.0813, Val Loss: 0.0842
Epoch:  6,  Train Loss: 0.0813, Val Loss: 0.0842
Early stopping for fold 2 after 7 epochs.
 K-Fold : 2
Epoch:  0,  Train Loss: 0.0828, Val Loss: 0.0783
Epoch:  1,  Train Loss: 0.0828, Val Loss: 0.0783
Epoch:  2,  Train Loss: 0.0829, Val Loss: 0.0783
Epoch:  3,  Train Loss: 0.0828, Val Loss: 0.0783
Epoch:  4,  Train Loss: 0.0828, Val Loss: 0.0783
Early stop

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.2078, Val Loss: 0.0843
Epoch:  1,  Train Loss: 0.0939, Val Loss: 0.0808
Epoch:  2,  Train Loss: 0.0838, Val Loss: 0.0802
Epoch:  3,  Train Loss: 0.0826, Val Loss: 0.0797
Epoch:  4,  Train Loss: 0.0827, Val Loss: 0.0797
Epoch:  5,  Train Loss: 0.0826, Val Loss: 0.0796
Epoch:  6,  Train Loss: 0.0826, Val Loss: 0.0797
Epoch:  7,  Train Loss: 0.0825, Val Loss: 0.0797
Epoch:  8,  Train Loss: 0.0825, Val Loss: 0.0797
Early stopping for fold 1 after 9 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0814, Val Loss: 0.0842
Epoch:  1,  Train Loss: 0.0814, Val Loss: 0.0842
Epoch:  2,  Train Loss: 0.0812, Val Loss: 0.0842
Epoch:  3,  Train Loss: 0.0815, Val Loss: 0.0842
Epoch:  4,  Train Loss: 0.0813, Val Loss: 0.0842
Epoch:  5,  Train Loss: 0.0814, Val Loss: 0.0842
Epoch:  6,  Train Loss: 0.0813, Val Loss: 0.0841
Epoch:  7,  Train Loss: 0.0813, Val Loss: 0.0842
Epoch:  8,  Train Loss: 0.0814, Val Loss: 0.0842
Epoch:  9,  Train Loss: 0.0813, Val Loss: 0.0842
Early stopping 

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.1992, Val Loss: 0.0843
Epoch:  1,  Train Loss: 0.0897, Val Loss: 0.0798
Epoch:  2,  Train Loss: 0.0842, Val Loss: 0.0799
Epoch:  3,  Train Loss: 0.0828, Val Loss: 0.0798
Epoch:  4,  Train Loss: 0.0825, Val Loss: 0.0798
Early stopping for fold 1 after 5 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0814, Val Loss: 0.0841
Epoch:  1,  Train Loss: 0.0814, Val Loss: 0.0841
Epoch:  2,  Train Loss: 0.0813, Val Loss: 0.0841
Epoch:  3,  Train Loss: 0.0815, Val Loss: 0.0842
Early stopping for fold 2 after 4 epochs.
 K-Fold : 2
Epoch:  0,  Train Loss: 0.0829, Val Loss: 0.0783
Epoch:  1,  Train Loss: 0.0829, Val Loss: 0.0783
Epoch:  2,  Train Loss: 0.0829, Val Loss: 0.0783
Epoch:  3,  Train Loss: 0.0828, Val Loss: 0.0783
Early stopping for fold 3 after 4 epochs.
 K-Fold : 3
Epoch:  0,  Train Loss: 0.0809, Val Loss: 0.0857
Epoch:  1,  Train Loss: 0.0810, Val Loss: 0.0857
Epoch:  2,  Train Loss: 0.0809, Val Loss: 0.0857
Epoch:  3,  Train Loss: 0.0809, Val Loss: 0.0857
Early

  results_df = results_df.append(


#### Resultados

In [19]:
results2_df = results_df
results2_df

Unnamed: 0,Model,MSE,MAE,R-squared
0,OneGCNLayer,0.059066,0.210738,0.270866
1,TwoGCNLayer,0.081799,0.248169,-0.003083
2,ThreeGCNLayer,0.081619,0.248356,-0.003318
3,FourGCNLayer,0.081748,0.247856,-0.003688


In [20]:
results2_df.to_csv("Experiment 2 GCNConv.csv") 

#### Gráficas

In [21]:
models = [("OneGCNLayer" , model21), 
          ("TwoGCNLayer" , model22), 
          ("ThreeGCNLayer" ,model23),
          ("FourGCNLayer", model24)]

metrics_df = pd.DataFrame(columns=["Model", "MSE", "MAE", "R-squared"])
for model_name, model in models: 
    path = f"../results/Experiment2/Exp2_PredictedVSLabelled_{model_name}.png"
    mse, mae, rmse, r2 = graph_testing(model, test, path)
    metrics_df = metrics_df.append(
        {"Model": model_name, "MSE": mse, "MAE": mae, "R-squared": r2},
        ignore_index=True,
    )
metrics_df.to_csv(f"../results/Experiment2/Metricas test experimento 2 GCNConv.csv") 

MSE: 0.049753692
MAE: 0.1961623
RMSE: 0.22305536
R-squared: 0.3934335691428591


  metrics_df = metrics_df.append(


MSE: 0.08210283
MAE: 0.24699987
RMSE: 0.28653592
R-squared: -0.000947103387999304


  metrics_df = metrics_df.append(


MSE: 0.08210276
MAE: 0.2469998
RMSE: 0.2865358
R-squared: -0.0009463495034069958


  metrics_df = metrics_df.append(


MSE: 0.08210284
MAE: 0.2469999
RMSE: 0.28653595
R-squared: -0.000947318167379807


  metrics_df = metrics_df.append(


### 4.3 Experimento 3

#### Dataset

In [22]:
dataset = load_gpickle_files(r"..\data\train", 3)  # Carga los datos con la función load_gpickle_files
test = load_gpickle_files(r"..\data\test", 3)  # Carga los datos con la función load_gpickle_files


The scipy.sparse array containers will be used instead of matrices
in Networkx 3.0. Use `to_scipy_sparse_array` instead.
  adj_matrix = nx.convert_matrix.to_scipy_sparse_matrix(graph)


#### Modelos

In [23]:
model31=OneGCNLayer(input_size=3, hidden_channels=3)
model32=TwoGCNLayer(input_size=3, hidden_channels=3)
model33=ThreeGCNLayer(input_size=3, hidden_channels=3)
model34=FourGCNLayer(input_size=3, hidden_channels=3)

models = [("OneGCNLayer" , model31), 
          ("TwoGCNLayer" , model32), 
          ("ThreeGCNLayer" ,model33),
          ("FourGCNLayer", model34)]

#### Entrenamiento y cross-validation

In [24]:
results_df = pd.DataFrame(columns=["Model", "MSE", "MAE", "R-squared"])
for model_name, model in models:
    print("Nombre del modelo :", model_name)
    mean_mse, mean_mae, mean_rmse, mean_r2 =cross_validate_with_early_stopping(model, dataset, num_folds=5, num_epochs=40)
    results_df = results_df.append(
        {"Model": model_name, "MSE": mean_mse, "MAE": mean_mae, "R-squared": mean_r2},
        ignore_index=True,
    )

Nombre del modelo : OneGCNLayer
 K-Fold : 0
Epoch:  0,  Train Loss: 0.4343, Val Loss: 0.0856
Epoch:  1,  Train Loss: 0.0890, Val Loss: 0.0799
Epoch:  2,  Train Loss: 0.0826, Val Loss: 0.0780
Epoch:  3,  Train Loss: 0.0811, Val Loss: 0.0763
Epoch:  4,  Train Loss: 0.0796, Val Loss: 0.0747
Epoch:  5,  Train Loss: 0.0763, Val Loss: 0.0705
Epoch:  6,  Train Loss: 0.0705, Val Loss: 0.0666
Epoch:  7,  Train Loss: 0.0667, Val Loss: 0.0632
Epoch:  8,  Train Loss: 0.0630, Val Loss: 0.0561
Epoch:  9,  Train Loss: 0.0617, Val Loss: 0.0534
Epoch:  10,  Train Loss: 0.0607, Val Loss: 0.0568
Epoch:  11,  Train Loss: 0.0600, Val Loss: 0.0552
Epoch:  12,  Train Loss: 0.0604, Val Loss: 0.0562
Early stopping for fold 1 after 13 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0614, Val Loss: 0.0616
Epoch:  1,  Train Loss: 0.0582, Val Loss: 0.0594
Epoch:  2,  Train Loss: 0.0581, Val Loss: 0.0531
Epoch:  3,  Train Loss: 0.0576, Val Loss: 0.0591
Epoch:  4,  Train Loss: 0.0580, Val Loss: 0.0547
Epoch:  5,  Trai

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.2733, Val Loss: 0.0818
Epoch:  1,  Train Loss: 0.0857, Val Loss: 0.0800
Epoch:  2,  Train Loss: 0.0829, Val Loss: 0.0799
Epoch:  3,  Train Loss: 0.0830, Val Loss: 0.0795
Epoch:  4,  Train Loss: 0.0829, Val Loss: 0.0795
Epoch:  5,  Train Loss: 0.0826, Val Loss: 0.0797
Epoch:  6,  Train Loss: 0.0825, Val Loss: 0.0796
Epoch:  7,  Train Loss: 0.0825, Val Loss: 0.0797
Early stopping for fold 1 after 8 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0814, Val Loss: 0.0842
Epoch:  1,  Train Loss: 0.0814, Val Loss: 0.0841
Epoch:  2,  Train Loss: 0.0814, Val Loss: 0.0842
Epoch:  3,  Train Loss: 0.0814, Val Loss: 0.0842
Epoch:  4,  Train Loss: 0.0814, Val Loss: 0.0842
Early stopping for fold 2 after 5 epochs.
 K-Fold : 2
Epoch:  0,  Train Loss: 0.0827, Val Loss: 0.0784
Epoch:  1,  Train Loss: 0.0830, Val Loss: 0.0781
Epoch:  2,  Train Loss: 0.0829, Val Loss: 0.0782
Epoch:  3,  Train Loss: 0.0829, Val Loss: 0.0782
Epoch:  4,  Train Loss: 0.0829, Val Loss: 0.0781
Epoch:  5,

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.0930, Val Loss: 0.0812
Epoch:  1,  Train Loss: 0.0834, Val Loss: 0.0799
Epoch:  2,  Train Loss: 0.0826, Val Loss: 0.0796
Epoch:  3,  Train Loss: 0.0824, Val Loss: 0.0795
Epoch:  4,  Train Loss: 0.0822, Val Loss: 0.0791
Epoch:  5,  Train Loss: 0.0819, Val Loss: 0.0787
Epoch:  6,  Train Loss: 0.0811, Val Loss: 0.0773
Epoch:  7,  Train Loss: 0.0799, Val Loss: 0.0753
Epoch:  8,  Train Loss: 0.0770, Val Loss: 0.0741
Epoch:  9,  Train Loss: 0.0733, Val Loss: 0.0661
Epoch:  10,  Train Loss: 0.0626, Val Loss: 0.0526
Epoch:  11,  Train Loss: 0.0514, Val Loss: 0.0437
Epoch:  12,  Train Loss: 0.0463, Val Loss: 0.0331
Epoch:  13,  Train Loss: 0.0440, Val Loss: 0.0287
Epoch:  14,  Train Loss: 0.0397, Val Loss: 0.0266
Epoch:  15,  Train Loss: 0.0417, Val Loss: 0.0279
Epoch:  16,  Train Loss: 0.0390, Val Loss: 0.0299
Epoch:  17,  Train Loss: 0.0370, Val Loss: 0.0226
Epoch:  18,  Train Loss: 0.0361, Val Loss: 0.0226
Epoch:  19,  Train Loss: 0.0348, Val Loss: 0.0219
Epoch:  20

  results_df = results_df.append(


Epoch:  0,  Train Loss: 0.0828, Val Loss: 0.0799
Epoch:  1,  Train Loss: 0.0826, Val Loss: 0.0797
Epoch:  2,  Train Loss: 0.0826, Val Loss: 0.0797
Epoch:  3,  Train Loss: 0.0825, Val Loss: 0.0797
Epoch:  4,  Train Loss: 0.0825, Val Loss: 0.0797
Epoch:  5,  Train Loss: 0.0825, Val Loss: 0.0797
Epoch:  6,  Train Loss: 0.0825, Val Loss: 0.0797
Early stopping for fold 1 after 7 epochs.
 K-Fold : 1
Epoch:  0,  Train Loss: 0.0813, Val Loss: 0.0841
Epoch:  1,  Train Loss: 0.0814, Val Loss: 0.0841
Epoch:  2,  Train Loss: 0.0813, Val Loss: 0.0841
Epoch:  3,  Train Loss: 0.0813, Val Loss: 0.0840
Epoch:  4,  Train Loss: 0.0811, Val Loss: 0.0837
Epoch:  5,  Train Loss: 0.0807, Val Loss: 0.0826
Epoch:  6,  Train Loss: 0.0787, Val Loss: 0.0791
Epoch:  7,  Train Loss: 0.0747, Val Loss: 0.0723
Epoch:  8,  Train Loss: 0.0658, Val Loss: 0.0582
Epoch:  9,  Train Loss: 0.0488, Val Loss: 0.0359
Epoch:  10,  Train Loss: 0.0333, Val Loss: 0.0197
Epoch:  11,  Train Loss: 0.0287, Val Loss: 0.0135
Epoch:  12,  

  results_df = results_df.append(


   ##### Resultados

In [25]:
results3_df = results_df
results3_df

Unnamed: 0,Model,MSE,MAE,R-squared
0,OneGCNLayer,0.055331,0.200293,0.315468
1,TwoGCNLayer,0.052397,0.190223,0.342423
2,ThreeGCNLayer,0.030569,0.142628,0.61863
3,FourGCNLayer,0.031798,0.134991,0.612172


In [26]:
results3_df.to_csv("Experiment 3 GCNConv.csv") 

#### Gráficas

In [27]:
models = [("OneGCNLayer" , model31), 
          ("TwoGCNLayer" , model32), 
          ("ThreeGCNLayer" ,model33),
          ("FourGCNLayer", model34)]

metrics_df = pd.DataFrame(columns=["Model", "MSE", "MAE", "R-squared"])
for model_name, model in models: 
    path = f"../results/Experiment3/Exp3_PredictedVSLabelled_{model_name}.png"
    mse, mae, rmse, r2 = graph_testing(model, test, path)
    metrics_df = metrics_df.append(
        {"Model": model_name, "MSE": mse, "MAE": mae, "R-squared": r2},
        ignore_index=True,
    )
metrics_df.to_csv(f"../results/Experiment3/Metricas test experimento 3 GCNConv.csv") 

MSE: 0.045268472
MAE: 0.18709207
RMSE: 0.21276389
R-squared: 0.44811464947924384


  metrics_df = metrics_df.append(


MSE: 0.02384477
MAE: 0.12587602
RMSE: 0.15441751
R-squared: 0.7092992643646259


  metrics_df = metrics_df.append(


MSE: 0.017579166
MAE: 0.10735509
RMSE: 0.13258645
R-squared: 0.7856856342344923


  metrics_df = metrics_df.append(


MSE: 0.009256747
MAE: 0.07896963
RMSE: 0.096211985
R-squared: 0.8871474492190692


  metrics_df = metrics_df.append(
