<a href="https://colab.research.google.com/github/raphaelp-silva/GNN_Pytorch_geometric/blob/main/loading_a_graph_and_training_a_GNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch-geometric
!pip install --upgrade skorch
!pip install optuna

In [29]:
import pandas as pd
import numpy as np
import torch
from torch_geometric.data import Data
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, BatchNorm
import torch.nn as nn
from skorch import NeuralNetClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from torch_geometric.data import DataLoader
from sklearn.metrics import accuracy_score
import optuna
from torch_geometric.utils import subgraph

In [None]:
# Carregando o grafo salvo:
graph = torch.load('/content/obesity_graph_data-3.pt', weights_only=False)

# Acessando as variáveis do grafo carregado:
print(graph.x)  # Features dos nós
print(graph.edge_index)  # Arestas do grafo
print(graph.y) # labels

In [31]:
graph = Data(x = graph.x, edge_index=graph.edge_index, y=graph.y)
print(graph)

Data(x=[1567, 26], edge_index=[2, 23505], y=[1567, 6])


In [32]:
# Separando os índices de treino e teste (80% treino, 20% teste)
train_idx, test_idx = train_test_split(range(graph.x.size(0)), test_size=0.2, random_state=42)

# Criando um Data para treino:
train_edge_index, _ = subgraph(train_idx, graph.edge_index, relabel_nodes=True)
train_data = Data(x=graph.x[train_idx],
                  edge_index=train_edge_index,  # Edge index não muda
                  y=graph.y[train_idx])

# Criando um Data para teste:
test_edge_index, _ = subgraph(test_idx, graph.edge_index, relabel_nodes=True)
test_data = Data(x=graph.x[test_idx],
                 edge_index=test_edge_index,  # Edge index não muda
                 y=graph.y[test_idx])

In [38]:
class GNN(torch.nn.Module):
  def __init__(self, activation, neurons, dropout):

    super(GNN, self).__init__()

    self.conv1 = GCNConv(26, neurons)
    self.conv2 = GCNConv(neurons, (neurons//2))
    self.conv3 = GCNConv((neurons//2), (neurons//4))
    self.conv4 = GCNConv((neurons//4), neurons//8)

    self.bn1 = BatchNorm(neurons)
    self.bn2 = BatchNorm((neurons//2))
    self.bn3 = BatchNorm((neurons//4))
    self.bn4 = BatchNorm(neurons//8)

    self.fc1 = nn.Linear(neurons//8, neurons//16)
    self.fc2 = nn.Linear(neurons//16, 6)

    self.dropout = nn.Dropout(dropout)
    self.activation = activation

    # Aplicando a inicialização padrão do PyTorch Geometric nas camadas
    self.conv1.reset_parameters()
    self.conv2.reset_parameters()
    self.conv3.reset_parameters()
    self.conv4.reset_parameters()

  def forward(self, data):
    x = data.x
    edge_index = data.edge_index

    x = self.conv1(x, edge_index)
    x = self.bn1(x)
    x = self.activation(x)
    x = self.dropout(x)

    x = self.conv2(x, edge_index)
    x = self.bn2(x)
    x = self.activation(x)
    x = self.dropout(x)

    x = self.conv3(x, edge_index)
    x = self.bn3(x)
    x = self.activation(x)
    x = self.dropout(x)

    x = self.conv4(x, edge_index)
    x = self.bn4(x)
    x = self.activation(x)
    x = self.dropout(x)

    x = self.fc1(x)
    x = self.activation(x)
    x = self.fc2(x)

    return F.log_softmax(x, dim=1)

In [48]:
# Criando a função 'objective' para otimizar parametros com optuna:

# Definindo a função objetivo do Optuna
def objective(trial):
    # Hiperparâmetros sugeridos pelo Optuna
    neurons = trial.suggest_int("neurons", 128, 256)  # número de neurônios nas camadas ocultas
    dropout = trial.suggest_uniform("dropout", 0.05, 0.4)  # taxa de dropout
    lr = trial.suggest_float("lr", 6e-5, 6e-3, log=True)  # taxa de aprendizado

    # Criar o modelo com os parâmetros sugeridos
    model = GNN(activation=torch.nn.LeakyReLU(),
                neurons=neurons,
                dropout=dropout)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.CrossEntropyLoss()

    # Treinamento
    model.train()
    for epoch in range(10):  # Defina o número de épocas de treino
        optimizer.zero_grad()
        out = model(train_data)  # Passando diretamente o Data, sem usar o DataLoader
        loss = criterion(out, train_data.y)
        loss.backward()
        optimizer.step()

    # Validação
    model.eval()
    predictions = []
    true_labels = []

    with torch.no_grad():
        out = model(test_data)  # Passando diretamente o Data, sem usar o DataLoader
        pred = torch.argmax(out, dim=1)
        predictions.append(pred)
        true_labels.append(test_data.y)

    predictions = torch.cat(predictions, dim=0)
    true_labels = torch.cat(true_labels, dim=0)

    # Calculando a acurácia
    accuracy = accuracy_score(true_labels.cpu().numpy(), predictions.cpu().numpy())

    return accuracy  # Retorne a acurácia para o Optuna


In [49]:
# Criando um estudo para maximizar a métrica de desempenho (ex: acurácia)
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10)  # Defina o número de tentativas

[I 2025-03-25 14:21:50,811] A new study created in memory with name: no-name-80e71292-8e79-4e94-ae98-6657bca0ffd2

suggest_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float instead.

[W 2025-03-25 14:21:51,269] Trial 0 failed with parameters: {'neurons': 138, 'dropout': 0.10396596000940309, 'lr': 0.0013129689152463783} because of the following error: ValueError("Classification metrics can't handle a mix of multilabel-indicator and binary targets").
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "<ipython-input-48-ced650d22faf>", line 42, in objective
    accuracy = accuracy_score(true_labels.cpu().numpy(), predictions.cpu().numpy())
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File 

ValueError: Classification metrics can't handle a mix of multilabel-indicator and binary targets

In [41]:
print("Melhores hiperparâmetros encontrados:")
print(study.best_params)

print("Melhor valor da métrica de avaliação:")
print(study.best_value)


Melhores hiperparâmetros encontrados:
{'neurons': 248, 'dropout': 0.2820056527568173, 'lr': 0.0006393069758966887}
Melhor valor da métrica de avaliação:
0.0


In [37]:
import optuna.visualization as vis

# Gráfico da convergência (mostra a evolução da métrica ao longo dos trials)
vis.plot_optimization_history(study).show()

# Importância dos hiperparâmetros
vis.plot_param_importances(study).show()


RuntimeError: Encountered zero total variance in all trees.