<a href="https://colab.research.google.com/github/raphaelp-silva/GNN_Pytorch_geometric/blob/main/loading_a_graph_and_training_a_GNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [89]:
!pip install torch-geometric
!pip install --upgrade skorch
!pip install optuna



In [98]:
import pandas as pd
import numpy as np
import torch
from torch_geometric.data import Data
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import torch.nn as nn
from skorch import NeuralNetClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from torch_geometric.data import DataLoader
from sklearn.metrics import accuracy_score
import optuna
from torch_geometric.utils import subgraph

In [99]:
# Carregando o grafo salvo:
graph = torch.load('/content/obesity_graph_data-3.pt', weights_only=False)

# Acessando as variáveis do grafo carregado:
print(graph.x)  # Features dos nós
print(graph.edge_index)  # Arestas do grafo
print(graph.y) # labels

tensor([[-1.2729, -0.6167, -0.8956,  ..., -0.0841,  0.6002, -0.1758],
        [-1.2729, -0.6167, -1.9360,  ..., -0.0841,  0.6002, -0.1758],
        [ 0.7856, -0.3259,  0.9770,  ..., -0.0841,  0.6002, -0.1758],
        ...,
        [-1.2729, -0.6167,  0.4568,  ..., -0.0841,  0.6002, -0.1758],
        [-1.2729, -0.6167,  1.0810,  ..., -0.0841,  0.6002, -0.1758],
        [-1.2729, -0.6167,  1.0810,  ..., -0.0841,  0.6002, -0.1758]])
tensor([[   0,    0,    0,  ..., 1566, 1566, 1566],
        [ 859,  721,   45,  ..., 1549, 1541, 1563]])
tensor([[1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0.],
        ...,
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0., 0.]])


In [100]:
graph = Data(x = graph.x, edge_index=graph.edge_index, y=graph.y)
print(graph)

Data(x=[1567, 26], edge_index=[2, 23505], y=[1567, 6])


In [105]:
# Separando os índices de treino e teste (80% treino, 20% teste)
train_idx, test_idx = train_test_split(range(graph.x.size(0)), test_size=0.2, random_state=42)

# Criando um Data para treino:
train_edge_index, _ = subgraph(train_idx, graph.edge_index, relabel_nodes=True)
train_data = Data(x=graph.x[train_idx],
                  edge_index=train_edge_index,  # Edge index não muda
                  y=graph.y[train_idx])

# Criando um Data para teste:
test_edge_index, _ = subgraph(test_idx, graph.edge_index, relabel_nodes=True)
test_data = Data(x=graph.x[test_idx],
                 edge_index=test_edge_index,  # Edge index não muda
                 y=graph.y[test_idx])

In [106]:
class GNN(torch.nn.Module):
  def __init__(self, activation, neurons, dropout):

    super(GNN, self).__init__()

    self.conv1 = GCNConv(26, neurons)
    self.conv2 = GCNConv(neurons, (neurons//2))
    self.conv3 = GCNConv((neurons//2), (neurons//4))
    self.fc = nn.Linear((neurons//4), 6)
    self.dropout = nn.Dropout(dropout)

    # Aplicando a inicialização padrão do PyTorch Geometric nas camadas
    self.conv1.reset_parameters()
    self.conv2.reset_parameters()
    self.conv3.reset_parameters()

  def forward(self, data):
    x = data.x
    edge_index = data.edge_index

    x = self.conv1(x, edge_index)
    x = torch.relu(x)
    x = self.dropout(x)

    x = self.conv2(x, edge_index)
    x = torch.relu(x)
    x = self.dropout(x)

    x = self.conv3(x, edge_index)
    x = torch.relu(x)
    x = self.dropout(x)

    x = self.fc(x)

    return x

In [111]:
# Criando a função 'objective' para otimizar parametros com optuna:

# Definindo a função objetivo do Optuna
def objective(trial):
    # Hiperparâmetros sugeridos pelo Optuna
    neurons = trial.suggest_int("neurons", 64, 256, step=64)  # número de neurônios nas camadas ocultas
    dropout = trial.suggest_uniform("dropout", 0.0, 0.5)  # taxa de dropout
    lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)  # taxa de aprendizado

    # Criar o modelo com os parâmetros sugeridos
    model = GNN(activation=torch.relu,
                neurons=neurons,
                dropout=dropout)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    # Treinamento
    model.train()
    for epoch in range(10):  # Defina o número de épocas de treino
        optimizer.zero_grad()
        out = model(train_data)  # Passando diretamente o Data, sem usar o DataLoader
        loss = criterion(out, train_data.y)
        loss.backward()
        optimizer.step()

    # Validação
    model.eval()
    predictions = []
    true_labels = []
    with torch.no_grad():
        out = model(test_data)  # Passando diretamente o Data, sem usar o DataLoader
        pred = (out > 0.5).type(torch.int64)
        predictions.append(pred)
        true_labels.append(test_data.y)

    predictions = torch.cat(predictions, dim=0)
    true_labels = torch.cat(true_labels, dim=0)

    # Calculando a acurácia
    accuracy = accuracy_score(true_labels.cpu().numpy(), predictions.cpu().numpy())

    return accuracy  # Retorne a acurácia para o Optuna


In [112]:
# Criando um estudo para maximizar a métrica de desempenho (ex: acurácia)
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)  # Defina o número de tentativas

[I 2025-03-24 19:15:20,637] A new study created in memory with name: no-name-54fe9ea5-6d44-4a56-9f68-c870e986a242
  dropout = trial.suggest_uniform("dropout", 0.0, 0.5)  # taxa de dropout
  lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)  # taxa de aprendizado
[I 2025-03-24 19:15:20,968] Trial 0 finished with value: 0.0 and parameters: {'neurons': 128, 'dropout': 0.21677119374742532, 'lr': 1.4574669451706914e-05}. Best is trial 0 with value: 0.0.
  dropout = trial.suggest_uniform("dropout", 0.0, 0.5)  # taxa de dropout
  lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)  # taxa de aprendizado
[I 2025-03-24 19:15:21,426] Trial 1 finished with value: 0.2961783439490446 and parameters: {'neurons': 192, 'dropout': 0.1030136910228247, 'lr': 0.0013595527365379424}. Best is trial 1 with value: 0.2961783439490446.
  dropout = trial.suggest_uniform("dropout", 0.0, 0.5)  # taxa de dropout
  lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)  # taxa de aprendizado
[I 2025-03-24 19:15:22,035] Trial 2 f

In [113]:
print("Melhores hiperparâmetros encontrados:")
print(study.best_params)

print("Melhor valor da métrica de avaliação:")
print(study.best_value)


Melhores hiperparâmetros encontrados:
{'neurons': 192, 'dropout': 0.4416386649767178, 'lr': 0.002024800235464539}
Melhor valor da métrica de avaliação:
0.3885350318471338


In [114]:
import optuna.visualization as vis

# Gráfico da convergência (mostra a evolução da métrica ao longo dos trials)
vis.plot_optimization_history(study).show()

# Importância dos hiperparâmetros
vis.plot_param_importances(study).show()
