In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Linear, BatchNorm1d, Dropout, init
import torch.optim as optim

import numpy as np
import time
import xgboost as xgb
import lightgbm
import pandas as pd
import random
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from geobr import read_municipality
from hyperopt import fmin, tpe, hp

import networkx as nx
import matplotlib.pyplot as plt

from torch_geometric.nn import GraphConv, GCNConv, GATConv, TransformerConv, GENConv
from torch_geometric.data import Data
from torch_geometric.transforms import NodePropertySplit
from torch_geometric.datasets.graph_generator import ERGraph


  Referenced from: <9EC0081F-9728-3675-89F4-4DDF3D476A21> /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch_sparse/_version_cpu.so
  Expected in:     <8715DC6D-DA47-31E5-A317-E75A06D8E436> /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/torch/lib/libtorch_cpu.dylib


## Importation and processing of the data

In [2]:
# Read the data
data = pd.read_excel("dataset/data.xlsx")

# Read municipality data from the geobr library
mun = read_municipality(code_muni="all", year=2020, simplified="TRUE")
mun['code_muni'] = mun['code_muni'].apply(lambda x: str(x)[:-3])
data['CD_MUN'] = data['CD_MUN'].astype(str)

# Merge the two datasets
municipios = pd.merge(data, mun, left_on='CD_MUN', right_on='code_muni', how='inner')

# Remove NaN values
municipios = municipios.dropna()

# Create an 'id_mun' column
municipios['id_mun'] = list(range(len(municipios)))

## Create a Graph Data Object

In [13]:
## Create a directed graph using NetworkX
#G = nx.DiGraph()
##
## Add nodes to the graph
#for idx, row in municipios.iterrows():
#    G.add_node(row['id_mun'], alunos=row['numero_alunos'], geometry=row['geometry'])
#
## Initialize edge lists
#edge1 = []
#edge2 = []
#
## Add edges to the graph based on the intersection of municipality geometries (neighborhood)
#for i, muni1 in municipios.iterrows():
#    for j, muni2 in municipios.iterrows():
#        if muni1['id_mun'] != muni2['id_mun'] and muni1['geometry'].intersects(muni2['geometry']):
#            edge1.append(muni1['id_mun'])
#            edge2.append(muni2['id_mun'])
#
## Convert edge lists to integers and create a tensor for edge indices
#edge1 = list(map(int, edge1))
#edge2 = list(map(int, edge2))
#edge_index = torch.tensor([edge1, edge2])
#edge_index
#
## Save edge lists
#torch.save(edge_index, "dataset/edge_index.pt")

In [11]:
# Load edge indices from the saved file
edge_index = torch.load("dataset/edge_index.pt")

# Define node features
x = torch.tensor([
    municipios['Índice de Theil-L 2010'].values,
    municipios['Índice de Gini 2010'].values,
    municipios['Rendimento médio dos ocupados 2010'].values,
    municipios['Esperança de vida ao nascer 2010'].values,
    municipios['IDHM 2010'].values,
    municipios['IDHM Renda 2010'].values,
    municipios['IDHM Longevidade 2010'].values,
    municipios['IDHM Educação 2010'].values,
    municipios['Subíndice de frequência escolar - IDHM Educação 2010'].values,
    municipios['Subíndice de escolaridade - IDHM Educação 2010'].values,
    municipios['Taxa de analfabetismo - 25 anos ou mais de idade 2010'].values,
    municipios['Produto Interno Bruto per capita 2013'].values,
    municipios['Produto Interno Bruto per capita 2014'].values,
    municipios['Produto Interno Bruto per capita 2016'].values,
    municipios['Renda per capita 2010'].values,
    municipios['POPULAÇÃO'].values,
    municipios['total_est_saude'].values,
    municipios['n_profissionais'].values
])

# Transpose the tensor for correct shape
x = x.t()

# Get the size of the edge indices
edge_size = edge_index.size()[1]

# Create edge attributes (fictitious attribute with a value of 1.0 for each edge)
edge_attr = torch.tensor([1.0] * edge_size).float()

# Define target values using the 'numero_alunos' column from 'municipios'
y = torch.tensor([municipios['numero_alunos'].values])

# Transpose and reshape the target tensor
y = y.t()
y = y.view(-1)

# Create a PyTorch Geometric Data object
data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)
data.x = data.x.float()

  x = torch.tensor([


## Split train/test/validation (Transductive setting)

In [12]:
# Name of the property
property_name = 'popularity'

torch.manual_seed(42)  # Setting the seed for reproducibility

# Ratios to split the nodes (train, validation, and test)
ratios = [0.7, 0, 0.3, 0, 0]

# Create NodePropertySplit transformation
transform = NodePropertySplit(property_name, ratios)

# Apply the transformation to the graph
data = transform(data)

print(data)

Data(x=[5203, 18], edge_index=[2, 28924], edge_attr=[28924], y=[5203], id_train_mask=[5203], id_val_mask=[5203], id_test_mask=[5203], ood_val_mask=[5203], ood_test_mask=[5203])


In [13]:
print(data.id_train_mask)
print(data.id_test_mask)

tensor([False, False,  True,  ...,  True,  True,  True])
tensor([ True,  True, False,  ..., False, False, False])


## Create a tabular dataset with the same train/test/validation split as the graph.

In [22]:
# Selecting specific columns from the 'municipios' DataFrame
df = municipios[['Índice de Theil-L 2010',
                        'Índice de Gini 2010',
                        'Rendimento médio dos ocupados 2010',
                        'Esperança de vida ao nascer 2010',
                        'IDHM 2010',
                        'IDHM Renda 2010',
                        'IDHM Longevidade 2010',
                        'IDHM Educação 2010',
                        'Subíndice de frequência escolar - IDHM Educação 2010',
                        'Subíndice de escolaridade - IDHM Educação 2010',
                        'Taxa de analfabetismo - 25 anos ou mais de idade 2010',
                        'Produto Interno Bruto per capita 2013',
                        'Produto Interno Bruto per capita 2014',
                        'Produto Interno Bruto per capita 2016',
                        'Renda per capita 2010',
                        'POPULAÇÃO',
                        'total_est_saude',
                        'n_profissionais',
                        'numero_alunos']]

# Convert the mask lists to Python lists
train_mask_list, test_mask_list = data.id_train_mask.tolist(), data.id_test_mask.tolist()

# Apply the masks to the DataFrame to get the splits
train_df, test_df = df[train_mask_list], df[test_mask_list]

# Extract labels and features for each split
y_train, x_train = train_df['numero_alunos'], train_df.drop(columns=['numero_alunos'])
y_test, x_test = test_df['numero_alunos'], test_df.drop(columns=['numero_alunos'])



### Setting 30 seeds for repetitions

In [15]:
random.seed(42)

num_seeds = 30
seeds = random.sample(range(0, 100 + 1), num_seeds)
#seeds.append(42)
print(seeds)

[81, 14, 3, 94, 35, 31, 28, 17, 13, 86, 69, 11, 75, 54, 4, 98, 89, 27, 29, 64, 77, 85, 71, 25, 90, 53, 97, 57, 96, 0]


## MLP Model

In [110]:
class MLPModel(nn.Module):
    def __init__(self, hidden_channels, num_hidden_layers):
        super(MLPModel, self).__init__()
        torch.manual_seed(42)

        # Graph Convolutional Layers
        self.fc_in = nn.Linear(len(x_train.columns), hidden_channels)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([nn.Linear(hidden_channels, hidden_channels) for _ in range(num_hidden_layers)])

        # Camada de saída
        self.fc_out = nn.Linear(hidden_channels, 1)

    def forward(self, x):
        # Camada de entrada
        x = self.fc_in(x)
        x = self.relu(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x)
            x = self.relu(x)

        # Camada de saída
        x = self.fc_out(x)

        return x

    def train_model(self, criterion, optimizer, x, y):
        optimizer.zero_grad()
        out = self(torch.FloatTensor(x.values))
        loss = criterion(out, torch.FloatTensor(y.values).view(-1, 1))
        loss.backward()
        optimizer.step()
        return loss.item()

    def test_model(self, criterion, x, y):
        self.eval()
        with torch.no_grad():
            out = self(torch.FloatTensor(x.values))
            acc = criterion(out, torch.FloatTensor(y.values).view(-1, 1))
        return acc.item()


### Hyperopt

In [106]:
# Defina o espaço de busca para os hiperparâmetros
space = {
    'hidden_channels': hp.choice('hidden_channels', [8, 16, 32]),
    'num_hidden_layers': hp.choice('num_hidden_layers', [0,1,2]),
    'lr': hp.uniform('lr', 0.001, 0.01),
    'weight_decay': hp.uniform('weight_decay', 0, 0.1),
    'n_epochs': hp.choice('n_epochs', [1000, 2000])

}

# Função objetivo para otimização
def objective(params):
    hidden_channels = params['hidden_channels']
    num_hidden_layers = params['num_hidden_layers']
    lr = params['lr']
    weight_decay = params['weight_decay']
    n_epochs = params['n_epochs']

    # Criar modelo com os hiperparâmetros
    model = MLPModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers)

    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(criterion, optimizer, x_train, y_train)
        acc = model.test_model(criterion, x_test, y_test)

    # Retorna a métrica que você deseja otimizar (por exemplo, negativo do desempenho)
    return acc

# Executar a otimização com o Hyperopt
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=50)

# Imprimir os melhores hiperparâmetros encontrados
print("Melhores hiperparâmetros encontrados:", best)


100%|██████████| 50/50 [03:16<00:00,  3.93s/trial, best loss: 1945.4801025390625]
Melhores hiperparâmetros encontrados: {'hidden_channels': 2, 'lr': 0.0017220878393000563, 'n_epochs': 0, 'num_hidden_layers': 2, 'weight_decay': 0.027803056335997903}


In [112]:
resultado = {
    'hidden_channels': 32, 'lr': 0.0017220878393000563, 'n_epochs': 1000, 'num_hidden_layers': 2, 'weight_decay': 0.027803056335997903
}

hidden_channels = resultado['hidden_channels']
lr = resultado['lr']
n_epochs = resultado['n_epochs']
num_hidden_layers = resultado['num_hidden_layers']
weight_decay = resultado['weight_decay']

model = MLPModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers)
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
criterion = torch.nn.MSELoss()

# Fazer previsões nos conjuntos de treino e teste
for epoch in range(1, n_epochs):
    loss = model.train_model(criterion, optimizer, x_train, y_train)
    loss_test = model.test_model(criterion, x_test, y_test)

print(f'Loss: {loss:.4f}, Loss Test: {loss_test:.4f}')


Loss: 2175.9446, Loss Test: 1945.4801


### Repeating using another set of 30 seeds for the model with the best configuration found for seed 42

In [113]:
class MLPModel(nn.Module):
    def __init__(self, hidden_channels, num_hidden_layers):
        super(MLPModel, self).__init__()

        # Graph Convolutional Layers
        self.fc_in = nn.Linear(len(x_train.columns), hidden_channels)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([nn.Linear(hidden_channels, hidden_channels) for _ in range(num_hidden_layers)])

        # Camada de saída
        self.fc_out = nn.Linear(hidden_channels, 1)

    def forward(self, x):
        # Camada de entrada
        x = self.fc_in(x)
        x = self.relu(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x)
            x = self.relu(x)

        # Camada de saída
        x = self.fc_out(x)

        return x

    def train_model(self, criterion, optimizer, x, y):
        optimizer.zero_grad()
        out = self(torch.FloatTensor(x.values))
        loss = criterion(out, torch.FloatTensor(y.values).view(-1, 1))
        loss.backward()
        optimizer.step()
        return loss.item()

    def test_model(self, criterion, x, y):
        self.eval()
        with torch.no_grad():
            out = self(torch.FloatTensor(x.values))
            acc = criterion(out, torch.FloatTensor(y.values).view(-1, 1))
        return acc.item()

# Lists to store accuracies for each seed
final_accuracies = []
start_time = time.time()
# Training and Testing loops for each seed
for seed in seeds:
    # Instantiate the model with a different seed
    torch.manual_seed(seed)
    model = MLPModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers)

    # Fazer previsões nos conjuntos de treino e teste
    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(criterion, optimizer, x_train, y_train)
        loss_test = model.test_model(criterion, x_test, y_test)

    print(f'Seed: {seed}, Loss: {loss:.4f}, Test Loss: {loss_test:.4f}')

    # Save accuracies for this seed
    final_accuracies.append(loss_test)

end_time = time.time()
final_time = end_time - start_time
# Calculate mean and standard deviation of accuracies
mean_accuracies = np.mean(final_accuracies , axis=0)
std_accuracies = np.std(final_accuracies , axis=0)

# Print or use mean_accuracies and std_accuracies as needed
print("Mean Accuracies:", mean_accuracies) 
print("Standard Deviations:", std_accuracies)
print("Time:", final_time, "s")

Seed: 81, Loss: 2558.4824, Test Loss: 3821.9539
Seed: 14, Loss: 2711.6672, Test Loss: 3257.2507
Seed: 3, Loss: 3004.2317, Test Loss: 3748.4248
Seed: 94, Loss: 2239.6292, Test Loss: 3167.7319
Seed: 35, Loss: 2529.3127, Test Loss: 4228.8726
Seed: 31, Loss: 2586.1294, Test Loss: 4169.3950
Seed: 28, Loss: 2480.7161, Test Loss: 3378.5989
Seed: 17, Loss: 2243.3918, Test Loss: 2764.0532
Seed: 13, Loss: 3142.2876, Test Loss: 4064.6528
Seed: 86, Loss: 2683.3459, Test Loss: 3392.9209
Seed: 69, Loss: 2531.1704, Test Loss: 4283.0835
Seed: 11, Loss: 2587.2373, Test Loss: 4287.4731
Seed: 75, Loss: 2905.9319, Test Loss: 3946.0552
Seed: 54, Loss: 2597.6172, Test Loss: 4146.5386
Seed: 4, Loss: 2719.5525, Test Loss: 3331.4744
Seed: 98, Loss: 2606.5249, Test Loss: 3787.5708
Seed: 89, Loss: 2863.4990, Test Loss: 3639.0435
Seed: 27, Loss: 2312.5920, Test Loss: 3022.8457
Seed: 29, Loss: 2356.7473, Test Loss: 3135.1404
Seed: 64, Loss: 2155.9675, Test Loss: 2740.6614
Seed: 77, Loss: 2634.0508, Test Loss: 3999

## Random Forest Regressor Model

### Hyperopt

In [121]:
# Defina o espaço de busca para os hiperparâmetros
space = {
    'n_estimators': hp.choice('n_estimators', [50,100,150,200,250,300,350,400,450,500]),
    'max_depth': hp.choice('max_depth', [10,20,30,40,50]),
    'min_samples_split': hp.choice('min_samples_split', [2,3,4,5,6,7,8,9,10]),
    'min_samples_leaf': hp.choice('min_samples_leaf', [1,2,3,4,5]),
    'bootstrap': hp.choice('bootstrap', [True, False])

}

# Função objetivo para otimização
def objective(params):
    n_estimators = params['n_estimators']
    max_depth = params['max_depth']
    min_samples_split = params['min_samples_split']
    min_samples_leaf = params['min_samples_leaf']
    bootstrap = params['bootstrap']

    model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split,
    min_samples_leaf=min_samples_leaf, bootstrap=bootstrap, random_state=42)

    # Treinamento e teste
    def train_rf(model, x, y):
        model.fit(x, y)
        return mean_squared_error(y, model.predict(x))

    # Testing function
    def test_rf(model, x, y):
        y_pred = model.predict(x)
        return mean_squared_error(y, y_pred)

    train_loss_rf = train_rf(model, x_train, y_train)
    test_loss_rf = test_rf(model, x_test, y_test)

    # Retorna a métrica que você deseja otimizar (por exemplo, negativo do desempenho)
    return test_loss_rf

# Executar a otimização com o Hyperopt
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=50)

# Imprimir os melhores hiperparâmetros encontrados
print("Melhores hiperparâmetros encontrados:", best)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

100%|██████████| 50/50 [29:19<00:00, 35.20s/trial, best loss: 3753.6211160517532]
Melhores hiperparâmetros encontrados: {'bootstrap': 1, 'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 1, 'n_estimators': 7}


## XGBoosting Model

### Hyperopt

In [120]:
# Defina o espaço de busca para os hiperparâmetros
eta = hp.uniform('eta', 0,1)
gamma = hp.uniform('gamma', 0, 1)
n_estimators = hp.quniform('n_estimators', 50, 200, 1)
max_depth = hp.quniform('max_depth', 3, 10, 1)
alpha = hp.uniform('alpha', 0, 1)

# Create the random grid
space = {'eta': eta,
    'gamma': gamma,
    'n_estimators': n_estimators,
    'max_depth': max_depth,
    'alpha': alpha}

# Função objetivo para otimização
def objective(params):
    eta = params['eta']
    n_estimators = int(params['n_estimators'])
    max_depth = int(params['max_depth'])
    alpha = params['alpha']

    model = xgb.XGBRegressor(random_state=42, eta=eta, n_estimators=n_estimators, max_depth=max_depth, alpha=alpha)

    # Treinamento e teste
    def train_xgb(model, x, y):
        model.fit(x, y)
        return mean_squared_error(y, model.predict(x))

    # Testing function
    def test_xgb(model, x, y):
        y_pred = model.predict(x)
        return mean_squared_error(y, y_pred)

    train_loss = train_xgb(model, x_train, y_train)
    test_loss = test_xgb(model, x_test, y_test)

    # Retorna a métrica que você deseja otimizar (por exemplo, negativo do desempenho)
    return test_loss

# Executar a otimização com o Hyperopt
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=50)

# Imprimir os melhores hiperparâmetros encontrados
print("Melhores hiperparâmetros encontrados:", best)

100%|██████████| 50/50 [00:20<00:00,  2.47trial/s, best loss: 12444.060902736177]
Melhores hiperparâmetros encontrados: {'alpha': 0.4285289444445258, 'eta': 0.6245005932855183, 'gamma': 0.4470965808805642, 'max_depth': 3.0, 'n_estimators': 119.0}


## LightGBM

### Hyperopt

In [138]:
# Defina o espaço de busca para os hiperparâmetros
num_leaves = hp.quniform('num_leaves', 10, 100, 5)
n_estimators = hp.quniform('n_estimators', 50, 200, 1)
max_depth = hp.quniform('max_depth', 3, 10, 1)

# Create the random grid
space = {'eta': eta,
    'n_estimators': n_estimators,
    'max_depth': max_depth,
    'num_leaves': num_leaves}

# Função objetivo para otimização
def objective(params):
    num_leaves = int(params['num_leaves'])
    n_estimators = int(params['n_estimators'])
    max_depth = int(params['max_depth'])

    model = lightgbm.LGBMRegressor(random_state=42, num_leaves=num_leaves, n_estimators=n_estimators, max_depth=max_depth)

    # Treinamento e teste
    def train_xgb(model, x, y):
        model.fit(x, y)
        return mean_squared_error(y, model.predict(x))

    # Testing function
    def test_xgb(model, x, y):
        y_pred = model.predict(x)
        return mean_squared_error(y, y_pred)

    train_loss = train_xgb(model, x_train, y_train)
    test_loss = test_xgb(model, x_test, y_test)

    # Retorna a métrica que você deseja otimizar (por exemplo, negativo do desempenho)
    return test_loss

# Executar a otimização com o Hyperopt
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=50)

# Imprimir os melhores hiperparâmetros encontrados
print("Melhores hiperparâmetros encontrados:", best)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000746 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4152                     
[LightGBM] [Info] Number of data points in the train set: 3642, number of used features: 18
[LightGBM] [Info] Start training from score 46.423394 
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000473 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4152                                               
[LightGBM] [Info] Number of data points in the train set: 3642, number of used features: 18
[LightGBM] [Info] Start training from score 46.423394                           
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000834 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4152                         

## Model GCN

In [29]:
class GCNModel(torch.nn.Module):
    def __init__(self, hidden_channels, num_hidden_layers, dropout_prob):
        super(GCNModel, self).__init__()
        torch.manual_seed(42)

        # Graph Convolutional Layers
        self.fc_in = GCNConv(data.num_features, hidden_channels)
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([GCNConv(hidden_channels, hidden_channels) for _ in range(num_hidden_layers)])

        # Camada de saída
        self.fc_out = GCNConv(hidden_channels, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        # Camada de saída
        x = self.fc_out(x, edge_index)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()


### Hyperopt

In [30]:
# Define the search space for hyperparameters
space = {
    'hidden_channels': hp.choice('hidden_channels', [8, 16, 32]),
    'num_hidden_layers': hp.choice('num_hidden_layers', [0,1,2]),
    'dropout_prob': hp.uniform('dropout_prob', 0, 0.6),
    'lr': hp.uniform('lr', 0.001, 0.01),
    'weight_decay': hp.uniform('weight_decay', 0, 0.1),
    'n_epochs': hp.choice('n_epochs', [1000, 2000])

}

# Objective function for optimization
def objective(params):
    hidden_channels = params['hidden_channels']
    num_hidden_layers = params['num_hidden_layers']
    dropout_prob = params['dropout_prob']
    lr = params['lr']
    weight_decay = params['weight_decay']
    n_epochs = params['n_epochs']

    # Create a model with the hyperparameters
    model = GCNModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers,dropout_prob=dropout_prob)

    # Define loss function and optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    # Training and testing
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        loss_test = model.test_model(data, criterion)

    # Return the metric you want to optimize
    return loss_test

# Run the optimization with Hyperopt
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=50)

# Print the best-found hyperparameters
print("Best hyperparameters found:", best)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

100%|██████████| 50/50 [33:11<00:00, 39.83s/trial, best loss: 15645.3681640625]
Best hyperparameters found: {'dropout_prob': 0.029478617988071918, 'hidden_channels': 0, 'lr': 0.009294042871656454, 'n_epochs': 1, 'num_hidden_layers': 0, 'weight_decay': 0.0006521439459469148}


In [33]:
resultado = {
    'dropout_prob': 0.029478617988071918, 'hidden_channels': 8, 'lr': 0.009294042871656454, 
    'n_epochs': 2000, 'num_hidden_layers': 0, 'weight_decay': 0.0006521439459469148
}

hidden_channels = resultado['hidden_channels']
lr = resultado['lr']
n_epochs = resultado['n_epochs']
num_hidden_layers = resultado['num_hidden_layers']
weight_decay = resultado['weight_decay']
dropout_prob = resultado['dropout_prob']

model = GCNModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers,dropout_prob=dropout_prob)
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
criterion = torch.nn.MSELoss()

# Fazer previsões nos conjuntos de treino e teste
for epoch in range(1, n_epochs):
    loss = model.train_model(data, criterion, optimizer)
    loss_test = model.test_model(data, criterion)

print(f'Loss: {loss:.4f}, Loss Test: {loss_test:.4f}')

Loss: 11838.6113, Loss Test: 15645.3682


### Repeating using another set of 30 seeds for the model with the best configuration found for seed 42

In [34]:
class GCNModel(torch.nn.Module):
    def __init__(self, hidden_channels, num_hidden_layers, dropout_prob):
        super(GCNModel, self).__init__()

        # Graph Convolutional Layers
        self.fc_in = GCNConv(data.num_features, hidden_channels)
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([GCNConv(hidden_channels, hidden_channels) for _ in range(num_hidden_layers)])

        # Camada de saída
        self.fc_out = GCNConv(hidden_channels, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        # Camada de saída
        x = self.fc_out(x, edge_index)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()


resultado = {
    'dropout_prob': 0.029478617988071918, 'hidden_channels': 8, 'lr': 0.009294042871656454, 
    'n_epochs': 2000, 'num_hidden_layers': 0, 'weight_decay': 0.0006521439459469148
}

hidden_channels = resultado['hidden_channels']
lr = resultado['lr']
n_epochs = resultado['n_epochs']
num_hidden_layers = resultado['num_hidden_layers']
weight_decay = resultado['weight_decay']
dropout_prob = resultado['dropout_prob']

# Lists to store accuracies for each seed
final_accuracies = []
start_time = time.time()
# Training and Testing loops for each seed
for seed in seeds:
    # Instantiate the model with a different seed
    torch.manual_seed(seed)
    model = GCNModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers,dropout_prob=dropout_prob)

    # Fazer previsões nos conjuntos de treino e teste
    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        loss_test = model.test_model(data, criterion)

    print(f'Seed: {seed}, Loss: {loss:.4f}, Test Loss: {loss_test:.4f}')

    # Save accuracies for this seed
    final_accuracies.append(loss_test)

end_time = time.time()
time = end_time - start_time
# Calculate mean and standard deviation of accuracies
mean_accuracies = np.mean(final_accuracies , axis=0)
std_accuracies = np.std(final_accuracies , axis=0)

# Print or use mean_accuracies and std_accuracies as needed
print("Mean Accuracies:", mean_accuracies) 
print("Standard Deviations:", std_accuracies)
print("Time:", time, "s")

Seed: 81, Loss: 11813.8057, Test Loss: 15688.2412
Seed: 14, Loss: 11342.9717, Test Loss: 15870.8516
Seed: 3, Loss: 19929.3066, Test Loss: 22402.1914
Seed: 94, Loss: 11602.2568, Test Loss: 16079.6250
Seed: 35, Loss: 12545.9316, Test Loss: 17668.7578
Seed: 31, Loss: 13235.0889, Test Loss: 16686.7773
Seed: 28, Loss: 13523.0947, Test Loss: 16413.7930
Seed: 17, Loss: 11661.3271, Test Loss: 15879.9824
Seed: 13, Loss: 11669.4639, Test Loss: 15976.1328
Seed: 86, Loss: 11641.0430, Test Loss: 15980.4600
Seed: 69, Loss: 13646.3994, Test Loss: 17222.3477
Seed: 11, Loss: 13135.8867, Test Loss: 16447.4414
Seed: 75, Loss: 19032.9375, Test Loss: 21771.1309
Seed: 54, Loss: 11678.3760, Test Loss: 16096.4219
Seed: 4, Loss: 11727.0938, Test Loss: 16248.4766
Seed: 98, Loss: 11649.8525, Test Loss: 16068.8008
Seed: 89, Loss: 12387.1064, Test Loss: 16300.2832
Seed: 27, Loss: 11714.8320, Test Loss: 15687.3711
Seed: 29, Loss: 12049.5879, Test Loss: 16036.9736
Seed: 64, Loss: 11602.5723, Test Loss: 16154.9443
Se

## Model GCN + Linear

In [45]:
class GCN_MLPModel(torch.nn.Module):
    def __init__(self, hidden_channels, mlp_hidden_channels, num_hidden_layers, mlp_num_hidden_layers, dropout_prob):
        super(GCN_MLPModel, self).__init__()
        torch.manual_seed(42)

        # Graph Convolutional Layers
        self.fc_in = GCNConv(data.num_features, hidden_channels)
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([GCNConv(hidden_channels, hidden_channels) for _ in range(num_hidden_layers)])
        self.hidden_layers_mlp = nn.ModuleList([
            nn.Linear(hidden_channels, mlp_hidden_channels) if i == 0 else nn.Linear(mlp_hidden_channels, mlp_hidden_channels)
            for i in range(mlp_num_hidden_layers)
        ])

        # Camada de saída
        self.fc_out = nn.Linear(mlp_hidden_channels, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        for layer in self.hidden_layers_mlp:
            x = layer(x)
            x = self.relu(x)

        # Camada de saída
        x = self.fc_out(x)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()


### Hyperopt

In [41]:
# Defina o espaço de busca para os hiperparâmetros
space = {
    'hidden_channels': hp.choice('hidden_channels', [8, 16, 32]),
    'mlp_hidden_channels': hp.choice('mlp_hidden_channels', [8, 16, 32]),
    'num_hidden_layers': hp.choice('num_hidden_layers', [0,1,2]),
    'mlp_num_hidden_layers': hp.choice('mlp_num_hidden_layers', [1,2,3]),
    'dropout_prob': hp.uniform('dropout_prob', 0, 0.6),
    'lr': hp.uniform('lr', 0.001, 0.01),
    'weight_decay': hp.uniform('weight_decay', 0, 0.1),
    'n_epochs': hp.choice('n_epochs', [1000, 2000])

}

# Função objetivo para otimização
def objective(params):
    hidden_channels = params['hidden_channels']
    mlp_hidden_channels = params['mlp_hidden_channels']
    num_hidden_layers = params['num_hidden_layers']
    mlp_num_hidden_layers = params['mlp_num_hidden_layers']
    dropout_prob = params['dropout_prob']
    lr = params['lr']
    weight_decay = params['weight_decay']
    n_epochs = params['n_epochs']

    # Criar modelo com os hiperparâmetros
    model = GCN_MLPModel(hidden_channels=hidden_channels, mlp_hidden_channels=mlp_hidden_channels, num_hidden_layers=num_hidden_layers, mlp_num_hidden_layers=mlp_num_hidden_layers, dropout_prob=dropout_prob)

    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        acc = model.test_model(data, criterion)

    # Retorna a métrica que você deseja otimizar (por exemplo, negativo do desempenho)
    return acc

# Executar a otimização com o Hyperopt
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=50)

# Imprimir os melhores hiperparâmetros encontrados
print("Melhores hiperparâmetros encontrados:", best)


  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

100%|██████████| 50/50 [1:03:31<00:00, 76.24s/trial, best loss: 14080.3603515625] 
Melhores hiperparâmetros encontrados: {'dropout_prob': 0.2982841973500487, 'hidden_channels': 1, 'lr': 0.0060596510005368216, 'mlp_hidden_channels': 2, 'mlp_num_hidden_layers': 0, 'n_epochs': 1, 'num_hidden_layers': 1, 'weight_decay': 0.060104806996587615}


In [54]:
resultado = {
    'dropout_prob': 0.2982841973500487, 'hidden_channels': 16, 'lr': 0.0060596510005368216, 'mlp_hidden_channels': 32, 'mlp_num_hidden_layers': 1, 
    'n_epochs': 2000, 'num_hidden_layers': 1, 'weight_decay': 0.060104806996587615
}

hidden_channels = resultado['hidden_channels']
lr = resultado['lr']
n_epochs = resultado['n_epochs']
num_hidden_layers = resultado['num_hidden_layers']
weight_decay = resultado['weight_decay']
dropout_prob = resultado['dropout_prob']
mlp_hidden_channels = resultado['mlp_hidden_channels']
mlp_num_hidden_layers = resultado['mlp_num_hidden_layers']

torch.manual_seed(42)
model = GCN_MLPModel(hidden_channels=hidden_channels, mlp_hidden_channels=mlp_hidden_channels, num_hidden_layers=num_hidden_layers, mlp_num_hidden_layers=mlp_num_hidden_layers, dropout_prob=dropout_prob)

# Definir loss function e optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

# Treinamento e teste
for epoch in range(1, n_epochs):
    loss = model.train_model(data, criterion, optimizer)
    loss_test = model.test_model(data, criterion)

print(f'Loss: {loss:.4f}, Loss Test: {loss_test:.4f}')

Loss: 9093.1436, Loss Test: 14080.3604


### Repeating using another set of 30 seeds for the model with the best configuration found for seed 42

In [61]:
class GCN_MLPModel(torch.nn.Module):
    def __init__(self, hidden_channels, mlp_hidden_channels, num_hidden_layers, mlp_num_hidden_layers, dropout_prob):
        super(GCN_MLPModel, self).__init__()

        # Graph Convolutional Layers
        self.fc_in = GCNConv(data.num_features, hidden_channels)
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([GCNConv(hidden_channels, hidden_channels) for _ in range(num_hidden_layers)])
        self.hidden_layers_mlp = nn.ModuleList([
            nn.Linear(hidden_channels, mlp_hidden_channels) if i == 0 else nn.Linear(mlp_hidden_channels, mlp_hidden_channels)
            for i in range(mlp_num_hidden_layers)
        ])

        # Camada de saída
        self.fc_out = nn.Linear(mlp_hidden_channels, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        for layer in self.hidden_layers_mlp:
            x = layer(x)
            x = self.relu(x)

        # Camada de saída
        x = self.fc_out(x)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()


resultado = {
    'dropout_prob': 0.2982841973500487, 'hidden_channels': 16, 'lr': 0.0060596510005368216, 'mlp_hidden_channels': 32, 'mlp_num_hidden_layers': 1, 
    'n_epochs': 2000, 'num_hidden_layers': 1, 'weight_decay': 0.060104806996587615
}

hidden_channels = resultado['hidden_channels']
lr = resultado['lr']
n_epochs = resultado['n_epochs']
num_hidden_layers = resultado['num_hidden_layers']
weight_decay = resultado['weight_decay']
dropout_prob = resultado['dropout_prob']
mlp_hidden_channels = resultado['mlp_hidden_channels']
mlp_num_hidden_layers = resultado['mlp_num_hidden_layers']

# Lists to store accuracies for each seed
final_accuracies = []
start_time = time.time()
# Training and Testing loops for each seed
for seed in seeds:
    # Instantiate the model with a different seed
    torch.manual_seed(seed)
    model = GCN_MLPModel(hidden_channels=hidden_channels, mlp_hidden_channels=mlp_hidden_channels, num_hidden_layers=num_hidden_layers, mlp_num_hidden_layers=mlp_num_hidden_layers, dropout_prob=dropout_prob)

    # Fazer previsões nos conjuntos de treino e teste
    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        loss_test = model.test_model(data, criterion)

    print(f'Seed: {seed}, Loss: {loss:.4f}, Test Loss: {loss_test:.4f}')

    # Save accuracies for this seed
    final_accuracies.append(loss_test)

end_time = time.time()
final_time = end_time - start_time
# Calculate mean and standard deviation of accuracies
mean_accuracies = np.mean(final_accuracies , axis=0)
std_accuracies = np.std(final_accuracies , axis=0)

# Print or use mean_accuracies and std_accuracies as needed
print("Mean Accuracies:", mean_accuracies) 
print("Standard Deviations:", std_accuracies)
print("Time:", final_time, "s")

Seed: 81, Loss: 11413.5244, Test Loss: 15872.3154
Seed: 14, Loss: 11846.0117, Test Loss: 15824.7959
Seed: 3, Loss: 12093.7295, Test Loss: 15860.8672
Seed: 94, Loss: 11643.5684, Test Loss: 15889.5156
Seed: 35, Loss: 11612.7656, Test Loss: 15620.4766
Seed: 31, Loss: 12631.0293, Test Loss: 16257.2354
Seed: 28, Loss: 11805.9189, Test Loss: 15711.8105
Seed: 17, Loss: 10128.9941, Test Loss: 14779.0068
Seed: 13, Loss: 9186.8818, Test Loss: 14874.5986
Seed: 86, Loss: 11967.1064, Test Loss: 15702.8174
Seed: 69, Loss: 5974.4287, Test Loss: 13336.7627
Seed: 11, Loss: 9439.8047, Test Loss: 15691.6475
Seed: 75, Loss: 11383.5918, Test Loss: 15889.2432
Seed: 54, Loss: 6799.3862, Test Loss: 24201.1582
Seed: 4, Loss: 9276.7803, Test Loss: 15017.0273
Seed: 98, Loss: 11693.8916, Test Loss: 15708.7979
Seed: 89, Loss: 8929.7393, Test Loss: 14426.7266
Seed: 27, Loss: 12044.1357, Test Loss: 15831.2861
Seed: 29, Loss: 11166.6592, Test Loss: 15811.0752
Seed: 64, Loss: 11289.2246, Test Loss: 15920.3496
Seed: 77

## Model GraphConv

In [49]:
class GraphConvModel(torch.nn.Module):
    def __init__(self, hidden_channels, num_hidden_layers, dropout_prob):
        super(GraphConvModel, self).__init__()
        torch.manual_seed(42)

        # Graph Convolutional Layers
        self.fc_in = GraphConv(data.num_features, hidden_channels, agg="sum")
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([GraphConv(hidden_channels, hidden_channels, agg="mean") for _ in range(num_hidden_layers)])

        # Camada de saída
        self.fc_out = GraphConv(hidden_channels, 1, agg="mean")

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        # Camada de saída
        x = self.fc_out(x, edge_index)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()

### Hyperopt

In [8]:
# Defina o espaço de busca para os hiperparâmetros
space = {
    'hidden_channels': hp.choice('hidden_channels', [8, 16, 32]),
    'num_hidden_layers': hp.choice('num_hidden_layers', [0,1,2]),
    'dropout_prob': hp.uniform('dropout_prob', 0.1, 0.6),
    'lr': hp.uniform('lr', 0.001, 0.01),
    'weight_decay': hp.uniform('weight_decay', 0, 0.1),
    'n_epochs': hp.choice('n_epochs', [1000, 2000])

}

# Função objetivo para otimização
def objective(params):
    hidden_channels = params['hidden_channels']
    num_hidden_layers = params['num_hidden_layers']
    dropout_prob = params['dropout_prob']
    lr = params['lr']
    weight_decay = params['weight_decay']
    n_epochs = params['n_epochs']

    # Criar modelo com os hiperparâmetros
    model = GraphConvModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers,dropout_prob=dropout_prob)

    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        acc = model.test_model(data, criterion)

    # Retorna a métrica que você deseja otimizar (por exemplo, negativo do desempenho)
    return acc

# Executar a otimização com o Hyperopt
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=50)

# Imprimir os melhores hiperparâmetros encontrados
print("Melhores hiperparâmetros encontrados:", best)


  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

100%|██████████| 50/50 [27:00<00:00, 32.41s/trial, best loss: 2841.2158203125]
Melhores hiperparâmetros encontrados: {'dropout_prob': 0.25725450852119003, 'hidden_channels': 0, 'lr': 0.008367613497076088, 'n_epochs': 1, 'num_hidden_layers': 2, 'weight_decay': 0.056834474246683066}


In [52]:
resultado = {
    'dropout_prob': 0.25725450852119003, 'hidden_channels': 8, 'lr': 0.008367613497076088, 'n_epochs': 2000, 'num_hidden_layers': 2, 'weight_decay': 0.056834474246683066
}

hidden_channels = resultado['hidden_channels']
lr = resultado['lr']
n_epochs = resultado['n_epochs']
num_hidden_layers = resultado['num_hidden_layers']
weight_decay = resultado['weight_decay']
dropout_prob = resultado['dropout_prob']

torch.manual_seed(42)
model = GraphConvModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers,dropout_prob=dropout_prob)

# Definir loss function e optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

# Treinamento e teste
for epoch in range(1, n_epochs):
    loss = model.train_model(data, criterion, optimizer)
    loss_test = model.test_model(data, criterion)

print(f'Loss: {loss:.4f}, Loss Test: {loss_test:.4f}')

Loss: 4013.2642, Loss Test: 2841.2158


### Repeating using another set of 30 seeds for the model with the best configuration found for seed 42

In [63]:
class GraphConvModel(torch.nn.Module):
    def __init__(self, hidden_channels, num_hidden_layers, dropout_prob):
        super(GraphConvModel, self).__init__()

        # Graph Convolutional Layers
        self.fc_in = GraphConv(data.num_features, hidden_channels, agg="sum")
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([GraphConv(hidden_channels, hidden_channels, agg="mean") for _ in range(num_hidden_layers)])

        # Camada de saída
        self.fc_out = GraphConv(hidden_channels, 1, agg="mean")

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        # Camada de saída
        x = self.fc_out(x, edge_index)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()


resultado = {
    'dropout_prob': 0.25725450852119003, 'hidden_channels': 8, 'lr': 0.008367613497076088, 'n_epochs': 2000, 'num_hidden_layers': 2, 'weight_decay': 0.056834474246683066
}

hidden_channels = resultado['hidden_channels']
lr = resultado['lr']
n_epochs = resultado['n_epochs']
num_hidden_layers = resultado['num_hidden_layers']
weight_decay = resultado['weight_decay']
dropout_prob = resultado['dropout_prob']


# Lists to store accuracies for each seed
final_accuracies = []
start_time = time.time()
# Training and Testing loops for each seed
for seed in seeds:
    # Instantiate the model with a different seed
    torch.manual_seed(seed)
    model = GraphConvModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers,dropout_prob=dropout_prob)

    # Fazer previsões nos conjuntos de treino e teste
    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        loss_test = model.test_model(data, criterion)

    print(f'Seed: {seed}, Loss: {loss:.4f}, Test Loss: {loss_test:.4f}')

    # Save accuracies for this seed
    final_accuracies.append(loss_test)

end_time = time.time()
final_time = end_time - start_time
# Calculate mean and standard deviation of accuracies
mean_accuracies = np.mean(final_accuracies , axis=0)
std_accuracies = np.std(final_accuracies , axis=0)

# Print or use mean_accuracies and std_accuracies as needed
print("Mean Accuracies:", mean_accuracies) 
print("Standard Deviations:", std_accuracies)
print("Time:", final_time, "s")

Seed: 81, Loss: 1858129.1250, Test Loss: 2204273.2500
Seed: 14, Loss: 17947.9258, Test Loss: 12738.7754
Seed: 3, Loss: 19776.6113, Test Loss: 22207.3242
Seed: 94, Loss: 51899.0938, Test Loss: 39035.2383
Seed: 35, Loss: 38155.1484, Test Loss: 24294.6523
Seed: 31, Loss: 19719.7656, Test Loss: 22177.4395
Seed: 28, Loss: 640420.5000, Test Loss: 551425.6250
Seed: 17, Loss: 19860.7207, Test Loss: 22228.5254
Seed: 13, Loss: 1677231.7500, Test Loss: 1198570.1250
Seed: 86, Loss: 434493.4062, Test Loss: 460884.0625
Seed: 69, Loss: 19730.9922, Test Loss: 22187.6562
Seed: 11, Loss: 3724.8264, Test Loss: 5004.2871
Seed: 75, Loss: 18867.7598, Test Loss: 11456.3369
Seed: 54, Loss: 559141.8125, Test Loss: 397253.5625
Seed: 4, Loss: 22643.8867, Test Loss: 49142.9180
Seed: 98, Loss: 29779.3770, Test Loss: 22199.4414
Seed: 89, Loss: 19749.7793, Test Loss: 22204.6816
Seed: 27, Loss: 25873.8750, Test Loss: 32603.3320
Seed: 29, Loss: 19783.2148, Test Loss: 22235.2422
Seed: 64, Loss: 6794.5391, Test Loss: 75

## Model GraphConv + Linear

In [66]:
class GraphConv_MLPModel(torch.nn.Module):
    def __init__(self, hidden_channels, mlp_hidden_channels, num_hidden_layers, mlp_num_hidden_layers, dropout_prob):
        super(GraphConv_MLPModel, self).__init__()
        torch.manual_seed(42)

        # Graph Convolutional Layers
        self.fc_in = GraphConv(data.num_features, hidden_channels, agg="sum")
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([GraphConv(hidden_channels, hidden_channels, agg="mean") for _ in range(num_hidden_layers)])
        self.hidden_layers_mlp = nn.ModuleList([
            nn.Linear(hidden_channels, mlp_hidden_channels) if i == 0 else nn.Linear(mlp_hidden_channels, mlp_hidden_channels)
            for i in range(mlp_num_hidden_layers)
        ])

        # Camada de saída
        self.fc_out = nn.Linear(mlp_hidden_channels, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        for layer in self.hidden_layers_mlp:
            x = layer(x)
            x = self.relu(x)

        # Camada de saída
        x = self.fc_out(x)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()

### Hyperopt

In [614]:
# Defina o espaço de busca para os hiperparâmetros
space = {
    'hidden_channels': hp.choice('hidden_channels', [8, 16, 32]),
    'mlp_hidden_channels': hp.choice('mlp_hidden_channels', [8, 16, 32]),
    'num_hidden_layers': hp.choice('num_hidden_layers', [0,1,2]),
    'mlp_num_hidden_layers': hp.choice('mlp_num_hidden_layers', [1,2,3]),
    'dropout_prob': hp.uniform('dropout_prob', 0.1, 0.6),
    'lr': hp.uniform('lr', 0.001, 0.01),
    'weight_decay': hp.uniform('weight_decay', 0, 0.1),
    'n_epochs': hp.choice('n_epochs', [1000, 2000])

}

# Função objetivo para otimização
def objective(params):
    hidden_channels = params['hidden_channels']
    mlp_hidden_channels = params['mlp_hidden_channels']
    num_hidden_layers = params['num_hidden_layers']
    mlp_num_hidden_layers = params['mlp_num_hidden_layers']
    dropout_prob = params['dropout_prob']
    lr = params['lr']
    weight_decay = params['weight_decay']
    n_epochs = params['n_epochs']

    # Criar modelo com os hiperparâmetros
    model = GraphConv_MLPModel(hidden_channels=hidden_channels, mlp_hidden_channels=mlp_hidden_channels, num_hidden_layers=num_hidden_layers, mlp_num_hidden_layers=mlp_num_hidden_layers, dropout_prob=dropout_prob)

    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        acc = model.test_model(data, criterion)

    # Retorna a métrica que você deseja otimizar (por exemplo, negativo do desempenho)
    return acc

# Executar a otimização com o Hyperopt
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=50)

# Imprimir os melhores hiperparâmetros encontrados
print("Melhores hiperparâmetros encontrados:", best)


  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

100%|██████████| 50/50 [15:28<00:00, 18.57s/trial, best loss: 877.4991455078125] 
Melhores hiperparâmetros encontrados: {'dropout_prob': 0.4175376164831556, 'hidden_channels': 0, 'lr': 0.0014044047133238025, 'mlp_hidden_channels': 2, 'mlp_num_hidden_layers': 1, 'n_epochs': 0, 'num_hidden_layers': 0, 'weight_decay': 0.04396996191444456}


In [68]:
resultado = {
    'dropout_prob': 0.4175376164831556, 'hidden_channels': 8, 'lr': 0.0014044047133238025, 'mlp_hidden_channels': 32, 'mlp_num_hidden_layers': 2,
     'n_epochs': 1000, 'num_hidden_layers': 0, 'weight_decay': 0.04396996191444456
}

hidden_channels = resultado['hidden_channels']
lr = resultado['lr']
n_epochs = resultado['n_epochs']
num_hidden_layers = resultado['num_hidden_layers']
weight_decay = resultado['weight_decay']
dropout_prob = resultado['dropout_prob']
mlp_hidden_channels = resultado['mlp_hidden_channels']
mlp_num_hidden_layers = resultado['mlp_num_hidden_layers']

torch.manual_seed(42)
model = GraphConv_MLPModel(hidden_channels=hidden_channels, mlp_hidden_channels=mlp_hidden_channels, num_hidden_layers=num_hidden_layers, mlp_num_hidden_layers=mlp_num_hidden_layers, dropout_prob=dropout_prob)

# Definir loss function e optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

# Treinamento e teste
for epoch in range(1, n_epochs):
    loss = model.train_model(data, criterion, optimizer)
    loss_test = model.test_model(data, criterion)

print(f'Loss: {loss:.4f}, Loss Test: {loss_test:.4f}')

Loss: 1481.4171, Loss Test: 877.4991


### Repeating using another set of 30 seeds for the model with the best configuration found for seed 42

In [69]:
class GraphConv_MLPModel(torch.nn.Module):
    def __init__(self, hidden_channels, mlp_hidden_channels, num_hidden_layers, mlp_num_hidden_layers, dropout_prob):
        super(GraphConv_MLPModel, self).__init__()
        
        # Graph Convolutional Layers
        self.fc_in = GraphConv(data.num_features, hidden_channels, agg="sum")
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([GraphConv(hidden_channels, hidden_channels, agg="mean") for _ in range(num_hidden_layers)])
        self.hidden_layers_mlp = nn.ModuleList([
            nn.Linear(hidden_channels, mlp_hidden_channels) if i == 0 else nn.Linear(mlp_hidden_channels, mlp_hidden_channels)
            for i in range(mlp_num_hidden_layers)
        ])

        # Camada de saída
        self.fc_out = nn.Linear(mlp_hidden_channels, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        for layer in self.hidden_layers_mlp:
            x = layer(x)
            x = self.relu(x)

        # Camada de saída
        x = self.fc_out(x)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()


resultado = {
    'dropout_prob': 0.4175376164831556, 'hidden_channels': 8, 'lr': 0.0014044047133238025, 'mlp_hidden_channels': 32, 'mlp_num_hidden_layers': 2,
     'n_epochs': 1000, 'num_hidden_layers': 0, 'weight_decay': 0.04396996191444456
}

hidden_channels = resultado['hidden_channels']
lr = resultado['lr']
n_epochs = resultado['n_epochs']
num_hidden_layers = resultado['num_hidden_layers']
weight_decay = resultado['weight_decay']
dropout_prob = resultado['dropout_prob']
mlp_hidden_channels = resultado['mlp_hidden_channels']
mlp_num_hidden_layers = resultado['mlp_num_hidden_layers']

# Lists to store accuracies for each seed
final_accuracies = []
start_time = time.time()
# Training and Testing loops for each seed
for seed in seeds:
    # Instantiate the model with a different seed
    torch.manual_seed(seed)
    model =  GraphConv_MLPModel(hidden_channels=hidden_channels, mlp_hidden_channels=mlp_hidden_channels, num_hidden_layers=num_hidden_layers, mlp_num_hidden_layers=mlp_num_hidden_layers, dropout_prob=dropout_prob)

    # Fazer previsões nos conjuntos de treino e teste
    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        loss_test = model.test_model(data, criterion)

    print(f'Seed: {seed}, Loss: {loss:.4f}, Test Loss: {loss_test:.4f}')

    # Save accuracies for this seed
    final_accuracies.append(loss_test)

end_time = time.time()
final_time = end_time - start_time
# Calculate mean and standard deviation of accuracies
mean_accuracies = np.mean(final_accuracies , axis=0)
std_accuracies = np.std(final_accuracies , axis=0)

# Print or use mean_accuracies and std_accuracies as needed
print("Mean Accuracies:", mean_accuracies) 
print("Standard Deviations:", std_accuracies)
print("Time:", final_time, "s")

Seed: 81, Loss: 2924.8474, Test Loss: 2643.8459
Seed: 14, Loss: 2338.4224, Test Loss: 1265.3497
Seed: 3, Loss: 2375.2729, Test Loss: 2119.2371
Seed: 94, Loss: 2004.6334, Test Loss: 1503.8364
Seed: 35, Loss: 1959.5521, Test Loss: 1412.3280
Seed: 31, Loss: 2490.2666, Test Loss: 1533.6951
Seed: 28, Loss: 2474.1428, Test Loss: 1165.9363
Seed: 17, Loss: 2877.4419, Test Loss: 3539.2581
Seed: 13, Loss: 2776.5154, Test Loss: 2659.3630
Seed: 86, Loss: 3400.8806, Test Loss: 3739.5649
Seed: 69, Loss: 1940.6858, Test Loss: 1563.8118
Seed: 11, Loss: 2464.1082, Test Loss: 1540.2214
Seed: 75, Loss: 2252.7146, Test Loss: 2088.2273
Seed: 54, Loss: 1513.8409, Test Loss: 1506.9268
Seed: 4, Loss: 2293.3054, Test Loss: 1669.0616
Seed: 98, Loss: 2497.7791, Test Loss: 1715.6282
Seed: 89, Loss: 2245.4517, Test Loss: 1289.4271
Seed: 27, Loss: 3179.9436, Test Loss: 4376.4575
Seed: 29, Loss: 2221.4993, Test Loss: 1650.5371
Seed: 64, Loss: 2423.9668, Test Loss: 1488.4430
Seed: 77, Loss: 2508.7378, Test Loss: 1218

## Model GAT

In [75]:
class GATModel(torch.nn.Module):
    def __init__(self, hidden_channels, num_hidden_layers, dropout_prob, heads):
        super(GATModel, self).__init__()
        torch.manual_seed(42)

        # Graph Convolutional Layers
        self.fc_in = GATConv(data.num_features, hidden_channels, heads=heads)
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([GATConv(hidden_channels*heads, hidden_channels, heads=heads) for _ in range(num_hidden_layers)])

        # Camada de saída
        self.fc_out = GATConv(hidden_channels*heads, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        # Camada de saída
        x = self.fc_out(x, edge_index)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()

### Hyperopt

In [16]:
# Defina o espaço de busca para os hiperparâmetros
space = {
    'hidden_channels': hp.choice('hidden_channels', [8, 16, 32]),
    'num_hidden_layers': hp.choice('num_hidden_layers', [0,1,2]),
    'dropout_prob': hp.uniform('dropout_prob', 0, 0.6),
    'lr': hp.uniform('lr', 0.001, 0.01),
    'weight_decay': hp.uniform('weight_decay', 0, 0.1),
    'n_epochs': hp.choice('n_epochs', [1000, 2000]),
    'heads': hp.choice('heads', [1,2,5])

}

# Função objetivo para otimização
def objective(params):
    hidden_channels = params['hidden_channels']
    num_hidden_layers = params['num_hidden_layers']
    dropout_prob = params['dropout_prob']
    lr = params['lr']
    weight_decay = params['weight_decay']
    n_epochs = params['n_epochs']
    heads= params['heads']

    # Criar modelo com os hiperparâmetros
    model = GATModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers,dropout_prob=dropout_prob, heads=heads)

    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        acc = model.test_model(data, criterion)

    # Retorna a métrica que você deseja otimizar (por exemplo, negativo do desempenho)
    return acc

# Executar a otimização com o Hyperopt
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=50)

# Imprimir os melhores hiperparâmetros encontrados
print("Melhores hiperparâmetros encontrados:", best)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

100%|██████████| 50/50 [2:24:00<00:00, 172.82s/trial, best loss: 17725.23828125]
Melhores hiperparâmetros encontrados: {'dropout_prob': 0.23228546886688933, 'heads': 1, 'hidden_channels': 1, 'lr': 0.0025105558422511857, 'n_epochs': 1, 'num_hidden_layers': 2, 'weight_decay': 0.03559844148620763}


In [78]:
resultado = {
    'dropout_prob': 0.23228546886688933, 'heads': 2, 'hidden_channels': 16, 'lr': 0.0025105558422511857, 
    'n_epochs': 2000, 'num_hidden_layers': 2, 'weight_decay': 0.03559844148620763
}

hidden_channels = resultado['hidden_channels']
lr = resultado['lr']
n_epochs = resultado['n_epochs']
num_hidden_layers = resultado['num_hidden_layers']
weight_decay = resultado['weight_decay']
dropout_prob = resultado['dropout_prob']
heads = resultado['heads']

torch.manual_seed(42)
model = GATModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers,dropout_prob=dropout_prob, heads=heads)

# Definir loss function e optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

# Treinamento e teste
for epoch in range(1, n_epochs):
    loss = model.train_model(data, criterion, optimizer)
    loss_test = model.test_model(data, criterion)

print(f'Loss: {loss:.4f}, Loss Test: {loss_test:.4f}')

Loss: 15470.7402, Loss Test: 17725.2383


### Repeating using another set of 30 seeds for the model with the best configuration found for seed 42

In [79]:
class GATModel(torch.nn.Module):

    def __init__(self, hidden_channels, num_hidden_layers, dropout_prob, heads):
        super(GATModel, self).__init__()
        # Graph Convolutional Layers
        self.fc_in = GATConv(data.num_features, hidden_channels, heads=heads)
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([GATConv(hidden_channels*heads, hidden_channels, heads=heads) for _ in range(num_hidden_layers)])

        # Camada de saída
        self.fc_out = GATConv(hidden_channels*heads, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        # Camada de saída
        x = self.fc_out(x, edge_index)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()

resultado = {
    'dropout_prob': 0.23228546886688933, 'heads': 2, 'hidden_channels': 16, 'lr': 0.0025105558422511857, 
    'n_epochs': 2000, 'num_hidden_layers': 2, 'weight_decay': 0.03559844148620763
}

hidden_channels = resultado['hidden_channels']
lr = resultado['lr']
n_epochs = resultado['n_epochs']
num_hidden_layers = resultado['num_hidden_layers']
weight_decay = resultado['weight_decay']
dropout_prob = resultado['dropout_prob']
heads = resultado['heads']

# Lists to store accuracies for each seed
final_accuracies = []
start_time = time.time()
# Training and Testing loops for each seed
for seed in seeds:
    # Instantiate the model with a different seed
    torch.manual_seed(seed)
    model = GATModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers,dropout_prob=dropout_prob, heads=heads)

    # Fazer previsões nos conjuntos de treino e teste
    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        loss_test = model.test_model(data, criterion)

    print(f'Seed: {seed}, Loss: {loss:.4f}, Test Loss: {loss_test:.4f}')

    # Save accuracies for this seed
    final_accuracies.append(loss_test)

end_time = time.time()
final_time = end_time - start_time
# Calculate mean and standard deviation of accuracies
mean_accuracies = np.mean(final_accuracies , axis=0)
std_accuracies = np.std(final_accuracies , axis=0)

# Print or use mean_accuracies and std_accuracies as needed
print("Mean Accuracies:", mean_accuracies) 
print("Standard Deviations:", std_accuracies)
print("Time:", final_time, "s")


Seed: 81, Loss: 16638.6973, Test Loss: 18994.0508
Seed: 14, Loss: 13397.1416, Test Loss: 20939.2637
Seed: 3, Loss: 14695.9258, Test Loss: 16326.9697
Seed: 94, Loss: 16437.6270, Test Loss: 19528.8672
Seed: 35, Loss: 16773.4766, Test Loss: 20434.1074
Seed: 31, Loss: 23605.9199, Test Loss: 30572.4629
Seed: 28, Loss: 20122.9258, Test Loss: 22687.9668
Seed: 17, Loss: 14547.5791, Test Loss: 19315.9766
Seed: 13, Loss: 20407.9062, Test Loss: 22514.0723
Seed: 86, Loss: 15060.5186, Test Loss: 16377.5186
Seed: 69, Loss: 96176.0234, Test Loss: 103130.3047
Seed: 11, Loss: 16605.9863, Test Loss: 20524.6094
Seed: 75, Loss: 125275.5000, Test Loss: 199120.5625
Seed: 54, Loss: 17078.5273, Test Loss: 19760.4863
Seed: 4, Loss: 9982.6670, Test Loss: 19002.4199
Seed: 98, Loss: 14412.0723, Test Loss: 18771.9727
Seed: 89, Loss: 21386.0645, Test Loss: 29867.6562
Seed: 27, Loss: 15255.6416, Test Loss: 16552.2324
Seed: 29, Loss: 14025.8936, Test Loss: 25221.9199
Seed: 64, Loss: 14704.8789, Test Loss: 17956.0957


## Model GAT + Linear

In [80]:
class GAT_MLPModel(torch.nn.Module):
    def __init__(self, hidden_channels, mlp_hidden_channels, num_hidden_layers, mlp_num_hidden_layers, dropout_prob, heads):
        super(GAT_MLPModel, self).__init__()
        torch.manual_seed(42)

        # Graph Convolutional Layers
        self.fc_in = GATConv(data.num_features, hidden_channels, heads=heads)
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([GATConv(hidden_channels*heads, hidden_channels, heads=heads) for _ in range(num_hidden_layers)])
        self.hidden_layers_mlp = nn.ModuleList([
            nn.Linear(hidden_channels*heads, mlp_hidden_channels) if i == 0 else nn.Linear(mlp_hidden_channels, mlp_hidden_channels)
            for i in range(mlp_num_hidden_layers)
        ])

        # Camada de saída
        self.fc_out = nn.Linear(mlp_hidden_channels, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        for layer in self.hidden_layers_mlp:
            x = layer(x)
            x = self.relu(x)

        # Camada de saída
        x = self.fc_out(x)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()

### Hyperopt

In [19]:
# Defina o espaço de busca para os hiperparâmetros
space = {
    'hidden_channels': hp.choice('hidden_channels', [8, 16, 32]),
    'mlp_hidden_channels': hp.choice('mlp_hidden_channels', [8, 16, 32]),
    'num_hidden_layers': hp.choice('num_hidden_layers', [0,1,2]),
    'mlp_num_hidden_layers': hp.choice('mlp_num_hidden_layers', [1,2,3]),
    'dropout_prob': hp.uniform('dropout_prob', 0, 0.6),
    'lr': hp.uniform('lr', 0.001, 0.01),
    'weight_decay': hp.uniform('weight_decay', 0, 0.1),
    'n_epochs': hp.choice('n_epochs', [1000, 2000]),
    'heads': hp.choice('heads', [1,2,5])

}

# Função objetivo para otimização
def objective(params):
    hidden_channels = params['hidden_channels']
    mlp_hidden_channels = params['mlp_hidden_channels']
    num_hidden_layers = params['num_hidden_layers']
    mlp_num_hidden_layers = params['mlp_num_hidden_layers']
    dropout_prob = params['dropout_prob']
    lr = params['lr']
    weight_decay = params['weight_decay']
    n_epochs = params['n_epochs']
    heads = params['heads']

    # Criar modelo com os hiperparâmetros
    model = GAT_MLPModel(hidden_channels=hidden_channels, mlp_hidden_channels=mlp_hidden_channels, num_hidden_layers=num_hidden_layers, 
    mlp_num_hidden_layers=mlp_num_hidden_layers, dropout_prob=dropout_prob, heads=heads)

    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        acc = model.test_model(data, criterion)

    # Retorna a métrica que você deseja otimizar (por exemplo, negativo do desempenho)
    return acc

# Executar a otimização com o Hyperopt
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=50)

# Imprimir os melhores hiperparâmetros encontrados
print("Melhores hiperparâmetros encontrados:", best)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

100%|██████████| 50/50 [2:37:23<00:00, 188.86s/trial, best loss: 8993.6337890625]   
Melhores hiperparâmetros encontrados: {'dropout_prob': 0.2618600657898271, 'heads': 2, 'hidden_channels': 0, 'lr': 0.002594449690229989, 'mlp_hidden_channels': 1, 'mlp_num_hidden_layers': 0, 'n_epochs': 0, 'num_hidden_layers': 2, 'weight_decay': 0.027195784609336235}


In [82]:
resultado = {
    'dropout_prob': 0.2618600657898271, 'heads': 5, 'hidden_channels': 8, 'lr': 0.002594449690229989, 'mlp_hidden_channels': 16, 'mlp_num_hidden_layers': 1, 
    'n_epochs': 1000, 'num_hidden_layers': 2, 'weight_decay': 0.027195784609336235
}

hidden_channels = resultado['hidden_channels']
lr = resultado['lr']
n_epochs = resultado['n_epochs']
num_hidden_layers = resultado['num_hidden_layers']
weight_decay = resultado['weight_decay']
dropout_prob = resultado['dropout_prob']
heads = resultado['heads']
mlp_hidden_channels = resultado['mlp_hidden_channels']
mlp_num_hidden_layers = resultado['mlp_num_hidden_layers']

torch.manual_seed(42)
model = GAT_MLPModel(hidden_channels=hidden_channels, mlp_hidden_channels=mlp_hidden_channels, num_hidden_layers=num_hidden_layers, 
    mlp_num_hidden_layers=mlp_num_hidden_layers, dropout_prob=dropout_prob, heads=heads)

# Definir loss function e optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

# Treinamento e teste
for epoch in range(1, n_epochs):
    loss = model.train_model(data, criterion, optimizer)
    loss_test = model.test_model(data, criterion)

print(f'Loss: {loss:.4f}, Loss Test: {loss_test:.4f}')

Loss: 11508.0264, Loss Test: 8993.6338


### Repeating using another set of 30 seeds for the model with the best configuration found for seed 42

In [84]:
class GAT_MLPModel(torch.nn.Module):
    def __init__(self, hidden_channels, mlp_hidden_channels, num_hidden_layers, mlp_num_hidden_layers, dropout_prob, heads):
        super(GAT_MLPModel, self).__init__()

        # Graph Convolutional Layers
        self.fc_in = GATConv(data.num_features, hidden_channels, heads=heads)
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([GATConv(hidden_channels*heads, hidden_channels, heads=heads) for _ in range(num_hidden_layers)])
        self.hidden_layers_mlp = nn.ModuleList([
            nn.Linear(hidden_channels*heads, mlp_hidden_channels) if i == 0 else nn.Linear(mlp_hidden_channels, mlp_hidden_channels)
            for i in range(mlp_num_hidden_layers)
        ])

        # Camada de saída
        self.fc_out = nn.Linear(mlp_hidden_channels, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        for layer in self.hidden_layers_mlp:
            x = layer(x)
            x = self.relu(x)

        # Camada de saída
        x = self.fc_out(x)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()

# Lists to store accuracies for each seed
final_accuracies = []
start_time = time.time()
# Training and Testing loops for each seed
for seed in seeds:
    # Instantiate the model with a different seed
    torch.manual_seed(seed)
    model = GAT_MLPModel(hidden_channels=hidden_channels, mlp_hidden_channels=mlp_hidden_channels, num_hidden_layers=num_hidden_layers, 
    mlp_num_hidden_layers=mlp_num_hidden_layers, dropout_prob=dropout_prob, heads=heads)

    # Fazer previsões nos conjuntos de treino e teste
    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        loss_test = model.test_model(data, criterion)

    print(f'Seed: {seed}, Loss: {loss:.4f}, Test Loss: {loss_test:.4f}')

    # Save accuracies for this seed
    final_accuracies.append(loss_test)

end_time = time.time()
final_time = end_time - start_time
# Calculate mean and standard deviation of accuracies
mean_accuracies = np.mean(final_accuracies , axis=0)
std_accuracies = np.std(final_accuracies , axis=0)

# Print or use mean_accuracies and std_accuracies as needed
print("Mean Accuracies:", mean_accuracies) 
print("Standard Deviations:", std_accuracies)
print("Time:", final_time, "s")


Seed: 81, Loss: 13978.1152, Test Loss: 16962.1250
Seed: 14, Loss: 14368.7334, Test Loss: 18753.8496
Seed: 3, Loss: 15863.7412, Test Loss: 18796.6543
Seed: 94, Loss: 17753.6016, Test Loss: 21538.4785
Seed: 35, Loss: 15854.4463, Test Loss: 18476.7148
Seed: 31, Loss: 16304.9092, Test Loss: 19007.7930
Seed: 28, Loss: 14397.0098, Test Loss: 18652.9746
Seed: 17, Loss: 14338.6221, Test Loss: 17528.7324
Seed: 13, Loss: 14172.2627, Test Loss: 17758.2734
Seed: 86, Loss: 16608.5488, Test Loss: 19797.9199
Seed: 69, Loss: 14372.7666, Test Loss: 16759.0625
Seed: 11, Loss: 19029.2520, Test Loss: 21864.3672
Seed: 75, Loss: 14442.7783, Test Loss: 15805.7861
Seed: 54, Loss: 18903.3105, Test Loss: 23313.5332
Seed: 4, Loss: 15473.3408, Test Loss: 18581.8750
Seed: 98, Loss: 14048.4990, Test Loss: 15574.0488
Seed: 89, Loss: 24305.1680, Test Loss: 22981.5605
Seed: 27, Loss: 14978.1113, Test Loss: 19544.2246
Seed: 29, Loss: 14931.1895, Test Loss: 17838.4121
Seed: 64, Loss: 14958.3252, Test Loss: 20023.3867
Se

## Model TransformerConv

In [88]:
class TransformerConvModel(torch.nn.Module):
    def __init__(self, hidden_channels, num_hidden_layers, dropout_prob, heads):
        super(TransformerConvModel, self).__init__()
        torch.manual_seed(42)

        # Graph Convolutional Layers
        self.fc_in = TransformerConv(data.num_features, hidden_channels, heads=heads)
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([TransformerConv(hidden_channels*heads, hidden_channels, heads=heads) for _ in range(num_hidden_layers)])

        # Camada de saída
        self.fc_out = TransformerConv(hidden_channels*heads, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        # Camada de saída
        x = self.fc_out(x, edge_index)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()

### Hyperopt

In [21]:
# Defina o espaço de busca para os hiperparâmetros
space = {
    'hidden_channels': hp.choice('hidden_channels', [8, 16, 32]),
    'num_hidden_layers': hp.choice('num_hidden_layers', [0,1,2]),
    'dropout_prob': hp.uniform('dropout_prob', 0, 0.6),
    'lr': hp.uniform('lr', 0.001, 0.01),
    'weight_decay': hp.uniform('weight_decay', 0, 0.1),
    'n_epochs': hp.choice('n_epochs', [1000, 2000]),
    'heads': hp.choice('heads', [1,2,5])

}

# Função objetivo para otimização
def objective(params):
    hidden_channels = params['hidden_channels']
    num_hidden_layers = params['num_hidden_layers']
    dropout_prob = params['dropout_prob']
    lr = params['lr']
    weight_decay = params['weight_decay']
    n_epochs = params['n_epochs']
    heads= params['heads']

    # Criar modelo com os hiperparâmetros
    model = TransformerConvModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers,dropout_prob=dropout_prob, heads=heads)

    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        acc = model.test_model(data, criterion)

    # Retorna a métrica que você deseja otimizar (por exemplo, negativo do desempenho)
    return acc

# Executar a otimização com o Hyperopt
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=50)

# Imprimir os melhores hiperparâmetros encontrados
print("Melhores hiperparâmetros encontrados:", best)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

100%|██████████| 50/50 [2:25:20<00:00, 174.42s/trial, best loss: 2081.659912109375]  
Melhores hiperparâmetros encontrados: {'dropout_prob': 0.3168179447358561, 'heads': 1, 'hidden_channels': 2, 'lr': 0.00949616792035446, 'n_epochs': 1, 'num_hidden_layers': 1, 'weight_decay': 0.04278289743105026}


In [89]:
resultado = {
    'dropout_prob': 0.3168179447358561, 'heads': 2, 'hidden_channels': 32, 'lr': 0.00949616792035446, 'n_epochs': 2000, 
    'num_hidden_layers': 1, 'weight_decay': 0.04278289743105026
}

hidden_channels = resultado['hidden_channels']
lr = resultado['lr']
n_epochs = resultado['n_epochs']
num_hidden_layers = resultado['num_hidden_layers']
weight_decay = resultado['weight_decay']
dropout_prob = resultado['dropout_prob']
heads = resultado['heads']


torch.manual_seed(42)
model = TransformerConvModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers,dropout_prob=dropout_prob, heads=heads)


# Definir loss function e optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

# Treinamento e teste
for epoch in range(1, n_epochs):
    loss = model.train_model(data, criterion, optimizer)
    loss_test = model.test_model(data, criterion)

print(f'Loss: {loss:.4f}, Loss Test: {loss_test:.4f}')

Loss: 3058.8945, Loss Test: 2081.6599


### Repeating using another set of 30 seeds for the model with the best configuration found for seed 42

In [90]:
class TransformerConvModel(torch.nn.Module):
    def __init__(self, hidden_channels, num_hidden_layers, dropout_prob, heads):
        super(TransformerConvModel, self).__init__()

        # Graph Convolutional Layers
        self.fc_in = TransformerConv(data.num_features, hidden_channels, heads=heads)
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([TransformerConv(hidden_channels*heads, hidden_channels, heads=heads) for _ in range(num_hidden_layers)])

        # Camada de saída
        self.fc_out = TransformerConv(hidden_channels*heads, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        # Camada de saída
        x = self.fc_out(x, edge_index)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()

# Lists to store accuracies for each seed
final_accuracies = []
start_time = time.time()
# Training and Testing loops for each seed
for seed in seeds:
    # Instantiate the model with a different seed
    torch.manual_seed(seed)
    model = TransformerConvModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers,dropout_prob=dropout_prob, heads=heads)

    # Fazer previsões nos conjuntos de treino e teste
    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        loss_test = model.test_model(data, criterion)

    print(f'Seed: {seed}, Loss: {loss:.4f}, Test Loss: {loss_test:.4f}')

    # Save accuracies for this seed
    final_accuracies.append(loss_test)

end_time = time.time()
final_time = end_time - start_time
# Calculate mean and standard deviation of accuracies
mean_accuracies = np.mean(final_accuracies , axis=0)
std_accuracies = np.std(final_accuracies , axis=0)

# Print or use mean_accuracies and std_accuracies as needed
print("Mean Accuracies:", mean_accuracies) 
print("Standard Deviations:", std_accuracies)
print("Time:", final_time, "s")

Seed: 81, Loss: 47288.5898, Test Loss: 35095.3750
Seed: 14, Loss: 19519.6191, Test Loss: 22214.7031
Seed: 3, Loss: 3126.2749, Test Loss: 3241.8904
Seed: 94, Loss: 7990.5210, Test Loss: 13392.5420
Seed: 35, Loss: 2920.1807, Test Loss: 3700.0737
Seed: 31, Loss: 10771.9170, Test Loss: 49593.0391
Seed: 28, Loss: 4677.5342, Test Loss: 9567.5762
Seed: 17, Loss: 18151.1816, Test Loss: 22025.1758
Seed: 13, Loss: 4762.3877, Test Loss: 8123.0444
Seed: 86, Loss: 4356.0723, Test Loss: 7035.0703
Seed: 69, Loss: 19558.1719, Test Loss: 21998.7832
Seed: 11, Loss: 20590.1328, Test Loss: 39143.1484
Seed: 75, Loss: 3100.5132, Test Loss: 7796.5962
Seed: 54, Loss: 3989.9248, Test Loss: 5997.9277
Seed: 4, Loss: 3130.8384, Test Loss: 11019.6006
Seed: 98, Loss: 5473.5273, Test Loss: 27556.4746
Seed: 89, Loss: 6386.3091, Test Loss: 5675.5806
Seed: 27, Loss: 6484.8896, Test Loss: 10861.0312
Seed: 29, Loss: 13754.1914, Test Loss: 30110.2852
Seed: 64, Loss: 183216.7656, Test Loss: 500416.0312
Seed: 77, Loss: 3597

## Model TransformerConv + Linear

In [91]:
class TransformerConv_MLPModel(torch.nn.Module):
    def __init__(self, hidden_channels, mlp_hidden_channels, num_hidden_layers, mlp_num_hidden_layers, dropout_prob, heads):
        super(TransformerConv_MLPModel, self).__init__()
        torch.manual_seed(42)

        # Graph Convolutional Layers
        self.fc_in = TransformerConv(data.num_features, hidden_channels, heads=heads)
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([TransformerConv(hidden_channels*heads, hidden_channels, heads=heads) for _ in range(num_hidden_layers)])
        self.hidden_layers_mlp = nn.ModuleList([
            nn.Linear(hidden_channels*heads, mlp_hidden_channels) if i == 0 else nn.Linear(mlp_hidden_channels, mlp_hidden_channels)
            for i in range(mlp_num_hidden_layers)
        ])

        # Camada de saída
        self.fc_out = nn.Linear(mlp_hidden_channels, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        for layer in self.hidden_layers_mlp:
            x = layer(x)
            x = self.relu(x)

        # Camada de saída
        x = self.fc_out(x)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()

### Hyperopt

In [23]:
# Defina o espaço de busca para os hiperparâmetros
space = {
    'hidden_channels': hp.choice('hidden_channels', [8, 16, 32]),
    'mlp_hidden_channels': hp.choice('mlp_hidden_channels', [8, 16, 32]),
    'num_hidden_layers': hp.choice('num_hidden_layers', [0,1,2]),
    'mlp_num_hidden_layers': hp.choice('mlp_num_hidden_layers', [1,2,3]),
    'dropout_prob': hp.uniform('dropout_prob', 0, 0.6),
    'lr': hp.uniform('lr', 0.001, 0.01),
    'weight_decay': hp.uniform('weight_decay', 0, 0.1),
    'n_epochs': hp.choice('n_epochs', [1000, 2000]),
    'heads': hp.choice('heads', [1,2,5])

}

# Função objetivo para otimização
def objective(params):
    hidden_channels = params['hidden_channels']
    mlp_hidden_channels = params['mlp_hidden_channels']
    num_hidden_layers = params['num_hidden_layers']
    mlp_num_hidden_layers = params['mlp_num_hidden_layers']
    dropout_prob = params['dropout_prob']
    lr = params['lr']
    weight_decay = params['weight_decay']
    n_epochs = params['n_epochs']
    heads = params['heads']

    # Criar modelo com os hiperparâmetros
    model = TransformerConv_MLPModel(hidden_channels=hidden_channels, mlp_hidden_channels=mlp_hidden_channels, num_hidden_layers=num_hidden_layers, 
    mlp_num_hidden_layers=mlp_num_hidden_layers, dropout_prob=dropout_prob, heads=heads)

    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        acc = model.test_model(data, criterion)

    # Retorna a métrica que você deseja otimizar (por exemplo, negativo do desempenho)
    return acc

# Executar a otimização com o Hyperopt
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=50)

# Imprimir os melhores hiperparâmetros encontrados
print("Melhores hiperparâmetros encontrados:", best)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

100%|██████████| 50/50 [2:25:04<00:00, 174.09s/trial, best loss: 1373.9088134765625]  
Melhores hiperparâmetros encontrados: {'dropout_prob': 0.07464854317163591, 'heads': 1, 'hidden_channels': 0, 'lr': 0.006868358897158035, 'mlp_hidden_channels': 1, 'mlp_num_hidden_layers': 2, 'n_epochs': 0, 'num_hidden_layers': 2, 'weight_decay': 0.026614970242972606}


In [93]:
resultado = {
    'dropout_prob': 0.07464854317163591, 'heads': 2, 'hidden_channels': 8, 'lr': 0.006868358897158035, 'mlp_hidden_channels': 16, 
    'mlp_num_hidden_layers': 3, 'n_epochs': 1000, 'num_hidden_layers': 2, 'weight_decay': 0.026614970242972606
}

hidden_channels = resultado['hidden_channels']
lr = resultado['lr']
n_epochs = resultado['n_epochs']
num_hidden_layers = resultado['num_hidden_layers']
weight_decay = resultado['weight_decay']
dropout_prob = resultado['dropout_prob']
heads = resultado['heads']
mlp_num_hidden_layers = resultado['mlp_num_hidden_layers']
mlp_hidden_channels = resultado['mlp_hidden_channels']


torch.manual_seed(42)
model = TransformerConv_MLPModel(hidden_channels=hidden_channels, mlp_hidden_channels=mlp_hidden_channels, num_hidden_layers=num_hidden_layers, 
    mlp_num_hidden_layers=mlp_num_hidden_layers, dropout_prob=dropout_prob, heads=heads)


# Definir loss function e optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

# Treinamento e teste
for epoch in range(1, n_epochs):
    loss = model.train_model(data, criterion, optimizer)
    loss_test = model.test_model(data, criterion)

print(f'Loss: {loss:.4f}, Loss Test: {loss_test:.4f}')

Loss: 1935.4923, Loss Test: 1373.9088


### Repeating using another set of 30 seeds for the model with the best configuration found for seed 42

In [94]:
class TransformerConv_MLPModel(torch.nn.Module):
    def __init__(self, hidden_channels, mlp_hidden_channels, num_hidden_layers, mlp_num_hidden_layers, dropout_prob, heads):
        super(TransformerConv_MLPModel, self).__init__()

        # Graph Convolutional Layers
        self.fc_in = TransformerConv(data.num_features, hidden_channels, heads=heads)
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([TransformerConv(hidden_channels*heads, hidden_channels, heads=heads) for _ in range(num_hidden_layers)])
        self.hidden_layers_mlp = nn.ModuleList([
            nn.Linear(hidden_channels*heads, mlp_hidden_channels) if i == 0 else nn.Linear(mlp_hidden_channels, mlp_hidden_channels)
            for i in range(mlp_num_hidden_layers)
        ])

        # Camada de saída
        self.fc_out = nn.Linear(mlp_hidden_channels, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        for layer in self.hidden_layers_mlp:
            x = layer(x)
            x = self.relu(x)

        # Camada de saída
        x = self.fc_out(x)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()


# Lists to store accuracies for each seed
final_accuracies = []
start_time = time.time()
# Training and Testing loops for each seed
for seed in seeds:
    # Instantiate the model with a different seed
    torch.manual_seed(seed)
    model = TransformerConv_MLPModel(hidden_channels=hidden_channels, mlp_hidden_channels=mlp_hidden_channels, num_hidden_layers=num_hidden_layers, 
    mlp_num_hidden_layers=mlp_num_hidden_layers, dropout_prob=dropout_prob, heads=heads)

    # Fazer previsões nos conjuntos de treino e teste
    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        loss_test = model.test_model(data, criterion)

    print(f'Seed: {seed}, Loss: {loss:.4f}, Test Loss: {loss_test:.4f}')

    # Save accuracies for this seed
    final_accuracies.append(loss_test)

end_time = time.time()
final_time = end_time - start_time
# Calculate mean and standard deviation of accuracies
mean_accuracies = np.mean(final_accuracies , axis=0)
std_accuracies = np.std(final_accuracies , axis=0)

# Print or use mean_accuracies and std_accuracies as needed
print("Mean Accuracies:", mean_accuracies) 
print("Standard Deviations:", std_accuracies)
print("Time:", final_time, "s")

Seed: 81, Loss: 3246.6433, Test Loss: 3848.2937
Seed: 14, Loss: 1854.8154, Test Loss: 2038.2278
Seed: 3, Loss: 3683.3958, Test Loss: 4788.7837
Seed: 94, Loss: 1365.8429, Test Loss: 1144.8635
Seed: 35, Loss: 4676.1641, Test Loss: 3710.1497
Seed: 31, Loss: 2421.3767, Test Loss: 2473.3662
Seed: 28, Loss: 2336.0244, Test Loss: 3415.4292
Seed: 17, Loss: 1926.0275, Test Loss: 2874.6367
Seed: 13, Loss: 2425.9580, Test Loss: 2217.7839
Seed: 86, Loss: 1075.6631, Test Loss: 996.1031
Seed: 69, Loss: 2848.9385, Test Loss: 2276.0835
Seed: 11, Loss: 3400.9541, Test Loss: 3870.3289
Seed: 75, Loss: 2155.5469, Test Loss: 1667.0992
Seed: 54, Loss: 2883.0396, Test Loss: 3787.4958
Seed: 4, Loss: 1827.5552, Test Loss: 1654.1377
Seed: 98, Loss: 3231.4785, Test Loss: 3637.9360
Seed: 89, Loss: 1943.6440, Test Loss: 2489.4956
Seed: 27, Loss: 2257.5981, Test Loss: 4721.1567
Seed: 29, Loss: 2795.3130, Test Loss: 4187.0527
Seed: 64, Loss: 2726.9172, Test Loss: 4181.2500
Seed: 77, Loss: 3143.3328, Test Loss: 4011.

## Model GENConv

In [98]:
class GENConvModel(torch.nn.Module):
    def __init__(self, hidden_channels, num_hidden_layers, dropout_prob):
        super(GENConvModel, self).__init__()
        torch.manual_seed(42)

        # Graph Convolutional Layers
        self.fc_in = GENConv(data.num_features, hidden_channels)
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([GENConv(hidden_channels, hidden_channels) for _ in range(num_hidden_layers)])

        # Camada de saída
        self.fc_out = GENConv(hidden_channels, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        # Camada de saída
        x = self.fc_out(x, edge_index)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()

### Hyperopt

In [644]:
# Defina o espaço de busca para os hiperparâmetros
space = {
    'hidden_channels': hp.choice('hidden_channels', [8, 16, 32]),
    'num_hidden_layers': hp.choice('num_hidden_layers', [0,1,2]),
    'dropout_prob': hp.uniform('dropout_prob', 0, 0.6),
    'lr': hp.uniform('lr', 0.001, 0.01),
    'weight_decay': hp.uniform('weight_decay', 0, 0.1),
    'n_epochs': hp.choice('n_epochs', [1000, 2000])

}

# Função objetivo para otimização
def objective(params):
    hidden_channels = params['hidden_channels']
    num_hidden_layers = params['num_hidden_layers']
    dropout_prob = params['dropout_prob']
    lr = params['lr']
    weight_decay = params['weight_decay']
    n_epochs = params['n_epochs']

    # Criar modelo com os hiperparâmetros
    model = GENConvModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers,dropout_prob=dropout_prob)

    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        acc = model.test_model(data, criterion)

    # Retorna a métrica que você deseja otimizar (por exemplo, negativo do desempenho)
    return acc

# Executar a otimização com o Hyperopt
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=50)

# Imprimir os melhores hiperparâmetros encontrados
print("Melhores hiperparâmetros encontrados:", best)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]

100%|██████████| 50/50 [1:26:12<00:00, 103.45s/trial, best loss: 707.3665771484375] 
Melhores hiperparâmetros encontrados: {'dropout_prob': 0.34996435171754614, 'hidden_channels': 2, 'lr': 0.008075490789724454, 'n_epochs': 1, 'num_hidden_layers': 0, 'weight_decay': 0.09939941333474925}


In [99]:
resultado = {
    'dropout_prob': 0.34996435171754614, 'hidden_channels': 32, 'lr': 0.008075490789724454, 'n_epochs': 2000, 'num_hidden_layers': 0, 'weight_decay': 0.09939941333474925
}

hidden_channels = resultado['hidden_channels']
lr = resultado['lr']
n_epochs = resultado['n_epochs']
num_hidden_layers = resultado['num_hidden_layers']
weight_decay = resultado['weight_decay']
dropout_prob = resultado['dropout_prob']

torch.manual_seed(42)
model = GENConvModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers,dropout_prob=dropout_prob)


# Definir loss function e optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

# Treinamento e teste
for epoch in range(1, n_epochs):
    loss = model.train_model(data, criterion, optimizer)
    loss_test = model.test_model(data, criterion)

print(f'Loss: {loss:.4f}, Loss Test: {loss_test:.4f}')

Loss: 1277.1487, Loss Test: 707.3666


### Repeating using another set of 30 seeds for the model with the best configuration found for seed 42

In [100]:
class GENConvModel(torch.nn.Module):
    def __init__(self, hidden_channels, num_hidden_layers, dropout_prob):
        super(GENConvModel, self).__init__()

        # Graph Convolutional Layers
        self.fc_in = GENConv(data.num_features, hidden_channels)
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([GENConv(hidden_channels, hidden_channels) for _ in range(num_hidden_layers)])

        # Camada de saída
        self.fc_out = GENConv(hidden_channels, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        # Camada de saída
        x = self.fc_out(x, edge_index)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()


# Lists to store accuracies for each seed
final_accuracies = []
start_time = time.time()
# Training and Testing loops for each seed
for seed in seeds:
    # Instantiate the model with a different seed
    torch.manual_seed(seed)
    model = GENConvModel(hidden_channels=hidden_channels, num_hidden_layers=num_hidden_layers,dropout_prob=dropout_prob)

    # Fazer previsões nos conjuntos de treino e teste
    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        loss_test = model.test_model(data, criterion)

    print(f'Seed: {seed}, Loss: {loss:.4f}, Test Loss: {loss_test:.4f}')

    # Save accuracies for this seed
    final_accuracies.append(loss_test)

end_time = time.time()
final_time = end_time - start_time
# Calculate mean and standard deviation of accuracies
mean_accuracies = np.mean(final_accuracies , axis=0)
std_accuracies = np.std(final_accuracies , axis=0)

# Print or use mean_accuracies and std_accuracies as needed
print("Mean Accuracies:", mean_accuracies) 
print("Standard Deviations:", std_accuracies)
print("Time:", final_time, "s")

Seed: 81, Loss: 1908.3005, Test Loss: 3523.7515
Seed: 14, Loss: 1021.8497, Test Loss: 2839.7144
Seed: 3, Loss: 1283.9503, Test Loss: 3286.7739
Seed: 94, Loss: 999.2529, Test Loss: 745.5527
Seed: 35, Loss: 1254.2456, Test Loss: 4123.4131
Seed: 31, Loss: 1607.1255, Test Loss: 3812.6716
Seed: 28, Loss: 677.4896, Test Loss: 3223.7996
Seed: 17, Loss: 1095.4258, Test Loss: 623.7651
Seed: 13, Loss: 1762.3258, Test Loss: 2796.3955
Seed: 86, Loss: 19761.2461, Test Loss: 22213.1445
Seed: 69, Loss: 1146.7682, Test Loss: 701.1161
Seed: 11, Loss: 875.7896, Test Loss: 3994.9919
Seed: 75, Loss: 592.3432, Test Loss: 5761.5034
Seed: 54, Loss: 701.2982, Test Loss: 3124.4001
Seed: 4, Loss: 1852.1292, Test Loss: 3883.6702
Seed: 98, Loss: 1321.8444, Test Loss: 809.7974
Seed: 89, Loss: 878.9097, Test Loss: 2071.3103
Seed: 27, Loss: 19761.2461, Test Loss: 22213.1445
Seed: 29, Loss: 1709.0060, Test Loss: 4296.9771
Seed: 64, Loss: 1330.2941, Test Loss: 2712.0068
Seed: 77, Loss: 19761.2461, Test Loss: 22213.144

## Model GENConv + Linear

In [101]:
class GENConv_MLPModel(torch.nn.Module):
    def __init__(self, hidden_channels, mlp_hidden_channels, num_hidden_layers, mlp_num_hidden_layers, dropout_prob):
        super(GENConv_MLPModel, self).__init__()
        torch.manual_seed(42)

        # Graph Convolutional Layers
        self.fc_in = GENConv(data.num_features, hidden_channels)
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([GENConv(hidden_channels, hidden_channels) for _ in range(num_hidden_layers)])
        self.hidden_layers_mlp = nn.ModuleList([
            nn.Linear(hidden_channels, mlp_hidden_channels) if i == 0 else nn.Linear(mlp_hidden_channels, mlp_hidden_channels)
            for i in range(mlp_num_hidden_layers)
        ])

        # Camada de saída
        self.fc_out = nn.Linear(mlp_hidden_channels, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        for layer in self.hidden_layers_mlp:
            x = layer(x)
            x = self.relu(x)

        # Camada de saída
        x = self.fc_out(x)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()

### Hyperopt

In [643]:
# Defina o espaço de busca para os hiperparâmetros
space = {
    'hidden_channels': hp.choice('hidden_channels', [8, 16, 32]),
    'mlp_hidden_channels': hp.choice('mlp_hidden_channels', [8, 16, 32]),
    'num_hidden_layers': hp.choice('num_hidden_layers', [0,1,2]),
    'mlp_num_hidden_layers': hp.choice('mlp_num_hidden_layers', [1,2,3]),
    'dropout_prob': hp.uniform('dropout_prob', 0, 0.6),
    'lr': hp.uniform('lr', 0.001, 0.01),
    'weight_decay': hp.uniform('weight_decay', 0, 0.1),
    'n_epochs': hp.choice('n_epochs', [1000, 2000])
}

# Função objetivo para otimização
def objective(params):
    hidden_channels = params['hidden_channels']
    mlp_hidden_channels = params['mlp_hidden_channels']
    num_hidden_layers = params['num_hidden_layers']
    mlp_num_hidden_layers = params['mlp_num_hidden_layers']
    dropout_prob = params['dropout_prob']
    lr = params['lr']
    weight_decay = params['weight_decay']
    n_epochs = params['n_epochs']

    # Criar modelo com os hiperparâmetros
    model = GENConv_MLPModel(hidden_channels=hidden_channels, mlp_hidden_channels=mlp_hidden_channels, num_hidden_layers=num_hidden_layers, 
    mlp_num_hidden_layers=mlp_num_hidden_layers, dropout_prob=dropout_prob)

    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        acc = model.test_model(data, criterion)

    # Retorna a métrica que você deseja otimizar (por exemplo, negativo do desempenho)
    return acc

# Executar a otimização com o Hyperopt
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=50)

# Imprimir os melhores hiperparâmetros encontrados
print("Melhores hiperparâmetros encontrados:", best)

100%|██████████| 50/50 [1:48:50<00:00, 130.61s/trial, best loss: 824.2120361328125]   
Melhores hiperparâmetros encontrados: {'dropout_prob': 0.30223267275005294, 'hidden_channels': 2, 'lr': 0.004107697470660071, 'mlp_hidden_channels': 0, 'mlp_num_hidden_layers': 1, 'n_epochs': 1, 'num_hidden_layers': 1, 'weight_decay': 0.07859428926983367}


In [103]:
resultado = {
    'dropout_prob': 0.30223267275005294, 'hidden_channels': 32, 'lr': 0.004107697470660071, 'mlp_hidden_channels': 8, 'mlp_num_hidden_layers': 2, 
    'n_epochs': 2000, 'num_hidden_layers': 1, 'weight_decay': 0.07859428926983367
}

hidden_channels = resultado['hidden_channels']
lr = resultado['lr']
n_epochs = resultado['n_epochs']
num_hidden_layers = resultado['num_hidden_layers']
weight_decay = resultado['weight_decay']
dropout_prob = resultado['dropout_prob']
mlp_hidden_channels = resultado['mlp_hidden_channels']
mlp_num_hidden_layers = resultado['mlp_num_hidden_layers']

torch.manual_seed(42)
model = GENConv_MLPModel(hidden_channels=hidden_channels, mlp_hidden_channels=mlp_hidden_channels, num_hidden_layers=num_hidden_layers, 
    mlp_num_hidden_layers=mlp_num_hidden_layers, dropout_prob=dropout_prob)


# Definir loss function e optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

# Treinamento e teste
for epoch in range(1, n_epochs):
    loss = model.train_model(data, criterion, optimizer)
    loss_test = model.test_model(data, criterion)

print(f'Loss: {loss:.4f}, Loss Test: {loss_test:.4f}')

Loss: 1186.9745, Loss Test: 824.2120


### Repeating using another set of 30 seeds for the model with the best configuration found for seed 42

In [104]:
class GENConv_MLPModel(torch.nn.Module):
    def __init__(self, hidden_channels, mlp_hidden_channels, num_hidden_layers, mlp_num_hidden_layers, dropout_prob):
        super(GENConv_MLPModel, self).__init__()

        # Graph Convolutional Layers
        self.fc_in = GENConv(data.num_features, hidden_channels)
        self.dropout_layer = Dropout(dropout_prob)
        self.relu = torch.nn.ReLU()
        
        self.hidden_layers = nn.ModuleList([GENConv(hidden_channels, hidden_channels) for _ in range(num_hidden_layers)])
        self.hidden_layers_mlp = nn.ModuleList([
            nn.Linear(hidden_channels, mlp_hidden_channels) if i == 0 else nn.Linear(mlp_hidden_channels, mlp_hidden_channels)
            for i in range(mlp_num_hidden_layers)
        ])

        # Camada de saída
        self.fc_out = nn.Linear(mlp_hidden_channels, 1)

    def forward(self, x, edge_index):
        # Camada de entrada
        x = self.fc_in(x, edge_index)
        x = self.relu(x)
        x = self.dropout_layer(x)

        # Camadas ocultas
        for layer in self.hidden_layers:
            x = layer(x, edge_index)
            x = self.relu(x)
            x = self.dropout_layer(x)

        for layer in self.hidden_layers_mlp:
            x = layer(x)
            x = self.relu(x)

        # Camada de saída
        x = self.fc_out(x)

        return x

    def train_model(self, data, criterion, optimizer):
        optimizer.zero_grad()  # Clear gradients.
        out = self(data.x, data.edge_index)  # Perform a single forward pass.
        out = out.view(-1)
        loss = criterion(out[data.id_train_mask], data.y[data.id_train_mask].float())  # Compute the loss solely based on the training nodes.
        loss.backward()  # Derive gradients.
        # torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=2.0)  # gradient clipping
        optimizer.step()  # Update parameters based on gradients.
        return loss.item()

    def test_model(self, data, criterion):
        self.eval()
        out = self(data.x, data.edge_index)
        out = out.view(-1)
        acc = criterion(out[data.id_test_mask], data.y[data.id_test_mask].float())
        return acc.item()

# Lists to store accuracies for each seed
final_accuracies = []
start_time = time.time()
# Training and Testing loops for each seed
for seed in seeds:
    # Instantiate the model with a different seed
    torch.manual_seed(seed)
    model = GENConv_MLPModel(hidden_channels=hidden_channels, mlp_hidden_channels=mlp_hidden_channels, num_hidden_layers=num_hidden_layers, 
    mlp_num_hidden_layers=mlp_num_hidden_layers, dropout_prob=dropout_prob)

    # Fazer previsões nos conjuntos de treino e teste
    # Definir loss function e optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    # Treinamento e teste
    for epoch in range(1, n_epochs):
        loss = model.train_model(data, criterion, optimizer)
        loss_test = model.test_model(data, criterion)

    print(f'Seed: {seed}, Loss: {loss:.4f}, Test Loss: {loss_test:.4f}')

    # Save accuracies for this seed
    final_accuracies.append(loss_test)

end_time = time.time()
final_time = end_time - start_time
# Calculate mean and standard deviation of accuracies
mean_accuracies = np.mean(final_accuracies , axis=0)
std_accuracies = np.std(final_accuracies , axis=0)

# Print or use mean_accuracies and std_accuracies as needed
print("Mean Accuracies:", mean_accuracies) 
print("Standard Deviations:", std_accuracies)
print("Time:", final_time, "s")

Seed: 81, Loss: 1022.1675, Test Loss: 1944.9821
Seed: 14, Loss: 1900.4095, Test Loss: 2048.5435
Seed: 3, Loss: 2113.4297, Test Loss: 2519.6387
Seed: 94, Loss: 2445.0461, Test Loss: 1622.6274
Seed: 35, Loss: 1393.4845, Test Loss: 2814.7224
Seed: 31, Loss: 1276.8402, Test Loss: 4047.1006
Seed: 28, Loss: 836.2770, Test Loss: 2207.9526
Seed: 17, Loss: 1380.2852, Test Loss: 1170.6349
Seed: 13, Loss: 2436.6064, Test Loss: 2699.9529
Seed: 86, Loss: 2045.0684, Test Loss: 1313.8167
Seed: 69, Loss: 2048.1526, Test Loss: 2244.4285
Seed: 11, Loss: 1037.4315, Test Loss: 1240.2747
Seed: 75, Loss: 1535.6123, Test Loss: 2612.0393
Seed: 54, Loss: 1168.5288, Test Loss: 789.0708
Seed: 4, Loss: 1065.8157, Test Loss: 2315.1074
Seed: 98, Loss: 1445.9508, Test Loss: 2799.2666
Seed: 89, Loss: 1054.7639, Test Loss: 1373.9862
Seed: 27, Loss: 1041.9806, Test Loss: 2317.2065
Seed: 29, Loss: 1324.9193, Test Loss: 1161.4122
Seed: 64, Loss: 1522.8273, Test Loss: 2035.3951
Seed: 77, Loss: 829.9117, Test Loss: 1356.24