# Geração dos Dados

In [1]:
# Torch import
import torch
import torch.nn.functional as F
from torch.nn import CrossEntropyLoss

from torch_geometric.nn import GCNConv

from ogb.nodeproppred import Evaluator

from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.utils import erdos_renyi_graph

import random

In [2]:
random.seed(7)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
# Generate a random graph with n nodes and p probability of edge creation and a clique of size clique_size
def generate_graph(n, p, clique_size):
    edge_index = erdos_renyi_graph(n, edge_prob=p)
    clique_nodes = random.sample(range(n), clique_size)

    x = torch.ones(n, 64).type(torch.float32)
    class_label = torch.zeros(n).type(torch.int64)
    class_label[clique_nodes] = 1

    for i, node_i in enumerate(clique_nodes):
        for node_j in clique_nodes[i + 1:]:
            edge_to_add = torch.tensor([[node_i, node_j], [node_j, node_i]])
            edge_index = torch.cat((edge_index, edge_to_add), 1)
    
    return Data(x=x, adj_t=torch.transpose(edge_index, 0, 1), y=class_label)

train_data = [generate_graph(200, 0.5, 50) for _ in range(5000)]
valid_data = [generate_graph(200, 0.5, 50) for _ in range(1000)]
test_data = [generate_graph(200, 0.5, 50) for _ in range(1000)]

class_weights = torch.tensor([50.0, 150.0])

train_loader = DataLoader(train_data, batch_size=32, num_workers=0)
valid_loader = DataLoader(valid_data, batch_size=32, num_workers=0)
test_loader = DataLoader(test_data, batch_size=32, num_workers=0)

# GCN - Graph Convolutional Network

In [4]:
GCN_args = {
    'device': device,
    'num_layers': 3,
    'hidden_dim': 256,
    'dropout': 0.5,
    'lr': 0.01,
    'epochs': 20,
}

class GCN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers,
                 dropout, return_embeds=False):
        super(GCN, self).__init__()

        self.convs = None
        self.bns = None
        self.softmax = None

        def get_in_channels(idx):
            return hidden_dim if idx > 0 else input_dim

        def get_out_channels(idx):
            return hidden_dim if idx < num_layers - 1 else output_dim

        self.convs = torch.nn.ModuleList([
            GCNConv(in_channels=get_in_channels(i), out_channels=get_out_channels(i))
            for i in range(num_layers)
        ])

        self.bns = torch.nn.ModuleList([
            torch.nn.BatchNorm1d(num_features=get_out_channels(i))
            for i in range(num_layers - 1)
        ])

        self.softmax = torch.nn.LogSoftmax(dim=1)
        self.dropout = dropout
        self.return_embeds = return_embeds

    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for bn in self.bns:
            bn.reset_parameters()

    def forward(self, batched_data):
        x, adj_t = batched_data.x, torch.transpose(batched_data.adj_t,0,1)

        out = None

        for gcn, bn in zip(self.convs[:-1], self.bns):
            x = gcn(x, adj_t)
            x = bn(x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)

        out = self.convs[-1](x, adj_t)
        if not self.return_embeds:
            out = self.softmax(out)

        return out

In [5]:
def train(model, data_loader, optimizer, loss_fn):
    model.train()
    loss = 0

    for batch in data_loader:
        batch = batch.to(device)

        if batch.x.shape[0] == 1 or batch.batch[-1] == 0:
            pass
        else:
            optimizer.zero_grad()

            preds = model(batch)
            labels = batch.y.to(device)
            loss = loss_fn(preds, labels.squeeze())

            loss.backward()
            optimizer.step()

    return loss.item()


# The evaluation function
def eval(model, loader, evaluator):
    model.eval()
    y_true = []
    y_pred = []

    for batch in loader:
        batch = batch.to(device)

        if batch.x.shape[0] == 1:
            pass
        else:
            with torch.no_grad():
                pred = model(batch).argmax(dim=-1, keepdim=True)

            y_true.append(batch.y.view(pred.shape).detach().cpu())
            y_pred.append(pred.detach().cpu())

    y_true = torch.cat(y_true, dim = 0).numpy()
    y_pred = torch.cat(y_pred, dim = 0).numpy()

    input_dict = {"y_true": y_true, "y_pred": y_pred}

    return evaluator.eval(input_dict)["acc"]

In [6]:
num_features = 64
num_labels = 2

model = GCN(num_features, GCN_args['hidden_dim'],
            num_labels, GCN_args['num_layers'],
            GCN_args['dropout']).to(device)

evaluator = Evaluator(name='ogbn-arxiv')

In [8]:
import copy

model.reset_parameters()

optimizer = torch.optim.Adam(model.parameters(), lr=GCN_args['lr'])
loss_fn = CrossEntropyLoss(class_weights.to(device))

best_model = None
best_valid_acc = 0

for epoch in range(1, 1 + GCN_args["epochs"]):
  print('Training...')
  loss = train(model, train_loader, optimizer, loss_fn)

  print('Evaluating...')
  train_result = eval(model, train_loader, evaluator)
  val_result = eval(model, valid_loader, evaluator)
  test_result = eval(model, test_loader, evaluator)

  train_acc, valid_acc, test_acc = train_result, val_result, test_result
  if valid_acc > best_valid_acc:
      best_valid_acc = valid_acc
      best_model = copy.deepcopy(model)
      
  print(f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100 * train_acc:.2f}%, '
        f'Valid: {100 * valid_acc:.2f}% '
        f'Test: {100 * test_acc:.2f}%')

Training...
Evaluating...
Epoch: 01, Loss: 0.6958, Train: 25.00%, Valid: 25.00% Test: 25.00%
Training...
