In [1]:
import os 
import torch
from torch.nn.functional import nll_loss
from tqdm.notebook import tqdm
import copy

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
import torch_geometric.transforms as T

from models import GCN_Classifier
from utils import train, test

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

  from .autonotebook import tqdm as notebook_tqdm


TODO : 
- Warning softmax
- Comprendre ce qu'est la data.adj_t
- Standardiser le code pour faire tourner avec plusieurs matrices pour comparer les résultats
- Préparer les différentes matrices
- Tout lancer sur Colab 

## Load Data

In [2]:
if 'IS_GRADESCOPE_ENV' not in os.environ:
  dataset_name = 'ogbn-arxiv'
  dataset = PygNodePropPredDataset(name=dataset_name,
                                  transform=T.ToSparseTensor())
  data = dataset[0]

  # Make the adjacency matrix to symmetric
  data.adj_t = data.adj_t.to_symmetric()
  row, col, value = data.adj_t.coo()
  value = torch.ones_like(row,  dtype=torch.float)

  # Create a sparse tensor from the COO format
  indices = torch.stack([row, col])
  adj_t = torch.sparse_coo_tensor(indices, value, 
                                  size=[data.num_nodes, data.num_nodes]).to(device)

  # If you use GPU, the device should be cuda
  print('Device: {}'.format(device))

  data = data.to(device)
  split_idx = dataset.get_idx_split()
  train_idx = split_idx['train'].to(device)

Device: cpu


In [14]:
# Compute degree matrix D
D = torch.sparse.sum(adj_t, dim=1).to_dense()
D_sqrt_inv = torch.pow(D, -0.5)

# Create the indices for the diagonal elements
indices = torch.arange(len(D_sqrt_inv)).unsqueeze(0).repeat(2, 1)
# Create the sparse matrix
D_sqrt_inv = torch.sparse_coo_tensor(indices, D_sqrt_inv,
                                      size=(len(D_sqrt_inv), len(D_sqrt_inv)))

# Compute symmetrically normalized adjacency matrix Tsym
Tsym = D_sqrt_inv.matmul(adj_t).matmul(D_sqrt_inv)

# Define model

In [15]:
args = {
    'device': device,
    'num_layers': 5,
    'hidden_dim': 256,
    'dropout': 0.5,
    'lr': 0.001,
    'epochs': 1,
    'trans_matrix': Tsym
}

In [5]:
model = GCN_Classifier(input_dim=dataset.num_features,
                       hidden_dim=args['hidden_dim'],
                       output_dim=dataset.num_classes,
                       dropout=args['dropout']).to(args['device'])

optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
evaluator = Evaluator(name='ogbn-arxiv')
loss_fn = nll_loss

best_model = None
best_valid_acc = 0

for epoch in range(1, 1 + args["epochs"]):
  loss = train(model, data, train_idx, args['trans_matrix'], optimizer, loss_fn)
  result = test(model, data, split_idx, args['trans_matrix'], evaluator)
  train_acc, valid_acc, test_acc = result
  if valid_acc > best_valid_acc:
      best_valid_acc = valid_acc
      best_model = copy.deepcopy(model)
  print(f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100 * train_acc:.2f}%, '
        f'Valid: {100 * valid_acc:.2f}% '
        f'Test: {100 * test_acc:.2f}%')

  return self.softmax(z3)


Epoch: 01, Loss: 24.1443, Train: 17.52%, Valid: 16.19% Test: 17.24%
Epoch: 02, Loss: 11.7148, Train: 14.53%, Valid: 26.74% Test: 25.34%
Epoch: 03, Loss: 6.9290, Train: 14.60%, Valid: 24.30% Test: 22.56%
Epoch: 04, Loss: 6.4572, Train: 18.98%, Valid: 27.37% Test: 25.90%
Epoch: 05, Loss: 6.0113, Train: 20.22%, Valid: 27.64% Test: 25.97%
Epoch: 06, Loss: 5.1360, Train: 22.88%, Valid: 30.59% Test: 29.86%
Epoch: 07, Loss: 5.1167, Train: 25.39%, Valid: 34.52% Test: 36.19%
Epoch: 08, Loss: 4.6262, Train: 25.47%, Valid: 35.10% Test: 38.38%
Epoch: 09, Loss: 5.5842, Train: 27.30%, Valid: 37.36% Test: 37.56%
Epoch: 10, Loss: 5.0533, Train: 26.33%, Valid: 36.08% Test: 34.87%


KeyboardInterrupt: 