In [7]:
import os 
import torch
from torch.nn.functional import nll_loss
import copy

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
import torch_geometric.transforms as T

from models import GCN_Classifier
from utils import train, test
from matrix_format import compute_Tsym

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

TODO : 
- Warning softmax
- Standardiser le code pour faire tourner avec plusieurs matrices pour comparer les résultats
- Préparer les différentes matrices
- Tout lancer sur Colab 

## Load Data

In [8]:
if 'IS_GRADESCOPE_ENV' not in os.environ:
  dataset_name = 'ogbn-arxiv'
  dataset = PygNodePropPredDataset(name=dataset_name,
                                  transform=T.ToSparseTensor())
  data = dataset[0]

  # Make the adjacency matrix to symmetric
  data.adj_t = data.adj_t.to_symmetric()
  row, col, value = data.adj_t.coo()
  value = torch.ones_like(row,  dtype=torch.float)

  # Create a sparse tensor from the COO format
  indices = torch.stack([row, col])
  A = torch.sparse_coo_tensor(indices, value, 
                              size=[data.num_nodes, data.num_nodes]).to(device)

  # If you use GPU, the device should be cuda
  print('Device: {}'.format(device))

  data = data.to(device)
  split_idx = dataset.get_idx_split()
  train_idx = split_idx['train'].to(device)

Device: cpu


In [14]:
Tsym = compute_Tsym(A).to(device)

In [10]:
def gdc(A, alpha, eps):
    N = A.shape[0]

    # Self-loops
    
    A_loop = sp.eye(N) + A

    # Symmetric transition matrix
    D_loop_vec = A_loop.sum(0).A1
    D_loop_vec_invsqrt = 1 / np.sqrt(D_loop_vec)
    D_loop_invsqrt = sp.diags(D_loop_vec_invsqrt)
    T_sym = D_loop_invsqrt @ A_loop @ D_loop_invsqrt

    # PPR-based diffusion
    S = alpha * sp.linalg.inv(sp.eye(N) - (1 - alpha) * T_sym)

    # Sparsify using threshold epsilon
    S_tilde = S.multiply(S >= eps)

    # Column-normalized transition matrix on graph S_tilde
    D_tilde_vec = S_tilde.sum(0).A1
    T_S = S_tilde / D_tilde_vec
    
    return T_S

In [41]:
def gdc(A, alpha, eps):
    N = A.shape[0]

    # Self-loops
    indices = torch.arange(N).unsqueeze(0).repeat(2, 1)
    values = torch.ones(N, dtype=torch.float)
    sparse_identiy = torch.sparse_coo_tensor(indices, values,
                                             size=(N, N))    
    
    A_loop = A + sparse_identiy
    
    # Symmetric transition matrix
    D_loop = torch.sparse.sum(A_loop, dim=1).to_dense()
    D_sqrt_inv = torch.pow(D_loop, -0.5)
    D_sqrt_inv = torch.sparse_coo_tensor(indices, D_sqrt_inv,
                                         size=(N, N))


    T_sym = D_sqrt_inv @ A_loop @ D_sqrt_inv

    # PPR-based diffusion
    S = alpha * torch.pow(sparse_identiy-(1-alpha)*T_sym, -1)

    # TODO : check why negative values are present in S
    # Sparsify using threshold epsilon
    indices = S.indices()
    thresholded_val = S.values() * (S.values() >= eps)
    S_tilde = torch.sparse_coo_tensor(indices, thresholded_val,
                                      size=(N, N))

    # Column-normalized transition matrix on graph S_tilde
    D_tilde_vec = torch.sparse.sum(S_tilde, dim=1).to_dense()
    indices = torch.arange(N).unsqueeze(0).repeat(2, 1)
    D_tilde_vec = torch.sparse_coo_tensor(indices, D_tilde_vec,
                                            size=(N, N))
    T_S = S_tilde @ torch.pow(D_tilde_vec, -1)
    
    return T_S

S = gdc(A, 0.05, 1e-4)

# Define model

In [35]:
args = {
    'device': device,
    'num_layers': 5,
    'hidden_dim': 256,
    'dropout': 0.5,
    'lr': 0.001,
    'epochs': 10,
    'trans_matrix': A
}

In [5]:
model = GCN_Classifier(input_dim=dataset.num_features,
                       hidden_dim=args['hidden_dim'],
                       output_dim=dataset.num_classes,
                       dropout=args['dropout']).to(args['device'])

optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
evaluator = Evaluator(name='ogbn-arxiv')
loss_fn = nll_loss

best_model = None
best_valid_acc = 0

for epoch in range(1, 1 + args["epochs"]):
  loss = train(model, data, train_idx, args['trans_matrix'], optimizer, loss_fn)
  result = test(model, data, split_idx, args['trans_matrix'], evaluator)
  train_acc, valid_acc, test_acc = result
  if valid_acc > best_valid_acc:
      best_valid_acc = valid_acc
      best_model = copy.deepcopy(model)
  print(f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100 * train_acc:.2f}%, '
        f'Valid: {100 * valid_acc:.2f}% '
        f'Test: {100 * test_acc:.2f}%')

  return self.softmax(z3)


Epoch: 01, Loss: 24.1443, Train: 17.52%, Valid: 16.19% Test: 17.24%
Epoch: 02, Loss: 11.7148, Train: 14.53%, Valid: 26.74% Test: 25.34%
Epoch: 03, Loss: 6.9290, Train: 14.60%, Valid: 24.30% Test: 22.56%
Epoch: 04, Loss: 6.4572, Train: 18.98%, Valid: 27.37% Test: 25.90%
Epoch: 05, Loss: 6.0113, Train: 20.22%, Valid: 27.64% Test: 25.97%
Epoch: 06, Loss: 5.1360, Train: 22.88%, Valid: 30.59% Test: 29.86%
Epoch: 07, Loss: 5.1167, Train: 25.39%, Valid: 34.52% Test: 36.19%
Epoch: 08, Loss: 4.6262, Train: 25.47%, Valid: 35.10% Test: 38.38%
Epoch: 09, Loss: 5.5842, Train: 27.30%, Valid: 37.36% Test: 37.56%
Epoch: 10, Loss: 5.0533, Train: 26.33%, Valid: 36.08% Test: 34.87%


KeyboardInterrupt: 

In [None]:
args = {
    'device': device,
    'num_layers': 5,
    'hidden_dim': 256,
    'dropout': 0.5,
    'lr': 0.001,
    'epochs': 10,
    'trans_matrix': T_sym
}

In [37]:
model = GCN_Classifier(input_dim=dataset.num_features,
                       hidden_dim=args['hidden_dim'],
                       output_dim=dataset.num_classes,
                       dropout=args['dropout']).to(args['device'])

optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
evaluator = Evaluator(name='ogbn-arxiv')
loss_fn = nll_loss

best_model = None
best_valid_acc = 0

for epoch in range(1, 1 + args["epochs"]):
  loss = train(model, data, train_idx, args['trans_matrix'], optimizer, loss_fn)
  result = test(model, data, split_idx, args['trans_matrix'], evaluator)
  train_acc, valid_acc, test_acc = result
  if valid_acc > best_valid_acc:
      best_valid_acc = valid_acc
      best_model = copy.deepcopy(model)
  print(f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100 * train_acc:.2f}%, '
        f'Valid: {100 * valid_acc:.2f}% '
        f'Test: {100 * test_acc:.2f}%')

  return self.softmax(z3)


Epoch: 01, Loss: 3.8077, Train: 11.00%, Valid: 22.97% Test: 21.55%
Epoch: 02, Loss: 3.5011, Train: 11.05%, Valid: 22.99% Test: 21.56%
Epoch: 03, Loss: 3.2281, Train: 12.70%, Valid: 23.27% Test: 21.78%
Epoch: 04, Loss: 3.0117, Train: 19.01%, Valid: 25.74% Test: 23.78%
Epoch: 05, Loss: 2.8382, Train: 25.22%, Valid: 29.60% Test: 28.26%
Epoch: 06, Loss: 2.7014, Train: 29.33%, Valid: 33.68% Test: 33.43%
Epoch: 07, Loss: 2.5909, Train: 32.02%, Valid: 37.57% Test: 38.34%
Epoch: 08, Loss: 2.4956, Train: 33.54%, Valid: 39.55% Test: 41.39%
Epoch: 09, Loss: 2.4109, Train: 33.84%, Valid: 39.96% Test: 42.40%
Epoch: 10, Loss: 2.3391, Train: 33.50%, Valid: 39.55% Test: 42.28%


In [None]:
args = {
    'device': device,
    'num_layers': 5,
    'hidden_dim': 256,
    'dropout': 0.5,
    'lr': 0.001,
    'epochs': 10,
    'trans_matrix': S
}

In [42]:
model = GCN_Classifier(input_dim=dataset.num_features,
                       hidden_dim=args['hidden_dim'],
                       output_dim=dataset.num_classes,
                       dropout=args['dropout']).to(args['device'])

optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
evaluator = Evaluator(name='ogbn-arxiv')
loss_fn = nll_loss

best_model = None
best_valid_acc = 0

for epoch in range(1, 1 + args["epochs"]):
  loss = train(model, data, train_idx, args['trans_matrix'], optimizer, loss_fn)
  result = test(model, data, split_idx, args['trans_matrix'], evaluator)
  train_acc, valid_acc, test_acc = result
  if valid_acc > best_valid_acc:
      best_valid_acc = valid_acc
      best_model = copy.deepcopy(model)
  print(f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100 * train_acc:.2f}%, '
        f'Valid: {100 * valid_acc:.2f}% '
        f'Test: {100 * test_acc:.2f}%')

NameError: name 'args' is not defined