In [None]:
colab = False

if colab:
  import torch
  import os
  print("PyTorch has version {}".format(torch.__version__))

  # Install torch geometric
  if 'IS_GRADESCOPE_ENV' not in os.environ:
    !pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.13.1+cu116.html
    !pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-1.13.1+cu116.html
    !pip install torch-geometric
    !pip install ogb

  !git clone https://github.com/thibautvalour/Graph-Diffusion-Convolution.git
  %cd Graph-Diffusion-Convolution

In [None]:
import os 
import math
import torch
from torch.nn.functional import nll_loss
import copy

from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
import torch_geometric.transforms as T

from models import GCN_Classifier
from utils import train, test
from matrix_format import compute_Tsym, gdc_pagerank

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Load Data

In [None]:
if 'IS_GRADESCOPE_ENV' not in os.environ:
  dataset_name = 'ogbn-arxiv'
  dataset = PygNodePropPredDataset(name=dataset_name,
                                  transform=T.ToSparseTensor())
  data = dataset[0]

  # Make the adjacency matrix to symmetric
  data.adj_t = data.adj_t.to_symmetric()
  row, col, value = data.adj_t.coo()
  value = torch.ones_like(row,  dtype=torch.float)

  # Create a sparse tensor from the COO format
  indices = torch.stack([row, col])
  A = torch.sparse_coo_tensor(indices, value, 
                              size=[data.num_nodes, data.num_nodes]).to(device)

  # If you use GPU, the device should be cuda
  print('Device: {}'.format(device))

  data = data.to(device)
  split_idx = dataset.get_idx_split()
  train_idx = split_idx['train'].to(device)
  A = A.to(device)

In [None]:
T_sym = compute_Tsym(A)

# Define model

In [None]:
args = {
    'device': device,
    'num_layers': 5,
    'hidden_dim': 256,
    'dropout': 0.5,
    'lr': 0.001,
    'epochs': 20,
    'trans_matrix': A
}

In [None]:
model = GCN_Classifier(input_dim=dataset.num_features,
                       hidden_dim=args['hidden_dim'],
                       output_dim=dataset.num_classes,
                       dropout=args['dropout']).to(args['device'])

optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
evaluator = Evaluator(name='ogbn-arxiv')
loss_fn = nll_loss

best_model = None
best_valid_acc = 0

for epoch in range(1, 1 + args["epochs"]):
  loss = train(model, data, train_idx, args['trans_matrix'], optimizer, loss_fn)
  result = test(model, data, split_idx, args['trans_matrix'], evaluator)
  train_acc, valid_acc, test_acc = result
  if valid_acc > best_valid_acc:
      best_valid_acc = valid_acc
      best_model = copy.deepcopy(model)
  print(f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100 * train_acc:.2f}%, '
        f'Valid: {100 * valid_acc:.2f}% '
        f'Test: {100 * test_acc:.2f}%')

In [None]:
args = {
    'device': device,
    'num_layers': 5,
    'hidden_dim': 256,
    'dropout': 0.5,
    'lr': 0.001,
    'epochs': 50,
    'trans_matrix': T_sym
}

In [None]:
model = GCN_Classifier(input_dim=dataset.num_features,
                       hidden_dim=args['hidden_dim'],
                       output_dim=dataset.num_classes,
                       dropout=args['dropout']).to(args['device'])

optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
evaluator = Evaluator(name='ogbn-arxiv')
loss_fn = nll_loss

best_model = None
best_valid_acc = 0

for epoch in range(1, 1 + args["epochs"]):
  loss = train(model, data, train_idx, args['trans_matrix'], optimizer, loss_fn)
  result = test(model, data, split_idx, args['trans_matrix'], evaluator)
  train_acc, valid_acc, test_acc = result
  if valid_acc > best_valid_acc:
      best_valid_acc = valid_acc
      best_model = copy.deepcopy(model)
  print(f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100 * train_acc:.2f}%, '
        f'Valid: {100 * valid_acc:.2f}% '
        f'Test: {100 * test_acc:.2f}%')

# Diffusion pagerank

In [None]:
S_pr = gdc_pagerank(A, 0.15, 1e-4)

In [None]:
args = {
    'device': device,
    'num_layers': 5,
    'hidden_dim': 256,
    'dropout': 0.5,
    'lr': 0.001,
    'epochs': 60,
    'trans_matrix': S_pr
}

In [None]:
model = GCN_Classifier(input_dim=dataset.num_features,
                       hidden_dim=args['hidden_dim'],
                       output_dim=dataset.num_classes,
                       dropout=args['dropout']).to(args['device'])

optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
evaluator = Evaluator(name='ogbn-arxiv')
loss_fn = nll_loss

best_model = None
best_valid_acc = 0

for epoch in range(1, 1 + args["epochs"]):
  loss = train(model, data, train_idx, args['trans_matrix'], optimizer, loss_fn)
  result = test(model, data, split_idx, args['trans_matrix'], evaluator)
  train_acc, valid_acc, test_acc = result
  if valid_acc > best_valid_acc:
      best_valid_acc = valid_acc
      best_model = copy.deepcopy(model)
  print(f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100 * train_acc:.2f}%, '
        f'Valid: {100 * valid_acc:.2f}% '
        f'Test: {100 * test_acc:.2f}%')

# Diffusion Heat

In [None]:
torch.tensor([[],[]])

In [None]:
def gdc_heat(A, t, sum_limit, eps):

    N = A.shape[0]
    # Self-loops
    indices = torch.arange(N).unsqueeze(0).repeat(2, 1).to(device) 
    values = torch.ones(N, dtype=torch.float).to(device) 
    sparse_identiy = torch.sparse_coo_tensor(indices, values,
                                             size=(N, N))    
    
    A_loop = A + sparse_identiy
    
    # Symmetric transition matrix
    D_loop = torch.sparse.sum(A_loop, dim=1).to_dense()
    D_sqrt_inv = torch.pow(D_loop, -0.5)
    D_sqrt_inv = torch.sparse_coo_tensor(indices, D_sqrt_inv,
                                         size=(N, N))

    T_sym = D_sqrt_inv @ A_loop @ D_sqrt_inv

    S = torch.sparse_coo_tensor(size=(N, N)).to(device)
    T_k = sparse_identiy
    for k in range(sum_limit):
      print(k)
      heat_coeff = math.exp(-t) * t**k / math.factorial(k)
      S += heat_coeff * T_k
      print(T_k)
      print(T_sym)
      T_k = T_k @ T_sym

    # TODO : check why negative values are present in S
    # Sparsify using threshold epsilon
    indices = S.indices()
    thresholded_val = S.values() * (S.values() >= eps)
    S_tilde = torch.sparse_coo_tensor(indices, thresholded_val,
                                      size=(N, N))

    # Column-normalized transition matrix on graph S_tilde
    D_tilde_vec = torch.sparse.sum(S_tilde, dim=1).to_dense()
    indices = torch.arange(N).unsqueeze(0).repeat(2, 1).to(device) 
    D_tilde_vec = torch.sparse_coo_tensor(indices, D_tilde_vec,
                                            size=(N, N))
    T_S = S_tilde @ torch.pow(D_tilde_vec, -1)
    
    return T_S

S = gdc_heat(A, 3, 25, 1e-4)