In [49]:
colab = False

import torch
import os 

if colab:
  print("PyTorch has version {}".format(torch.__version__))

  # Install torch geometric
  if 'IS_GRADESCOPE_ENV' not in os.environ:
    !pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.13.1+cu116.html
    !pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-1.13.1+cu116.html
    !pip install torch-geometric

  !git clone https://github.com/thibautvalour/Graph-Diffusion-Convolution.git
  %cd Graph-Diffusion-Convolution

import numpy as np
import networkx as nx
import pandas as pd
import math as math
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms.gdc import GDC
from torch.nn.functional import nll_loss

from matrix_format import gdc_pagerank, gdc_heat, compute_Lsym, compute_Lrw
from models import GCN_Classifier
from utils import train, test

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('device:', device)

device: cpu


# 1 Cora

In [2]:
dataset = Planetoid(root='../cora', name='Cora')
data = dataset[0]

test_prop = 0.2
train_idx = torch.tensor(np.random.binomial(1, 1-test_prop, size=data.y.shape[0])).to(bool)
test_idx = torch.tensor(np.logical_not(train_idx)).to(bool).to(device)
train_idx = train_idx.to(device)

Gnx = nx.from_pandas_edgelist(pd.DataFrame(data['edge_index'].T,
                                           columns=['source', 'target']))
Gnx = Gnx.to_undirected()

# Exctract adjacency matrix
A = nx.adjacency_matrix(Gnx).toarray()
data = data.to(device)

  test_idx = torch.tensor(np.logical_not(train_idx)).to(bool).to(device)


### 1.1 Laplacian matrix

In [51]:
Lsym = compute_Lsym(A)
Lsym = torch.from_numpy(Lsym).float().to(device)

In [52]:
args = {
    'device': device,
    'hidden_layers': 1,
    'hidden_dim': 264,
    'dropout': 0.3,
    'lr': 3e-4,
    'epochs': 80,
    'trans_matrix': Lsym
}

In [None]:
model = GCN_Classifier(input_dim=dataset.num_features,
                       hidden_dim=args['hidden_dim'],
                       hidden_layers=args['hidden_layers'],
                       output_dim=dataset.num_classes,
                       dropout=args['dropout']).to(args['device'])

optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
loss_fn = nll_loss

for epoch in range(1, 1 + args["epochs"]):
  loss = train(model, data, train_idx, args['trans_matrix'], optimizer, loss_fn)
  train_acc, test_acc = test(model, data, train_idx, test_idx, args['trans_matrix'])

  print(f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100*train_acc:.2f}%, '
        f'Test: {100*test_acc:.2f}%')

In [None]:
Lrw = compute_Lrw(A)
Lrw = torch.from_numpy(Lrw).float().to(device)
args['trans_matrix'] = Lrw

In [None]:
model = GCN_Classifier(input_dim=dataset.num_features,
                       hidden_dim=args['hidden_dim'],
                       hidden_layers=args['hidden_layers'],
                       output_dim=dataset.num_classes,
                       dropout=args['dropout']).to(args['device'])

optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
loss_fn = nll_loss

for epoch in range(1, 1 + args["epochs"]):
  loss = train(model, data, train_idx, args['trans_matrix'], optimizer, loss_fn)
  train_acc, test_acc = test(model, data, train_idx, test_idx, args['trans_matrix'])

  print(f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100*train_acc:.2f}%, '
        f'Test: {100*test_acc:.2f}%')

### 1.2 Page rank

In [56]:
# pagerank = gdc_pagerank(A, 0.05, 1e-4)
# pagerank = torch.from_numpy(pagerank).float().to(device)

gdc = GDC(diffusion_kwargs = dict(method='ppr', alpha=0.15))
edge_index, edge_weight = gdc.transition_matrix(edge_index=data.edge_index, edge_weight=data.edge_weight,
                                                num_nodes=data.num_nodes, normalization='sym')    

trans_matrix = torch.zeros((data.num_nodes, data.num_nodes)).to(device)
trans_matrix[edge_index[0], edge_index[1]] = edge_weight

trans_matrix.fill_diagonal_(1)
pagerank = trans_matrix.to(device)

args['trans_matrix'] = pagerank

In [57]:
model = GCN_Classifier(input_dim=dataset.num_features,
                       hidden_dim=args['hidden_dim'],
                       hidden_layers=args['hidden_layers'],
                       output_dim=dataset.num_classes,
                       dropout=args['dropout']).to(args['device'])

optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
loss_fn = nll_loss

for epoch in range(1, 1 + args["epochs"]):
  loss = train(model, data, train_idx, args['trans_matrix'], optimizer, loss_fn)
  train_acc, test_acc = test(model, data, train_idx, test_idx, args['trans_matrix'])

  print(f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100*train_acc:.2f}%, '
        f'Test: {100*test_acc:.2f}%')

Epoch: 01, Loss: 2.2898, Train: 21.56%, Test: 20.83%
Epoch: 02, Loss: 1.9891, Train: 34.33%, Test: 33.40%
Epoch: 03, Loss: 1.7195, Train: 49.70%, Test: 47.54%


KeyboardInterrupt: 

### 1.3 Heat

In [None]:
heat = gdc_heat(A, 3, 10, 1e-4)
heat = torch.from_numpy(heat).float().to(device)

gdc = GDC(diffusion_kwargs = dict(method='heat', t=3))
edge_index, edge_weight = gdc.transition_matrix(edge_index=data.edge_index, edge_weight=data.edge_weight,
                                                num_nodes=data.num_nodes, normalization='sym')    

trans_matrix = torch.zeros((data.num_nodes, data.num_nodes)).to(device)
trans_matrix[edge_index[0], edge_index[1]] = edge_weight

trans_matrix.fill_diagonal_(1)
heat = trans_matrix.to(device)

args['trans_matrix'] = heat

In [None]:
model = GCN_Classifier(input_dim=dataset.num_features,
                       hidden_dim=args['hidden_dim'],
                       hidden_layers=args['hidden_layers'],
                       output_dim=dataset.num_classes,
                       dropout=args['dropout']).to(args['device'])

optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
loss_fn = nll_loss

for epoch in range(1, 1 + args["epochs"]):
  loss = train(model, data, train_idx, args['trans_matrix'], optimizer, loss_fn)
  train_acc, test_acc = test(model, data, train_idx, test_idx, args['trans_matrix'])

  print(f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100*train_acc:.2f}%, '
        f'Test: {100*test_acc:.2f}%')

# Amazon

In [None]:
from torch_geometric.datasets import Amazon

dataset = Amazon(root='../cora', name='Computers')
data = dataset[0]

Gnx = nx.Graph()
Gnx.add_nodes_from(range(data.x.shape[0]))

# Convert edge_index tensor to a list of edge tuples
edges = torch.t(data.edge_index).tolist()
Gnx.add_edges_from(edges)
Gnx = Gnx.to_undirected()

isolated_nodes = list(nx.isolates(Gnx))
Gnx.remove_nodes_from(isolated_nodes)

mapping = {node: idx for idx, node in enumerate(Gnx.nodes)}
Gnx = nx.relabel_nodes(Gnx, mapping)

A = nx.adjacency_matrix(Gnx).toarray()

data.x = data.x[list(mapping.values())]
data.y = data.y[list(mapping.values())]
data = data.to(device)

test_prop = 0.2
train_idx = torch.tensor(np.random.binomial(1, 1-test_prop, size=data.y.shape[0])).to(bool)
test_idx = torch.tensor(np.logical_not(train_idx)).to(bool).to(device)
train_idx = train_idx.to(device)

In [25]:
A.shape

(13471, 13471)

## 2.1 Laplacian

In [None]:
Lsym = compute_Lsym(A)
Lsym = torch.from_numpy(Lsym).float().to(device)
args['trans_matrix'] = Lsym

In [None]:
model = GCN_Classifier(input_dim=dataset.num_features,
                       hidden_dim=args['hidden_dim'],
                       hidden_layers=args['hidden_layers'],
                       output_dim=dataset.num_classes,
                       dropout=args['dropout']).to(args['device'])

optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
loss_fn = nll_loss

for epoch in range(1, 1 + args["epochs"]):
  loss = train(model, data, train_idx, args['trans_matrix'], optimizer, loss_fn)
  train_acc, test_acc = test(model, data, train_idx, test_idx, args['trans_matrix'])

  print(f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100*train_acc:.2f}%, '
        f'Test: {100*test_acc:.2f}%')

## 2.2 page rank

In [None]:
pagerank = gdc_pagerank(A, 0.05, 1e-4)
pagerank = torch.from_numpy(pagerank).float().to(device)
args['trans_matrix'] = pagerank

model = GCN_Classifier(input_dim=dataset.num_features,
                       hidden_dim=args['hidden_dim'],
                       hidden_layers=args['hidden_layers'],
                       output_dim=dataset.num_classes,
                       dropout=args['dropout']).to(args['device'])

optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
loss_fn = nll_loss

for epoch in range(1, 1 + args["epochs"]):
  loss = train(model, data, train_idx, args['trans_matrix'], optimizer, loss_fn)
  train_acc, test_acc = test(model, data, train_idx, test_idx, args['trans_matrix'])

  print(f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100*train_acc:.2f}%, '
        f'Test: {100*test_acc:.2f}%')

## 2.3 Heat Kernel

In [None]:
heat = gdc_heat(A, 3, 10, 1e-4)
heat = torch.from_numpy(heat).float().to(device)

args['trans_matrix'] = heat

model = GCN_Classifier(input_dim=dataset.num_features,
                       hidden_dim=args['hidden_dim'],
                       hidden_layers=args['hidden_layers'],
                       output_dim=dataset.num_classes,
                       dropout=args['dropout']).to(args['device'])

optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
loss_fn = nll_loss

for epoch in range(1, 1 + args["epochs"]):
  loss = train(model, data, train_idx, args['trans_matrix'], optimizer, loss_fn)
  train_acc, test_acc = test(model, data, train_idx, test_idx, args['trans_matrix'])

  print(f'Epoch: {epoch:02d}, '
        f'Loss: {loss:.4f}, '
        f'Train: {100*train_acc:.2f}%, '
        f'Test: {100*test_acc:.2f}%')