In [None]:
# https://uvadlc-notebooks.readthedocs.io/en/latest/tutorial_notebooks/tutorial7/GNN_overview.html
# https://pytorch-lightning.readthedocs.io/en/stable/notebooks/course_UvA-DL/06-graph-neural-networks.html

In [2]:
## Standard libraries
import os
import json
import math
import numpy as np
import time

## Imports for plotting
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg', 'pdf') # For export
from matplotlib.colors import to_rgb
import matplotlib
matplotlib.rcParams['lines.linewidth'] = 2.0
import seaborn as sns
sns.reset_orig()
sns.set()

  set_matplotlib_formats('svg', 'pdf') # For export


In [3]:
## Progress bar
from tqdm.notebook import tqdm

## PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim
# Torchvision
import torchvision
from torchvision.datasets import CIFAR10
from torchvision import transforms

In [4]:
import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint

In [5]:
# Path to the folder where the datasets are/should be downloaded (e.g. CIFAR10)
DATASET_PATH = "./data"
# Path to the folder where the pretrained models are saved
CHECKPOINT_PATH = "./checkpoint"

# Setting the seed
pl.seed_everything(42)

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.determinstic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)

Global seed set to 42


cuda:0


In [6]:
import urllib.request
from urllib.error import HTTPError

base_url = "https://raw.githubusercontent.com/phlippe/saved_models/main/tutorial7/"
pretrained_files = ["NodeLevelMLP.ckpt", "NodeLevelGNN.ckpt", "GraphLevelGraphConv.ckpt"]

# Create checkpoint path if it doesn't exist yet
os.makedirs(CHECKPOINT_PATH, exist_ok=True)

# For each file, check whether it already exists. If not, try downloading it.
for file_name in pretrained_files:
    file_path = os.path.join(CHECKPOINT_PATH, file_name)
    if "/" in file_name:
        os.makedirs(file_path.rsplit("/",1)[0], exist_ok=True)
    if not os.path.isfile(file_path):
        file_url = base_url + file_name
        print(f"Downloading {file_url}...")
        try:
            urllib.request.urlretrieve(file_url, file_path)
        except HTTPError as e:
            print("Something went wrong. Please try to download the file from the GDrive folder, or contact the author with the full output including the following error:\n", e)

In [18]:
class GCNLayer(nn.Module):
  def __init__(self, c_in, c_out):
    super().__init__()
    self.projection = nn.Linear(c_in, c_out)

  def forward(self, node_feats, adj_matrix):
    """
    node_feats [batch_size, num_nodes, c_in]  
    adj_matrix [batch_size, num_nodes, num_nodes]
    non-symmetric matrices => directed edges
    Assume already have added the identity connections
    """
    num_neighbours = adj_matrix.sum(dim=-1, keepdims=True)
    node_feats = self.projection(node_feats)
    node_feats = torch.bmm(adj_matrix, node_feats)
    node_feats = node_feats / num_neighbours
    return node_feats

In [27]:
node_feats = torch.arange(8, dtype=torch.float32).view(1,4,2)
adj_matrix = torch.tensor([[[1, 1, 0, 0],
                           [1,1,1,1],
                           [0,1,1,1],
                           [0,1,1,1]]], dtype=torch.float32)
print("Node features :\n", node_feats)
print("\nAdjacency metrix:\n", adj_matrix)

Node features :
 tensor([[[0., 1.],
         [2., 3.],
         [4., 5.],
         [6., 7.]]])

Adjacency metrix:
 tensor([[[1., 1., 0., 0.],
         [1., 1., 1., 1.],
         [0., 1., 1., 1.],
         [0., 1., 1., 1.]]])


In [31]:
layer = GCNLayer(c_in=2, c_out=2)
layer.projection.weight.data = torch.tensor([[1.0, 0.0], [0.0, 1.0]])
layer.projection.bias.data = torch.tensor([0.0, 0.0])

with torch.no_grad():
    out_feats = layer(node_feats, adj_matrix)

print("Adjacency matrix", adj_matrix)
print("Input features", node_feats)
print("Output features", out_feats)

Adjacency matrix tensor([[[1., 1., 0., 0.],
         [1., 1., 1., 1.],
         [0., 1., 1., 1.],
         [0., 1., 1., 1.]]])
Input features tensor([[[0., 1.],
         [2., 3.],
         [4., 5.],
         [6., 7.]]])
Output features tensor([[[1., 2.],
         [3., 4.],
         [4., 5.],
         [4., 5.]]])


PyG

In [36]:
import torch_geometric.nn as pyg_nn
import torch_geometric.data as pyg_data
import torch_geometric

In [35]:
gnn_layer_by_name = {'GCN':pyg_nn.GCNConv,
                     'GAT':pyg_nn.GATConv,
                     'GraphConv' : pyg_nn.GraphConv}

In [37]:
cora_dataset = torch_geometric.datasets.Planetoid(root=DATASET_PATH, name='Cora')

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [38]:
cora_dataset[0]

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

In [39]:
class GNNModel(nn.Module):
  def __init__(self, c_in, c_hidden, c_out, num_layers=2, layer_name='GCN',
               dp_rate=0.1, **kwargs):
    """
    c_in - Dimension of input features
    c_hidden - Dimension of hidden features
    c_out - Dimension of the output features. Usually number of classes in classification
    num_layers - Number of "hidden" graph layers
    """
    super().__init__()
    gnn_layer = gnn_layer_by_name[layer_name]

    layers = []
    in_channels, out_channels = c_in, c_hidden
    for l_idx in range(num_layers-1):
      layers += [gnn_layer(in_channels=in_channels, 
                           out_channels=out_channels,
                           **kwargs),
                 nn.ReLU(inplace=True),
                 nn.Dropout(dp_rate)]
      in_channels = c_hidden
    
    layer += [gnn_layer(in_channels=in_channels, 
                        out_channels=c_out,
                        **kwargs)]
    self.layers = nn.ModuleList(layers)
  
  def forward(self, x, edge_index):
    for l in self.layers:
      if isinstance(l, pyg_nn.MessagePassing):
        x = l(x, edge_index)

      else:
        x = l(x)
    
    return x

In [None]:
class MLPModel(nn.Module):
  def __init__(self, c_in, c_hidden, c_out, num_layers=2, dp_rate=0.1):
    super().__init__()
    in_channels, out_channels = c_in, c_hidden
    layers = []
    

In [None]:
# merge the models into a PL module
class NodeLevelGNN(pl.LightningModule):
  def __init__(self, model_name, **model_kwargs):
    super().__init__()
    self.save_hyperparameters()


    if model_name == 'MLP':
      self.model = MLPModel(**model_kwargs)

    else:
      self.model = GNNModel(**model_kwargs)

    self.loss_module = nn.CrossEntropyLoss()

  def forward(self, data, mode='train'):
    x, edge_index = data.x, data.edge_index
    x = self.model(x, edge_index)

    if mode == 'train':
      mask = data.train_mask
    elif mode == 'val':
      mask = data.val_mask
    elif mode == 'test':
      mask = data.test_mask
    else:
      assert False, f'Unknown forward mode : {mode}'
    
    loss = self.loss_module(x[mask], data.y[mask])
    acc = (x[mask].argmax(dim=-1) == data.y[mask]).sum().float()/mask.sum()
    return loss, acc

  def configure_optimizers(self):
    optimizer = optim.SGD(self.parameters(), lr=0.1, momentum=0.9, weight_decay=2e-3)
    return optimizer
  
  def training_step(self, batch, batch_idx):
    loss, acc = self.forward(batch, mode='train')
    self.log('train_loss', loss)
    self.log('train_acc', acc)
    return loss
  def validation_step(self, batch, batch_idx):
    _, acc = self.forward(batch, mode="val")
    self.log('val_acc', acc)
  
  def test_step(self, batch, batch_idx):
    _, acc = self.forward(batch, mode="test")
    self.log('test_acc', acc)

In [None]:
def train_node_classifier(model_name, dataset, **model_kwargs):
  pl.seed_everything(42)
  node_data_loader = pyg_data.DataLoader(dataset, batch_size=1)

  root_dir = os.path.join(CHECKPOINT_PATH, 'NodeLevel'+model_name)
  os.makedirs(root_dir, exist_ok=True)

  trainer = pl.Trainer(default_root_dir=root_dir,
                       callbacks=)