In [1]:
## Standard libraries
import os
import json
import math
import numpy as np
import time

## Imports for plotting
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg', 'pdf')  # For export
from matplotlib.colors import to_rgb
import matplotlib
matplotlib.rcParams['lines.linewidth'] = 2.0
import seaborn as sns
sns.reset_orig()
sns.set()

## Progress bar
from tqdm.notebook import tqdm

## PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim
# Torchvision
import torchvision
from torchvision.datasets import CIFAR10
from torchvision import transforms
# PyTorch Lightning
try:
    import pytorch_lightning as pl
except ModuleNotFoundError: # Google Colab does not have PyTorch Lightning installed by default. Hence, we do it here if necessary
    !pip install --quiet pytorch-lightning>=1.4
    import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint

# Path to the folder where the datasets are/should be downloaded (e.g. CIFAR10)
DATASET_PATH = "../data"
# Path to the folder where the pretrained models are saved
CHECKPOINT_PATH = "../saved_models/tutorial7"

# Setting the seed
pl.seed_everything(42)

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)

Seed set to 42


cpu


In [2]:
# Import the necessary Python libraries
import urllib.request              # Used for downloading files from the internet (via HTTP requests)
from urllib.error import HTTPError # Specific error class for handling download-related issues

# Base URL where the pretrained model checkpoints are stored (GitHub raw files)
base_url = "https://raw.githubusercontent.com/phlippe/saved_models/main/tutorial7/"

# List of pretrained model checkpoint files we want to download
pretrained_files = ["NodeLevelMLP.ckpt", "NodeLevelGNN.ckpt", "GraphLevelGraphConv.ckpt"]

# Create a directory to store the downloaded checkpoint files if it doesn't already exist.
# CHECKPOINT_PATH should be a variable (string) you defined earlier, e.g., "./checkpoints/"
os.makedirs(CHECKPOINT_PATH, exist_ok=True)

# Loop through each file in the pretrained_files list
for file_name in pretrained_files:
    # Construct the full local path where the file should be saved
    file_path = os.path.join(CHECKPOINT_PATH, file_name)
    
    # If the file name includes a subdirectory (contains "/"), make sure that subdirectory exists
    if "/" in file_name:
        os.makedirs(file_path.rsplit("/", 1)[0], exist_ok=True)
    
    # If the file is not already present locally, then download it
    if not os.path.isfile(file_path):
        # Create the full URL by combining the base_url with the specific file name
        file_url = base_url + file_name
        print(f"Downloading {file_url}...")  # Inform the user which file is being downloaded
        try:
            # Download the file and save it to the specified local path
            urllib.request.urlretrieve(file_url, file_path)
        except HTTPError as e:
            # If there's an error during download (e.g., file not found, no internet),
            # print a helpful message along with the error details.
            print("Something went wrong. Please try to download the file from the GDrive folder, "
                  "or contact the author with the full output including the following error:\n", e)


In [3]:
# Define a custom Graph Convolutional Layer (GCNLayer) by subclassing nn.Module
class GCNLayer(nn.Module):

    def __init__(self, c_in, c_out):
        """
        Initializes the GCN layer.

        Args:
            c_in  - Number of input features per node (feature dimensionality of input)
            c_out - Number of output features per node (feature dimensionality of output)
        """
        super().__init__()
        # Linear transformation to project node features from c_in -> c_out dimensions
        self.projection = nn.Linear(c_in, c_out)

    def forward(self, node_feats, adj_matrix):
        """
        Perform a forward pass of the GCN layer.

        Inputs:
            node_feats  - Tensor of node features with shape [batch_size, num_nodes, c_in].
                          Each node has a feature vector of length c_in.
            adj_matrix  - Tensor of adjacency matrices (graph structure) with shape
                          [batch_size, num_nodes, num_nodes].
                          If there is an edge from node i to j, adj_matrix[b, i, j] = 1.
                          Identity/self-connections (diagonal ones) should already be added.

        Output:
            Updated node feature representations of shape [batch_size, num_nodes, c_out].
        """

        # Step 1: Compute number of neighbors for each node
        # This is used for normalization so each node's feature update is averaged
        num_neighbours = adj_matrix.sum(dim=-1, keepdims=True)  # Shape: [batch_size, num_nodes, 1]

        # Step 2: Apply a linear transformation (feature projection) to each node
        node_feats = self.projection(node_feats)  # Shape: [batch_size, num_nodes, c_out]

        # Step 3: Aggregate neighbor features using matrix multiplication
        # torch.bmm = batch matrix multiplication
        # adj_matrix [b, n, n] x node_feats [b, n, c_out] -> [b, n, c_out]
        node_feats = torch.bmm(adj_matrix, node_feats)

        # Step 4: Normalize by dividing by number of neighbors
        # Ensures that the scale of features does not explode with more neighbors
        node_feats = node_feats / num_neighbours

        # Return the updated node features
        return node_feats


In [4]:
# Create a toy example of node features for a graph with 4 nodes
# torch.arange(8) -> generates values [0, 1, 2, 3, 4, 5, 6, 7]
# .view(1, 4, 2) -> reshapes into [batch_size=1, num_nodes=4, features_per_node=2]
# So each node has 2 features, and we have 1 graph (batch size = 1)
node_feats = torch.arange(8, dtype=torch.float32).view(1, 4, 2)

# Define an adjacency matrix for the same graph
# Shape = [batch_size=1, num_nodes=4, num_nodes=4]
# Row i corresponds to connections *from node i to other nodes*
# A 1 means "there is an edge", 0 means "no edge"
# The diagonal entries are 1 -> these are the self-loops (identity connections)
adj_matrix = torch.Tensor([[
    [1, 1, 0, 0],  # Node 0 is connected to itself and node 1
    [1, 1, 1, 1],  # Node 1 is connected to all nodes (0,1,2,3)
    [0, 1, 1, 1],  # Node 2 is connected to nodes 1,2,3
    [0, 1, 1, 1]   # Node 3 is connected to nodes 1,2,3
]])

# Print the node features for clarity
print("Node features:\n", node_feats)

# Print the adjacency matrix to visualize graph connections
print("\nAdjacency matrix:\n", adj_matrix)


Node features:
 tensor([[[0., 1.],
         [2., 3.],
         [4., 5.],
         [6., 7.]]])

Adjacency matrix:
 tensor([[[1., 1., 0., 0.],
         [1., 1., 1., 1.],
         [0., 1., 1., 1.],
         [0., 1., 1., 1.]]])


In [5]:
# Initialize the GCN layer
# c_in=2 -> input feature dimension per node = 2
# c_out=2 -> output feature dimension per node = 2
layer = GCNLayer(c_in=2, c_out=2)

# Manually set the weights of the linear projection to an identity matrix
# [[1,0],[0,1]] means the projection won't change the input features
# Bias is set to zero -> no shift in features
layer.projection.weight.data = torch.Tensor([[1., 0.], [0., 1.]])
layer.projection.bias.data = torch.Tensor([0., 0.])

# Turn off gradient calculation since this is just a test (no training needed)
with torch.no_grad():
    # Pass the toy node features and adjacency matrix through the GCN layer
    out_feats = layer(node_feats, adj_matrix)

# Print inputs and outputs
print("Adjacency matrix", adj_matrix)
print("Input features", node_feats)
print("Output features", out_feats)


Adjacency matrix tensor([[[1., 1., 0., 0.],
         [1., 1., 1., 1.],
         [0., 1., 1., 1.],
         [0., 1., 1., 1.]]])
Input features tensor([[[0., 1.],
         [2., 3.],
         [4., 5.],
         [6., 7.]]])
Output features tensor([[[1., 2.],
         [3., 4.],
         [4., 5.],
         [4., 5.]]])


In [6]:
class GATLayer(nn.Module):

    def __init__(self, c_in, c_out, num_heads=1, concat_heads=True, alpha=0.2):
        """
        Inputs:
            c_in - Dimensionality of input features
            c_out - Dimensionality of output features
            num_heads - Number of heads, i.e. attention mechanisms to apply in parallel. The
                        output features are equally split up over the heads if concat_heads=True.
            concat_heads - If True, the output of the different heads is concatenated instead of averaged.
            alpha - Negative slope of the LeakyReLU activation.
        """
        super().__init__()
        self.num_heads = num_heads
        self.concat_heads = concat_heads
        if self.concat_heads:
            assert c_out % num_heads == 0, "Number of output features must be a multiple of the count of heads."
            c_out = c_out // num_heads

        # Sub-modules and parameters needed in the layer
        self.projection = nn.Linear(c_in, c_out * num_heads)
        self.a = nn.Parameter(torch.Tensor(num_heads, 2 * c_out)) # One per head
        self.leakyrelu = nn.LeakyReLU(alpha)

        # Initialization from the original implementation
        nn.init.xavier_uniform_(self.projection.weight.data, gain=1.414)
        nn.init.xavier_uniform_(self.a.data, gain=1.414)

    def forward(self, node_feats, adj_matrix, print_attn_probs=False):
        """
        Inputs:
            node_feats - Input features of the node. Shape: [batch_size, c_in]
            adj_matrix - Adjacency matrix including self-connections. Shape: [batch_size, num_nodes, num_nodes]
            print_attn_probs - If True, the attention weights are printed during the forward pass (for debugging purposes)
        """
        batch_size, num_nodes = node_feats.size(0), node_feats.size(1)

        # Apply linear layer and sort nodes by head
        node_feats = self.projection(node_feats)
        node_feats = node_feats.view(batch_size, num_nodes, self.num_heads, -1)

        # We need to calculate the attention logits for every edge in the adjacency matrix
        # Doing this on all possible combinations of nodes is very expensive
        # => Create a tensor of [W*h_i||W*h_j] with i and j being the indices of all edges
        edges = adj_matrix.nonzero(as_tuple=False) # Returns indices where the adjacency matrix is not 0 => edges
        node_feats_flat = node_feats.view(batch_size * num_nodes, self.num_heads, -1)
        edge_indices_row = edges[:,0] * num_nodes + edges[:,1]
        edge_indices_col = edges[:,0] * num_nodes + edges[:,2]
        a_input = torch.cat([
            torch.index_select(input=node_feats_flat, index=edge_indices_row, dim=0),
            torch.index_select(input=node_feats_flat, index=edge_indices_col, dim=0)
        ], dim=-1) # Index select returns a tensor with node_feats_flat being indexed at the desired positions along dim=0

        # Calculate attention MLP output (independent for each head)
        attn_logits = torch.einsum('bhc,hc->bh', a_input, self.a)
        attn_logits = self.leakyrelu(attn_logits)

        # Map list of attention values back into a matrix
        attn_matrix = attn_logits.new_zeros(adj_matrix.shape+(self.num_heads,)).fill_(-9e15)
        attn_matrix[adj_matrix[...,None].repeat(1,1,1,self.num_heads) == 1] = attn_logits.reshape(-1)

        # Weighted average of attention
        attn_probs = F.softmax(attn_matrix, dim=2)
        if print_attn_probs:
            print("Attention probs\n", attn_probs.permute(0, 3, 1, 2))
        node_feats = torch.einsum('bijh,bjhc->bihc', attn_probs, node_feats)

        # If heads should be concatenated, we can do this by reshaping. Otherwise, take mean
        if self.concat_heads:
            node_feats = node_feats.reshape(batch_size, num_nodes, -1)
        else:
            node_feats = node_feats.mean(dim=2)

        return node_feats

In [7]:
# Initialize a Graph Attention Layer
# c_in = 2 -> input feature dimension per node = 2
# c_out = 2 -> output feature dimension per node = 2
# num_heads = 2 -> multi-head attention (the layer will compute attention twice in parallel)
layer = GATLayer(2, 2, num_heads=2)

# Manually set the linear projection weights to identity
# This ensures the projection does not change node features
layer.projection.weight.data = torch.Tensor([[1., 0.], [0., 1.]])
layer.projection.bias.data = torch.Tensor([0., 0.])

# Manually set the attention parameters
# Each head has its own attention vector `a` used in computing attention coefficients
layer.a.data = torch.Tensor([
    [-0.2, 0.3],   # Attention vector for head 1
    [0.1, -0.1]    # Attention vector for head 2
])

# Run the GAT layer without gradients (we're just testing, not training)
with torch.no_grad():
    # Forward pass with attention printing enabled
    out_feats = layer(node_feats, adj_matrix, print_attn_probs=True)

# Print inputs and outputs for clarity
print("Adjacency matrix", adj_matrix)
print("Input features", node_feats)
print("Output features", out_feats)


Attention probs
 tensor([[[[0.3543, 0.6457, 0.0000, 0.0000],
          [0.1096, 0.1450, 0.2642, 0.4813],
          [0.0000, 0.1858, 0.2885, 0.5257],
          [0.0000, 0.2391, 0.2696, 0.4913]],

         [[0.5100, 0.4900, 0.0000, 0.0000],
          [0.2975, 0.2436, 0.2340, 0.2249],
          [0.0000, 0.3838, 0.3142, 0.3019],
          [0.0000, 0.4018, 0.3289, 0.2693]]]])
Adjacency matrix tensor([[[1., 1., 0., 0.],
         [1., 1., 1., 1.],
         [0., 1., 1., 1.],
         [0., 1., 1., 1.]]])
Input features tensor([[[0., 1.],
         [2., 3.],
         [4., 5.],
         [6., 7.]]])
Output features tensor([[[1.2913, 1.9800],
         [4.2344, 3.7725],
         [4.6798, 4.8362],
         [4.5043, 4.7351]]])


In [8]:
# Try importing PyTorch Geometric
try:
    import torch_geometric
except ModuleNotFoundError:
    # If it's not installed, we install the necessary packages.
    # PyTorch Geometric has several dependencies that must match the PyTorch + CUDA version.

    # Extract the installed PyTorch version (without "+cuXXX" suffix).
    TORCH = torch.__version__.split('+')[0]

    # Extract the CUDA version from torch (e.g., "11.8") and format as "cu118".
    CUDA = 'cu' + torch.version.cuda.replace('.','')

    # Install core PyTorch Geometric dependencies:
    # - torch-scatter
    # - torch-sparse
    # - torch-cluster
    # - torch-spline-conv
    # These packages are provided from special wheels that match the current PyTorch+CUDA version.
    !pip install torch-scatter     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
    !pip install torch-sparse      -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
    !pip install torch-cluster     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
    !pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html

    # Finally, install the main torch-geometric package.
    !pip install torch-geometric

    # Import torch_geometric after installation
    import torch_geometric

# Import useful PyG submodules:
import torch_geometric.nn as geom_nn     # Neural network layers (GCN, GAT, GraphSAGE, etc.)
import torch_geometric.data as geom_data # Data handling (datasets, data loaders for graphs)


In [9]:
# Create a dictionary that maps string names to specific GNN layer classes from PyTorch Geometric
gnn_layer_by_name = {
    "GCN": geom_nn.GCNConv,        # Graph Convolutional Network (GCN) layer
    "GAT": geom_nn.GATConv,        # Graph Attention Network (GAT) layer
    "GraphConv": geom_nn.GraphConv # A more general graph convolution layer
}


In [10]:
# Load the Cora citation network dataset using PyTorch Geometric's built-in Planetoid class
cora_dataset = torch_geometric.datasets.Planetoid(
    root=DATASET_PATH,  # Directory where the dataset should be stored (downloaded if not found)
    name="Cora"         # Name of the dataset to load ("Cora", "CiteSeer", "PubMed" are available)
)

In [11]:
cora_dataset[0]

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

In [12]:
class GNNModel(nn.Module):

    def __init__(self, c_in, c_hidden, c_out, num_layers=2, layer_name="GCN", dp_rate=0.1, **kwargs):
        """
        Inputs:
            c_in - Dimension of input features
            c_hidden - Dimension of hidden features
            c_out - Dimension of the output features. Usually number of classes in classification
            num_layers - Number of "hidden" graph layers
            layer_name - String of the graph layer to use
            dp_rate - Dropout rate to apply throughout the network
            kwargs - Additional arguments for the graph layer (e.g. number of heads for GAT)
        """
        super().__init__()
        gnn_layer = gnn_layer_by_name[layer_name]

        layers = []
        in_channels, out_channels = c_in, c_hidden
        for l_idx in range(num_layers-1):
            layers += [
                gnn_layer(in_channels=in_channels,
                          out_channels=out_channels,
                          **kwargs),
                nn.ReLU(inplace=True),
                nn.Dropout(dp_rate)
            ]
            in_channels = c_hidden
        layers += [gnn_layer(in_channels=in_channels,
                             out_channels=c_out,
                             **kwargs)]
        self.layers = nn.ModuleList(layers)

    def forward(self, x, edge_index):
        """
        Inputs:
            x - Input features per node
            edge_index - List of vertex index pairs representing the edges in the graph (PyTorch geometric notation)
        """
        for l in self.layers:
            # For graph layers, we need to add the "edge_index" tensor as additional input
            # All PyTorch Geometric graph layer inherit the class "MessagePassing", hence
            # we can simply check the class type.
            if isinstance(l, geom_nn.MessagePassing):
                x = l(x, edge_index)
            else:
                x = l(x)
        return x

In [13]:
class MLPModel(nn.Module):
    def __init__(self, c_in, c_hidden, c_out, num_layers=2, dp_rate=0.1):
        """
        Initialize a simple feedforward neural network (MLP).

        Args:
            c_in      - Number of input features (dimension of input vector per node)
            c_hidden  - Size of hidden layers
            c_out     - Number of output units (e.g., number of classes for classification)
            num_layers- Number of layers (including input -> hidden layers). 
                        The last layer will always project to c_out.
            dp_rate   - Dropout probability (to prevent overfitting)
        """
        super().__init__()

        layers = []  # Collect all layers in a list

        # Build (num_layers-1) hidden layers
        in_channels, out_channels = c_in, c_hidden
        for l_idx in range(num_layers - 1):
            layers += [
                nn.Linear(in_channels, out_channels),  # Fully connected layer
                nn.ReLU(inplace=True),                 # Nonlinear activation
                nn.Dropout(dp_rate)                    # Dropout for regularization
            ]
            in_channels = c_hidden  # Next layer input = hidden size

        # Final output layer (maps hidden -> c_out, e.g., class scores)
        layers += [nn.Linear(in_channels, c_out)]

        # Store the sequence of layers
        self.layers = nn.Sequential(*layers)

    def forward(self, x, *args, **kwargs):
        """
        Forward pass of the MLP.

        Args:
            x - Input features (tensor of shape [num_nodes, c_in] or [batch_size, c_in])
        Returns:
            Output tensor of shape [num_nodes, c_out] (e.g., class logits)
        """
        return self.layers(x)


In [14]:
class NodeLevelGNN(pl.LightningModule):
    def __init__(self, model_name, **model_kwargs):
        super().__init__()

        # Save all hyperparameters (so Lightning can log them automatically)
        self.save_hyperparameters()

        # Choose which model to use:
        # - "MLP": a baseline without graph structure
        # - otherwise: use a GNN model (e.g., GCN, GAT, GraphConv)
        if model_name == "MLP":
            self.model = MLPModel(**model_kwargs)
        else:
            self.model = GNNModel(**model_kwargs)

        # Define loss function for classification
        self.loss_module = nn.CrossEntropyLoss()

    def forward(self, data, mode="train"):
        """
        Forward pass through the model and compute loss/accuracy.

        Args:
            data - A PyG Data object containing:
                   - x: node features
                   - edge_index: graph structure
                   - y: node labels
                   - train/val/test masks
            mode - Determines which mask to use ("train", "val", "test")
        """
        x, edge_index = data.x, data.edge_index
        x = self.model(x, edge_index)  # Run through MLP or GNN

        # Select nodes for loss/accuracy based on mask
        if mode == "train":
            mask = data.train_mask
        elif mode == "val":
            mask = data.val_mask
        elif mode == "test":
            mask = data.test_mask
        else:
            assert False, f"Unknown forward mode: {mode}"

        # Compute loss only on masked nodes
        loss = self.loss_module(x[mask], data.y[mask])

        # Compute accuracy: compare predicted class vs. true label
        acc = (x[mask].argmax(dim=-1) == data.y[mask]).sum().float() / mask.sum()
        return loss, acc

    def configure_optimizers(self):
        # Define optimizer: Stochastic Gradient Descent with momentum + weight decay
        optimizer = optim.SGD(self.parameters(), lr=0.1, momentum=0.9, weight_decay=2e-3)
        return optimizer

    def training_step(self, batch, batch_idx):
        # Compute training loss + accuracy
        loss, acc = self.forward(batch, mode="train")
        self.log('train_loss', loss)  # log for visualization
        self.log('train_acc', acc)
        return loss

    def validation_step(self, batch, batch_idx):
        # Compute validation accuracy
        _, acc = self.forward(batch, mode="val")
        self.log('val_acc', acc)

    def test_step(self, batch, batch_idx):
        # Compute test accuracy
        _, acc = self.forward(batch, mode="test")
        self.log('test_acc', acc)


In [15]:
def train_node_classifier(model_name, dataset, **model_kwargs):
    pl.seed_everything(42)
    node_data_loader = geom_data.DataLoader(dataset, batch_size=1)

    # Create a PyTorch Lightning trainer with the generation callback
    root_dir = os.path.join(CHECKPOINT_PATH, "NodeLevel" + model_name)
    os.makedirs(root_dir, exist_ok=True)
    trainer = pl.Trainer(default_root_dir=root_dir,
                         callbacks=[ModelCheckpoint(save_weights_only=True, mode="max", monitor="val_acc")],
                         accelerator="gpu" if str(device).startswith("cuda") else "cpu",
                         devices=1,
                         max_epochs=200,
                         enable_progress_bar=False) # False because epoch size is 1
    trainer.logger._default_hp_metric = None # Optional logging argument that we don't need

    # Check whether pretrained model exists. If yes, load it and skip training
    pretrained_filename = os.path.join(CHECKPOINT_PATH, f"NodeLevel{model_name}.ckpt")
    if os.path.isfile(pretrained_filename):
        print("Found pretrained model, loading...")
        model = NodeLevelGNN.load_from_checkpoint(pretrained_filename)
    else:
        pl.seed_everything()
        model = NodeLevelGNN(model_name=model_name, c_in=dataset.num_node_features, c_out=dataset.num_classes, **model_kwargs)
        trainer.fit(model, node_data_loader, node_data_loader)
        model = NodeLevelGNN.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)

    # Test best model on the test set
    test_result = trainer.test(model, node_data_loader, verbose=False)
    batch = next(iter(node_data_loader))
    batch = batch.to(model.device)
    _, train_acc = model.forward(batch, mode="train")
    _, val_acc = model.forward(batch, mode="val")
    result = {"train": train_acc,
              "val": val_acc,
              "test": test_result[0]['test_acc']}
    return model, result

In [16]:
# Small function for printing the test scores
def print_results(result_dict):
    if "train" in result_dict:
        print(f"Train accuracy: {(100.0*result_dict['train']):4.2f}%")
    if "val" in result_dict:
        print(f"Val accuracy:   {(100.0*result_dict['val']):4.2f}%")
    print(f"Test accuracy:  {(100.0*result_dict['test']):4.2f}%")

In [17]:
# Train a node classification model on the Cora dataset
node_mlp_model, node_mlp_result = train_node_classifier(
    model_name="MLP",       # Choose the model type (MLP baseline in this case)
    dataset=cora_dataset,   # Dataset to use (Cora citation network)
    c_hidden=16,            # Dimension of hidden layer features
    num_layers=2,           # Number of layers in the MLP
    dp_rate=0.1             # Dropout rate (for regularization)
)

# Print the results (accuracy, loss, etc.)
print_results(node_mlp_result)


Seed set to 42
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/opt/anaconda3/lib/python3.13/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
/opt/anaconda3/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
Lightning automatically upgraded your loaded checkpoint from v1.0.2 to v2.5.5. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgra

Found pretrained model, loading...
Train accuracy: 97.86%
Val accuracy:   52.80%
Test accuracy:  60.60%


In [18]:
# Train a node classification model on the Cora dataset using a GNN
node_gnn_model, node_gnn_result = train_node_classifier(
    model_name="GNN",        # Specify we want a graph neural network (not MLP)
    layer_name="GCN",        # Choose GCNConv as the GNN layer type
    dataset=cora_dataset,    # Dataset: Cora citation network
    c_hidden=16,             # Hidden layer dimension
    num_layers=2,            # Number of layers in the GNN
    dp_rate=0.1              # Dropout rate
)

# Print evaluation results (train/val/test accuracy)
print_results(node_gnn_result)


Seed set to 42
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/opt/anaconda3/lib/python3.13/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
Lightning automatically upgraded your loaded checkpoint from v1.0.2 to v2.5.5. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../saved_models/tutorial7/NodeLevelGNN.ckpt`


Found pretrained model, loading...
Train accuracy: 100.00%
Val accuracy:   77.80%
Test accuracy:  82.40%


In [19]:
# Load the MUTAG dataset using PyTorch Geometric's TUDataset class
tu_dataset = torch_geometric.datasets.TUDataset(
    root=DATASET_PATH,   # Directory where dataset should be stored (downloaded if missing)
    name="MUTAG"         # Dataset name from the TUDataset collection
)


In [20]:
print("Data object:", tu_dataset.data)
print("Length:", len(tu_dataset))
print(f"Average label: {tu_dataset.data.y.float().mean().item():4.2f}")

Data object: Data(x=[3371, 7], edge_index=[2, 7442], edge_attr=[7442, 4], y=[188])
Length: 188
Average label: 0.66




In [21]:
torch.manual_seed(42)
tu_dataset.shuffle()
train_dataset = tu_dataset[:150]
test_dataset = tu_dataset[150:]

In [22]:
# Create a DataLoader for the training dataset
graph_train_loader = geom_data.DataLoader(
    train_dataset,   # Training split of graphs
    batch_size=64,   # Number of graphs per batch
    shuffle=True     # Shuffle order each epoch (important for training)
)

# Create a DataLoader for validation dataset
# (Here, it's reusing test_dataset as a placeholder, but usually you'd split off a validation set)
graph_val_loader = geom_data.DataLoader(
    test_dataset,    # Validation split of graphs
    batch_size=64
)

# Create a DataLoader for test dataset
graph_test_loader = geom_data.DataLoader(
    test_dataset,    # Test split of graphs
    batch_size=64
)


In [23]:
# Take the first batch from the test DataLoader
batch = next(iter(graph_test_loader))

# Print the whole batched object (will show sizes of x, edge_index, y, batch, etc.)
print("Batch:", batch)

# Print the labels of the first 10 graphs in this batch
print("Labels:", batch.y[:10])

# Print the batch indices of the first 40 nodes
# (This tells us which graph each node belongs to)
print("Batch indices:", batch.batch[:40])


Batch: DataBatch(edge_index=[2, 1512], x=[687, 7], edge_attr=[1512, 4], y=[38], batch=[687], ptr=[39])
Labels: tensor([1, 1, 1, 0, 0, 0, 1, 1, 1, 0])
Batch indices: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2])


In [24]:
class GraphGNNModel(nn.Module):

    def __init__(self, c_in, c_hidden, c_out, dp_rate_linear=0.5, **kwargs):
        """
        Initialize a graph-level GNN model.

        Args:
            c_in            - Number of input features per node
            c_hidden        - Hidden feature dimension inside the GNN layers
            c_out           - Output feature dimension (number of graph classes)
            dp_rate_linear  - Dropout probability before the final linear layer 
                              (usually higher than in the GNN to avoid overfitting)
            kwargs          - Extra arguments passed to GNNModel (e.g., layer type, num_layers, dropout rate)
        """
        super().__init__()

        # Core GNN backbone: maps node features -> hidden features
        # Output size is c_hidden (not the final prediction yet)
        self.GNN = GNNModel(
            c_in=c_in,
            c_hidden=c_hidden,
            c_out=c_hidden,  # intermediate features
            **kwargs
        )

        # Classification head: turns pooled graph representation into class logits
        self.head = nn.Sequential(
            nn.Dropout(dp_rate_linear),   # Regularization
            nn.Linear(c_hidden, c_out)    # Final classifier (graph-level output)
        )

    def forward(self, x, edge_index, batch_idx):
        """
        Forward pass for graph classification.

        Args:
            x         - Node features tensor [num_nodes, c_in]
            edge_index- Graph connectivity (PyG COO format) [2, num_edges]
            batch_idx - Batch vector mapping each node to its graph index
                        (e.g., [0,0,0,1,1,1,...] means first 3 nodes in graph 0, next 3 in graph 1, etc.)

        Returns:
            Graph-level predictions (logits) of shape [num_graphs, c_out]
        """
        # Step 1: Apply GNN layers (node-level feature updates)
        x = self.GNN(x, edge_index)  # Shape: [num_nodes, c_hidden]

        # Step 2: Pool node embeddings into graph embeddings
        # global_mean_pool: averages node features for each graph in the batch
        x = geom_nn.global_mean_pool(x, batch_idx)  # Shape: [num_graphs, c_hidden]

        # Step 3: Apply classification head to get logits
        x = self.head(x)  # Shape: [num_graphs, c_out]

        return x


In [25]:
class GraphLevelGNN(pl.LightningModule):

    def __init__(self, **model_kwargs):
        super().__init__()

        # Save all hyperparameters (for logging/reproducibility in Lightning)
        self.save_hyperparameters()

        # Core GNN model (node → pooled graph embedding → classification head)
        self.model = GraphGNNModel(**model_kwargs)

        # Choose appropriate loss function:
        # - BCEWithLogitsLoss for binary classification (c_out == 1)
        # - CrossEntropyLoss for multi-class classification (c_out > 1)
        self.loss_module = nn.BCEWithLogitsLoss() if self.hparams.c_out == 1 else nn.CrossEntropyLoss()

    def forward(self, data, mode="train"):
        """
        Forward pass for graph classification.

        Args:
            data - A batch of graphs from DataLoader with attributes:
                   - x: node features
                   - edge_index: graph edges
                   - batch: node-to-graph assignment
                   - y: graph labels
            mode - (Optional) "train" / "val" / "test" for logging

        Returns:
            loss, acc - loss value and accuracy metric
        """
        x, edge_index, batch_idx = data.x, data.edge_index, data.batch

        # Run through GNN backbone + pooling + classification head
        x = self.model(x, edge_index, batch_idx)

        # For binary classification, remove trailing dimension
        x = x.squeeze(dim=-1)

        # Convert logits into predictions
        if self.hparams.c_out == 1:
            # Binary classification → threshold at 0
            preds = (x > 0).float()
            data.y = data.y.float()  # Ensure labels are float for BCE loss
        else:
            # Multi-class classification → argmax over logits
            preds = x.argmax(dim=-1)

        # Compute loss (logits vs labels)
        loss = self.loss_module(x, data.y)

        # Compute accuracy (predictions vs true labels)
        acc = (preds == data.y).sum().float() / preds.shape[0]

        return loss, acc

    def configure_optimizers(self):
        # Use AdamW optimizer (good default for GNNs)
        # High learning rate because MUTAG is a small dataset with a small model
        optimizer = optim.AdamW(self.parameters(), lr=1e-2, weight_decay=0.0)
        return optimizer

    def training_step(self, batch, batch_idx):
        # Run forward pass in training mode
        loss, acc = self.forward(batch, mode="train")
        # Log training loss + accuracy
        self.log('train_loss', loss)
        self.log('train_acc', acc)
        return loss

    def validation_step(self, batch, batch_idx):
        # Evaluate validation accuracy
        _, acc = self.forward(batch, mode="val")
        self.log('val_acc', acc)

    def test_step(self, batch, batch_idx):
        # Evaluate test accuracy
        _, acc = self.forward(batch, mode="test")
        self.log('test_acc', acc)


In [26]:
def train_graph_classifier(model_name, **model_kwargs):
    # Set random seed for reproducibility
    pl.seed_everything(42)

    # Define a checkpoint directory for this model
    root_dir = os.path.join(CHECKPOINT_PATH, "GraphLevel" + model_name)
    os.makedirs(root_dir, exist_ok=True)

    # Create PyTorch Lightning Trainer
    trainer = pl.Trainer(
        default_root_dir=root_dir,   # Where logs/checkpoints are stored
        callbacks=[ModelCheckpoint(  # Save only the best model (based on val_acc)
            save_weights_only=True,
            mode="max",
            monitor="val_acc"
        )],
        accelerator="gpu" if str(device).startswith("cuda") else "cpu",  # Auto GPU/CPU
        devices=1,                 # Use 1 device
        max_epochs=500,            # Train for max 500 epochs
        enable_progress_bar=False  # Disable progress bar (cleaner output)
    )

    # Disable default "hp_metric" logging (not needed here)
    trainer.logger._default_hp_metric = None

    # Check if a pretrained checkpoint already exists
    pretrained_filename = os.path.join(CHECKPOINT_PATH, f"GraphLevel{model_name}.ckpt")
    if os.path.isfile(pretrained_filename):
        print("Found pretrained model, loading...")
        # Load model directly from checkpoint
        model = GraphLevelGNN.load_from_checkpoint(pretrained_filename)
    else:
        # Otherwise, initialize and train a new model
        pl.seed_everything(42)  # Re-set seed to ensure reproducibility
        model = GraphLevelGNN(
            c_in=tu_dataset.num_node_features,                    # Input size = node feature dim
            c_out=1 if tu_dataset.num_classes == 2 else tu_dataset.num_classes,  # Output = 1 (binary) or num_classes
            **model_kwargs
        )
        trainer.fit(model, graph_train_loader, graph_val_loader)   # Train model
        # Reload the best model (based on validation accuracy)
        model = GraphLevelGNN.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)

    # Evaluate the best model on train and test sets
    train_result = trainer.test(model, graph_train_loader, verbose=False)
    test_result = trainer.test(model, graph_test_loader, verbose=False)

    # Collect results into a dictionary
    result = {
        "test": test_result[0]['test_acc'],
        "train": train_result[0]['test_acc']
    }

    return model, result


In [27]:
model, result = train_graph_classifier(model_name="GraphConv",
                                       c_hidden=256,
                                       layer_name="GraphConv",
                                       num_layers=3,
                                       dp_rate_linear=0.5,
                                       dp_rate=0.0)

Seed set to 42
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Lightning automatically upgraded your loaded checkpoint from v1.0.2 to v2.5.5. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../saved_models/tutorial7/GraphLevelGraphConv.ckpt`


Found pretrained model, loading...


/opt/anaconda3/lib/python3.13/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:484: Your `test_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
/opt/anaconda3/lib/python3.13/site-packages/pytorch_lightning/utilities/data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 2. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


In [28]:
print(f"Train performance: {100.0*result['train']:4.2f}%")
print(f"Test performance:  {100.0*result['test']:4.2f}%")

Train performance: 93.28%
Test performance:  92.11%
