In [1]:
import torch
import torch_cluster
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import Node2Vec, PNAConv
from torch_geometric.transforms import NormalizeFeatures
from torch_geometric.utils import degree

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
# Load the Cora dataset
dataset = Planetoid(root='data/Cora', name='Cora', transform=NormalizeFeatures())
data = dataset[0].to(device)

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [3]:
# Configure Node2Vec parameters
embedding_dim = 64
walk_length = 20
context_size = 10
walks_per_node = 10
num_negative_samples = 1

node2vec = Node2Vec(
    data.edge_index,
    embedding_dim=embedding_dim,
    walk_length=walk_length,
    context_size=context_size,
    walks_per_node=walks_per_node,
    num_negative_samples=num_negative_samples,
    sparse=True
).to(device)

In [4]:
# Train the Node2Vec embeddings
loader = node2vec.loader(batch_size=128, shuffle=True)
optimizer_n2v = torch.optim.SparseAdam(list(node2vec.parameters()), lr=0.01)

def train_node2vec():
    node2vec.train()
    total_loss = 0
    for pos_rw, neg_rw in loader:
        optimizer_n2v.zero_grad()
        loss = node2vec.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
        optimizer_n2v.step()
        total_loss += loss.item()
    return total_loss / len(loader)

print("Training Node2Vec embeddings...")
for epoch in range(1, 51):
    loss = train_node2vec()
    if epoch % 10 == 0:
        print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}')

Training Node2Vec embeddings...
Epoch: 10, Loss: 1.2265
Epoch: 20, Loss: 0.9070
Epoch: 30, Loss: 0.8587
Epoch: 40, Loss: 0.8425
Epoch: 50, Loss: 0.8356


In [5]:
# Extract the learned Node2Vec embeddings
node2vec.eval()
node_embeddings = node2vec().detach()  # [num_nodes, embedding_dim]

In [None]:
# Compute node degrees for PNAConv
deg = degree(data.edge_index[0], data.num_nodes).to(device)

# Define aggregators and scalers for PNAConv
aggregators = ['mean', 'min', 'max', 'std']
scalers = ['identity', 'amplification', 'attenuation']

hidden_dim = 64
num_layers = 2

In [None]:
# Define a multi-layer PNA-based GNN model that incorporates Node2Vec embeddings as residual connections
class PNAModel(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, aggregators, scalers, deg, node_embeddings, num_layers=2):
        super(PNAModel, self).__init__()
        
        self.num_layers = num_layers
        
        # First PNA layer: from input features to hidden
        self.convs = nn.ModuleList()
        self.convs.append(PNAConv(in_channels, hidden_channels, aggregators=aggregators, scalers=scalers, deg=deg))
        
        # Additional PNA layers (all hidden -> hidden)
        for _ in range(num_layers - 1):
            self.convs.append(PNAConv(hidden_channels, hidden_channels, aggregators=aggregators, scalers=scalers, deg=deg))
        
        # Linear classifier after PNA layers
        self.lin = nn.Linear(hidden_channels, out_channels)
        
        # Store Node2Vec embeddings as a buffer (not learnable)
        self.register_buffer('n2v_emb', node_embeddings)
        
    def forward(self, x, edge_index):
        h = x
        for conv in self.convs:
            # Apply PNA convolution
            h = conv(h, edge_index)
            # Add Node2Vec embeddings as residual connection after each layer
            h = h + self.n2v_emb
            h = F.relu(h)
        # Classification layer
        h = self.lin(h)
        return h

model = PNAModel(
    in_channels=dataset.num_node_features,
    hidden_channels=hidden_dim,
    out_channels=dataset.num_classes,
    aggregators=aggregators,
    scalers=scalers,
    deg=deg,
    node_embeddings=node_embeddings,
    num_layers=num_layers
).to(device)

In [8]:
# Setup training components
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = nn.CrossEntropyLoss()

x = data.x

In [9]:
def train():
    model.train()
    optimizer.zero_grad()
    out = model(x, data.edge_index)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

@torch.no_grad()
def test():
    model.eval()
    out = model(x, data.edge_index)
    pred = out.argmax(dim=1)
    accs = []
    for mask in [data.train_mask, data.val_mask, data.test_mask]:
        correct = pred[mask].eq(data.y[mask]).sum().item()
        accs.append(correct / mask.sum().item())
    return accs  # [train_acc, val_acc, test_acc]

In [10]:
# Train and evaluate
best_val_acc = 0
test_acc_at_best_val = 0

print("Training PNA model with Node2Vec embeddings...")
for epoch in range(1, 201):
    loss = train()
    train_acc, val_acc, test_acc = test()
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        test_acc_at_best_val = test_acc

    if epoch % 20 == 0:
        print(f"Epoch: {epoch:03d}, Loss: {loss:.4f}, "
              f"Train: {train_acc:.4f}, Val: {val_acc:.4f}, Test: {test_acc:.4f}")

print(f"\nBest Validation Accuracy: {best_val_acc:.4f}")
print(f"Test Accuracy at Best Val: {test_acc_at_best_val:.4f}")

Training PNA model with Node2Vec embeddings...
Epoch: 020, Loss: 0.0015, Train: 1.0000, Val: 0.5860, Test: 0.6090
Epoch: 040, Loss: 0.0002, Train: 1.0000, Val: 0.5940, Test: 0.5930
Epoch: 060, Loss: 0.0003, Train: 1.0000, Val: 0.5160, Test: 0.4940
Epoch: 080, Loss: 0.0010, Train: 1.0000, Val: 0.5940, Test: 0.5990
Epoch: 100, Loss: 0.0020, Train: 1.0000, Val: 0.6200, Test: 0.6340
Epoch: 120, Loss: 0.0026, Train: 1.0000, Val: 0.6600, Test: 0.6620
Epoch: 140, Loss: 0.0029, Train: 1.0000, Val: 0.6720, Test: 0.6820
Epoch: 160, Loss: 0.0032, Train: 1.0000, Val: 0.6900, Test: 0.6970
Epoch: 180, Loss: 0.0032, Train: 1.0000, Val: 0.6860, Test: 0.7040
Epoch: 200, Loss: 0.0033, Train: 1.0000, Val: 0.7000, Test: 0.7130

Best Validation Accuracy: 0.7000
Test Accuracy at Best Val: 0.7130
