In [3]:
!pip install torch-cluster -f https://data.pyg.org/whl/torch-2.5.0+cpu.html
!pip install --upgrade pip setuptools wheel
!pip install --upgrade torch torch-geometric

Looking in links: https://data.pyg.org/whl/torch-2.5.0+cpu.html
Collecting torch-geometric
  Using cached torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
Using cached torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
Installing collected packages: torch-geometric
Successfully installed torch-geometric-2.6.1


In [1]:
pip install certifi



In [4]:
import torch
import torch_cluster
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import Node2Vec, PNAConv
from torch_geometric.transforms import NormalizeFeatures
from torch_geometric.utils import degree

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
# Load the Cora dataset
dataset = Planetoid(root='data/Cora', name='Cora', transform=NormalizeFeatures())
data = dataset[0].to(device)

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [6]:
# Configure Node2Vec parameters
embedding_dim = 64
walk_length = 20
context_size = 10
walks_per_node = 10
num_negative_samples = 1

node2vec = Node2Vec(
    data.edge_index,
    embedding_dim=embedding_dim,
    walk_length=walk_length,
    context_size=context_size,
    walks_per_node=walks_per_node,
    num_negative_samples=num_negative_samples,
    sparse=True
).to(device)

In [7]:
# Train the Node2Vec embeddings
loader = node2vec.loader(batch_size=128, shuffle=True)
optimizer_n2v = torch.optim.SparseAdam(list(node2vec.parameters()), lr=0.01)

def train_node2vec():
    node2vec.train()
    total_loss = 0
    for pos_rw, neg_rw in loader:
        optimizer_n2v.zero_grad()
        loss = node2vec.loss(pos_rw.to(device), neg_rw.to(device))
        loss.backward()
        optimizer_n2v.step()
        total_loss += loss.item()
    return total_loss / len(loader)

print("Training Node2Vec embeddings...")
for epoch in range(1, 51):
    loss = train_node2vec()
    if epoch % 10 == 0:
        print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}')

Training Node2Vec embeddings...
Epoch: 10, Loss: 1.2228
Epoch: 20, Loss: 0.9053
Epoch: 30, Loss: 0.8589
Epoch: 40, Loss: 0.8422
Epoch: 50, Loss: 0.8359


In [8]:
# Extract the learned Node2Vec embeddings
node2vec.eval()
node_embeddings = node2vec().detach()  # [num_nodes, embedding_dim]

In [9]:
# Compute node degrees for PNAConv
deg = degree(data.edge_index[0], data.num_nodes).to(device)

# Define aggregators and scalers for PNAConv
aggregators = ['mean', 'min', 'max', 'std']
scalers = ['identity', 'amplification', 'attenuation']

hidden_dim = 64

In [10]:
class PNAModelWithAttention(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, aggregators, scalers, deg, node_embeddings):
        super(PNAModelWithAttention, self).__init__()
        self.conv = PNAConv(in_channels, hidden_channels, aggregators=aggregators,
                            scalers=scalers, deg=deg)
        self.lin = nn.Linear(hidden_channels, out_channels)

        self.attention_weights = nn.Parameter(torch.rand(node_embeddings.size(0), 1))
        self.register_buffer('n2v_emb', node_embeddings)

    def forward(self, x, edge_index):
        # PNA aggregation step
        h = self.conv(x, edge_index)

        # Compute attention weights
        alpha = torch.sigmoid(self.attention_weights)  # Ensure values are between 0 and 1

        # Apply attention-based fusion
        h_fused = alpha * self.n2v_emb + (1 - alpha) * h
        h_fused = F.relu(h_fused)
        h_fused = self.lin(h_fused)
        return h_fused

model = PNAModelWithAttention(
    in_channels=dataset.num_node_features,
    hidden_channels=hidden_dim,
    out_channels=dataset.num_classes,
    aggregators=aggregators,
    scalers=scalers,
    deg=deg,
    node_embeddings=node_embeddings
).to(device)

In [11]:
# Setup training components
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = nn.CrossEntropyLoss()

x = data.x

In [12]:
def train():
    model.train()
    optimizer.zero_grad()
    out = model(x, data.edge_index)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

@torch.no_grad()
def test():
    model.eval()
    out = model(x, data.edge_index)
    pred = out.argmax(dim=1)
    accs = []
    for mask in [data.train_mask, data.val_mask, data.test_mask]:
        correct = pred[mask].eq(data.y[mask]).sum().item()
        accs.append(correct / mask.sum().item())
    return accs  # [train_acc, val_acc, test_acc]

In [13]:
# Train and evaluate
best_val_acc = 0
test_acc_at_best_val = 0

print("Training PNA model with Node2Vec embeddings...")
for epoch in range(1, 201):
    loss = train()
    train_acc, val_acc, test_acc = test()
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        test_acc_at_best_val = test_acc

    if epoch % 20 == 0:
        print(f"Epoch: {epoch:03d}, Loss: {loss:.4f}, "
              f"Train: {train_acc:.4f}, Val: {val_acc:.4f}, Test: {test_acc:.4f}")

print(f"\nBest Validation Accuracy: {best_val_acc:.4f}")
print(f"Test Accuracy at Best Val: {test_acc_at_best_val:.4f}")

Training PNA model with Node2Vec embeddings...




Epoch: 020, Loss: 0.1074, Train: 1.0000, Val: 0.5360, Test: 0.5560
Epoch: 040, Loss: 0.0074, Train: 1.0000, Val: 0.5900, Test: 0.6180
Epoch: 060, Loss: 0.0058, Train: 1.0000, Val: 0.6280, Test: 0.6420
Epoch: 080, Loss: 0.0046, Train: 1.0000, Val: 0.6440, Test: 0.6740
Epoch: 100, Loss: 4.7131, Train: 0.3286, Val: 0.0620, Test: 0.0850
Epoch: 120, Loss: 0.0045, Train: 1.0000, Val: 0.5420, Test: 0.5310
Epoch: 140, Loss: 0.0003, Train: 1.0000, Val: 0.5420, Test: 0.5370
Epoch: 160, Loss: 0.0004, Train: 1.0000, Val: 0.5400, Test: 0.5430
Epoch: 180, Loss: 0.0007, Train: 1.0000, Val: 0.5500, Test: 0.5520
Epoch: 200, Loss: 0.0010, Train: 1.0000, Val: 0.5700, Test: 0.5710

Best Validation Accuracy: 0.6500
Test Accuracy at Best Val: 0.6720


In [15]:
from itertools import product

# Define the search space for hyperparameters
param_grid = {
    'lr': [0.01, 0.001, 0.0001],
    'hidden_dim': [64],
    'weight_decay': [1e-5, 1e-4, 1e-3],
    'dropout_prob': [0.2, 0.3, 0.5]
}

# Expand the grid into combinations
param_combinations = list(product(*param_grid.values()))
best_val_acc = 0
best_params = None
best_test_acc = 0

# Function to train and evaluate the model
def train_and_evaluate(lr, hidden_dim, weight_decay, dropout_prob):
    model = PNAModelWithAttention(
        in_channels=dataset.num_node_features,
        hidden_channels=hidden_dim,
        out_channels=dataset.num_classes,
        aggregators=aggregators,
        scalers=scalers,
        deg=deg,
        node_embeddings=node_embeddings
    ).to(device)

    # Add dropout to the model
    model.dropout = nn.Dropout(p=dropout_prob)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = nn.CrossEntropyLoss()

    # Training and validation loop
    best_val = 0
    test_acc_at_best_val = 0
    for epoch in range(1, 201):
        # Training
        model.train()
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        loss = criterion(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        # Validation and test
        model.eval()
        with torch.no_grad():
            out = model(data.x, data.edge_index)
            pred = out.argmax(dim=1)
            train_acc = pred[data.train_mask].eq(data.y[data.train_mask]).sum().item() / data.train_mask.sum().item()
            val_acc = pred[data.val_mask].eq(data.y[data.val_mask]).sum().item() / data.val_mask.sum().item()
            test_acc = pred[data.test_mask].eq(data.y[data.test_mask]).sum().item() / data.test_mask.sum().item()

        if val_acc > best_val:
            best_val = val_acc
            test_acc_at_best_val = test_acc

    return best_val, test_acc_at_best_val

# Run the grid search
for params in param_combinations:
    lr, hidden_dim, weight_decay, dropout_prob = params
    print(f"Testing params: lr={lr}, hidden_dim={hidden_dim}, weight_decay={weight_decay}, dropout_prob={dropout_prob}")

    val_acc, test_acc = train_and_evaluate(lr, hidden_dim, weight_decay, dropout_prob)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_test_acc = test_acc
        best_params = params

    print(f"Validation Accuracy: {val_acc:.4f}, Test Accuracy: {test_acc:.4f}")

# Output the best parameters and performance
print("\nBest Hyperparameters:")
print(f"Learning Rate: {best_params[0]}, Hidden Dimension: {best_params[1]}, Weight Decay: {best_params[2]}, Dropout Probability: {best_params[3]}")
print(f"Best Validation Accuracy: {best_val_acc:.4f}")
print(f"Test Accuracy at Best Validation: {best_test_acc:.4f}")


Testing params: lr=0.01, hidden_dim=64, weight_decay=1e-05, dropout_prob=0.2
Validation Accuracy: 0.6800, Test Accuracy: 0.6610
Testing params: lr=0.01, hidden_dim=64, weight_decay=1e-05, dropout_prob=0.3
Validation Accuracy: 0.5680, Test Accuracy: 0.5860
Testing params: lr=0.01, hidden_dim=64, weight_decay=1e-05, dropout_prob=0.5
Validation Accuracy: 0.6180, Test Accuracy: 0.6200
Testing params: lr=0.01, hidden_dim=64, weight_decay=0.0001, dropout_prob=0.2
Validation Accuracy: 0.6620, Test Accuracy: 0.6910
Testing params: lr=0.01, hidden_dim=64, weight_decay=0.0001, dropout_prob=0.3
Validation Accuracy: 0.6940, Test Accuracy: 0.7070
Testing params: lr=0.01, hidden_dim=64, weight_decay=0.0001, dropout_prob=0.5
Validation Accuracy: 0.7100, Test Accuracy: 0.7230
Testing params: lr=0.01, hidden_dim=64, weight_decay=0.001, dropout_prob=0.2
Validation Accuracy: 0.6700, Test Accuracy: 0.6530
Testing params: lr=0.01, hidden_dim=64, weight_decay=0.001, dropout_prob=0.3
Validation Accuracy: 0.7