# Assignment 3 - COMP 511

## Q1 — Node Classification [50%]

Three algorithms compared on Cora, CiteSeer, PubMed, and ogbn-arxiv (all measured by accuracy):

| Algorithm | Features used | Notes |
|-----------|--------------|-------|
| **Label Propagation** | None (structure only) | Deterministic — std = 0 across runs |
| **Node2Vec** | None (structure only) | Embeddings → Logistic Regression |
| **GCN** | Node features (required) | Kipf & Welling (2017) |

**Splits**: Planetoid protocol for Cora/CiteSeer/PubMed (`torch_geometric.datasets.Planetoid`), official OGB split for ogbn-arxiv.

**Results reported**: mean ± std over 10 independent runs.

In [1]:
# ── Imports ──────────────────────────────────────────────────────────────────
import torch
import torch.nn.functional as F
import functools

# PyTorch ≥2.6 defaults weights_only=True in torch.load, which breaks OGB/PyG
# dataset caching (they pickle custom Data classes). Restore the old default.
# Store the true original on the module so re-running this cell is always safe.
if not hasattr(torch, '_original_load'):
    torch._original_load = torch.load
def _patched_load(*args, **kwargs):
    kwargs.setdefault('weights_only', False)
    return torch._original_load(*args, **kwargs)
torch.load = _patched_load
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

from scipy.sparse import csr_matrix, diags
from sklearn.linear_model import LogisticRegression

from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, Node2Vec
from ogb.nodeproppred import PygNodePropPredDataset, Evaluator

# Device: prefer MPS (Apple Silicon) > CUDA > CPU
if torch.backends.mps.is_available():
    GCN_DEVICE = torch.device('mps')
elif torch.cuda.is_available():
    GCN_DEVICE = torch.device('cuda')
else:
    GCN_DEVICE = torch.device('cpu')
CPU = torch.device('cpu')
print(f"GCN device: {GCN_DEVICE}  |  Node2Vec/LP device: cpu")

N_RUNS = 10

def set_seed(seed: int):
    torch.manual_seed(seed)
    np.random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

GCN device: mps  |  Node2Vec/LP device: cpu


In [2]:
# ── Dataset Loading ───────────────────────────────────────────────────────────
#
# Planetoid (Cora / CiteSeer / PubMed): auto-downloaded by torch_geometric.
# Uses the Planetoid split protocol as required by the assignment.
#
# ogbn-arxiv: auto-downloaded by OGB. Uses official year-based split.

planetoid_datasets = {}
for name in ['Cora', 'CiteSeer', 'PubMed']:
    ds = Planetoid(root=f'data/{name}', name=name, transform=T.NormalizeFeatures())
    planetoid_datasets[name.lower()] = ds
    d = ds[0]
    print(f"{name:10s}: {d.num_nodes:6d} nodes  {d.num_edges:6d} edges  "
          f"{ds.num_features:4d} features  {ds.num_classes} classes  "
          f"train={d.train_mask.sum().item()} val={d.val_mask.sum().item()} test={d.test_mask.sum().item()}")

# ogbn-arxiv — multiclass (40 CS subject areas), 128-dim node features
dataset_arxiv = PygNodePropPredDataset(name='ogbn-arxiv', root='data/ogbn-arxiv')
split_idx     = dataset_arxiv.get_idx_split()
data_arxiv    = dataset_arxiv[0]
n_arxiv_classes = int(data_arxiv.y.max()) + 1

print(f"\nogbn-arxiv: {data_arxiv.num_nodes:,} nodes  {data_arxiv.num_edges:,} edges  "
      f"x={tuple(data_arxiv.x.shape)}  y classes={n_arxiv_classes}")
print(f"  train={len(split_idx['train']):,}  valid={len(split_idx['valid']):,}  "
      f"test={len(split_idx['test']):,}")

Cora      :   2708 nodes   10556 edges  1433 features  7 classes  train=140 val=500 test=1000
CiteSeer  :   3327 nodes    9104 edges  3703 features  6 classes  train=120 val=500 test=1000
PubMed    :  19717 nodes   88648 edges   500 features  3 classes  train=60 val=500 test=1000

ogbn-arxiv: 169,343 nodes  1,166,243 edges  x=(169343, 128)  y classes=40
  train=90,941  valid=29,799  test=48,603


### Algorithm 1 — Label Propagation

Classic semi-supervised LP (Zhu et al., 2003).  
No features are used — purely structure-based.  
Update rule: **Y ← α D⁻¹A Y + (1−α) Y₀**, iterated until convergence.  
Because the algorithm is deterministic given the graph and fixed splits, all 10 runs return identical results (std = 0).

In [3]:
def build_norm_adj(edge_index: torch.Tensor, n_nodes: int):
    """Row-normalised adjacency D⁻¹A (scipy CSR, symmetric)."""
    r, c = edge_index.numpy()
    rows = np.concatenate([r, c])
    cols = np.concatenate([c, r])
    adj  = csr_matrix((np.ones(len(rows)), (rows, cols)), shape=(n_nodes, n_nodes))
    adj.data[:] = 1.0                          # binarise duplicate entries
    deg  = np.array(adj.sum(axis=1)).flatten()
    deg[deg == 0] = 1
    return diags(1.0 / deg) @ adj              # row-stochastic


def label_propagate(A_norm, Y0: np.ndarray, alpha=0.85, max_iter=200, tol=1e-6):
    """Iterate Y ← α A_norm Y + (1-α) Y0 until convergence. Returns (Y, trace)."""
    Y, trace = Y0.copy(), []
    for _ in range(max_iter):
        Y_new = alpha * (A_norm @ Y) + (1.0 - alpha) * Y0
        delta = float(np.abs(Y_new - Y).max())
        trace.append(delta)
        Y = Y_new
        if delta < tol:
            break
    return Y, trace


def run_lp_planetoid(data, n_classes, alpha=0.85, max_iter=200, n_runs=N_RUNS):
    """LP for Planetoid datasets. Returns (test_accs, val_accs, lp_trace)."""
    A_norm = build_norm_adj(data.edge_index, data.num_nodes)
    labels = data.y.numpy().flatten()
    train_idx = data.train_mask.numpy().nonzero()[0]
    val_idx   = data.val_mask.numpy().nonzero()[0]
    test_idx  = data.test_mask.numpy().nonzero()[0]

    Y0 = np.zeros((data.num_nodes, n_classes))
    Y0[train_idx, labels[train_idx]] = 1.0

    Y, trace = label_propagate(A_norm, Y0, alpha=alpha, max_iter=max_iter)
    pred = Y.argmax(axis=1)

    test_acc = float((pred[test_idx] == labels[test_idx]).mean())
    val_acc  = float((pred[val_idx]  == labels[val_idx]).mean())
    # LP is deterministic → replicate result across all runs
    return [test_acc] * n_runs, [val_acc] * n_runs, trace


def run_lp_ogb(data, split_idx, n_classes, alpha=0.85, max_iter=200, n_runs=N_RUNS):
    """LP for OGB datasets (multiclass). Metric: accuracy."""
    A_norm    = build_norm_adj(data.edge_index, data.num_nodes)
    labels    = data.y.numpy().flatten()
    train_idx = split_idx['train'].numpy()
    valid_idx = split_idx['valid'].numpy()
    test_idx  = split_idx['test'].numpy()

    Y0 = np.zeros((data.num_nodes, n_classes))
    Y0[train_idx, labels[train_idx]] = 1.0

    Y, trace = label_propagate(A_norm, Y0, alpha=alpha, max_iter=max_iter)
    pred = Y.argmax(axis=1)

    test_acc = float((pred[test_idx] == labels[test_idx]).mean())
    val_acc  = float((pred[valid_idx] == labels[valid_idx]).mean())
    return [test_acc] * n_runs, [val_acc] * n_runs, trace

### Algorithm 2 — Node2Vec

Random-walk-based node embedding (Grover & Leskovec, 2016).  
No features are used — purely structure-based.  
Embeddings are trained via the skip-gram objective, then a Logistic Regression classifier is fitted on the training nodes and evaluated on test nodes.  
Stochastic across runs (different seeds → different walk samples and optimisation trajectories) → non-zero variance.

In [4]:
def train_node2vec_model(
    edge_index, n_nodes,
    embedding_dim=128, walk_length=20, context_size=10,
    walks_per_node=10, p=1.0, q=1.0,
    n_epochs=50, lr=0.01, batch_size=256, seed=0
):
    """Train Node2Vec skip-gram model. Returns (embeddings, epoch_losses)."""
    set_seed(seed)
    model = Node2Vec(
        edge_index,
        embedding_dim=embedding_dim,
        walk_length=walk_length,
        context_size=context_size,
        walks_per_node=walks_per_node,
        p=p, q=q,
        num_negative_samples=1,
        sparse=True,
        num_nodes=n_nodes,
    ).to(CPU)

    loader    = model.loader(batch_size=batch_size, shuffle=True, num_workers=0)
    optimizer = torch.optim.SparseAdam(model.parameters(), lr=lr)

    losses = []
    for epoch in range(n_epochs):
        model.train()
        total = 0.0
        for pos_rw, neg_rw in loader:
            optimizer.zero_grad()
            loss = model.loss(pos_rw, neg_rw)
            loss.backward()
            optimizer.step()
            total += loss.item()
        losses.append(total / len(loader))

    model.eval()
    with torch.no_grad():
        z = model().cpu().numpy()
    return z, losses


def run_node2vec_planetoid(data, n_classes, n_epochs=50, n_runs=N_RUNS):
    """Node2Vec for Planetoid datasets. Returns (test_accs, val_accs, all_losses)."""
    train_m = data.train_mask.numpy()
    val_m   = data.val_mask.numpy()
    test_m  = data.test_mask.numpy()
    labels  = data.y.numpy().flatten()

    test_accs, val_accs, all_losses = [], [], []
    for run in range(n_runs):
        z, losses = train_node2vec_model(
            data.edge_index, data.num_nodes,
            embedding_dim=128, walk_length=20, context_size=10,
            walks_per_node=10, n_epochs=n_epochs, lr=0.01, seed=run * 7
        )
        clf = LogisticRegression(max_iter=1000, random_state=run, C=1.0)
        clf.fit(z[train_m], labels[train_m])
        test_accs.append(clf.score(z[test_m], labels[test_m]))
        val_accs.append(clf.score(z[val_m],  labels[val_m]))
        all_losses.append(losses)
        print(f"  Node2Vec run {run+1:02d}/{n_runs}: test={test_accs[-1]:.4f}")
    return test_accs, val_accs, all_losses


def run_node2vec_ogb(data, split_idx, n_epochs=10, n_runs=N_RUNS):
    """Node2Vec for OGB datasets (multiclass). Metric: accuracy."""
    labels    = data.y.numpy().flatten()
    train_idx = split_idx['train'].numpy()
    valid_idx = split_idx['valid'].numpy()
    test_idx  = split_idx['test'].numpy()

    test_accs, val_accs, all_losses = [], [], []
    for run in range(n_runs):
        z, losses = train_node2vec_model(
            data.edge_index, data.num_nodes,
            embedding_dim=128, walk_length=20, context_size=10,
            walks_per_node=5, n_epochs=n_epochs, lr=0.01,
            batch_size=512, seed=run * 7
        )
        clf = LogisticRegression(max_iter=500, random_state=run, C=1.0)
        clf.fit(z[train_idx], labels[train_idx])
        test_accs.append(clf.score(z[test_idx], labels[test_idx]))
        val_accs.append(clf.score(z[valid_idx], labels[valid_idx]))
        all_losses.append(losses)
        print(f"  Node2Vec ogb run {run+1:02d}/{n_runs}: test={test_accs[-1]:.4f}")
    return test_accs, val_accs, all_losses

### Algorithm 3 — Graph Convolutional Network (GCN)

Standard 2-layer GCN (Kipf & Welling, 2017) for Planetoid datasets.  
3-layer GCN with BatchNorm for ogbn-arxiv (larger graph benefits from deeper model).  
**Node features are used** (required for GCN per the assignment).  
Stochastic across runs (random weight initialisation + stochastic optimisation) → non-zero variance.

In [5]:
class GCN(torch.nn.Module):
    """2-layer GCN for multi-class node classification."""
    def __init__(self, in_ch, hidden_ch, out_ch, dropout=0.5):
        super().__init__()
        self.conv1   = GCNConv(in_ch, hidden_ch)
        self.conv2   = GCNConv(hidden_ch, out_ch)
        self.dropout = dropout

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        return self.conv2(x, edge_index)


class GCN3(torch.nn.Module):
    """3-layer GCN with BatchNorm for larger graphs (e.g. ogbn-arxiv)."""
    def __init__(self, in_ch, hidden_ch, out_ch, dropout=0.5):
        super().__init__()
        self.conv1 = GCNConv(in_ch, hidden_ch)
        self.bn1   = torch.nn.BatchNorm1d(hidden_ch)
        self.conv2 = GCNConv(hidden_ch, hidden_ch)
        self.bn2   = torch.nn.BatchNorm1d(hidden_ch)
        self.conv3 = GCNConv(hidden_ch, out_ch)
        self.dropout = dropout

    def forward(self, x, edge_index):
        x = self.bn1(self.conv1(x, edge_index)).relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.bn2(self.conv2(x, edge_index)).relu()
        x = F.dropout(x, p=self.dropout, training=self.training)
        return self.conv3(x, edge_index)


def run_gcn_planetoid(data, n_classes, hidden=256, n_epochs=200,
                      lr=1e-2, wd=5e-4, n_runs=N_RUNS):
    """Train 2-layer GCN on a Planetoid dataset. Returns (test_accs, histories)."""
    data_dev = data.to(GCN_DEVICE)
    all_test, all_hist = [], []

    for run in range(n_runs):
        set_seed(run * 13)
        model = GCN(data.num_node_features, hidden, n_classes).to(GCN_DEVICE)
        opt   = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
        crit  = torch.nn.CrossEntropyLoss()
        hist  = {'train_loss': [], 'val_loss': [], 'val_acc': []}

        for epoch in range(n_epochs):
            model.train()
            opt.zero_grad()
            out  = model(data_dev.x, data_dev.edge_index)
            loss = crit(out[data_dev.train_mask], data_dev.y[data_dev.train_mask].squeeze())
            loss.backward()
            opt.step()

            model.eval()
            with torch.no_grad():
                out      = model(data_dev.x, data_dev.edge_index)
                val_loss = crit(out[data_dev.val_mask],
                                data_dev.y[data_dev.val_mask].squeeze()).item()
                val_pred = out[data_dev.val_mask].argmax(1)
                val_acc  = (val_pred == data_dev.y[data_dev.val_mask].squeeze()
                            ).float().mean().item()
            hist['train_loss'].append(loss.item())
            hist['val_loss'].append(val_loss)
            hist['val_acc'].append(val_acc)

        model.eval()
        with torch.no_grad():
            out  = model(data_dev.x, data_dev.edge_index)
            pred = out[data_dev.test_mask].argmax(1)
            test_acc = (pred == data_dev.y[data_dev.test_mask].squeeze()
                        ).float().mean().item()
        all_test.append(test_acc)
        all_hist.append(hist)
        print(f"  GCN run {run+1:02d}/{n_runs}: test={test_acc:.4f}")

    data_dev.to(CPU)
    return all_test, all_hist


def run_gcn_ogb(data, split_idx, n_classes, hidden=256, n_epochs=200,
                lr=1e-2, wd=5e-4, n_runs=N_RUNS):
    """Train 3-layer GCN on an OGB dataset (multiclass). Metric: accuracy."""
    data_dev  = data.to(GCN_DEVICE)
    train_idx = split_idx['train'].to(GCN_DEVICE)
    valid_idx = split_idx['valid'].to(GCN_DEVICE)
    test_idx  = split_idx['test'].to(GCN_DEVICE)
    labels    = data.y.squeeze().to(GCN_DEVICE)
    all_test, all_hist = [], []

    for run in range(n_runs):
        set_seed(run * 13)
        model = GCN3(data.x.size(1), hidden, n_classes).to(GCN_DEVICE)
        opt   = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
        crit  = torch.nn.CrossEntropyLoss()
        hist  = {'train_loss': [], 'val_loss': [], 'val_acc': []}

        for epoch in range(n_epochs):
            model.train()
            opt.zero_grad()
            out  = model(data_dev.x, data_dev.edge_index)
            loss = crit(out[train_idx], labels[train_idx])
            loss.backward()
            opt.step()

            model.eval()
            with torch.no_grad():
                out      = model(data_dev.x, data_dev.edge_index)
                val_loss = crit(out[valid_idx], labels[valid_idx]).item()
                val_pred = out[valid_idx].argmax(1)
                val_acc  = (val_pred == labels[valid_idx]).float().mean().item()
            hist['train_loss'].append(loss.item())
            hist['val_loss'].append(val_loss)
            hist['val_acc'].append(val_acc)

        model.eval()
        with torch.no_grad():
            out  = model(data_dev.x, data_dev.edge_index)
            pred = out[test_idx].argmax(1)
            test_acc = (pred == labels[test_idx]).float().mean().item()
        all_test.append(test_acc)
        all_hist.append(hist)
        print(f"  GCN ogb run {run+1:02d}/{n_runs}: test={test_acc:.4f}")

    data_dev.to(CPU)
    return all_test, all_hist

In [None]:
# ── Run All Experiments ───────────────────────────────────────────────────────
#
# results[dataset][algorithm] = {
#   'test'    : list of N_RUNS test scores,
#   'val'     : list of N_RUNS val  scores,   (LP / Node2Vec)
#   'history' : list of N_RUNS history dicts, (Node2Vec, GCN)
#   'lp_trace': convergence trace,            (LP only)
# }

results = {}

# ── Planetoid ─────────────────────────────────────────────────────────────────
for ds_name in ['cora', 'citeseer', 'pubmed']:
    ds        = planetoid_datasets[ds_name]
    data      = ds[0]
    n_classes = ds.num_classes
    print(f"\n{'='*60}\n{ds_name.upper()}  ({n_classes} classes)\n{'='*60}")
    results[ds_name] = {}

    # Label Propagation
    print("\n── Label Propagation ──")
    test, val, trace = run_lp_planetoid(data, n_classes)
    results[ds_name]['LP'] = {'test': test, 'val': val, 'lp_trace': trace}
    print(f"  LP  test acc : {np.mean(test):.4f} ± {np.std(test):.4f}")

    # Node2Vec
    print("\n── Node2Vec ──")
    test, val, hists = run_node2vec_planetoid(data, n_classes, n_epochs=50)
    results[ds_name]['Node2Vec'] = {'test': test, 'val': val, 'history': hists}
    print(f"  N2V test acc : {np.mean(test):.4f} ± {np.std(test):.4f}")

    # GCN
    print("\n── GCN ──")
    test, hists = run_gcn_planetoid(data, n_classes, n_epochs=200)
    results[ds_name]['GCN'] = {'test': test, 'history': hists}
    print(f"  GCN test acc : {np.mean(test):.4f} ± {np.std(test):.4f}")

# ── ogbn-arxiv ────────────────────────────────────────────────────────────────
print(f"\n{'='*60}\nOGBN-ARXIV  ({n_arxiv_classes} classes, metric: Accuracy)\n{'='*60}")
results['ogbn-arxiv'] = {}

print("\n── Label Propagation ──")
test, val, trace = run_lp_ogb(data_arxiv, split_idx, n_arxiv_classes)
results['ogbn-arxiv']['LP'] = {'test': test, 'val': val, 'lp_trace': trace}
print(f"  LP  test acc : {np.mean(test):.4f} ± {np.std(test):.4f}")

print("\n── Node2Vec ──")
test, val, hists = run_node2vec_ogb(data_arxiv, split_idx, n_epochs=10)
results['ogbn-arxiv']['Node2Vec'] = {'test': test, 'val': val, 'history': hists}
print(f"  N2V test acc : {np.mean(test):.4f} ± {np.std(test):.4f}")

print("\n── GCN ──")
test, hists = run_gcn_ogb(data_arxiv, split_idx, n_arxiv_classes, n_epochs=200)
results['ogbn-arxiv']['GCN'] = {'test': test, 'history': hists}
print(f"  GCN test acc : {np.mean(test):.4f} ± {np.std(test):.4f}")


CORA  (7 classes)

── Label Propagation ──
  LP  test acc : 0.6940 ± 0.0000

── Node2Vec ──


In [None]:
# ── Training Curves ───────────────────────────────────────────────────────────
# One figure per dataset, three panels:
#   Left  : LP convergence (max label change per iteration, log-scale)
#   Centre: Node2Vec training loss (skip-gram, mean ± std across runs)
#   Right : GCN train loss and validation loss (mean ± std across runs)

def plot_training_curves(ds_name, res, y_label_gcn='Cross-Entropy Loss'):
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
    fig.suptitle(f'{ds_name.upper()} — Training Curves', fontsize=13, fontweight='bold')

    # ── LP convergence ────────────────────────────────────────────────────────
    ax    = axes[0]
    trace = res['LP']['lp_trace']
    ax.semilogy(range(1, len(trace) + 1), trace, color='steelblue', lw=2)
    ax.set_xlabel('Iteration')
    ax.set_ylabel('Max Label Change (log scale)')
    ax.set_title('Label Propagation — Convergence')
    ax.grid(True, alpha=0.3)

    # ── Node2Vec skip-gram loss ───────────────────────────────────────────────
    ax     = axes[1]
    losses = np.array(res['Node2Vec']['history'])   # (n_runs, n_epochs)
    mean_l = losses.mean(0)
    std_l  = losses.std(0)
    ep     = range(1, len(mean_l) + 1)
    ax.plot(ep, mean_l, color='darkorange', lw=2, label='train loss (mean)')
    ax.fill_between(ep, mean_l - std_l, mean_l + std_l, alpha=0.25, color='darkorange',
                    label='±1 std')
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Skip-gram Loss')
    ax.set_title('Node2Vec — Training Loss')
    ax.legend(fontsize=9)
    ax.grid(True, alpha=0.3)

    # ── GCN train + val loss ──────────────────────────────────────────────────
    ax  = axes[2]
    tl  = np.array([h['train_loss'] for h in res['GCN']['history']])
    vl  = np.array([h['val_loss']   for h in res['GCN']['history']])
    ep  = range(1, tl.shape[1] + 1)
    ax.plot(ep, tl.mean(0), color='forestgreen', lw=2, label='train loss')
    ax.fill_between(ep, tl.mean(0) - tl.std(0), tl.mean(0) + tl.std(0),
                    alpha=0.2, color='forestgreen')
    ax.plot(ep, vl.mean(0), color='crimson', lw=2, label='val loss')
    ax.fill_between(ep, vl.mean(0) - vl.std(0), vl.mean(0) + vl.std(0),
                    alpha=0.2, color='crimson')
    ax.set_xlabel('Epoch')
    ax.set_ylabel(y_label_gcn)
    ax.set_title('GCN — Train / Val Loss')
    ax.legend(fontsize=9)
    ax.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig(f'training_curves_{ds_name}.pdf', bbox_inches='tight')
    plt.show()


for ds_name in ['cora', 'citeseer', 'pubmed']:
    plot_training_curves(ds_name, results[ds_name])

plot_training_curves('ogbn-arxiv', results['ogbn-arxiv'])

In [None]:
# ── Per-Dataset Result Tables ─────────────────────────────────────────────────

for ds_name in ['cora', 'citeseer', 'pubmed', 'ogbn-arxiv']:
    metric = 'Accuracy'
    rows   = []
    for run in range(N_RUNS):
        row = {'Run': run + 1}
        for algo in ['LP', 'Node2Vec', 'GCN']:
            row[algo] = f"{results[ds_name][algo]['test'][run]:.4f}"
        rows.append(row)

    df_per = pd.DataFrame(rows).set_index('Run')
    # Append mean and std rows
    mean_row = {algo: f"{np.mean(results[ds_name][algo]['test']):.4f}"
                for algo in ['LP', 'Node2Vec', 'GCN']}
    std_row  = {algo: f"{np.std(results[ds_name][algo]['test']):.4f}"
                for algo in ['LP', 'Node2Vec', 'GCN']}
    df_per.loc['mean'] = mean_row
    df_per.loc['std']  = std_row

    print(f"\n{'─'*55}")
    print(f" {ds_name.upper()}  |  Test {metric} per run")
    print(f"{'─'*55}")
    print(df_per.to_string())

In [None]:
# ── Summary Table (all datasets × all algorithms) ─────────────────────────────

summary_rows = []
for ds_name in ['cora', 'citeseer', 'pubmed', 'ogbn-arxiv']:
    for algo in ['LP', 'Node2Vec', 'GCN']:
        vals = results[ds_name][algo]['test']
        summary_rows.append({
            'Dataset': ds_name, 'Algorithm': algo, 'Metric': 'Accuracy',
            'Mean': np.mean(vals), 'Std': np.std(vals),
            'Result': f"{np.mean(vals):.4f} ± {np.std(vals):.4f}"
        })

df_summary = pd.DataFrame(summary_rows)
pivot = df_summary.pivot_table(values='Result', index='Dataset',
                                columns='Algorithm', aggfunc='first')[['LP', 'Node2Vec', 'GCN']]
pivot.index.name = 'Dataset'

print("="*70)
print("Q1 — Node Classification: Test Accuracy (mean ± std, 10 runs)")
print("="*70)
print(pivot.to_string())

# ── Grouped Bar Plot ──────────────────────────────────────────────────────────
ds_list = ['cora', 'citeseer', 'pubmed', 'ogbn-arxiv']
titles  = ['Cora\n(Accuracy)', 'CiteSeer\n(Accuracy)',
           'PubMed\n(Accuracy)', 'ogbn-arxiv\n(Accuracy)']
algos   = ['LP', 'Node2Vec', 'GCN']
colors  = ['steelblue', 'darkorange', 'forestgreen']

fig, axes = plt.subplots(1, 4, figsize=(17, 5), sharey=False)
for ax, ds_name, title in zip(axes, ds_list, titles):
    means = [np.mean(results[ds_name][a]['test']) for a in algos]
    stds  = [np.std(results[ds_name][a]['test'])  for a in algos]
    bars  = ax.bar(algos, means, yerr=stds, capsize=7,
                   color=colors, alpha=0.82, edgecolor='black', lw=0.7)
    ax.set_title(title, fontsize=11)
    ax.set_ylim(0, min(1.12, max(means) + max(stds) + 0.15))
    ax.set_ylabel('Score')
    ax.grid(axis='y', alpha=0.3)
    for bar, m, s in zip(bars, means, stds):
        ax.text(bar.get_x() + bar.get_width() / 2,
                m + s + 0.01, f'{m:.3f}',
                ha='center', va='bottom', fontsize=8.5)

fig.suptitle('Q1 — Node Classification: Algorithm Comparison across Datasets',
             fontsize=13, fontweight='bold')
plt.tight_layout()
plt.savefig('q1_comparison_barplot.pdf', bbox_inches='tight')
plt.show()