# GCN Analysis on Leeds Road Network
This notebook computes PageRank, builds a GCN, and visualizes the node embeddings using PCA.

In [1]:
import osmnx as ox
import networkx as nx
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch_geometric.nn import GCNConv
from sklearn.decomposition import PCA
import numpy as np

In [2]:
# Load road network
center = (53.7996, -1.5491)
G = ox.graph_from_point(center, dist=3000, network_type='walk')
G = nx.convert_node_labels_to_integers(G, label_attribute="old_id")

In [3]:
# Compute PageRank
pagerank_scores = nx.pagerank(nx.DiGraph(G), alpha=0.85)
pagerank_df = pd.DataFrame(pagerank_scores.items(), columns=["Node", "PageRank"]).sort_values("PageRank", ascending=False)
top_pagerank_nodes = set(pagerank_df.head(100)["Node"])

In [4]:
# Prepare PyTorch Geometric input
node_mapping = {old: new for new, old in enumerate(G.nodes())}
edge_list = [(node_mapping[u], node_mapping[v]) for u, v in G.edges()]
edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous()
num_nodes = len(G.nodes())
x = torch.rand((num_nodes, 16))  # Random features
y = torch.tensor([1 if node in top_pagerank_nodes else 0 for node in G.nodes()], dtype=torch.float32).unsqueeze(1)

In [5]:
# Define GCN
class GCN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(16, 32)
        self.conv2 = GCNConv(32, 2)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x

model = GCN()
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.BCEWithLogitsLoss()

In [6]:
# Train model
for epoch in range(200):
    model.train()
    optimizer.zero_grad()
    out = model(x, edge_index)
    loss = criterion(out[:, 0:1], y)
    loss.backward()
    optimizer.step()
    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

Epoch 0, Loss: 0.6050
Epoch 20, Loss: 0.0470
Epoch 40, Loss: 0.0426
Epoch 60, Loss: 0.0385
Epoch 80, Loss: 0.0385
Epoch 100, Loss: 0.0385
Epoch 120, Loss: 0.0385
Epoch 140, Loss: 0.0384
Epoch 160, Loss: 0.0384
Epoch 180, Loss: 0.0384


In [None]:
# Visualize with PCA
out = model(x, edge_index).detach().numpy()
pca = PCA(n_components=2)
emb_2d = pca.fit_transform(out)

plt.figure(figsize=(10, 6))
plt.scatter(emb_2d[:, 0], emb_2d[:, 1], c=y.squeeze(), cmap='coolwarm', alpha=0.6)
plt.title("GCN Node Embedding Visualization (PCA)")
plt.xlabel("Component 1")
plt.ylabel("Component 2")
plt.colorbar(label="PageRank Top100 (1=Yes, 0=No)")
plt.tight_layout()
plt.show()