# Dryrun

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

def hypergraph_construction(H):
    H = H.float()

    Dv = torch.sum(H, dim=1)
    De = torch.sum(H, dim=0)

    # Fix: avoid division by zero
    Dv = torch.where(Dv == 0, torch.ones_like(Dv), Dv)
    De = torch.where(De == 0, torch.ones_like(De), De)

    Dv_inv_sqrt = torch.diag(torch.pow(Dv, -0.5))
    De_inv = torch.diag(torch.pow(De, -1.0))

    HT = H.t()
    G = Dv_inv_sqrt @ H @ De_inv @ HT @ Dv_inv_sqrt
    return G

class HGNNLayer(nn.Module):
    def __init__(self, in_features, out_features):
        super(HGNNLayer, self).__init__()
        self.linear = nn.Linear(in_features, out_features, bias=False)

    def forward(self, x, G):
        return self.linear(G @ x)

class HGNN(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim):
        super(HGNN, self).__init__()
        self.layer1 = HGNNLayer(in_dim, hidden_dim)
        self.layer2 = HGNNLayer(hidden_dim, out_dim)

    def forward(self, x, G):
        x = F.relu(self.layer1(x, G))
        x = self.layer2(x, G)
        return x


In [4]:
import torch
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit

def generate_planetoid_split(labels, num_classes=7, train_per_class=20, val_size=500, test_size=1000, seed=42):
    np.random.seed(seed)
    labels = labels.cpu().numpy()
    idx = np.arange(len(labels))

    train_idx = []
    for i in range(num_classes):
        cls_idx = idx[labels == i]
        train_idx.extend(np.random.choice(cls_idx, train_per_class, replace=False))

    remaining = np.setdiff1d(idx, train_idx)
    np.random.shuffle(remaining)

    val_idx = remaining[:val_size]
    test_idx = remaining[val_size:val_size + test_size]

    return torch.LongTensor(train_idx), torch.LongTensor(val_idx), torch.LongTensor(test_idx)


In [5]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
# from hgnn_model import HGNN, hypergraph_construction
# from utils import generate_planetoid_split

# Load Cora (citation graph)
data = Planetoid(root='/tmp/Cora', name='Cora')[0]
features, labels = data.x, data.y
edge_index = data.edge_index

# Construct hyperedges: treat each undirected edge as a hyperedge
edges = edge_index.t().tolist()
edges = [tuple(sorted(e)) for e in edges]
unique_edges = list(set(edges))

num_nodes = features.shape[0]
num_edges = len(unique_edges)
H = torch.zeros((num_nodes, num_edges))
for j, (u, v) in enumerate(unique_edges):
    H[u, j] = 1.0
    H[v, j] = 1.0

# Normalize features
features[torch.isnan(features)] = 0
features[torch.isinf(features)] = 0
features = F.normalize(features, p=2, dim=1)

# Build G
G = hypergraph_construction(H)

# Split
train_idx, val_idx, test_idx = generate_planetoid_split(labels)

# Model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
features, labels, G = features.to(device), labels.to(device), G.to(device)
train_idx, val_idx, test_idx = train_idx.to(device), val_idx.to(device), test_idx.to(device)

model = HGNN(in_dim=features.shape[1], hidden_dim=64, out_dim=labels.max().item() + 1).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# Training loop
def evaluate(model, x, G, labels, idx):
    model.eval()
    with torch.no_grad():
        out = model(x, G)
        pred = out[idx].argmax(dim=1)
        acc = (pred == labels[idx]).float().mean()
    return acc.item()

for epoch in range(1, 201):
    model.train()
    out = model(features, G)
    loss = F.cross_entropy(out[train_idx], labels[train_idx])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    train_acc = evaluate(model, features, G, labels, train_idx)
    val_acc = evaluate(model, features, G, labels, val_idx)
    test_acc = evaluate(model, features, G, labels, test_idx)

    if epoch % 10 == 0 or epoch == 1:
        print(f"Epoch {epoch:03d} | Loss: {loss:.4f} | Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f} | Test Acc: {test_acc:.4f}")


Epoch 001 | Loss: 1.9461 | Train Acc: 0.9857 | Val Acc: 0.6140 | Test Acc: 0.5980
Epoch 010 | Loss: 1.3577 | Train Acc: 1.0000 | Val Acc: 0.7240 | Test Acc: 0.7080
Epoch 020 | Loss: 0.3747 | Train Acc: 1.0000 | Val Acc: 0.8020 | Test Acc: 0.7650
Epoch 030 | Loss: 0.0960 | Train Acc: 1.0000 | Val Acc: 0.7960 | Test Acc: 0.7730
Epoch 040 | Loss: 0.0726 | Train Acc: 1.0000 | Val Acc: 0.8000 | Test Acc: 0.7870
Epoch 050 | Loss: 0.0776 | Train Acc: 1.0000 | Val Acc: 0.8020 | Test Acc: 0.7940
Epoch 060 | Loss: 0.0680 | Train Acc: 1.0000 | Val Acc: 0.8060 | Test Acc: 0.8010
Epoch 070 | Loss: 0.0581 | Train Acc: 1.0000 | Val Acc: 0.8100 | Test Acc: 0.8030
Epoch 080 | Loss: 0.0535 | Train Acc: 1.0000 | Val Acc: 0.8100 | Test Acc: 0.8050
Epoch 090 | Loss: 0.0499 | Train Acc: 1.0000 | Val Acc: 0.8120 | Test Acc: 0.8040
Epoch 100 | Loss: 0.0466 | Train Acc: 1.0000 | Val Acc: 0.8120 | Test Acc: 0.8030
Epoch 110 | Loss: 0.0440 | Train Acc: 1.0000 | Val Acc: 0.8120 | Test Acc: 0.8030
Epoch 120 | Loss

# Multiple Datasets

In [6]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
# from hgnn_model import HGNN, hypergraph_construction
# from utils import generate_planetoid_split
import time

def graph_to_hypergraph(edge_index, num_nodes):
    edges = edge_index.t().tolist()
    edges = [tuple(sorted(e)) for e in edges]
    unique_edges = list(set(edges))

    H = torch.zeros((num_nodes, len(unique_edges)))
    for j, (u, v) in enumerate(unique_edges):
        H[u, j] = 1.0
        H[v, j] = 1.0
    return H

def run_hgnn_on(dataset_name, hidden_dim=128, epochs=200, lr=0.003, weight_decay=5e-4, seed=42):
    torch.manual_seed(seed)

    # Load dataset
    dataset = Planetoid(root=f'/tmp/{dataset_name}', name=dataset_name)
    data = dataset[0]
    features, labels = data.x, data.y
    edge_index = data.edge_index

    # Build hypergraph from citation edges
    H = graph_to_hypergraph(edge_index, features.shape[0])
    G = hypergraph_construction(H)

    # Normalize features
    features[torch.isnan(features)] = 0
    features[torch.isinf(features)] = 0
    features = F.normalize(features, p=2, dim=1)

    # Splits
    train_idx, val_idx, test_idx = generate_planetoid_split(labels, num_classes=labels.max().item()+1)

    # To device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    features, labels, G = features.to(device), labels.to(device), G.to(device)
    train_idx, val_idx, test_idx = train_idx.to(device), val_idx.to(device), test_idx.to(device)

    # Model
    model = HGNN(in_dim=features.shape[1], hidden_dim=hidden_dim, out_dim=labels.max().item() + 1).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

    acc_list = []
    start_time = time.time()

    for epoch in range(1, epochs + 1):
        model.train()
        out = model(features, G)
        loss = F.cross_entropy(out[train_idx], labels[train_idx])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Evaluation
        model.eval()
        with torch.no_grad():
            logits = model(features, G)
            pred = logits[test_idx].argmax(dim=1)
            acc = (pred == labels[test_idx]).float().mean().item()
            acc_list.append(acc)

        if epoch % 10 == 0 or epoch == 1 or epoch == epochs:
            print(f"[{dataset_name}] Epoch {epoch:03d} | Loss: {loss:.4f} | Test Acc: {acc:.4f}")

    total_time = time.time() - start_time
    return acc_list, total_time


In [7]:
datasets = ["Cora", "Citeseer", "PubMed"]
results = {}

for name in datasets:
    acc, time_taken = run_hgnn_on(name)
    results[name] = {"acc": acc, "time": time_taken}


[Cora] Epoch 001 | Loss: 1.9457 | Test Acc: 0.5200
[Cora] Epoch 010 | Loss: 1.7500 | Test Acc: 0.7130
[Cora] Epoch 020 | Loss: 1.3265 | Test Acc: 0.7290
[Cora] Epoch 030 | Loss: 0.7966 | Test Acc: 0.7470
[Cora] Epoch 040 | Loss: 0.3995 | Test Acc: 0.7620
[Cora] Epoch 050 | Loss: 0.2139 | Test Acc: 0.7720
[Cora] Epoch 060 | Loss: 0.1462 | Test Acc: 0.7730
[Cora] Epoch 070 | Loss: 0.1225 | Test Acc: 0.7800
[Cora] Epoch 080 | Loss: 0.1111 | Test Acc: 0.7850
[Cora] Epoch 090 | Loss: 0.1017 | Test Acc: 0.7900
[Cora] Epoch 100 | Loss: 0.0929 | Test Acc: 0.7970
[Cora] Epoch 110 | Loss: 0.0853 | Test Acc: 0.7990
[Cora] Epoch 120 | Loss: 0.0792 | Test Acc: 0.8030
[Cora] Epoch 130 | Loss: 0.0742 | Test Acc: 0.8050
[Cora] Epoch 140 | Loss: 0.0700 | Test Acc: 0.8050
[Cora] Epoch 150 | Loss: 0.0664 | Test Acc: 0.8060
[Cora] Epoch 160 | Loss: 0.0633 | Test Acc: 0.8050
[Cora] Epoch 170 | Loss: 0.0605 | Test Acc: 0.8050
[Cora] Epoch 180 | Loss: 0.0580 | Test Acc: 0.8080
[Cora] Epoch 190 | Loss: 0.0558

In [9]:
print(f"{'Dataset':<10} | {'Max Accuracy':<12} | {'Time Taken (s)':<15}")
print("-" * 45)
for name in ['Cora', 'Citeseer', 'PubMed']:
    acc = max(results[name]['acc'])
    time_taken = results[name]['time']
    print(f"{name:<10} | {acc*100:>10.2f}%     | {time_taken:>12.2f} sec")


Dataset    | Max Accuracy | Time Taken (s) 
---------------------------------------------
Cora       |      80.90%     |         0.97 sec
Citeseer   |      69.50%     |         2.91 sec
PubMed     |      77.70%     |        16.51 sec


In [10]:
!pwd

/home/ridham.patel/hypergraph-baselines-2/HGNN
