In [1]:
import numpy as np


def Eu_dis(x):
    """
    Calculate the distance among each raw of x
    :param x: N X D
                N: the object number
                D: Dimension of the feature
    :return: N X N distance matrix
    """
    x = np.mat(x)
    aa = np.sum(np.multiply(x, x), 1)
    ab = x * x.T
    dist_mat = aa + aa.T - 2 * ab
    dist_mat[dist_mat < 0] = 0
    dist_mat = np.sqrt(dist_mat)
    dist_mat = np.maximum(dist_mat, dist_mat.T)
    return dist_mat


def feature_concat(*F_list, normal_col=False):
    """
    Concatenate multiple modality feature. If the dimension of a feature matrix is more than two,
    the function will reduce it into two dimension(using the last dimension as the feature dimension,
    the other dimension will be fused as the object dimension)
    :param F_list: Feature matrix list
    :param normal_col: normalize each column of the feature
    :return: Fused feature matrix
    """
    features = None
    for f in F_list:
        if f is not None and f != []:
            # deal with the dimension that more than two
            if len(f.shape) > 2:
                f = f.reshape(-1, f.shape[-1])
            # normal each column
            if normal_col:
                f_max = np.max(np.abs(f), axis=0)
                f = f / f_max
            # facing the first feature matrix appended to fused feature matrix
            if features is None:
                features = f
            else:
                features = np.hstack((features, f))
    if normal_col:
        features_max = np.max(np.abs(features), axis=0)
        features = features / features_max
    return features


def hyperedge_concat(*H_list):
    """
    Concatenate hyperedge group in H_list
    :param H_list: Hyperedge groups which contain two or more hypergraph incidence matrix
    :return: Fused hypergraph incidence matrix
    """
    H = None
    for h in H_list:
        if h is not None and h != []:
            # for the first H appended to fused hypergraph incidence matrix
            if H is None:
                H = h
            else:
                if type(h) != list:
                    H = np.hstack((H, h))
                else:
                    tmp = []
                    for a, b in zip(H, h):
                        tmp.append(np.hstack((a, b)))
                    H = tmp
    return H


def generate_G_from_H(H, variable_weight=False):
    """
    calculate G from hypgraph incidence matrix H
    :param H: hypergraph incidence matrix H
    :param variable_weight: whether the weight of hyperedge is variable
    :return: G
    """
    if type(H) != list:
        return _generate_G_from_H(H, variable_weight)
    else:
        G = []
        for sub_H in H:
            G.append(generate_G_from_H(sub_H, variable_weight))
        return G


def _generate_G_from_H(H, variable_weight=False):
    """
    calculate G from hypgraph incidence matrix H
    :param H: hypergraph incidence matrix H
    :param variable_weight: whether the weight of hyperedge is variable
    :return: G
    """
    H = np.array(H)
    n_edge = H.shape[1]
    # the weight of the hyperedge
    W = np.ones(n_edge)
    # the degree of the node
    DV = np.sum(H * W, axis=1)
    # the degree of the hyperedge
    DE = np.sum(H, axis=0)
    
    invDE = np.asmatrix(np.diag(np.power(DE, -1)))
    DV2 = np.asmatrix(np.diag(np.power(DV, -0.5)))
    W = np.asmatrix(np.diag(W))
    H = np.asmatrix(H)
    HT = H.T

    if variable_weight:
        DV2_H = DV2 * H
        invDE_HT_DV2 = invDE * HT * DV2
        return DV2_H, W, invDE_HT_DV2
    else:
        G = DV2 * H * W * invDE * HT * DV2
        return G


def construct_H_with_KNN_from_distance(dis_mat, k_neig, is_probH=True, m_prob=1):
    """
    construct hypregraph incidence matrix from hypergraph node distance matrix
    :param dis_mat: node distance matrix
    :param k_neig: K nearest neighbor
    :param is_probH: prob Vertex-Edge matrix or binary
    :param m_prob: prob
    :return: N_object X N_hyperedge
    """
    n_obj = dis_mat.shape[0]
    # construct hyperedge from the central feature space of each node
    n_edge = n_obj
    H = np.zeros((n_obj, n_edge))
    for center_idx in range(n_obj):
        dis_mat[center_idx, center_idx] = 0
        dis_vec = dis_mat[center_idx]
        nearest_idx = np.array(np.argsort(dis_vec)).squeeze()
        avg_dis = np.average(dis_vec)
        if not np.any(nearest_idx[:k_neig] == center_idx):
            nearest_idx[k_neig - 1] = center_idx

        for node_idx in nearest_idx[:k_neig]:
            if is_probH:
                H[node_idx, center_idx] = np.exp(-dis_vec[0, node_idx] ** 2 / (m_prob * avg_dis) ** 2)
            else:
                H[node_idx, center_idx] = 1.0
    return H


def construct_H_with_KNN(X, K_neigs=[10], split_diff_scale=False, is_probH=True, m_prob=1):
    """
    init multi-scale hypergraph Vertex-Edge matrix from original node feature matrix
    :param X: N_object x feature_number
    :param K_neigs: the number of neighbor expansion
    :param split_diff_scale: whether split hyperedge group at different neighbor scale
    :param is_probH: prob Vertex-Edge matrix or binary
    :param m_prob: prob
    :return: N_object x N_hyperedge
    """
    if len(X.shape) != 2:
        X = X.reshape(-1, X.shape[-1])

    if type(K_neigs) == int:
        K_neigs = [K_neigs]

    dis_mat = Eu_dis(X)
    H = []
    for k_neig in K_neigs:
        H_tmp = construct_H_with_KNN_from_distance(dis_mat, k_neig, is_probH, m_prob)
        if not split_diff_scale:
            H = hyperedge_concat(H, H_tmp)
        else:
            H.append(H_tmp)
    return H


In [2]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter

class HGNN_conv(nn.Module):
    def __init__(self, in_ft, out_ft, bias=True, device='cuda'):
        super(HGNN_conv, self).__init__()

        self.weight = Parameter(torch.Tensor(in_ft, out_ft).to(device))
        if bias:
            self.bias = Parameter(torch.Tensor(out_ft).to(device=device))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, x: torch.Tensor, G: torch.Tensor):
        # print(type(x))
        x = x.matmul(self.weight)
        if self.bias is not None:
            x = x + self.bias
        # G = np.array(G)
        x = G.matmul(x)
        return x


class HGNN_fc(nn.Module):
    def __init__(self, in_ch, out_ch):
        super(HGNN_fc, self).__init__()
        self.fc = nn.Linear(in_ch, out_ch)

    def forward(self, x):
        return self.fc(x)


class HGNN_embedding(nn.Module):
    def __init__(self, in_ch, n_hid, dropout=0.5):
        super(HGNN_embedding, self).__init__()
        self.dropout = dropout
        self.hgc1 = HGNN_conv(in_ch, n_hid)
        self.hgc2 = HGNN_conv(n_hid, n_hid)

    def forward(self, x, G):
        x = F.relu(self.hgc1(x, G))
        x = F.dropout(x, self.dropout)
        x = F.relu(self.hgc2(x, G))
        return x


class HGNN_classifier(nn.Module):
    def __init__(self, n_hid, n_class):
        super(HGNN_classifier, self).__init__()
        self.fc1 = nn.Linear(n_hid, n_class)

    def forward(self, x):
        x = self.fc1(x)
        return x
    
from torch import nn
# from models import HGNN_conv
import torch.nn.functional as F


class HGNN(nn.Module):
    def __init__(self, in_ch, n_class, n_hid, dropout=0.5):
        super(HGNN, self).__init__()
        self.dropout = dropout
        self.hgc1 = HGNN_conv(in_ch, n_hid)
        self.hgc2 = HGNN_conv(n_hid, n_class)

    def forward(self, x, G):
        x = F.relu(self.hgc1(x, G))
        x = F.dropout(x, self.dropout)
        x = self.hgc2(x, G)
        return x

In [3]:
import pickle
import os
import numpy as np
import torch
from sklearn.preprocessing import LabelEncoder

# Adjust these paths if needed
data_dir = '/home/ridham.patel/hypergraph-baselines-2/HGNN/data/cora/'

with open(data_dir + 'features.pickle', 'rb') as f:
    features = pickle.load(f)  # shape [N, F]

with open(data_dir + 'labels.pickle', 'rb') as f:
    labels = pickle.load(f)  # shape [N]

with open(data_dir + 'hypergraph.pickle', 'rb') as f:
    hypergraph = pickle.load(f)  # dict: author -> [papers]

num_nodes = features.shape[0]
hyperedges = list(hypergraph.values())

# Build incidence matrix H (N x M)
rows, cols = [], []
for j, edge in enumerate(hyperedges):
    for i in edge:
        rows.append(i)
        cols.append(j)

H = torch.zeros((num_nodes, len(hyperedges)))
H[rows, cols] = 1

# return torch.tensor(features.toarray(), dtype=torch.float), torch.tensor(labels), H


  features = pickle.load(f)  # shape [N, F]


In [4]:
from sklearn.model_selection import train_test_split

with open(os.path.join(data_dir, 'splits/3.pickle'), 'rb') as f:
    split = pickle.load(f)

train_idx = torch.LongTensor(split['train'])
# test_idx = torch.LongTensor(split['test'])
val_idx_np, test_idx_np = train_test_split(split['test'], test_size=0.5, random_state=42, shuffle=True)
val_idx = torch.LongTensor(val_idx_np)
test_idx = torch.LongTensor(test_idx_np)

In [5]:
num_nodes, feat_dim = features.shape
labels = np.array(labels)
num_classes = labels.max().item() + 1

In [11]:
H.shape

torch.Size([2708, 1072])

In [7]:
model = HGNN(in_ch=feat_dim, n_class=num_classes, n_hid=64, dropout=0.5)


In [8]:
labels

array([3, 2, 6, ..., 0, 4, 3])

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
features = torch.tensor(features.toarray(), dtype=torch.float)
labels = torch.LongTensor(labels)
X = features.to(device)
# H = H.T
y = labels.to(device)
train_idx = train_idx.to(device)
val_idx = val_idx.to(device)
test_idx = test_idx.to(device)
G = generate_G_from_H(H)
H = H.to(device)
G = torch.Tensor(G).to(device)
G = torch.nan_to_num(G,nan=0.0)

  H = np.array(H)
  DV2 = np.asmatrix(np.diag(np.power(DV, -0.5)))


In [10]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)
criterion = nn.CrossEntropyLoss()


# ---------------------
# Training Loop
# ---------------------
def evaluate(model, X, G, y, indices):
    model.eval()
    with torch.no_grad():
        logits = model(X, G)
        preds = logits[indices].argmax(dim=1)
        acc = (preds == y[indices]).float().mean().item()
        return acc

best_val_acc = 0
patience = 2005
pat_counter = 0

for epoch in range(1, 201):
    model.train()
    optimizer.zero_grad()
    logits = model(X, G)
    # print(logits)
    loss = criterion(logits[train_idx], y[train_idx])
    loss.backward()
    optimizer.step()

    val_acc = evaluate(model, X, G, y, val_idx)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model = model.state_dict()
        pat_counter = 0
    else:
        pat_counter += 1

    if epoch % 10 == 0 or epoch == 1:
        train_acc = evaluate(model, X, G, y, train_idx)
        print(f"Epoch {epoch:03d} | Loss: {loss.item():.4f} | Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")

    if pat_counter >= patience:
        print("Early stopping.")
        break

# ---------------------
# Test Evaluation
# ---------------------
model.load_state_dict(best_model)
test_acc = evaluate(model, X, G, y, test_idx)
print(f"Test Accuracy: {test_acc:.4f}")

Epoch 001 | Loss: 1.9845 | Train Acc: 0.1643 | Val Acc: 0.1410
Epoch 010 | Loss: 1.7004 | Train Acc: 0.5786 | Val Acc: 0.3879
Epoch 020 | Loss: 1.4130 | Train Acc: 0.7643 | Val Acc: 0.5179
Epoch 030 | Loss: 1.1649 | Train Acc: 0.8214 | Val Acc: 0.5545
Epoch 040 | Loss: 0.9180 | Train Acc: 0.8143 | Val Acc: 0.5693
Epoch 050 | Loss: 0.7450 | Train Acc: 0.8357 | Val Acc: 0.5942
Epoch 060 | Loss: 0.6210 | Train Acc: 0.8286 | Val Acc: 0.6347
Epoch 070 | Loss: 0.5454 | Train Acc: 0.8357 | Val Acc: 0.6464
Epoch 080 | Loss: 0.5037 | Train Acc: 0.8357 | Val Acc: 0.6308
Epoch 090 | Loss: 0.4685 | Train Acc: 0.8429 | Val Acc: 0.6417
Epoch 100 | Loss: 0.4403 | Train Acc: 0.8357 | Val Acc: 0.6371
Epoch 110 | Loss: 0.4272 | Train Acc: 0.8429 | Val Acc: 0.6495
Epoch 120 | Loss: 0.4109 | Train Acc: 0.8500 | Val Acc: 0.6363
Epoch 130 | Loss: 0.4045 | Train Acc: 0.8429 | Val Acc: 0.6449
Epoch 140 | Loss: 0.3989 | Train Acc: 0.8429 | Val Acc: 0.6425
Epoch 150 | Loss: 0.3931 | Train Acc: 0.8429 | Val Acc: