In [2]:
from torch_geometric.datasets import TUDataset

# Specify a root directory for storing data (adjust path as needed)
MUTAG = TUDataset(root='data/TUDataset', name='MUTAG')

Downloading https://www.chrsmrrs.com/graphkerneldatasets/MUTAG.zip
Processing...
Done!


In [3]:
# Print summary statistics
print(f"Number of graphs: {len(MUTAG)}")
print(f"Number of node features: {MUTAG.num_node_features}")
print(f"Number of edge features: {MUTAG.num_edge_features}")
print(f"Number of classes: {MUTAG.num_classes}")

Number of graphs: 188
Number of node features: 7
Number of edge features: 4
Number of classes: 2


In [10]:
import numpy as np
import torch

def build_graph_features(dataset):
    # detect if continuous features exist
    has_continuous = any(getattr(d, 'x', None) is not None for d in dataset)
    # detect discrete node labels
    has_node_label = any(getattr(d, 'node_label', None) is not None for d in dataset)
    global_label_count = None

    if not has_continuous and has_node_label:
        # find maximum label to create consistent histogram length
        max_lbl = 0
        for d in dataset:
            nl = getattr(d, 'node_label', None)
            if nl is not None and nl.numel() > 0:
                max_lbl = max(max_lbl, int(nl.max().item()))
        global_label_count = max_lbl + 1

    X_list = []
    y_list = []
    for d in dataset:
        feats = []
        n_nodes = d.num_nodes
        n_edges = d.num_edges
        avg_deg = (2.0 * n_edges / n_nodes) if n_nodes > 0 else 0.0
        feats.extend([n_nodes, n_edges, avg_deg])

        if getattr(d, 'x', None) is not None:
            x = d.x.cpu().numpy()  # shape (n_nodes, num_node_features)
            # summary statistics for each node feature
            means = x.mean(axis=0)
            stds  = x.std(axis=0)
            sums  = x.sum(axis=0)
            mins  = x.min(axis=0)
            maxs  = x.max(axis=0)
            feats.extend(means.tolist())
            feats.extend(stds.tolist())
            feats.extend(sums.tolist())
            feats.extend(mins.tolist())
            feats.extend(maxs.tolist())
        elif getattr(d, 'node_label', None) is not None:
            nl = d.node_label.cpu().numpy().astype(int)
            L = global_label_count or (int(nl.max()) + 1 if nl.size>0 else 1)
            hist = np.zeros(L, dtype=float)
            for lbl in nl:
                if 0 <= int(lbl) < L:
                    hist[int(lbl)] += 1.0
            # normalize histogram
            hist = hist / (hist.sum() + 1e-12)
            feats.extend(hist.tolist())
        else:
            # no node-level info: nothing extra
            pass

        X_list.append(np.array(feats, dtype=float))
        # graph-level label y
        label = d.y
        if isinstance(label, torch.Tensor):
            label = int(label.item()) if label.numel() == 1 else label.cpu().numpy()
        y_list.append(int(label))

    # ensure consistent feature length by padding if needed
    lengths = [len(x) for x in X_list]
    if len(set(lengths)) != 1:
        max_len = max(lengths)
        X_padded = np.zeros((len(X_list), max_len), dtype=float)
        for i, x in enumerate(X_list):
            X_padded[i, :len(x)] = x
        X = X_padded
    else:
        X = np.vstack(X_list)
    y = np.array(y_list, dtype=int)
    return X, y


In [11]:
X, y = build_graph_features(MUTAG)

In [5]:
from grakel.kernels import WeisfeilerLehman, VertexHistogram

wl_kernel = WeisfeilerLehman(n_iter=5, normalize=True, base_graph_kernel=VertexHistogram)

In [12]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
K_train = wl_kernel.fit_transform(X_train)
K_test = wl_kernel.transform(X_test)

ValueError: Unsupported input type. For more information check the documentation, concerning valid input types for graph type object.