In [4]:
from ogb.nodeproppred import NodePropPredDataset
import numpy as np
import torch
from torch_geometric.data import Data

In [17]:
dataset = NodePropPredDataset(name="ogbn-proteins", root="../data")
graph, labels = dataset[0]
print(graph["node_feat"])  # → None

Downloading http://snap.stanford.edu/ogb/data/nodeproppred/proteins.zip


Downloaded 0.21 GB: 100%|██████████| 216/216 [01:00<00:00,  3.56it/s]


Extracting ../data/proteins.zip
Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 1/1 [00:01<00:00,  1.65s/it]


Saving...
None


In [None]:
# Save as a PyTorch Geometric Data object with synthetic features
edge_index = torch.from_numpy(graph['edge_index']).long()           # [2, E]
edge_attr  = torch.from_numpy(graph['edge_feat']).float()          # [E, 8]
y          = torch.from_numpy(labels).float().squeeze()            # [N, 112]
num_nodes  = graph['num_nodes']

D = 64
x = torch.rand((num_nodes, D), dtype=torch.float)

splits = dataset.get_idx_split()
train_mask = torch.zeros(num_nodes, dtype=torch.bool)
train_mask[splits['train']] = True
val_mask   = torch.zeros(num_nodes, dtype=torch.bool)
val_mask[splits['valid']] = True
test_mask  = torch.zeros(num_nodes, dtype=torch.bool)
test_mask[splits['test']] = True

data = Data(
    x=x,
    edge_index=edge_index,
    edge_attr=edge_attr,
    y=y,
    train_mask=train_mask,
    val_mask=val_mask,
    test_mask=test_mask,
)

data.edge_type = data.edge_attr.argmax(dim=1).long()

# torch.save(data, '../data/ogbn_proteins/ogbn_proteins_synthetic.pt')
print("Saved PyG Data object with synthetic features to ogbn_proteins_synthetic.pt")

In [6]:
graph = torch.load('../data/ogbn_proteins/ogbn_proteins_synthetic.pt', weights_only=False)

In [23]:
print("Graph properties:\n",
graph.y.size(1), '\n',
graph.edge_type.shape, '\n',
graph.edge_attr.shape[1]) 

Graph properties:
 112 
 torch.Size([79122504]) 
 8
