In [33]:
import numpy as np
from scipy.sparse import csr_matrix

In [84]:
from ogb.nodeproppred import NodePropPredDataset

dataset = NodePropPredDataset(name = 'ogbn-products')

split_idx = dataset.get_idx_split()
train_idx, valid_idx, test_idx = split_idx["train"], split_idx["valid"], split_idx["test"]
graph, label = dataset[0] # graph: library-agnostic graph object

This will download 1.38GB. Will you proceed? (y/N)
 y


Downloading https://snap.stanford.edu/ogb/data/nodeproppred/products.zip


Downloaded 1.38 GB: 100%|██████████| 1414/1414 [05:18<00:00,  4.44it/s]


Extracting dataset/products.zip
Loading necessary files...
This might take a while.


  0%|          | 0/1 [00:00<?, ?it/s]

Processing graphs...


100%|██████████| 1/1 [00:03<00:00,  3.82s/it]


Saving...


In [85]:
graph.keys()

dict_keys(['edge_index', 'edge_feat', 'node_feat', 'num_nodes'])

In [86]:
graph['num_nodes']

2449029

In [87]:
graph['edge_index'].shape

(2, 123718280)

In [24]:
graph['node_feat'].shape

(169343, 128)

In [27]:
edge_index = graph['edge_index']

In [32]:
np.ones([edge_index.shape[1]]).shape

(1166243,)

In [38]:
edge_index

array([[104447,  15858, 107156, ...,  45118,  45118,  45118],
       [ 13091,  47283,  69161, ..., 162473, 162537,  72717]])

In [41]:
reversed_edges = np.array([edge_index[1],edge_index[0]])

In [42]:
reversed_edges

array([[ 13091,  47283,  69161, ..., 162473, 162537,  72717],
       [104447,  15858, 107156, ...,  45118,  45118,  45118]])

In [36]:
adj = csr_matrix((np.ones([edge_index.shape[1]]), edge_index))

In [43]:
adj.shape

(169343, 169342)

In [49]:
complete = np.concatenate((edge_index,reversed_edges),axis=1)

In [57]:
adj = csr_matrix((np.ones([complete.shape[1]]), complete))

In [61]:
adj = min(adj,1)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all().

In [64]:
adj.data = np.clip(adj.data,0,1)

In [65]:
adj.max()

1.0

In [67]:
label.flatten()

array([ 4,  5, 28, ..., 10,  4,  1])

In [72]:
from deeprobust.graph.defense import GCN
import torch

In [76]:
def get_n_params(model):
    pp=0
    for p in list(model.parameters()):
        nn=1
        for s in list(p.size()):
            nn = nn*s
        pp += nn
    return pp


In [81]:
gcn = GCN(nfeat=graph['node_feat'].shape[1],
              nhid=128,
              nclass=label.flatten().max().item() + 1,
              dropout=0.5, device=torch.device("cpu"))


In [82]:
get_n_params(gcn)

21672