In [1]:
import torch

In [2]:
import dgl

In [3]:
from dgl._ffi.function import _init_api
import dgl.backend as F

_init_api("dgl.groot", __name__)


['_CAPI_GetOutputNodeFeats',
 '_CAPI_GetInputNodeLabels',
 '_CAPI_GetBlocks',
 '_CAPI_GetBlock',
 '_CAPI_Next',
 '_CAPI_InitLocDataloader',
 '_CAPI_GetInputNodes',
 '_CAPI_GetOutputNodes',
 '_CAPI_CreateLocDataloader',
 '_CAPI_ScatterObjectCreate',
 '_CAPI_testffi']

In [4]:
import dgl
import torch
import numpy as np
from ogb.nodeproppred import DglNodePropPredDataset
from dgl.utils import pin_memory_inplace
dataset = DglNodePropPredDataset('ogbn-products', root="/home/juelin/dataset")
graph = dataset.graph[0]
feats = graph.srcdata["feat"]
train_idx = dataset.get_idx_split()["train"]
indptr, indices, edge_id = graph.adj_tensors('csc')
labels = dataset.labels

feats_handle = pin_memory_inplace(feats)
indptr_handle = pin_memory_inplace(indptr)
indices_handle = pin_memory_inplace(indices)
edge_id_handle = pin_memory_inplace(edge_id)

In [5]:
from dgl.heterograph import DGLBlock

def init_dataloader(rank: int, 
                          indptr: torch.Tensor, 
                          indices: torch.Tensor, 
                          feats: torch.Tensor,
                          labels: torch.Tensor,
                          seeds: torch.Tensor,
                          fanouts: list[int],
                          batch_size: int,
                          max_pool_size: int = 2):
    
    return _CAPI_InitLocDataloader(rank, 
                                    F.zerocopy_to_dgl_ndarray(indptr),
                                    F.zerocopy_to_dgl_ndarray(indices),
                                    F.zerocopy_to_dgl_ndarray(feats),
                                    F.zerocopy_to_dgl_ndarray(labels.to(rank)),
                                    F.zerocopy_to_dgl_ndarray(seeds.to(rank)),
                                    fanouts,
                                    batch_size,
                                    max_pool_size)

def get_batch(key: int, layers:int = 3):
    blocks = []
    for i in range(layers):
        gidx = _CAPI_GetBlock(key, i)
        block = DGLBlock(gidx, (['_N'], ['_N']), ['_E'])
        blocks.append(block)
        
    feat = _CAPI_GetOutputNodeFeats(key)
    labels = _CAPI_GetInputNodeLabels(key)
    return blocks, feat, labels


In [6]:
dataloader = init_dataloader(0, indptr, indices, feats, labels, train_idx, [20, 20, 20], 1024, 2)

[21:41:26] /home/juelin/project/dgl_groot/src/groot_dataloader/loc_dataloader.h:53: Calling LocDataloaderObject default constructor
[21:41:26] /home/juelin/project/dgl_groot/src/groot_dataloader/loc_dataloader.h:73: Creating LocDataloaderObject with init function


In [26]:
import time
batch_size = 1024
step = train_idx.shape[0] / batch_size
step = int(step)
feat_size_in_bytes = 0
print(f"start sampling for {step} mini-batches") 
start = time.time()
for i in range(step):
    key = _CAPI_Next()
    blocks, batch_feat, batch_labels = get_batch(key)
    
    num_feat, feat_width = batch_feat.shape
    feat_size_in_bytes += num_feat * feat_width * 4
    print(f"{key=}")

end = time.time()

feat_size_in_mb = feat_size_in_bytes / 1024 / 1024
duration_in_s = end - start
print(f"finished sampling one epoch in {round(duration_in_s, 1)} sec")
print(f"fetching feature data {round(feat_size_in_mb)} MB; bandwidth {round(feat_size_in_mb / duration_in_s)} MB/s")

start sampling for 192 mini-batches
key=384
key=385
key=386
key=387
key=388
key=389
key=390
key=391
key=392
key=393
key=394
key=395
key=396
key=397
key=398
key=399
key=400
key=401
key=402
key=403
key=404
key=405
key=406
key=407
key=408
key=409
key=410
key=411
key=412
key=413
key=414
key=415
key=416
key=417
key=418
key=419
key=420
key=421
key=422
key=423
key=424
key=425
key=426
key=427
key=428
key=429
key=430
key=431
key=432
key=433
key=434
key=435
key=436
key=437
key=438
key=439
key=440
key=441
key=442
key=443
key=444
key=445
key=446
key=447
key=448
key=449
key=450
key=451
key=452
key=453
key=454
key=455
key=456
key=457
key=458
key=459
key=460
key=461
key=462
key=463
key=464
key=465
key=466
key=467
key=468
key=469
key=470
key=471
key=472
key=473
key=474
key=475
key=476
key=477
key=478
key=479
key=480
key=481
key=482
key=483
key=484
key=485
key=486
key=487
key=488
key=489
key=490
key=491
key=492
key=493
key=494
key=495
key=496
key=497
key=498
key=499
key=500
key=501
key=502
key=503
key=

In [19]:
blocks, batch_feat, batch_label = get_batch(key - 1)

In [20]:
blocks

[Block(num_src_nodes=1024, num_dst_nodes=19953, num_edges=19953),
 Block(num_src_nodes=20387, num_dst_nodes=397301, num_edges=397301),
 Block(num_src_nodes=258697, num_dst_nodes=4973874, num_edges=4973874)]

In [21]:
block = blocks[0]

In [22]:
block.edges()[1].shape

torch.Size([19953])

In [23]:
batch_feat.shape

(1161223, 100)

In [24]:
1161223 * 100 * 4 / 1000000

464.4892