In [1]:
import pandas as pd
import os
import torch
import dgl

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def get_edgelist(scale, edgefactor, dataset_dir = 'datasets'):
    fp = os.path.join(dataset_dir, f'rmat_scale_{scale}_edgefactor_{edgefactor}.parquet')
    return pd.read_parquet(fp)
                      
def create_dgl_graph_from_df(df):
    src_tensor = torch.as_tensor(df['src'].values)
    dst_tensor = torch.as_tensor(df['dst'].values)
    # Reverse edges to match cuGraph behavior
    g = dgl.graph(data = (dst_tensor, src_tensor))
    return g

def create_dataloader(g, seed_nodes, batch_size):
    dataloader = dgl.dataloading.DataLoader(
        g,                                # The graph must be on GPU.
        seed_nodes,                        # train_nid must be on GPU.
        sampler,
        device=torch.device('cuda:0'),    # The device argument must be GPU.
        num_workers=0,                    # Number of workers must be 0.
        use_uva=True,
        batch_size=batch_size,
        drop_last=False,
        shuffle=False)
    return dataloader

In [3]:
df = get_edgelist(26, 16, '../datasets/')
g = create_dgl_graph_from_df(df)
seed_nodes = torch.as_tensor(df['dst'][:30_000_000])

### Benchmark UCA

In [4]:
for batch_size in [100, 500, 1_000, 2_500, 5_000, 10_000, 20_000, 30_000, 40_000, 50_000, 60_000, 70_000, 80_000, 90_000, 100_000]:
    fanout = [10, 25]
    ### Reverse because dgl sampler samples from destination to source
    fanout.reverse()
    sampler = dgl.dataloading.MultiLayerNeighborSampler(fanout)
    dataloader = create_dataloader(g, seed_nodes, batch_size)
    print(f"Batch Size = {batch_size}", flush=True)
    %timeit input_nodes, output_nodes, blocks = next(iter(dataloader))
    input_nodes, output_nodes, blocks = next(iter(dataloader))
    assert len(output_nodes)==batch_size
    print("---"*20)

Batch Size = 100
23.9 ms ± 712 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
------------------------------------------------------------
Batch Size = 500
25.4 ms ± 300 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
------------------------------------------------------------
Batch Size = 1000
26.4 ms ± 93.1 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
------------------------------------------------------------
Batch Size = 2500
31.7 ms ± 275 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
------------------------------------------------------------
Batch Size = 5000
39.3 ms ± 466 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
------------------------------------------------------------
Batch Size = 10000
52 ms ± 485 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
------------------------------------------------------------
Batch Size = 20000
81.4 ms ± 899 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
---------------------