In [1]:
!export RAPIDS_NO_INITIALIZE="1"
!export CUDF_SPILL="1"
!export LIBCUDF_CUFILE_POLICY="OFF"

from cugraph_bulk_sampling import start_dask_client, benchmark_cugraph_bulk_sampling, load_disk_dataset, construct_graph
from cugraph_bulk_sampling import sample_graph
import os

# Setup Cluster

In [2]:
dask_worker_devices='0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15'

In [3]:
client, cluster = start_dask_client(dask_worker_devices=dask_worker_devices,
                                    jit_unspill=False,
                                    rmm_pool_size=28e9,
                                    rmm_async=True)


Dask client/cluster created using LocalCUDACluster


# Setup Benchmark

In [4]:
dataset='ogbn_papers100M'
dataset_root="."
output_root="."
reverse_edges=True
add_edge_types=False
batch_size=512
seeds_per_call=524288
fanout=[25,25]
replication_factor=4
seed=123

dataset_dir=dataset_root
output_path=output_root
persist=False


In [5]:
dask_edgelist_df, dask_label_df, node_offsets, edge_offsets, total_num_nodes = \
    load_disk_dataset(
        dataset,
        dataset_dir=dataset_dir,
        reverse_edges=reverse_edges,
        replication_factor=replication_factor,
        persist=False,
        add_edge_types=add_edge_types
    )
num_input_edges = len(dask_edgelist_df)
print(
f"Number of input edges = {num_input_edges:,}"
)

G = construct_graph(
dask_edgelist_df
)
del dask_edgelist_df
print('constructed graph')

Loading edge index for edge type paper__cites__paper
Loading node labels for node type paper (offset=0)
Number of input edges = 6,462,743,488
constructed graph


In [6]:
input_memory = G.edgelist.edgelist_df.memory_usage().sum().compute()
print(f'input memory: {input_memory}')

output_subdir = os.path.join(output_path, f'{dataset}[{replication_factor}]_b{batch_size}_f{fanout}')
os.makedirs(output_subdir, exist_ok=True)

output_sample_path = os.path.join(output_subdir, 'samples')
os.makedirs(output_sample_path,  exist_ok=True)

batches_per_partition = 200_000 // batch_size



input memory: 103403895808


# Benchmarking Sample Graph

In [7]:
%%timeit -n30 -r1


execution_time, allocation_counts = sample_graph(
    G,
    dask_label_df,
    output_sample_path,
    seed=seed,
    batch_size=batch_size,
    seeds_per_call=seeds_per_call,
    batches_per_partition=batches_per_partition,
    fanout=fanout,
    persist=persist,
)


created batches
flushed all batches
function:  sample_graph
function args: (<cugraph.structure.graph_classes.MultiGraph object at 0x7ff27879a440>, <dask_cudf.DataFrame | 32 tasks | 16 npartitions>, '/tmp/ramdisk/ogbn_papers100M[4]_b512_f[25, 25]/samples') kwargs: {'seed': 123, 'batch_size': 512, 'seeds_per_call': 524288, 'batches_per_partition': 390, 'fanout': [25, 25], 'persist': False}
execution_time: 9.981931209564209
allocation_counts:
{   'tcp://127.0.0.1:33343': {   'current_bytes': '51.1MB',
                                 'peak_bytes': '2.4GB',
                                 'total_bytes': '60.4GB'},
    'tcp://127.0.0.1:33565': {   'current_bytes': '58.6MB',
                                 'peak_bytes': '2.4GB',
                                 'total_bytes': '61.8GB'},
    'tcp://127.0.0.1:33977': {   'current_bytes': '59.0MB',
                                 'peak_bytes': '2.4GB',
                                 'total_bytes': '61.4GB'},
    'tcp://127.0.0.1:34603': { 