In [None]:
import gc
import os
from time import perf_counter
import numpy as np
import random
import math

# rapids
import cugraph
import cudf

# NetworkX libraries
import networkx as nx

# RMAT data generator
from cugraph.generators import rmat
from datetime import datetime

In [None]:
def generate_data(scale, edgefactor=16):
    _gdf = rmat(
        scale,
        (2 ** scale) * edgefactor,
        0.57,
        0.19,
        0.19,
        42,
        clip_and_flip=False,
        scramble_vertex_ids=True,
        create_using=None,  # return edgelist instead of Graph instance
        mg=False
        )
    print('Generating a dataframe of ' + str(len(_gdf)) + '...')
    return _gdf

In [None]:
def gen_times(count, start_date, end_date):
    range_start = start_date.timestamp()
    range_end =  int(end_date.timestamp())
    random_list = []
    for i in range(count):
        random_list.append(random.randint(range_start,range_end))
    return cudf.Series(random_list,name='Date', dtype=int)
#    return [datetime.fromtimestamp(i) for i in random_list]

In [None]:
def gen_amounts(count,value_range):
    random_list = []
    for i in range(count):
        random_list.append(random.randint(0,value_range*100))
    return cudf.Series(random_list,name='amount', dtype=float).divide(100)

In [None]:
start_time = '1/1/2022 01:00:00 AM'
end_time =   '7/1/2022 01:00:00 AM'
amount_range = 25000
d1 = datetime.strptime(start_time, '%m/%d/%Y %I:%M:%S %p')
d2 = datetime.strptime(end_time, '%m/%d/%Y %I:%M:%S %p')

df = generate_data(15)

dates = gen_times(len(df),d1, d2)
amounts = gen_amounts(len(df),amount_range)
df['amounts'] = amounts
df['date'] = dates
len(df)
df.head(4)
df.to_csv('../data/data_500m.csv') #append mode