In [1]:
from torch_geometric.datasets import EllipticBitcoinDataset

# Load the dataset
root = 'EllipticBitcoin'
dataset = EllipticBitcoinDataset(root)
data = dataset[0]

In [2]:
data.x.shape

torch.Size([203769, 165])

In [3]:
data.edge_index

tensor([[     0,      2,      4,  ..., 201921, 201480, 201954],
        [     1,      3,      5,  ..., 202042, 201368, 201756]])

In [4]:
from neo4j import GraphDatabase

uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "k59D5ftw^N^WUm"))

cypher_query = '''
UNWIND $items AS item
MERGE (a:Transaction {id: item.id})
SET a.features = item.features, a.label = item.label
WITH a, item.edges AS edges
UNWIND edges AS edge
MERGE (b:Transaction {id: edge.dst_id})
MERGE (a)-[:PAYMENT]->(b)
'''

def create_graph(tx, batch):
    tx.run(cypher_query, items=batch)

# Define batch size
batch_size = 1000

# Upload nodes and edges in batches
with driver.session() as session:
    for i in range(0, data.num_nodes, batch_size):
        batch = []
        for node_id in range(i, min(i + batch_size, data.num_nodes)):
            node = {"id": node_id, "features": data.x[node_id].tolist(), "label": data.y[node_id].item(), "edges": []}
            node_edges = [{"src_id": node_id, "dst_id": dst.item()} for dst in data.edge_index[1, data.edge_index[0] == node_id]]
            node["edges"] = node_edges
            batch.append(node)
        session.execute_write(create_graph, batch)

driver.close()

In [None]:
import requests

# Define batch size
batch_size = 1000

# Upload nodes and edges in batches
url = "http://localhost:5004/api/nodes/batch"
headers = {"Content-Type": "application/json"}

node_label = "Transaction"
edge_label = "PAYMENT"

for i in range(0, data.num_nodes, batch_size):
    batch = []
    for node_id in range(i, min(i + batch_size, data.num_nodes)):
        node = {"id": node_id, "features": data.x[node_id].tolist(), "label": data.y[node_id].item(), "edges": []}
        node_edges = [{"src_id": node_id, "dst_id": dst.item()} for dst in data.edge_index[1, data.edge_index[0] == node_id]]
        node["edges"] = node_edges
        batch.append(node)
    
    response = requests.post(url, json={'batch': batch, 'node_label': node_label, 'edge_label': edge_label}, headers=headers)
    if response.status_code != 200:
        print(f"Failed to upload batch starting at {i}: {response.json()}")

driver.close()


In [8]:
import time
import uuid
import base64
import os
import hashlib
import shortuuid
import random
import string

# Function to test UUID
def test_uuid(n):
    ids = set()
    start_time = time.time()
    for _ in range(n):
        unique_id = uuid.uuid4().hex
        ids.add(unique_id)
    end_time = time.time()
    return end_time - start_time, len(ids)

# Function to test Base64
def test_base64(n):
    ids = set()
    start_time = time.time()
    for _ in range(n):
        random_bytes = os.urandom(16)
        unique_id = base64.urlsafe_b64encode(random_bytes).rstrip(b'=').decode('utf-8')
        ids.add(unique_id)
    end_time = time.time()
    return end_time - start_time, len(ids)

# Function to test Hashlib
def test_hashlib(n):
    ids = set()
    start_time = time.time()
    for _ in range(n):
        random_seed = os.urandom(16)
        unique_id = hashlib.sha256(random_seed).hexdigest()
        ids.add(unique_id)
    end_time = time.time()
    return end_time - start_time, len(ids)

# Function to test ShortUUID
def test_shortuuid(n):
    ids = set()
    start_time = time.time()
    for _ in range(n):
        unique_id = shortuuid.uuid()
        ids.add(unique_id)
    end_time = time.time()
    return end_time - start_time, len(ids)

# Function to test Time-based IDs
def test_time_based(n):
    ids = set()
    start_time = time.time()
    for _ in range(n):
        timestamp = str(int(time.time() * 1000))
        random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=6))
        unique_id = timestamp + random_string
        ids.add(unique_id)
    end_time = time.time()
    return end_time - start_time, len(ids)

# Number of IDs to generate for each test
n = 10_000_000

# Run tests
uuid_time, uuid_count = test_uuid(n)
base64_time, base64_count = test_base64(n)
hashlib_time, hashlib_count = test_hashlib(n)
shortuuid_time, shortuuid_count = test_shortuuid(n)
time_based_time, time_based_count = test_time_based(n)

# Print results
print(f"UUID: Time taken = {uuid_time:.2f}s, Collisions = {n - uuid_count}")
print(f"Base64: Time taken = {base64_time:.2f}s, Collisions = {n - base64_count}")
print(f"Hashlib: Time taken = {hashlib_time:.2f}s, Collisions = {n - hashlib_count}")
print(f"ShortUUID: Time taken = {shortuuid_time:.2f}s, Collisions = {n - shortuuid_count}")
print(f"Time-based: Time taken = {time_based_time:.2f}s, Collisions = {n - time_based_count}")


UUID: Time taken = 22.42s, Collisions = 0
Base64: Time taken = 8.01s, Collisions = 0
Hashlib: Time taken = 13.44s, Collisions = 0
ShortUUID: Time taken = 60.21s, Collisions = 0
Time-based: Time taken = 22.18s, Collisions = 0


In [20]:
shortuuid.ShortUUID().random(length=16)

'bqjXN8RjXYemoSG4'

In [23]:
"1".lower()

'1'