In [None]:
import os
import json
import requests
from torch_geometric.datasets import EllipticBitcoinDataset

In [None]:
base_url = 'http://localhost:5004'
node_url = f"{base_url}/api/nodes/batch"
edge_url = f"{base_url}/api/edges/batch"
headers = {"Content-Type": "application/json"}

In [1]:
root = 'EllipticBitcoin'
dataset = EllipticBitcoinDataset(root)
data = dataset[0]

In [2]:
def read_n_to_last_line(filename, n=1):
    """Returns the nth before last line of a file (n=1 gives last line)"""
    num_newlines = 0
    with open(filename, 'rb') as f:
        try:
            f.seek(-2, os.SEEK_END)
            while num_newlines < n:
                f.seek(-2, os.SEEK_CUR)
                if f.read(1) == b'\n':
                    num_newlines += 1
        except OSError:
            f.seek(0)
        last_line = f.readline().decode()
    return last_line

def load_node_id_map(file_path):
    node_id_map = {}
    with open(file_path, 'r') as file:
        for line in file:
            entry = json.loads(line)
            node_id_map[entry['orig_id']] = entry['new_id']
    return node_id_map

def get_node_id(node_id_map, orig_id):
    return node_id_map.get(orig_id)

In [5]:
node_batch_size = 10_000

node_file = 'transaction_nodes.jsonl'
start_index = 0

if os.path.exists(node_file):
    last_line = read_n_to_last_line(node_file, 1)
    start_index = json.loads(last_line)['orig_id'] + 1

total_nodes = len(data.x)

with requests.Session() as session:
    for start in range(start_index, total_nodes, node_batch_size):
        end = min(start + node_batch_size, total_nodes)
        nodes = []

        for i in range(start, end):
            node = {
                'labels': ['Transaction'],
                'x': data.x[i].tolist(),
                'y': data.y[i].item(),
                'orig_id': i,
            }
            nodes.append(node)

        try:
            response = session.post(node_url, json=nodes, headers=headers)
            response.raise_for_status()
            new_node_ids = response.json()
            with open(node_file, 'a') as file:
                for i, node in enumerate(new_node_ids):
                    json_line = json.dumps({'orig_id': start + i, 'new_id': node['id']})
                    file.write(json_line + '\n')
            print(f"Nodes {start} to {end - 1} created successfully")
        except requests.exceptions.RequestException as e:
            print(f"Failed to create nodes {start} to {end - 1}: {e}")

Nodes 0 to 9999 created successfully
Nodes 10000 to 19999 created successfully
Nodes 20000 to 29999 created successfully
Nodes 30000 to 39999 created successfully
Nodes 40000 to 49999 created successfully
Nodes 50000 to 59999 created successfully
Nodes 60000 to 69999 created successfully
Nodes 70000 to 79999 created successfully
Nodes 80000 to 89999 created successfully
Nodes 90000 to 99999 created successfully
Nodes 100000 to 109999 created successfully
Nodes 110000 to 119999 created successfully
Nodes 120000 to 129999 created successfully
Nodes 130000 to 139999 created successfully
Nodes 140000 to 149999 created successfully
Nodes 150000 to 159999 created successfully
Nodes 160000 to 169999 created successfully
Nodes 170000 to 179999 created successfully
Nodes 180000 to 189999 created successfully
Nodes 190000 to 199999 created successfully
Nodes 200000 to 203768 created successfully


In [8]:
node_id_map = load_node_id_map('transaction_nodes.jsonl')

edge_index = data.edge_index
edge_batch_size = 10_000

edge_file = 'transaction_edges.jsonl'
total_edges = edge_index.size(1)
start_index = 0
if os.path.exists(edge_file):
    last_line = read_n_to_last_line(edge_file, 1)
    start_index = json.loads(last_line)['orig_id'] + 1

with requests.Session() as session:
    for start in range(start_index, total_edges, edge_batch_size):
        end = min(start + edge_batch_size, total_edges)
        edges = []

        for i in range(start, end):
            src_index = edge_index[0, i].item()
            dst_index = edge_index[1, i].item()

            src_id = get_node_id(node_id_map, src_index)
            dst_id = get_node_id(node_id_map, dst_index)

            if src_id is not None and dst_id is not None:
                edge = {
                    'src': src_id,
                    'dst': dst_id,
                    'orig_id': i,
                }
                edges.append(edge)

        try:
            response = session.post(edge_url, json=edges, headers=headers)
            response.raise_for_status()
            edge_ids = response.json()
            with open(edge_file, 'a') as file:
                for i, edge in enumerate(edge_ids):
                    json_line = json.dumps({'orig_id': start + i, 'new_id': edge['id']})
                    file.write(json_line + '\n')
            print(f"Edges {start} to {end - 1} created successfully")
        except requests.exceptions.RequestException as e:
            print(f"Failed to create edges {start} to {end - 1}: {e}")


Edges 0 to 9999 created successfully
Edges 10000 to 19999 created successfully
Edges 20000 to 29999 created successfully
Edges 30000 to 39999 created successfully
Edges 40000 to 49999 created successfully
Edges 50000 to 59999 created successfully
Edges 60000 to 69999 created successfully
Edges 70000 to 79999 created successfully
Edges 80000 to 89999 created successfully
Edges 90000 to 99999 created successfully
Edges 100000 to 109999 created successfully
Edges 110000 to 119999 created successfully
Edges 120000 to 129999 created successfully
Edges 130000 to 139999 created successfully
Edges 140000 to 149999 created successfully
Edges 150000 to 159999 created successfully
Edges 160000 to 169999 created successfully
Edges 170000 to 179999 created successfully
Edges 180000 to 189999 created successfully
Edges 190000 to 199999 created successfully
Edges 200000 to 209999 created successfully
Edges 210000 to 219999 created successfully
Edges 220000 to 229999 created successfully
Edges 230000 