In [None]:
"""Script to generate trades for testing purposes."""
import sys, os
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))  # from tools -> regtechdemo
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

from api.data.tradebuilder import TradeBuilder
from api.data.config import Config
import uuid
from pathlib import Path
from datetime import datetime

# COB date for the data snapshot
cob_dt = datetime(2025, 10, 10)

run_id = str(uuid.uuid4())
out_path = Path(Config.RUNS_DIR) / run_id / "trades.parquet"
rows = 5000000
chunk_size = 200000

print(f"Building {rows} trades in chunks of {chunk_size} at COB {cob_dt.strftime('%Y-%m-%d')}")
print(f"to {out_path} with run_id {run_id} ...")
manifest = TradeBuilder().build(cob_dt, run_id, rows, chunk_size, out_path)

if manifest.get("status") != "succeeded":
    print(f"Error: {manifest.get('error')}")
else:
    print(f"Completed in {manifest['duration_s']}s. Wrote {manifest['rows']} rows (mb: {manifest['file_size_mb']}).")

Building 5000000 trades in chunks of 200000 at COB 2025-10-10
to .data/runs/98a43010-9e79-4c8f-a751-4be698042ff2/trades.parquet with run_id 98a43010-9e79-4c8f-a751-4be698042ff2 ...
Completed in 7.36s. Wrote 5000000 rows (mb: 221.98)


In [4]:
manifest

{'dataset': 'trades',
 'run_id': '98a43010-9e79-4c8f-a751-4be698042ff2',
 'status': 'succeeded',
 'rows': 5000000,
 'written': 5000000,
 'out_path': '.data/runs/98a43010-9e79-4c8f-a751-4be698042ff2/trades.parquet',
 'duration_s': 7.36,
 'file_size_bytes': 232765438,
 'file_size_mb': 221.98}