# Step 4 / Point 2 — Extract + Snapshot

This notebook snapshots ClickHouse datasets into versioned Parquet files + a `manifest.json` for reproducible research.

- Source view: `polybot.user_trade_research`
- Optional: `polybot.market_trades`, `polybot.clob_tob`

Output: `research/data/snapshots/<snapshot_id>/...`


In [None]:
username = "gabagool22"

# Optional filters (ISO strings) — keep None to snapshot everything we have.
start_ts = None  # e.g. "2025-12-14T00:00:00Z"
end_ts = None    # e.g. "2025-12-15T00:00:00Z"

margin_minutes = 15
include_market_trades = True
include_clob_tob = True


In [None]:
import sys
from pathlib import Path

cwd = Path.cwd()
if (cwd / "snapshot.py").exists():
    # Running from `research/`
    sys.path.insert(0, str(cwd))
elif (cwd / "research" / "snapshot.py").exists():
    # Running from repo root
    sys.path.insert(0, str(cwd / "research"))
else:
    raise RuntimeError("Run Jupyter from the repo root or the `research/` directory")

from snapshot import snapshot_user


In [None]:
result = snapshot_user(
    username=username,
    start_ts=start_ts,
    end_ts=end_ts,
    margin_minutes=margin_minutes,
    include_market_trades=include_market_trades,
    include_clob_tob=include_clob_tob,
)
result

In [None]:
import json

manifest_path = result["manifest_path"]
manifest = json.loads(Path(manifest_path).read_text())
manifest["snapshot_id"], manifest["outputs"]

In [None]:
import pandas as pd

trades = pd.read_parquet(Path(result["snapshot_dir"]) / "trades.parquet")
trades.head()