# Visualize Saved Results

Read-only notebook that visualizes artifacts produced by the pipeline.
It does not run any create steps; it only loads existing outputs.

Artifacts used:
- 1m bars: `data/processed/{SYMBOL}/minute.parquet`
- 5m features: `data/features/{SYMBOL}/features_5m.parquet`
- Alerts: `artifacts/alerts/{SYMBOL}.csv`
- Metrics: `artifacts/metrics/metrics.json`


In [None]:
# Resolve repo root and import helpers
from pathlib import Path
import sys

def find_root(start: Path | None = None) -> Path:
    p = Path.cwd() if start is None else start
    for _ in range(6):
        if (p / 'splf').exists() and (p / 'config').exists():
            return p
        p = p.parent
    return Path.cwd()

root = find_root()
sys.path.insert(0, str(root))
print('Repo root:', root)


In [None]:
# Load config, choose symbol, and define paths
import json
import pandas as pd
import matplotlib.pyplot as plt
from splf.utils.io import load_yaml

CFG_PATH = root / 'config' / 'config.yaml'
cfg = load_yaml(str(CFG_PATH))
paths = cfg['paths']
uni = cfg.get('universe', {})
SYMBOLS = uni.get('symbols') or (uni.get('tier_a', []) + uni.get('tier_b', []) + uni.get('tier_c', [])) or ['BTCUSDT']
SYMBOL = SYMBOLS[0]
SYMBOL, SYMBOLS[:5]


## Load Artifacts
This cell loads saved outputs. If a file is missing, the corresponding object will be empty.


In [None]:
def read_parquet_any(p: Path) -> pd.DataFrame:
    for engine in ('pyarrow', 'fastparquet', None):
        try:
            return pd.read_parquet(p, engine=engine)
        except Exception:
            continue
    raise RuntimeError('Install pyarrow or fastparquet in the active kernel')

p_min = root / Path(paths['processed_dir']) / SYMBOL / 'minute.parquet'
p_feat = root / Path(paths['features_dir']) / SYMBOL / 'features_5m.parquet'
p_alerts = root / Path(paths['artifacts_dir']) / 'alerts' / f'{SYMBOL}.csv'
p_metrics = root / Path(paths['artifacts_dir']) / 'metrics' / 'metrics.json'

df1m = read_parquet_any(p_min) if p_min.exists() else pd.DataFrame()
df5 = read_parquet_any(p_feat) if p_feat.exists() else pd.DataFrame()
alerts = pd.read_csv(p_alerts, parse_dates=['ts']) if p_alerts.exists() else pd.DataFrame()
metrics = json.load(open(p_metrics)) if p_metrics.exists() else {}
(p_min.exists(), p_feat.exists(), p_alerts.exists(), p_metrics.exists()), (df1m.shape, df5.shape, len(alerts), bool(metrics))


## Plot 1m Price, Basis, Spread, Flow (perp)
Shaded regions indicate `data_ok = False`. Alerts are red markers on the price panel.


In [None]:
def false_ranges(mask: pd.Series):
    if mask.empty: return []
    bad = (~mask).astype(int)
    change = bad.diff().fillna(bad.iloc[0])
    starts = mask.index[change == 1]
    ends = mask.index[change == -1]
    if len(ends) < len(starts):
        ends = ends.insert(len(ends), mask.index[-1])
    return list(zip(starts, ends))

price_cols = [c for c in ('perp_mark','index_px') if c in df1m.columns]
has_basis = set(('perp_mark','index_px')).issubset(df1m.columns)
has_spread = 'spread_bps' in df1m.columns
has_flow = set(('taker_buy_qty','taker_sell_qty')).issubset(df1m.columns)
rows = 1 + int(has_basis) + int(has_spread) + int(has_flow)
fig, axes = plt.subplots(rows, 1, figsize=(12, 2.6*rows), sharex=True)
if rows == 1: axes = [axes]
i = 0
# Price
ax = axes[i]; i += 1
for c in price_cols:
    ax.plot(df1m.index, df1m[c], label=c)
ax.set_ylabel('Price'); ax.legend(loc='upper left'); ax.grid(True, alpha=0.3)
# Basis
if has_basis:
    ax = axes[i]; i += 1
    basis_bps = (df1m['perp_mark'] - df1m['index_px']) / df1m['index_px'] * 10000.0
    ax.plot(df1m.index, basis_bps, color='tab:purple', label='basis_bps')
    ax.axhline(0, color='#666', lw=0.8)
    ax.set_ylabel('Basis (bps)'); ax.legend(loc='upper left'); ax.grid(True, alpha=0.3)
# Spread
if has_spread:
    ax = axes[i]; i += 1
    ax.plot(df1m.index, df1m['spread_bps'], color='tab:orange', label='spread_bps')
    ax.set_ylabel('Spread (bps)'); ax.legend(loc='upper left'); ax.grid(True, alpha=0.3)
# Flow
if has_flow:
    ax = axes[i]; i += 1
    cvd15 = (df1m['taker_buy_qty'] - df1m['taker_sell_qty']).rolling('15T', min_periods=1).sum()
    ax.plot(df1m.index, cvd15, color='tab:green', label='CVD 15m (perp)')
    ax.set_ylabel('Flow (qty)'); ax.legend(loc='upper left'); ax.grid(True, alpha=0.3)
# Shade data holes
if 'data_ok' in df1m.columns:
    for s, e in false_ranges(df1m['data_ok'].fillna(False)):
        for a in axes: a.axvspan(s, e, color='red', alpha=0.1)
# Alerts on price
if not alerts.empty and 'perp_mark' in df1m.columns:
    axes[0].scatter(alerts['ts'], df1m.reindex(alerts['ts'])['perp_mark'], s=12, color='red', label='alerts')
    axes[0].legend(loc='upper left')
fig.suptitle(f'{SYMBOL} — 1m price, basis, spread, flow with alerts'); fig.autofmt_xdate(); fig.tight_layout(rect=[0,0,1,0.97])
fig


## Plot Selected 5m Features


In [None]:
sel = [c for c in ['basis_now','basis_TWAP_60m','premium_TWAP_120m','cvd_perp_15m','perp_share_60m','rv_15m'] if c in df5.columns]
n = max(1, len(sel))
fig, axes = plt.subplots(n, 1, figsize=(12, 2.4*n), sharex=True)
if n == 1: axes = [axes]
for i, c in enumerate(sel):
    axes[i].plot(df5.index, df5[c], label=c)
    axes[i].set_ylabel(c); axes[i].grid(True, alpha=0.3); axes[i].legend(loc='upper left')
fig.suptitle(f'{SYMBOL} — selected 5m features'); fig.autofmt_xdate(); fig.tight_layout(rect=[0,0,1,0.97])
fig


## Alerts and Metrics


In [None]:
alerts.head(10) if not alerts.empty else alerts


In [None]:
import json
from pprint import pprint
pprint(metrics) if metrics else metrics
