# CLV Intelligence Pack

Notebook to explore closing-line value, steam signals, and portfolio metrics for paper trades.


In [None]:
import json
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

sns.set_theme(style="darkgrid")

CLV_PATH = Path("../state/sports_paper_clv.json").resolve()
EINSTEIN_PATH = Path("../state/sports_einstein_queue.json").resolve()

if not CLV_PATH.exists():
    raise FileNotFoundError(f"CLV dataset not found: {CLV_PATH}")

with CLV_PATH.open() as f:
    clv_payload = json.load(f)

trades = pd.DataFrame(clv_payload.get("trades", []))
summary = clv_payload.get("summary", {})

print(f"Loaded {len(trades)} trades")
trades.head()

In [None]:
trades["placed_at"] = pd.to_datetime(trades["placed_at"])
trades["settled_at"] = pd.to_datetime(trades.get("settled_at"))

numeric_cols = ["stake", "decimal_odds", "bet_implied_prob", "closing_implied_prob", "pnl", "clv"]
trades[numeric_cols] = trades[numeric_cols].apply(pd.to_numeric, errors="coerce")
trades["steam_direction"] = trades["steam_direction"].fillna("unknown")

# Derived metrics
trades["roi_pct"] = trades["pnl"] / trades["stake"] * 100
trades["is_positive_clv"] = trades["clv"] > 0
trades.head()

In [None]:
group_cols = ["sport", "league", "regime"]
trades_with_roi = trades.copy()
trades_with_roi["roi"] = trades_with_roi.apply(
    lambda row: (row["pnl"] / row["stake"])
    if (
        isinstance(row.get("pnl"), (int, float))
        and isinstance(row.get("stake"), (int, float))
        and row["stake"]
    )
    else np.nan,
    axis=1,
)
regime_summary = (
    trades_with_roi.groupby(group_cols, dropna=False)
    .agg(
        count=("id", "count"),
        avg_clv=("clv", "mean"),
        avg_roi=("roi", "mean"),
    )
    .reset_index()
)
regime_summary["avg_clv_pct"] = regime_summary["avg_clv"] * 100
regime_summary["avg_roi_pct"] = regime_summary["avg_roi"] * 100
regime_summary.sort_values("avg_clv_pct", ascending=False).head(10)

In [None]:
portfolio = clv_payload.get("summary", {}).get("portfolio", {})
pd.DataFrame(portfolio.get("scenarios", [])).set_index(
    "label"
) if portfolio else "No portfolio data"

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
non_null_clv = trades["clv"].dropna()
if non_null_clv.empty:
    ax.text(0.5, 0.5, "No CLV data yet", ha="center", va="center", fontsize=14)
    ax.set_axis_off()
else:
    sns.histplot(non_null_clv * 100, bins=20, kde=True, ax=ax)
    ax.set_title("CLV Distribution (bps)")
    ax.set_xlabel("CLV (%)")
    ax.set_ylabel("Frequency")
fig.tight_layout()
fig

In [None]:
weekly = pd.DataFrame(summary.get("weekly_leaders", []))
monthly = pd.DataFrame(summary.get("monthly_leaders", []))
steam_summary = pd.DataFrame(
    list(summary.get("steam_summary", {}).items()), columns=["steam_direction", "count"]
)

print("Weekly Leaders")
display(weekly)
print("\nMonthly Leaders")
display(monthly)
print("\nSteam Summary")
display(steam_summary)

In [None]:
from __future__ import annotations

import json
from pathlib import Path

import pandas as pd

CLV_PATH = Path("state/sports_paper_clv.json")
if not CLV_PATH.exists():
    raise FileNotFoundError("state/sports_paper_clv.json not found. Run the paper trader first.")

clv_payload = json.loads(CLV_PATH.read_text())
trades_df = pd.DataFrame(clv_payload.get("trades", []))
summary = clv_payload.get("summary", {})

trades_df.head()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="whitegrid")

clv_series = trades_df["clv"].dropna()
fig, ax = plt.subplots(figsize=(8, 4))
if clv_series.empty:
    ax.text(0.5, 0.5, "No CLV data yet", ha="center", va="center", fontsize=14)
    ax.set_axis_off()
else:
    sns.histplot(clv_series, bins=20, kde=True, ax=ax)
    ax.set_title("CLV Distribution")
    ax.set_xlabel("Closing Line Value")
plt.tight_layout()
fig

In [None]:
pd.DataFrame(summary.get("weekly_leaders", []))

In [None]:
pd.DataFrame(summary.get("monthly_leaders", []))

In [None]:
analysis_df = trades_df.copy()
analysis_df["roi"] = analysis_df.apply(
    lambda row: (row["pnl"] / row["stake"]) if row.get("stake") else None,
    axis=1,
)
scatter_df = analysis_df.dropna(subset=["clv", "roi"])
fig, ax = plt.subplots(figsize=(6, 4))
if scatter_df.empty:
    ax.text(0.5, 0.5, "Insufficient data for CLV vs ROI", ha="center", va="center", fontsize=14)
    ax.set_axis_off()
else:
    sns.scatterplot(data=scatter_df, x="clv", y="roi", hue="steam_direction", ax=ax)
    ax.set_title("CLV vs ROI")
    ax.set_xlabel("CLV")
    ax.set_ylabel("ROI (pnl / stake)")
plt.tight_layout()
fig

In [None]:
league_df = pd.DataFrame.from_dict(summary.get("league_breakdown", {}), orient="index")
league_df