In [1]:
#!/usr/bin/env python

import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# ============================================================
# CONFIG
# ============================================================

SWEEP_ROOT = "./26-simple_parameter_sweep"
SPY_FILE   = "./8-SPY_200DMA_market_regime/8-SPY_200DMA_regime.parquet"  # optional, not strictly needed

OUTPUT_SUMMARY_CSV = os.path.join(SWEEP_ROOT, "26_sweep_summary.csv")
OUTPUT_BEST_CSV    = os.path.join(SWEEP_ROOT, "26_best_runs.csv")
HEATMAP_DIR        = os.path.join(SWEEP_ROOT, "heatmaps")

os.makedirs(HEATMAP_DIR, exist_ok=True)

TRADING_DAYS_PER_YEAR = 252

print("=== SIMPLE PARAMETER SWEEP ANALYZER ===\n")
print(f"Scanning sweep root: {SWEEP_ROOT}\n")

# ============================================================
# METRIC HELPERS
# ============================================================

def max_drawdown(eq: pd.Series):
    eq = eq.dropna()
    if len(eq) == 0:
        return np.nan
    roll_max = eq.cummax()
    dd = eq / roll_max - 1.0
    return dd.min()

def cagr_from_curve(eq: pd.Series):
    eq = eq.dropna()
    if len(eq) < 2 or eq.iloc[0] <= 0 or eq.iloc[-1] <= 0:
        return np.nan
    years = (eq.index[-1] - eq.index[0]).days / 365.25
    if years <= 0:
        return np.nan
    return (eq.iloc[-1] / eq.iloc[0]) ** (1.0 / years) - 1.0

def sharpe_ratio(ret: pd.Series, rf: float = 0.0):
    ret = ret.dropna()
    if len(ret) < 2:
        return np.nan
    excess = ret - rf / TRADING_DAYS_PER_YEAR
    if excess.std() == 0:
        return np.nan
    return excess.mean() / excess.std() * np.sqrt(TRADING_DAYS_PER_YEAR)

def sortino_ratio(ret: pd.Series, rf: float = 0.0):
    ret = ret.dropna()
    if len(ret) < 2:
        return np.nan
    excess = ret - rf / TRADING_DAYS_PER_YEAR
    downside = excess[excess < 0]
    if len(downside) == 0 or downside.std() == 0:
        return np.nan
    return excess.mean() / downside.std() * np.sqrt(TRADING_DAYS_PER_YEAR)


# ============================================================
# OPTIONAL: LOAD SPY FOR BENCHMARK (not required for core task)
# ============================================================

spy_price = None
if os.path.exists(SPY_FILE):
    try:
        spy = pd.read_parquet(SPY_FILE)
        spy = spy.reset_index().rename(columns={"Date": "date"})
        spy["date"] = pd.to_datetime(spy["date"])
        spy = spy.sort_values("date")
        spy_price = spy[["date", "spy_close"]].set_index("date")["spy_close"]
        print("Loaded SPY benchmark for potential extra analysis.\n")
    except Exception as e:
        print(f"Warning: could not load SPY file: {e}\n")
        spy_price = None

# ============================================================
# SCAN RUN DIRECTORIES
# ============================================================

runs = []

for name in os.listdir(SWEEP_ROOT):
    run_dir = os.path.join(SWEEP_ROOT, name)
    if not os.path.isdir(run_dir):
        continue
    summary_path = os.path.join(run_dir, "summary.json")
    equity_path  = os.path.join(run_dir, "equity.parquet")

    if not os.path.exists(summary_path) or not os.path.exists(equity_path):
        continue

    # Load summary.json
    with open(summary_path, "r") as f:
        summary = json.load(f)

    RP = summary.get("risk_per_trade")
    TP = summary.get("top_percentile")
    RD = summary.get("rebalance_day", "UNKNOWN")

    # Load equity curve
    eq_df = pd.read_parquet(equity_path)
    if "date" not in eq_df.columns or "portfolio_value" not in eq_df.columns:
        print(f"Skipping {name}: equity file missing 'date' or 'portfolio_value'")
        continue

    eq_df["date"] = pd.to_datetime(eq_df["date"])
    eq_df = eq_df.sort_values("date").set_index("date")
    pv = eq_df["portfolio_value"].astype(float)

    # compute metrics
    daily_ret = pv.pct_change()

    cagr   = cagr_from_curve(pv)
    mdd    = max_drawdown(pv)
    sharpe = sharpe_ratio(daily_ret)
    sort   = sortino_ratio(daily_ret)
    calmar = cagr / abs(mdd) if (mdd is not None and mdd < 0) else np.nan

    final_val = float(pv.iloc[-1])
    num_trades = int(summary.get("num_trades", np.nan))

    runs.append({
        "run_dir": name,
        "risk_per_trade": RP,
        "top_percentile": TP,
        "rebalance_day": RD,
        "final_value": final_val,
        "cagr": cagr,
        "sharpe": sharpe,
        "sortino": sort,
        "maxdd": mdd,
        "calmar": calmar,
        "num_trades": num_trades,
    })

# Put into DataFrame
if not runs:
    print("No valid runs found in sweep root. Check folder structure & rerun sweep.")
    raise SystemExit

df_runs = pd.DataFrame(runs)

print("=== SWEEP SUMMARY (first few rows) ===")
print(df_runs.head())
print()

# Save summary CSV
df_runs.to_csv(OUTPUT_SUMMARY_CSV, index=False)
print(f"Full sweep summary saved → {OUTPUT_SUMMARY_CSV}\n")

# ============================================================
# BEST-RUN PICKER
# ============================================================

def report_best(df, metric, larger_is_better=True, top_n=5):
    df = df.copy()
    df_sorted = df.sort_values(metric, ascending=not larger_is_better)
    print(f"\n=== Top {top_n} runs by {metric} ({'max' if larger_is_better else 'min'}) ===")
    cols = ["run_dir", "risk_per_trade", "top_percentile", "rebalance_day",
            "cagr", "calmar", "sharpe", "maxdd", "final_value", "num_trades"]
    cols = [c for c in cols if c in df_sorted.columns]
    print(df_sorted[cols].head(top_n))
    return df_sorted.head(top_n)

best_by_cagr   = report_best(df_runs, "cagr",   larger_is_better=True)
best_by_calmar = report_best(df_runs, "calmar", larger_is_better=True)
best_by_sharpe = report_best(df_runs, "sharpe", larger_is_better=True)

# Combine unique best runs
best_combined = pd.concat([best_by_cagr, best_by_calmar, best_by_sharpe]).drop_duplicates(subset=["run_dir"])
best_combined.to_csv(OUTPUT_BEST_CSV, index=False)
print(f"\nBest runs summary saved → {OUTPUT_BEST_CSV}\n")

# ============================================================
# HEATMAPS (CAGR, Calmar, Sharpe) PER REBALANCE DAY
# ============================================================

metrics_for_heatmap = ["cagr", "calmar", "sharpe"]

for RD in sorted(df_runs["rebalance_day"].unique()):
    sub = df_runs[df_runs["rebalance_day"] == RD].copy()
    if sub.empty:
        continue

    print(f"Generating heatmaps for rebalance_day = {RD} ...")

    # Unique sorted axes
    risks = sorted(sub["risk_per_trade"].unique())
    tps   = sorted(sub["top_percentile"].unique())

    for metric in metrics_for_heatmap:
        pivot = sub.pivot_table(
            index="risk_per_trade",
            columns="top_percentile",
            values=metric,
            aggfunc="mean"
        )

        plt.figure(figsize=(6, 4))
        sns.heatmap(
            pivot,
            annot=True,
            fmt=".3f",
            cmap="viridis",
            cbar_kws={"label": metric}
        )
        plt.title(f"{metric.upper()} heatmap – Rebalance: {RD}")
        plt.xlabel("top_percentile")
        plt.ylabel("risk_per_trade")
        plt.tight_layout()

        out_path = os.path.join(HEATMAP_DIR, f"heatmap_{metric}_RD={RD}.png")
        plt.savefig(out_path, dpi=150)
        plt.close()

        print(f"  Saved {metric} heatmap → {out_path}")

print("\n=== ANALYSIS COMPLETE ===")


=== SIMPLE PARAMETER SWEEP ANALYZER ===

Scanning sweep root: ./26-simple_parameter_sweep

Loaded SPY benchmark for potential extra analysis.

=== SWEEP SUMMARY (first few rows) ===
                       run_dir  risk_per_trade  top_percentile rebalance_day  \
0     RP0p0005_TP0p75_RDFriday          0.0005            0.75        Friday   
1     RP0p0005_TP0p75_RDMonday          0.0005            0.75        Monday   
2  RP0p0005_TP0p75_RDWednesday          0.0005            0.75     Wednesday   
3     RP0p0005_TP0p85_RDFriday          0.0005            0.85        Friday   
4     RP0p0005_TP0p85_RDMonday          0.0005            0.85        Monday   

   final_value      cagr    sharpe   sortino     maxdd    calmar  num_trades  
0  3809083.251  0.090974  0.619367  0.777680 -0.325386  0.279587       83113  
1  4217277.485  0.095095  0.637665  0.795230 -0.302671  0.314186       78529  
2  4355417.611  0.096406  0.647742  0.795648 -0.321282  0.300066       85433  
3  3167413.965  0.083