In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Convexity / Positive Skew / Correlation Test Harness

What this does:
1) Loads one or more strategy equity curves -> daily returns
2) Loads SPY regime file -> daily SPY returns + regime (0/1)
3) Tests "convexity" via conditional returns vs |SPY move| buckets (and downside-only buckets)
4) Computes skewness / tail metrics
5) Computes correlation between systems (overall + by regime + on high-|SPY| days)
6) Writes CSV outputs to OUT_DIR

Run:
  python 17-convexity_correlation_tests.py
"""

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ============================================================
# CONFIG
# ============================================================

OUT_DIR = "./25c-convexity_tests_output"
os.makedirs(OUT_DIR, exist_ok=True)

# --- Your files (add more systems by adding more entries) ---
SYSTEMS = {
    "regression_spyfilter": {
        "equity_file": "./13-trading_output_regression_insp500_spyfilter_cap15/13-equity_curve_regression_insp500_spyfilter_cap15.parquet",
        # optional: for trade-level skew (not needed for convexity tests)
        "trades_file": "./13-trading_output_regression_insp500_spyfilter_cap15/13-trades_regression_insp500_spyfilter_cap15.parquet",
    },
    # "system2": {"equity_file": "..."},
    # "system3": {"equity_file": "..."},
}

# SPY regime parquet (your example looks indexed by "Date")
SPY_FILE = r"C:\TWS API\source\pythonclient\TradingIdeas\MomentumSystem\8-SPY_200DMA_market_regime\8-SPY_200DMA_regime.parquet"

# Convexity buckets
N_BUCKETS = 10                # deciles of |SPY return|
HIGH_MOVE_BUCKET = 10         # "top decile" is bucket 10 if using 1..10 labels
HIGH_MOVE_Q = 0.90            # threshold quantile for "high move days"

# Tail buckets
WORST_SPY_QS = [0.01, 0.05]   # worst 1% and 5% SPY days

# ============================================================
# LOADERS
# ============================================================

def _pick_first(cols, available):
    for c in cols:
        if c in available:
            return c
    return None

def load_equity_curve(path: str) -> pd.DataFrame:
    eq = pd.read_parquet(path)

    date_col = _pick_first(["date", "Date", "datetime", "timestamp"], list(eq.columns))
    if date_col is None:
        # sometimes date might be index
        if eq.index.name in ["date", "Date"]:
            eq = eq.reset_index()
            date_col = eq.columns[0]
        else:
            raise KeyError(f"Cannot find date column in equity curve: {path} columns={list(eq.columns)}")

    eq[date_col] = pd.to_datetime(eq[date_col], errors="coerce")
    eq = eq.dropna(subset=[date_col]).copy()
    eq.rename(columns={date_col: "date"}, inplace=True)
    eq["date"] = eq["date"].dt.normalize()

    if "portfolio_value" not in eq.columns:
        pv_col = _pick_first(["portfolio", "equity", "total", "account_value"], list(eq.columns))
        if pv_col is None:
            raise KeyError(f"Cannot find portfolio value column in equity curve: {path}")
        eq.rename(columns={pv_col: "portfolio_value"}, inplace=True)

    eq["portfolio_value"] = pd.to_numeric(eq["portfolio_value"], errors="coerce")
    eq = eq.dropna(subset=["portfolio_value"]).sort_values("date").drop_duplicates("date")

    eq["ret"] = eq["portfolio_value"].pct_change()
    eq = eq.dropna(subset=["ret"]).reset_index(drop=True)
    return eq[["date", "portfolio_value", "ret"]]

def load_spy_regime(path: str) -> pd.DataFrame:
    spy = pd.read_parquet(path)

    # your file appears to be indexed by "Date"
    if "Date" in spy.columns:
        spy["date"] = pd.to_datetime(spy["Date"], errors="coerce")
    elif "date" in spy.columns:
        spy["date"] = pd.to_datetime(spy["date"], errors="coerce")
    elif spy.index.name in ["Date", "date"]:
        spy = spy.reset_index().rename(columns={spy.index.name: "date"})
        spy["date"] = pd.to_datetime(spy["date"], errors="coerce")
    else:
        # last resort: assume first col is date
        spy = spy.reset_index().rename(columns={"index": "date"})
        spy["date"] = pd.to_datetime(spy["date"], errors="coerce")

    spy = spy.dropna(subset=["date"]).copy()
    spy["date"] = spy["date"].dt.normalize()

    if "spy_close" not in spy.columns:
        raise KeyError("SPY file missing 'spy_close' column")

    spy["spy_close"] = pd.to_numeric(spy["spy_close"], errors="coerce")
    spy = spy.dropna(subset=["spy_close"]).sort_values("date").drop_duplicates("date")

    spy["spy_ret"] = spy["spy_close"].pct_change()
    spy = spy.dropna(subset=["spy_ret"]).reset_index(drop=True)

    if "market_regime" in spy.columns:
        spy["market_regime"] = pd.to_numeric(spy["market_regime"], errors="coerce").fillna(0).astype(int)
    else:
        spy["market_regime"] = 1  # if absent, treat all as "bull"

    return spy[["date", "spy_close", "spy_ret", "market_regime"]]

# ============================================================
# METRICS
# ============================================================

def max_drawdown_from_rets(rets: np.ndarray) -> float:
    eq = np.cumprod(1.0 + rets)
    peak = np.maximum.accumulate(eq)
    dd = eq / peak - 1.0
    return float(dd.min())

def sharpe(rets: np.ndarray, ann=252) -> float:
    rets = np.asarray(rets, dtype=float)
    if rets.size < 2:
        return np.nan
    s = rets.std(ddof=1)
    if s == 0:
        return np.nan
    return float(np.sqrt(ann) * rets.mean() / s)

def tail_ratio(rets: np.ndarray, q=0.05) -> float:
    """(average of top q tail) / abs(average of bottom q tail)"""
    r = np.asarray(rets, dtype=float)
    if r.size < 50:
        return np.nan
    hi = np.mean(np.sort(r)[-max(1, int(q * len(r))):])
    lo = np.mean(np.sort(r)[:max(1, int(q * len(r)))])
    if lo == 0:
        return np.nan
    return float(hi / abs(lo))

def ols_beta_gamma(y: np.ndarray, x: np.ndarray):
    """
    Simple OLS: y = a + b*x + g*x^2
    Returns a,b,g and R^2. (No p-values; good enough for directional convexity check.)
    """
    y = np.asarray(y, dtype=float)
    x = np.asarray(x, dtype=float)
    X = np.column_stack([np.ones_like(x), x, x**2])
    coef, *_ = np.linalg.lstsq(X, y, rcond=None)
    yhat = X @ coef
    ss_res = np.sum((y - yhat) ** 2)
    ss_tot = np.sum((y - y.mean()) ** 2)
    r2 = 1.0 - (ss_res / ss_tot) if ss_tot > 0 else np.nan
    a, b, g = coef.tolist()
    return a, b, g, float(r2)

# ============================================================
# ANALYSIS
# ============================================================

def convexity_tables(merged: pd.DataFrame, sys_name: str) -> pd.DataFrame:
    """
    merged columns required: date, ret (strategy), spy_ret, market_regime
    Creates deciles of |spy_ret| and computes mean strat ret overall + downside-only.
    """
    df = merged.copy()
    df["abs_spy"] = df["spy_ret"].abs()

    # deciles
    df["abs_spy_bucket"] = pd.qcut(df["abs_spy"], q=N_BUCKETS, labels=list(range(1, N_BUCKETS + 1)))

    tbl_all = (
        df.groupby("abs_spy_bucket", as_index=False)
          .agg(
              n=("ret", "size"),
              mean_strat_ret=("ret", "mean"),
              mean_spy_ret=("spy_ret", "mean"),
              mean_abs_spy=("abs_spy", "mean"),
              hit_rate=("ret", lambda x: float((x > 0).mean())),
          )
    )
    tbl_all["system"] = sys_name
    tbl_all["subset"] = "ALL"

    df_down = df[df["spy_ret"] < 0].copy()
    if len(df_down) > 0:
        df_down["abs_spy_bucket"] = pd.qcut(df_down["abs_spy"], q=min(N_BUCKETS, df_down["abs_spy"].nunique()),
                                            labels=None, duplicates="drop")
        # relabel to 1..k
        if df_down["abs_spy_bucket"].dtype.name == "category":
            df_down["abs_spy_bucket"] = df_down["abs_spy_bucket"].cat.codes + 1

        tbl_down = (
            df_down.groupby("abs_spy_bucket", as_index=False)
                   .agg(
                       n=("ret", "size"),
                       mean_strat_ret=("ret", "mean"),
                       mean_spy_ret=("spy_ret", "mean"),
                       mean_abs_spy=("abs_spy", "mean"),
                       hit_rate=("ret", lambda x: float((x > 0).mean())),
                   )
        )
        tbl_down["system"] = sys_name
        tbl_down["subset"] = "SPY_DOWN"
        out = pd.concat([tbl_all, tbl_down], ignore_index=True)
    else:
        out = tbl_all

    return out

def summarize_system(merged: pd.DataFrame, sys_name: str) -> dict:
    r = merged["ret"].to_numpy()
    spy = merged["spy_ret"].to_numpy()

    a, b, g, r2 = ols_beta_gamma(r, spy)

    # High-move day behavior (top decile of |SPY|)
    abs_spy = np.abs(spy)
    thr = np.quantile(abs_spy, HIGH_MOVE_Q)
    hi = merged[abs_spy >= thr]
    mid = merged[abs_spy < thr]

    # Worst SPY day buckets
    worst_stats = {}
    for q in WORST_SPY_QS:
        cut = np.quantile(spy, q)
        bucket = merged[merged["spy_ret"] <= cut]
        worst_stats[f"mean_ret_on_worst_spy_{int(q*100)}pct_days"] = float(bucket["ret"].mean()) if len(bucket) else np.nan

    out = {
        "system": sys_name,
        "n_days": int(len(merged)),
        "mean_daily_ret": float(np.mean(r)),
        "vol_daily": float(np.std(r, ddof=1)),
        "sharpe": sharpe(r),
        "maxdd": max_drawdown_from_rets(r),
        "skew": float(pd.Series(r).skew()),
        "excess_kurt": float(pd.Series(r).kurt()),  # pandas kurt() is excess kurtosis by default
        "tail_ratio_5pct": tail_ratio(r, q=0.05),

        # Convexity proxies
        "beta_to_spy": float(b),
        "gamma_to_spy2": float(g),    # >0 suggests convexity vs SPY
        "ols_r2": float(r2),

        "mean_ret_high_abs_spy": float(hi["ret"].mean()) if len(hi) else np.nan,
        "mean_ret_other_days": float(mid["ret"].mean()) if len(mid) else np.nan,
        "delta_high_move": (float(hi["ret"].mean()) - float(mid["ret"].mean())) if (len(hi) and len(mid)) else np.nan,

        "corr_with_spy": float(np.corrcoef(r, spy)[0, 1]),
    }
    out.update(worst_stats)

    # Regime splits
    for regime_val, regime_name in [(1, "bull"), (0, "bear")]:
        sub = merged[merged["market_regime"] == regime_val]
        if len(sub) >= 50:
            rr = sub["ret"].to_numpy()
            ss = sub["spy_ret"].to_numpy()
            _, bb, gg, rr2 = ols_beta_gamma(rr, ss)
            out[f"sharpe_{regime_name}"] = sharpe(rr)
            out[f"maxdd_{regime_name}"] = max_drawdown_from_rets(rr)
            out[f"corr_with_spy_{regime_name}"] = float(np.corrcoef(rr, ss)[0, 1])
            out[f"gamma_{regime_name}"] = float(gg)
            out[f"beta_{regime_name}"] = float(bb)
            out[f"n_days_{regime_name}"] = int(len(sub))
        else:
            out[f"sharpe_{regime_name}"] = np.nan
            out[f"maxdd_{regime_name}"] = np.nan
            out[f"corr_with_spy_{regime_name}"] = np.nan
            out[f"gamma_{regime_name}"] = np.nan
            out[f"beta_{regime_name}"] = np.nan
            out[f"n_days_{regime_name}"] = int(len(sub))

    return out

# ============================================================
# MAIN
# ============================================================

def main():
    spy = load_spy_regime(SPY_FILE)

    # Load systems and merge to SPY
    rets_by_system = {}
    summaries = []
    convex_tables = []

    for name, cfg in SYSTEMS.items():
        eq = load_equity_curve(cfg["equity_file"])

        merged = eq.merge(spy, on="date", how="inner")
        if len(merged) < 200:
            print(f"[WARN] {name}: only {len(merged)} merged days with SPY. Check dates/files.")
        merged = merged.sort_values("date").reset_index(drop=True)

        # store returns series
        s = merged.set_index("date")["ret"].rename(name)
        rets_by_system[name] = s

        summaries.append(summarize_system(merged, name))
        convex_tables.append(convexity_tables(merged, name))

        # quick plot: strat ret vs abs spy bucket (ALL)
        tbl_plot = convex_tables[-1].query("subset == 'ALL'").copy()
        if len(tbl_plot):
            plt.figure(figsize=(10, 5))
            plt.plot(tbl_plot["abs_spy_bucket"], tbl_plot["mean_strat_ret"], marker="o")
            plt.title(f"{name}: Mean strategy return by |SPY return| decile (ALL)")
            plt.xlabel("|SPY return| decile (1=low, 10=high)")
            plt.ylabel("Mean strategy daily return")
            plt.tight_layout()
            plt.savefig(os.path.join(OUT_DIR, f"{name}_mean_ret_by_abs_spy_decile.png"))
            plt.close()

    # Save per-system summary
    summary_df = pd.DataFrame(summaries).sort_values(["gamma_to_spy2", "skew"], ascending=[False, False])
    summary_path = os.path.join(OUT_DIR, "25c-system_summary_convexity_skew.csv")
    summary_df.to_csv(summary_path, index=False)

    # Save convexity bucket tables
    convex_df = pd.concat(convex_tables, ignore_index=True)
    convex_path = os.path.join(OUT_DIR, "25c-convexity_tables_by_abs_spy_decile.csv")
    convex_df.to_csv(convex_path, index=False)

    print("\n=== SYSTEM SUMMARY (top rows) ===")
    print(summary_df.head(20).to_string(index=False))
    print(f"\nWrote: {summary_path}")
    print(f"Wrote: {convex_path}")

    # If multiple systems, compute correlation matrices
    if len(rets_by_system) >= 2:
        R = pd.concat(rets_by_system.values(), axis=1).dropna()
        corr_all = R.corr()

        corr_path = os.path.join(OUT_DIR, "25c-system_return_correlation_ALL.csv")
        corr_all.to_csv(corr_path)
        print(f"\nWrote: {corr_path}")

        # Also correlation on high-|SPY| days
        spy2 = spy.set_index("date").loc[R.index]
        abs_spy = spy2["spy_ret"].abs()
        thr = abs_spy.quantile(HIGH_MOVE_Q)
        R_hi = R.loc[abs_spy >= thr]
        if len(R_hi) >= 50:
            corr_hi = R_hi.corr()
            corr_hi_path = os.path.join(OUT_DIR, "25c-system_return_correlation_high_abs_spy.csv")
            corr_hi.to_csv(corr_hi_path)
            print(f"Wrote: {corr_hi_path}")

        # By regime
        for regime_val, regime_name in [(1, "bull"), (0, "bear")]:
            mask = (spy2["market_regime"] == regime_val)
            R_sub = R.loc[mask]
            if len(R_sub) >= 50:
                corr_sub = R_sub.corr()
                p = os.path.join(OUT_DIR, f"system_return_correlation_{regime_name}.csv")
                corr_sub.to_csv(p)
                print(f"Wrote: {p}")

    print("\n=== DONE ===")

if __name__ == "__main__":
    main()



=== SYSTEM SUMMARY (top rows) ===
              system  n_days  mean_daily_ret  vol_daily   sharpe     maxdd      skew  excess_kurt  tail_ratio_5pct  beta_to_spy  gamma_to_spy2   ols_r2  mean_ret_high_abs_spy  mean_ret_other_days  delta_high_move  corr_with_spy  mean_ret_on_worst_spy_1pct_days  mean_ret_on_worst_spy_5pct_days  sharpe_bull  maxdd_bull  corr_with_spy_bull  gamma_bull  beta_bull  n_days_bull  sharpe_bear  maxdd_bear  corr_with_spy_bear  gamma_bear  beta_bear  n_days_bear
regression_spyfilter    6786        0.000636   0.008253 1.223018 -0.207327 -0.217674     6.250288         1.047984     0.341807      -1.243601 0.259336              -0.001492             0.000872        -0.002364       0.502466                        -0.012307                        -0.008831     1.863452   -0.151862            0.650149   -1.416947   0.669641         5053    -1.252573   -0.609674            0.439103   -0.963583   0.150477         1733

Wrote: ./25c-convexity_tests_output\25c-system_summa

  df.groupby("abs_spy_bucket", as_index=False)
