In [1]:
#!/usr/bin/env python

import os
import numpy as np
import pandas as pd
from datetime import datetime

# ============================================================
# CONFIG
# ============================================================

TRADES_FILE = "./13-trading_output_regression_insp500_spyfilter_cap15/13-trades_regression_insp500_spyfilter_cap15.parquet"
EQUITY_FILE = "./13-trading_output_regression_insp500_spyfilter_cap15/13-equity_curve_regression_insp500_spyfilter_cap15.parquet"

OUTPUT_DIR = "./17-robustness_output"
os.makedirs(OUTPUT_DIR, exist_ok=True)

TRADING_DAYS_PER_YEAR = 252

# Slippage stress levels (basis points per execution, applied to traded notional)
SLIPPAGE_BPS = [1, 3, 5, 10, 15, 20, 30, 50]

# Flat commission per execution (order/fill assumptions depend on your broker)
FIXED_COMMISSION = 9.99

# If True: only subtract cost drag on days where costs exist (recommended).
# If False: mathematically equivalent here, but this guard prevents accidental future distortions.
APPLY_COSTS_ONLY_ON_TRADE_DAYS = True

# ============================================================
# METRICS
# ============================================================

def cagr_from_curve(eq: pd.Series, dates: pd.Series) -> float:
    """
    Calendar-time CAGR (more correct than len/252 over multi-decade samples).
    """
    if eq is None or len(eq) < 2:
        return np.nan
    if eq.iloc[0] <= 0:
        return np.nan
    d0 = pd.Timestamp(dates.iloc[0])
    d1 = pd.Timestamp(dates.iloc[-1])
    years = (d1 - d0).days / 365.25
    if years <= 0:
        return np.nan
    return (eq.iloc[-1] / eq.iloc[0]) ** (1 / years) - 1

def sharpe_ratio(returns: pd.Series) -> float:
    r = returns.dropna()
    if r.std(ddof=1) == 0 or len(r) < 2:
        return np.nan
    return r.mean() / r.std(ddof=1) * np.sqrt(TRADING_DAYS_PER_YEAR)

def sortino_ratio(returns: pd.Series) -> float:
    r = returns.dropna()
    downside = r[r < 0]
    if len(downside) < 2 or downside.std(ddof=1) == 0:
        return np.nan
    return r.mean() / downside.std(ddof=1) * np.sqrt(TRADING_DAYS_PER_YEAR)

def max_drawdown(eq: pd.Series) -> float:
    s = eq.dropna()
    if s.empty:
        return np.nan
    peak = s.cummax()
    dd = s / peak - 1
    return float(dd.min())

# ============================================================
# VALIDATION
# ============================================================

def _require_cols(df: pd.DataFrame, cols: list, name: str):
    missing = [c for c in cols if c not in df.columns]
    if missing:
        raise ValueError(f"{name} missing required columns: {missing}")

# ============================================================
# COST APPLICATION
# ============================================================

def apply_costs(equity_df: pd.DataFrame,
                trades_df: pd.DataFrame,
                slippage_bp: float) -> pd.DataFrame:
    """
    Apply execution-based costs to a daily close-to-close equity curve.

    Modeling notes (intentional simplifications):
    - Slippage is modeled as bps of traded notional: abs(value) * bps/10k.
    - Commission is flat per execution row.
    - Costs are applied as a drag to that day's close-to-close return
      using previous day's equity as denominator.
    - This is an approximation (does not re-size positions intraday),
      but is standard for research and stress testing.
    """

    df = equity_df.copy()
    df = df.sort_values("date", kind="mergesort").reset_index(drop=True)
    df["ret"] = df["portfolio_value"].pct_change().fillna(0.0)

    # ---- trades: enforce execution-day costing ----
    trades = trades_df.copy()

    # normalize to a single 'date' column used for grouping
    trades["date"] = pd.to_datetime(trades["exec_date"])
    trades = trades.sort_values("date", kind="mergesort").reset_index(drop=True)

    # defensively treat 'value' as traded notional (positive magnitude)
    # (some systems encode sells as negative value; slippage is on abs notional)
    trades["notional"] = trades["value"].astype(float).abs()

    # slippage & commissions per execution
    trades["slippage_cost"] = trades["notional"] * (slippage_bp / 10_000.0)
    trades["commission_cost"] = float(FIXED_COMMISSION)

    daily_costs = (
        trades.groupby("date", as_index=False)[["slippage_cost", "commission_cost"]]
        .sum()
        .sort_values("date")
    )

    df = df.merge(daily_costs, on="date", how="left")
    df[["slippage_cost", "commission_cost"]] = df[["slippage_cost", "commission_cost"]].fillna(0.0)

    prev_equity = df["portfolio_value"].shift(1)
    prev_equity = prev_equity.replace(0, np.nan)

    df["slippage_drag"] = (df["slippage_cost"] / prev_equity).fillna(0.0)
    df["commission_drag"] = (df["commission_cost"] / prev_equity).fillna(0.0)

    # Optionally apply drag only on trade days
    if APPLY_COSTS_ONLY_ON_TRADE_DAYS:
        trade_day_mask = (df["slippage_cost"] > 0) | (df["commission_cost"] > 0)
        df["ret_after_costs"] = df["ret"]
        df.loc[trade_day_mask, "ret_after_costs"] = (
            df.loc[trade_day_mask, "ret"]
            - df.loc[trade_day_mask, "slippage_drag"]
            - df.loc[trade_day_mask, "commission_drag"]
        )
    else:
        df["ret_after_costs"] = df["ret"] - df["slippage_drag"] - df["commission_drag"]

    start_equity = float(df["portfolio_value"].iloc[0])
    df["equity_after_costs"] = (1.0 + df["ret_after_costs"]).cumprod() * start_equity

    return df

# ============================================================
# MAIN
# ============================================================

def main():
    print("\n=== TRANSACTION-COST ROBUSTNESS TEST ===")

    eq = pd.read_parquet(EQUITY_FILE)
    _require_cols(eq, ["date", "portfolio_value"], "EQUITY_FILE")
    eq["date"] = pd.to_datetime(eq["date"])
    eq = eq.sort_values("date", kind="mergesort").reset_index(drop=True)

    trades = pd.read_parquet(TRADES_FILE)
    _require_cols(trades, ["exec_date", "value"], "TRADES_FILE")
    trades["exec_date"] = pd.to_datetime(trades["exec_date"])
    trades = trades.sort_values("exec_date", kind="mergesort").reset_index(drop=True)

    print(f"Loaded equity: {len(eq):,} days")
    print(f"Loaded trades: {len(trades):,} executions\n")

    eq["ret"] = eq["portfolio_value"].pct_change().fillna(0.0)

    baseline_cagr = cagr_from_curve(eq["portfolio_value"], eq["date"])
    baseline_sharpe = sharpe_ratio(eq["ret"])
    baseline_sortino = sortino_ratio(eq["ret"])
    baseline_dd = max_drawdown(eq["portfolio_value"])

    print("=== BASELINE (NO COSTS) ===")
    print(f"CAGR:         {baseline_cagr:.4f}")
    print(f"Sharpe:       {baseline_sharpe:.4f}")
    print(f"Sortino:      {baseline_sortino:.4f}")
    print(f"Max Drawdown: {baseline_dd:.4f}\n")

    all_results = []

    # Use calendar years for per-year cost reporting (more accurate than len/252)
    years = (eq["date"].iloc[-1] - eq["date"].iloc[0]).days / 365.25
    years = float(years) if years > 0 else (len(eq) / TRADING_DAYS_PER_YEAR)

    total_trades = int(len(trades))
    total_notional = float(trades["value"].astype(float).abs().sum())

    for bp in SLIPPAGE_BPS:
        print(f"--- Testing slippage = {bp} bps ---")

        df_sim = apply_costs(eq, trades, bp)

        cagr = cagr_from_curve(df_sim["equity_after_costs"], df_sim["date"])
        sharpe = sharpe_ratio(df_sim["ret_after_costs"])
        sortino = sortino_ratio(df_sim["ret_after_costs"])
        dd = max_drawdown(df_sim["equity_after_costs"])

        total_slippage = total_notional * (bp / 10_000.0)
        slippage_per_year = total_slippage / years

        total_commission = total_trades * float(FIXED_COMMISSION)
        commission_per_year = total_commission / years

        print(f"Trades:                     {total_trades:,}")
        print(f"Total notional traded:      ${total_notional:,.2f}")
        print(f"Total slippage:             ${total_slippage:,.2f}")
        print(f"Slippage per year:          ${slippage_per_year:,.2f}")
        print(f"Total commission:           ${total_commission:,.2f}")
        print(f"Commission per year:        ${commission_per_year:,.2f}")
        print(f"CAGR (after costs):         {cagr:.4f}")
        print(f"Sharpe (after costs):       {sharpe:.4f}")
        print(f"Sortino (after costs):      {sortino:.4f}")
        print(f"MaxDD (after costs):        {dd:.4f}\n")

        all_results.append({
            "slippage_bp": bp,
            "total_trades": total_trades,
            "total_notional": total_notional,
            "total_slippage": total_slippage,
            "slippage_per_year": slippage_per_year,
            "total_commission": total_commission,
            "commission_per_year": commission_per_year,
            "cagr_after_costs": cagr,
            "sharpe_after_costs": sharpe,
            "sortino_after_costs": sortino,
            "maxdd_after_costs": dd,
            "baseline_cagr": baseline_cagr,
            "baseline_sharpe": baseline_sharpe,
            "baseline_sortino": baseline_sortino,
            "baseline_maxdd": baseline_dd,
            "apply_costs_only_on_trade_days": APPLY_COSTS_ONLY_ON_TRADE_DAYS,
            "fixed_commission": FIXED_COMMISSION,
        })

        ts = datetime.now().strftime("%Y%m%d-%H%M%S")
        curve_path = os.path.join(OUTPUT_DIR, f"equity_after_costs_{bp}bp_{ts}.parquet")
        df_sim.to_parquet(curve_path, index=False)

    summary = pd.DataFrame(all_results)
    ts = datetime.now().strftime("%Y%m%d-%H%M%S")
    summary_path = os.path.join(OUTPUT_DIR, f"transaction_cost_summary_{ts}.csv")
    summary.to_csv(summary_path, index=False)

    print("\n=== SUMMARY ===")
    print(summary.to_string(index=False))
    print(f"\nSummary saved → {summary_path}")
    print("\n=== TRANSACTION-COST ROBUSTNESS COMPLETE ===\n")

if __name__ == "__main__":
    main()



=== TRANSACTION-COST ROBUSTNESS TEST ===
Loaded equity: 6,790 days
Loaded trades: 3,020 executions

=== BASELINE (NO COSTS) ===
CAGR:         0.1636
Sharpe:       1.2259
Sortino:      1.5259
Max Drawdown: -0.2013

--- Testing slippage = 1 bps ---
Trades:                     3,020
Total notional traded:      $1,292,292,054.29
Total slippage:             $129,229.21
Slippage per year:          $4,788.57
Total commission:           $30,169.80
Commission per year:        $1,117.94
CAGR (after costs):         0.1620
Sharpe (after costs):       1.2158
Sortino (after costs):      1.5134
MaxDD (after costs):        -0.2014

--- Testing slippage = 3 bps ---
Trades:                     3,020
Total notional traded:      $1,292,292,054.29
Total slippage:             $387,687.62
Slippage per year:          $14,365.72
Total commission:           $30,169.80
Commission per year:        $1,117.94
CAGR (after costs):         0.1603
Sharpe (after costs):       1.2040
Sortino (after costs):      1.4989
M