In [1]:
#!/usr/bin/env python
import numpy as np
import pandas as pd
from pathlib import Path

# =========================
# CONFIG
# =========================
TRADES_FILE = Path("./13-trading_output_regression_insp500_spyfilter_cap15/13-trades_regression_insp500_spyfilter_cap15.parquet")
EQUITY_FILE = Path("./13-trading_output_regression_insp500_spyfilter_cap15/13-equity_curve_regression_insp500_spyfilter_cap15.parquet")

STARTING_CAPITAL = 365_000.0  # <-- your assumption

OUT_CSV = Path("./16-monte_carlo_output/roi_roc_yearly.csv")
OUT_CSV.parent.mkdir(parents=True, exist_ok=True)

# =========================
# HELPERS
# =========================
def cagr_from_values(values: np.ndarray, start_date: pd.Timestamp, end_date: pd.Timestamp) -> float:
    years = (end_date - start_date).days / 365.25
    if years <= 0 or len(values) < 2:
        return np.nan
    return (values[-1] / values[0]) ** (1 / years) - 1

def ann_sharpe(daily_returns: np.ndarray, rf_daily: float = 0.0) -> float:
    r = daily_returns - rf_daily
    sd = np.std(r, ddof=1)
    if sd == 0:
        return np.nan
    return np.sqrt(252) * np.mean(r) / sd

def max_drawdown(values: np.ndarray) -> float:
    peaks = np.maximum.accumulate(values)
    dd = values / peaks - 1.0
    return float(np.min(dd))

# =========================
# MAIN
# =========================
def main():
    eq = pd.read_parquet(EQUITY_FILE).copy()

    if "date" not in eq.columns:
        raise ValueError(f"Equity file missing 'date'. Columns: {list(eq.columns)}")

    if "portfolio_value" not in eq.columns:
        raise ValueError(f"Equity file missing 'portfolio_value'. Columns: {list(eq.columns)}")

    eq["date"] = pd.to_datetime(eq["date"])
    eq = eq.sort_values("date").dropna(subset=["portfolio_value"]).reset_index(drop=True)

    # daily returns from the original equity curve
    eq["ret"] = eq["portfolio_value"].pct_change().fillna(0.0)

    # Rescale the equity path so it starts at STARTING_CAPITAL
    eq["scaled_portfolio"] = STARTING_CAPITAL * (1.0 + eq["ret"]).cumprod()

    # If cash exists, scale it proportionally to the new starting capital
    has_cash = "cash" in eq.columns
    if has_cash:
        first_pv = float(eq["portfolio_value"].iloc[0])
        scale_factor = STARTING_CAPITAL / first_pv if first_pv > 0 else np.nan
        eq["scaled_cash"] = pd.to_numeric(eq["cash"], errors="coerce") * scale_factor
        eq["scaled_invested"] = eq["scaled_portfolio"] - eq["scaled_cash"]
    else:
        eq["scaled_cash"] = np.nan
        eq["scaled_invested"] = eq["scaled_portfolio"]  # assume fully invested if cash not available

    # --- Total ROI ---
    v0 = float(eq["scaled_portfolio"].iloc[0])
    v1 = float(eq["scaled_portfolio"].iloc[-1])
    total_roi = v1 / v0 - 1.0

    # --- CAGR / Sharpe / MaxDD on scaled equity ---
    start_date = pd.Timestamp(eq["date"].iloc[0]).normalize()
    end_date   = pd.Timestamp(eq["date"].iloc[-1]).normalize()

    cagr = cagr_from_values(eq["scaled_portfolio"].to_numpy(), start_date, end_date)
    sharpe = ann_sharpe(eq["ret"].to_numpy())  # returns unchanged by scaling
    mdd = max_drawdown(eq["scaled_portfolio"].to_numpy())

    # --- ROC (Return on Capital Employed) ---
    profit = v1 - v0

    invested = eq["scaled_invested"].copy()
    invested = invested.replace([np.inf, -np.inf], np.nan)
    invested = invested.where(invested > 0)  # only days with positive invested capital
    avg_invested = float(invested.mean(skipna=True))

    roc = profit / avg_invested if (avg_invested and avg_invested > 0) else np.nan

    print("=== SYSTEM ROI / ROC (scaled to start at $365,000) ===")
    print(f"Start date: {start_date.date()}   End date: {end_date.date()}   N days: {len(eq):,}")
    print(f"Start value: ${v0:,.2f}")
    print(f"End value:   ${v1:,.2f}")
    print(f"Total ROI:   {total_roi*100:.2f}%")
    print(f"CAGR:        {cagr*100:.2f}%")
    print(f"Sharpe:      {sharpe:.3f}")
    print(f"MaxDD:       {mdd*100:.2f}%")

    if has_cash:
        print(f"Avg invested capital (PV - cash): ${avg_invested:,.2f}")
    else:
        print("Note: 'cash' column not found in equity curve; ROC assumed fully-invested (ROC ~= ROI).")

    print(f"ROC (profit / avg invested capital): {roc*100:.2f}%")

    # =========================
    # YEAR-BY-YEAR ROI + ROC
    # =========================
    eq["year"] = eq["date"].dt.year

    def year_stats(g: pd.DataFrame) -> pd.Series:
        g = g.sort_values("date")
        pv0 = float(g["scaled_portfolio"].iloc[0])
        pv1 = float(g["scaled_portfolio"].iloc[-1])
        roi_y = pv1 / pv0 - 1.0
        profit_y = pv1 - pv0

        inv = g["scaled_invested"].copy()
        inv = inv.replace([np.inf, -np.inf], np.nan)
        inv = inv.where(inv > 0)
        avg_inv = float(inv.mean(skipna=True))
        roc_y = profit_y / avg_inv if (avg_inv and avg_inv > 0) else np.nan

        return pd.Series({
            "start": g["date"].iloc[0].date(),
            "end": g["date"].iloc[-1].date(),
            "start_value": pv0,
            "end_value": pv1,
            "profit": profit_y,
            "roi": roi_y,
            "avg_invested": avg_inv,
            "roc": roc_y,
            "n_days": len(g),
        })

    yearly = eq.groupby("year", group_keys=False).apply(year_stats).reset_index()
    yearly.to_csv(OUT_CSV, index=False)

    print("\n=== YEARLY (first 10 rows) ===")
    with pd.option_context("display.max_rows", 20, "display.width", 140):
        print(yearly.head(10).to_string(index=False))

    print(f"\n✔ Saved yearly ROI/ROC to: {OUT_CSV}")

if __name__ == "__main__":
    main()


=== SYSTEM ROI / ROC (scaled to start at $365,000) ===
Start date: 1999-01-04   End date: 2025-12-24   N days: 6,787
Start value: $365,000.00
End value:   $48,601,351.08
Total ROI:   13215.44%
CAGR:        19.89%
Sharpe:      1.088
MaxDD:       -30.40%
Avg invested capital (PV - cash): $10,265,200.23
ROC (profit / avg invested capital): 469.90%

=== YEARLY (first 10 rows) ===
 year      start        end  start_value    end_value         profit       roi  avg_invested       roc  n_days
 1999 1999-01-04 1999-12-31 3.650000e+05 6.631385e+05  298138.469474  0.816818  4.103298e+05  0.726583     252
 2000 2000-01-03 2000-12-29 6.664939e+05 9.077353e+05  241241.377478  0.361956  6.261261e+05  0.385292     252
 2001 2001-01-02 2001-12-31 9.071206e+05 9.027760e+05   -4344.530032 -0.004789  1.698047e+03 -2.558545     248
 2002 2002-01-02 2002-12-31 9.027760e+05 8.897894e+05  -12986.612019 -0.014385  1.956987e+05 -0.066360     252
 2003 2003-01-02 2003-12-31 8.897894e+05 1.215301e+06  325511.4242

  yearly = eq.groupby("year", group_keys=False).apply(year_stats).reset_index()
