In [3]:
#!/usr/bin/env python
"""
Monthly MINIMUM cash-balance table for a trading system.

Goal:
  Produce a Year x Month table where each cell is the *minimum cash balance
  observed during that month* (i.e., the lowest point cash fell to).

This helps answer: "What's the worst-case cash draw during each month?"

Data source priority:
  1) Equity curve file (if it contains a cash column)
  2) Trade log (uses cash_after)

Outputs:
  - 25a_monthly_min_cash_balance_long.csv
  - 25a_monthly_min_cash_balance_pivot.csv
  - (optional) 25a_monthly_min_cash_balance_pivot.xlsx
"""

import os
import numpy as np
import pandas as pd
import warnings

warnings.filterwarnings("ignore")

# ============================================================
# CONFIG
# ============================================================

TRADES_FILE = "./13-trading_output_regression_insp500_spyfilter_cap15/13-trades_regression_insp500_spyfilter_cap15.parquet"
EQUITY_FILE = "./13-trading_output_regression_insp500_spyfilter_cap15/13-equity_curve_regression_insp500_spyfilter_cap15.parquet"
OUTPUT_DIR  = "./25d-monthly_min_cash_balance"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Candidate cash column names
EQUITY_CASH_COL_CANDIDATES = [
    "cash", "cash_balance", "cash_value", "cash_usd", "net_cash", "cash_after"
]

# Month labels for final pivot
MONTH_LABELS = {1:"Jan", 2:"Feb", 3:"Mar", 4:"Apr", 5:"May", 6:"Jun",
                7:"Jul", 8:"Aug", 9:"Sep", 10:"Oct", 11:"Nov", 12:"Dec"}

print("=== MONTHLY MINIMUM CASH BALANCE TABLE ===")

# ============================================================
# LOAD + BUILD A CASH TIME SERIES
# ============================================================

cash_ts = None

# --- Try equity curve first (best if it has daily cash) ---
if os.path.exists(EQUITY_FILE):
    eq = pd.read_parquet(EQUITY_FILE)
    # normalize date column
    if "date" in eq.columns:
        eq["date"] = pd.to_datetime(eq["date"])
    elif "exec_date" in eq.columns:
        eq["date"] = pd.to_datetime(eq["exec_date"])
    else:
        eq["date"] = pd.to_datetime(eq.iloc[:, 0], errors="coerce")

    # detect cash column
    cash_col = None
    for c in EQUITY_CASH_COL_CANDIDATES:
        if c in eq.columns:
            cash_col = c
            break

    if cash_col is not None:
        eq = eq.sort_values("date")
        cash_ts = (
            eq[["date", cash_col]]
            .rename(columns={cash_col: "cash_balance"})
            .dropna(subset=["date"])
        )
        cash_ts["cash_balance"] = pd.to_numeric(cash_ts["cash_balance"], errors="coerce")
        cash_ts = cash_ts.dropna(subset=["cash_balance"])
        cash_ts = cash_ts.sort_values("date").reset_index(drop=True)
        print(f"Using EQUITY_FILE for cash series (column='{cash_col}'): {len(cash_ts):,} rows")
    else:
        print("EQUITY_FILE found but no recognized cash column. Falling back to TRADES_FILE.")
else:
    print("EQUITY_FILE not found. Using TRADES_FILE.")

# --- Fallback: use trades cash_after ---
if cash_ts is None:
    trades = pd.read_parquet(TRADES_FILE)

    if "exec_date" in trades.columns:
        trades["date"] = pd.to_datetime(trades["exec_date"])
    elif "date" in trades.columns:
        trades["date"] = pd.to_datetime(trades["date"])
    else:
        raise ValueError("Trades file must contain 'exec_date' or 'date'.")

    if "cash_after" not in trades.columns:
        raise ValueError("Trades file must contain 'cash_after' to compute cash balance.")

    trades = trades.sort_values("date").reset_index(drop=True)

    cash_ts = trades[["date", "cash_after"]].rename(columns={"cash_after": "cash_balance"})
    cash_ts["cash_balance"] = pd.to_numeric(cash_ts["cash_balance"], errors="coerce")
    cash_ts = cash_ts.dropna(subset=["cash_balance"]).copy()
    cash_ts = cash_ts.sort_values("date").reset_index(drop=True)

    print(f"Using TRADES_FILE for cash series (cash_after): {len(cash_ts):,} rows")

if cash_ts.empty:
    raise RuntimeError("Cash time series is empty after loading/cleaning.")

# ============================================================
# COMPUTE MONTHLY MINIMUM CASH BALANCE
# ============================================================

cash_ts["year"] = cash_ts["date"].dt.year
cash_ts["month"] = cash_ts["date"].dt.month

# Group by year-month and find minimum cash balance
monthly_min = (
    cash_ts.groupby(["year", "month"], as_index=False)
    .agg(
        min_cash=("cash_balance", "min"),
        min_cash_date=("cash_balance", lambda x: cash_ts.loc[x.idxmin(), "date"]),
        max_cash=("cash_balance", "max"),
        end_cash=("cash_balance", "last"),
        observations=("cash_balance", "count"),
    )
)

monthly_min["month_name"] = monthly_min["month"].map(MONTH_LABELS)

# Calculate how much cash dropped from max to min within month
monthly_min["intra_month_drawdown"] = monthly_min["max_cash"] - monthly_min["min_cash"]
monthly_min["drawdown_pct"] = (monthly_min["intra_month_drawdown"] / monthly_min["max_cash"] * 100).round(2)

# ============================================================
# SUMMARY STATISTICS
# ============================================================

print("\n=== SUMMARY STATISTICS ===")
print(f"Total months analyzed: {len(monthly_min)}")
print(f"Overall minimum cash ever: ${monthly_min['min_cash'].min():,.2f}")
print(f"Overall maximum cash ever: ${monthly_min['max_cash'].max():,.2f}")
print(f"Average monthly minimum: ${monthly_min['min_cash'].mean():,.2f}")
print(f"Median monthly minimum: ${monthly_min['min_cash'].median():,.2f}")

# Months where cash fell below certain thresholds
thresholds = [50000, 100000, 200000, 500000]
print("\n=== MONTHS WITH MINIMUM CASH BELOW THRESHOLDS ===")
for thresh in thresholds:
    count = (monthly_min["min_cash"] < thresh).sum()
    pct = count / len(monthly_min) * 100
    print(f"  Below ${thresh:,}: {count} months ({pct:.1f}%)")

# Worst 10 months by minimum cash
print("\n=== WORST 10 MONTHS (Lowest Minimum Cash) ===")
worst_months = monthly_min.nsmallest(10, "min_cash")[
    ["year", "month_name", "min_cash", "min_cash_date", "end_cash", "intra_month_drawdown"]
].copy()
worst_months["min_cash_date"] = pd.to_datetime(worst_months["min_cash_date"]).dt.strftime("%Y-%m-%d")
print(worst_months.to_string(index=False, float_format=lambda x: f"{x:,.2f}"))

# Largest intra-month drawdowns
print("\n=== LARGEST INTRA-MONTH CASH DRAWDOWNS ===")
largest_dd = monthly_min.nlargest(10, "intra_month_drawdown")[
    ["year", "month_name", "max_cash", "min_cash", "intra_month_drawdown", "drawdown_pct"]
].copy()
print(largest_dd.to_string(index=False, float_format=lambda x: f"{x:,.2f}"))

# ============================================================
# PIVOT TABLE: rows = year, cols = months (MINIMUM cash)
# ============================================================

pivot_min = (
    monthly_min.pivot_table(
        index="year",
        columns="month_name",
        values="min_cash",
        aggfunc="first"
    )
    .reindex(columns=list(MONTH_LABELS.values()))
    .sort_index()
)

print("\n=== MONTHLY MINIMUM CASH BALANCE (Year x Month) ===")
print(pivot_min.to_string(float_format=lambda x: f"{x:,.2f}"))

# ============================================================
# SAVE OUTPUTS
# ============================================================

# Long format with all details
monthly_out = monthly_min[[
    "year", "month", "month_name", "min_cash", "min_cash_date", 
    "max_cash", "end_cash", "intra_month_drawdown", "drawdown_pct", "observations"
]].copy()
monthly_out.to_csv(os.path.join(OUTPUT_DIR, "25d_monthly_min_cash_balance_long.csv"), index=False)

# Pivot format (just min cash)
pivot_min.to_csv(os.path.join(OUTPUT_DIR, "25d_monthly_min_cash_balance_pivot.csv"))

# Optional Excel
try:
    with pd.ExcelWriter(os.path.join(OUTPUT_DIR, "25d_monthly_min_cash_balance.xlsx")) as writer:
        pivot_min.to_excel(writer, sheet_name="Min Cash Pivot")
        monthly_out.to_excel(writer, sheet_name="Details", index=False)
        worst_months.to_excel(writer, sheet_name="Worst Months", index=False)
except Exception:
    pass

print(f"\nSaved:")
print(f"  → {os.path.join(OUTPUT_DIR, '25d_monthly_min_cash_balance_long.csv')}")
print(f"  → {os.path.join(OUTPUT_DIR, '25d_monthly_min_cash_balance_pivot.csv')}")
print(f"  → {os.path.join(OUTPUT_DIR, '25d_monthly_min_cash_balance.xlsx')} (if supported)")
print("\n=== DONE ===")

=== MONTHLY MINIMUM CASH BALANCE TABLE ===
Using EQUITY_FILE for cash series (column='cash'): 6,790 rows

=== SUMMARY STATISTICS ===
Total months analyzed: 324
Overall minimum cash ever: $71,439.78
Overall maximum cash ever: $14,123,347.35
Average monthly minimum: $1,554,066.01
Median monthly minimum: $887,562.50

=== MONTHS WITH MINIMUM CASH BELOW THRESHOLDS ===
  Below $50,000: 0 months (0.0%)
  Below $100,000: 5 months (1.5%)
  Below $200,000: 19 months (5.9%)
  Below $500,000: 91 months (28.1%)

=== WORST 10 MONTHS (Lowest Minimum Cash) ===
 year month_name   min_cash min_cash_date     end_cash  intra_month_drawdown
 2000        Jan  71,439.78    2000-01-27    71,439.78            127,528.96
 2000        Feb  71,439.78    2000-02-01   214,022.31            142,582.53
 2021        Nov  81,307.23    2021-11-04   150,052.71          1,077,586.52
 2012        Jul  81,936.65    2012-07-12   393,586.51            311,649.86
 2000        May  94,163.91    2000-05-18   104,164.61          