In [3]:
#!/usr/bin/env python
"""
Monthly portfolio components table.

For each month-end (calendar month-end, carried back to the last available trading day),
print and save:

1) number of positions
2) value of positions (portfolio_value - cash)
3) value of cash

Source:
  Uses the equity curve produced by your trading engine:
    - date
    - portfolio_value
    - cash
    - num_positions

Output:
  - 26_monthly_portfolio_components.csv
"""

import os
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

# ============================================================
# CONFIG
# ============================================================

EQUITY_FILE = "./13-trading_output_regression_insp500_spyfilter_cap15/13-equity_curve_regression_insp500_spyfilter_cap15.parquet"
OUTPUT_DIR  = "./25d-monthly_portfolio_components_output"
os.makedirs(OUTPUT_DIR, exist_ok=True)

OUT_CSV = os.path.join(OUTPUT_DIR, "25d_monthly_portfolio_components.csv")

MONTH_LABELS = {1:"Jan", 2:"Feb", 3:"Mar", 4:"Apr", 5:"May", 6:"Jun",
                7:"Jul", 8:"Aug", 9:"Sep", 10:"Oct", 11:"Nov", 12:"Dec"}

print("=== MONTHLY PORTFOLIO COMPONENTS ===")

# ============================================================
# LOAD EQUITY CURVE
# ============================================================

if not os.path.exists(EQUITY_FILE):
    raise FileNotFoundError(f"Equity curve file not found: {EQUITY_FILE}")

eq = pd.read_parquet(EQUITY_FILE).copy()

# Normalize date column
if "date" not in eq.columns:
    raise ValueError("Equity file must contain a 'date' column.")
eq["date"] = pd.to_datetime(eq["date"], errors="coerce")
eq = eq.dropna(subset=["date"]).sort_values("date").reset_index(drop=True)

# Validate required fields
required = {"portfolio_value", "cash", "num_positions"}
missing = required - set(eq.columns)
if missing:
    raise ValueError(f"Equity file missing required columns: {missing}")

# Coerce numerics
eq["portfolio_value"] = pd.to_numeric(eq["portfolio_value"], errors="coerce")
eq["cash"] = pd.to_numeric(eq["cash"], errors="coerce")
eq["num_positions"] = pd.to_numeric(eq["num_positions"], errors="coerce")

eq = eq.dropna(subset=["portfolio_value", "cash", "num_positions"]).copy()

# If multiple rows per day, keep the last one
eq["day"] = eq["date"].dt.normalize()
eq = eq.groupby("day", as_index=False).tail(1).drop(columns=["day"]).sort_values("date").reset_index(drop=True)

if eq.empty:
    raise RuntimeError("Equity curve is empty after cleaning.")

print(f"Loaded equity curve rows: {len(eq):,}")
print(f"Date range: {eq['date'].min().date()} -> {eq['date'].max().date()}")

# ============================================================
# BUILD MONTH-END SNAPSHOTS (asof last trading day in month)
# ============================================================

start_date = eq["date"].min().normalize()
end_date   = eq["date"].max().normalize()

month_ends = pd.date_range(start=start_date, end=end_date, freq="M")
month_ends_df = pd.DataFrame({"month_end": month_ends})

eq_asof = eq[["date", "portfolio_value", "cash", "num_positions"]].rename(columns={"date": "asof_date"}).copy()
eq_asof = eq_asof.sort_values("asof_date")

monthly = pd.merge_asof(
    month_ends_df.sort_values("month_end"),
    eq_asof,
    left_on="month_end",
    right_on="asof_date",
    direction="backward",
)

# Drop early months before first equity point
monthly = monthly.dropna(subset=["portfolio_value", "cash", "num_positions"]).copy()

monthly["year"] = monthly["month_end"].dt.year
monthly["month"] = monthly["month_end"].dt.month
monthly["month_name"] = monthly["month"].map(MONTH_LABELS)

# Compute positions value
monthly["positions_value"] = monthly["portfolio_value"] - monthly["cash"]

# Clean + reorder
monthly["num_positions"] = monthly["num_positions"].round(0).astype(int)
monthly["cash"] = monthly["cash"].astype(float)
monthly["positions_value"] = monthly["positions_value"].astype(float)

out = monthly[[
    "month_end", "year", "month", "month_name",
    "num_positions", "positions_value", "cash"
]].copy()

# ============================================================
# PRINT + SAVE
# ============================================================

print("\n=== MONTH-END SNAPSHOT (one row per month) ===")
print(out.to_string(
    index=False,
    formatters={
        "positions_value": lambda x: f"{x:,.2f}",
        "cash": lambda x: f"{x:,.2f}",
    }
))

out.to_csv(OUT_CSV, index=False)

print(f"\nSaved:\n  → {OUT_CSV}")
print("=== DONE ===")


=== MONTHLY PORTFOLIO COMPONENTS ===
Loaded equity curve rows: 6,790
Date range: 1999-01-04 -> 2025-12-30

=== MONTH-END SNAPSHOT (one row per month) ===
 month_end  year  month month_name  num_positions positions_value          cash
1999-01-31  1999      1        Jan              8      210,935.29    148,046.50
1999-02-28  1999      2        Feb              5      129,311.36    228,991.83
1999-03-31  1999      3        Mar              8      190,818.58    159,592.35
1999-04-30  1999      4        Apr              6      196,831.27    161,888.71
1999-05-31  1999      5        May              6      150,809.07    212,834.10
1999-06-30  1999      6        Jun              8      245,616.40    134,277.10
1999-07-31  1999      7        Jul              7      225,193.72    159,467.66
1999-08-31  1999      8        Aug              7      196,206.86    201,489.47
1999-09-30  1999      9        Sep              7      200,793.42    201,914.14
1999-10-31  1999     10        Oct            