In [3]:
#!/usr/bin/env python

import os
import pandas as pd
import numpy as np
from datetime import datetime
import warnings
warnings.filterwarnings("ignore")


"""_summary_

    What's New
1. Weekly Rankings Storage
Added a new list weekly_rankings = [] that captures all top-ranked stocks before any turnover filters are applied.
2. Pre-Filter Data Captured (lines 409-436)
For each stock in the top group, the system now records:

Signal and execution dates
Ticker and rank
Slope and ATR20 values
Raw and capped weights
Target vs current positions (shares, values, weights)
SPY regime status
Portfolio value

This happens before the drift threshold, minimum trade value, and minimum position weight filters are applied.
3. New Output File
The script now saves three files instead of two:

13-trades_regression_insp500_spyfilter_cap15.parquet - Actual executed trades
13-equity_curve_regression_insp500_spyfilter_cap15.parquet - Daily portfolio values
13-weekly_rankings_pre_filter_cap15.parquet - NEW: All top-ranked stocks each week

4. Enhanced Console Output
The script now reports:
Total trades: X
Total weekly rankings: Y
Rankings saved to: ...

5. SPY REGIME CONFIRMATION PERIOD (NEW)
Added SPY_REGIME_CONFIRM_DAYS parameter to reduce whipsaw signals around the 200 DMA.
Set to 1 for original behavior (immediate regime flip on crossover).
Set to 5+ to require N consecutive days above/below before confirming regime change.
    """

# ============================================================
# CONFIG
# ============================================================

UNIVERSE_FILE   = "./12-tradable_sp500_universe/12-tradable_sp500_universe.parquet"
ATR20_DIR       = "./4-ATR20_adjusted_All_Prices"
SPY_FILE        = "./8-SPY_200DMA_market_regime/8-SPY_200DMA_regime.parquet"

OUTPUT_DIR_TRADES   = "./13-trading_output_regression_insp500_spyfilter_cap15"
OUTPUT_DIR_PERF     = "./14-trading_output_regression_insp500_spyfilter_performance_output_cap15"
os.makedirs(OUTPUT_DIR_TRADES, exist_ok=True)
os.makedirs(OUTPUT_DIR_PERF, exist_ok=True)

START_TRADING         = pd.Timestamp("1999-1-1")
INITIAL_CAPITAL       = 400000
TOP_PERCENTILE        = 0.95        # top 5% by regression slope
REBALANCE_DAY         = "Wednesday"  # weekly signal day
TRADING_DAYS_PER_YEAR = 252

# --- Position cap ---
MAX_POSITION_WEIGHT = 0.12   # 12% max position (by proxy portfolio value)

# --- Cash floor (Option A planned alignment) ---
MIN_CASH_RESERVE = 20000.0

# --- SPY REGIME CONFIRMATION PERIOD ---
# Number of consecutive days SPY must stay above/below 200 DMA before confirming regime change
# Set to 1 for original behavior (immediate flip on crossover)
# Set to 5, 10, etc. to filter out whipsaw signals around the 200 DMA
SPY_REGIME_CONFIRM_DAYS = 1

# --- DEBUG CONTROLS ---
DEBUG_TICKER = "TWX"
DEBUG_SIGNAL_DATE    = pd.Timestamp("1999-01-06")
DEBUG_REBALANCE_DATE = pd.Timestamp("1999-01-07")

# Turnover / trade filters (MODEL A)
DRIFT_THRESHOLD          = 0.05
MIN_TRADE_VALUE          = 10000
MIN_NEW_POSITION_WEIGHT  = 0.005

print("=== REGRESSION-ONLY WEEKLY TREND STRATEGY "
      "(VOL-BASED SIZING, WITH RANKS + SPY REGIME + TURNOVER FILTERS + CASH FLOOR) ===")
print(f"SPY Regime Confirmation Period: {SPY_REGIME_CONFIRM_DAYS} day(s)")

# ============================================================
# EXECUTION DIAGNOSTICS (kept, but not fully wired)
# ============================================================

exec_diag = {
    "orders_seen": 0,
    "orders_executed": 0,
    "dropped_missing_open": 0,
    "dropped_cash_floor_buy": 0,
    "dropped_zero_shares": 0,
    "dropped_missing_ticker": 0,
    "warn_exec_cash_breach": 0,  # open moved; execution may breach reserve
}
exec_diag["dropped_cash_floor_buy_exec"] = 0
exec_diag["clipped_cash_floor_buy_exec"] = 0

# ============================================================
# SPY REGIME CONFIRMATION FUNCTION
# ============================================================

def create_confirmed_regime(raw_regime: np.ndarray, confirm_days: int) -> np.ndarray:
    """
    Create a confirmed regime signal that requires N consecutive days
    above/below the 200 DMA before flipping the regime.
    
    Parameters:
    -----------
    raw_regime : array of 0/1 (0 = below 200 DMA, 1 = above 200 DMA)
    confirm_days : number of consecutive days required to confirm regime change
    
    Returns:
    --------
    confirmed_regime : array of 0/1 with smoothed regime signal
    """
    if confirm_days <= 1:
        return raw_regime.copy()
    
    n = len(raw_regime)
    confirmed = np.zeros(n, dtype=int)
    
    # Start with the initial regime (use first value)
    current_regime = raw_regime[0]
    consecutive_count = 0
    
    for i in range(n):
        if raw_regime[i] == current_regime:
            # Same as current confirmed regime
            consecutive_count = 0  # Reset counter for opposite regime
            confirmed[i] = current_regime
        else:
            # Different from current confirmed regime
            consecutive_count += 1
            
            if consecutive_count >= confirm_days:
                # Confirm the regime change
                current_regime = raw_regime[i]
                consecutive_count = 0
            
            confirmed[i] = current_regime
    
    return confirmed

# ============================================================
# FAST PRICE LOOKUP
# ============================================================

def fast_price_lookup(px_array, date_val):
    """
    Given a structured array with fields ['date', 'px'] and a date,
    return the last known price at or before that date.
    """
    date_val = np.datetime64(date_val, "ns")
    dates = px_array["date"]
    idx = np.searchsorted(dates, date_val, side="right") - 1
    if idx < 0:
        return np.nan
    return px_array["px"][idx]

# ============================================================
# SNAPSHOT PORTFOLIO
# ============================================================

def snapshot_portfolio_close(date, cash, positions, px_by_ticker):
    """
    Snapshot using CLOSE prices (your px_by_ticker is close_adj).
    Used for end-of-day equity curve.
    """
    equity = 0.0
    for t, pos in positions.items():
        px = fast_price_lookup(px_by_ticker[t], date)
        if not np.isnan(px):
            equity += pos["shares"] * px
    portfolio_value = cash + equity
    return equity, portfolio_value, len(positions)

def snapshot_portfolio_open(exec_date, cash, positions, open_px_map, px_by_ticker_fallback):
    """
    Snapshot using OPEN prices for exec_date when available, fallback to last known close.
    Used for execution-time snapshots (reporting).
    """
    equity = 0.0
    for t, pos in positions.items():
        if t in open_px_map and pd.notna(open_px_map[t]) and open_px_map[t] > 0:
            px = float(open_px_map[t])
        else:
            px = fast_price_lookup(px_by_ticker_fallback[t], exec_date)
        if not np.isnan(px):
            equity += pos["shares"] * px
    portfolio_value = cash + equity
    return equity, portfolio_value, len(positions)

def snapshot_portfolio_exec_proxy(asof_date, cash, positions, exec_px_map, px_by_ticker_fallback):
    """
    Snapshot using "execution proxy" prices (Wednesday close_adj).
    Used for sizing / weights with no lookahead.
    """
    equity = 0.0
    for t, pos in positions.items():
        px = exec_px_map.get(t, np.nan)
        if pd.isna(px) or px <= 0:
            px = fast_price_lookup(px_by_ticker_fallback[t], asof_date)
        if pd.notna(px) and px > 0:
            equity += int(pos["shares"]) * float(px)
    return equity, cash + equity, len(positions)

# ============================================================
# LOAD UNIVERSE
# ============================================================

df = pd.read_parquet(UNIVERSE_FILE)
df["date"] = pd.to_datetime(df["date"])

df["slope_adj"] = pd.to_numeric(df["slope_adj"], errors="coerce")
df["close_adj"] = pd.to_numeric(df["close_adj"], errors="coerce")
df["open_adj"]  = pd.to_numeric(df["open_adj"],  errors="coerce")

print(f"Loaded universe: {len(df):,} rows")

# ============================================================
# MERGE ATR20 PER-TICKER (IN-MEMORY ONLY)
# ============================================================

atr20_map = {}
for f in os.listdir(ATR20_DIR):
    if not f.endswith(".parquet"):
        continue
    t = f.replace(".parquet", "")
    tmp = pd.read_parquet(os.path.join(ATR20_DIR, f))
    if "atr20" not in tmp:
        continue
    tmp["date"] = pd.to_datetime(tmp["date"])
    atr20_map[t] = tmp[["date", "atr20"]]

rows = []
for t, sub in df.groupby("ticker", sort=False):
    if t in atr20_map:
        rows.append(sub.merge(atr20_map[t], on="date", how="left"))
    else:
        sub = sub.copy()
        sub["atr20"] = np.nan
        rows.append(sub)

df = pd.concat(rows, ignore_index=True)
print(f"Universe with ATR20 merged: {len(df):,} rows")

# ============================================================
# LOAD SPY 200DMA REGIME FILE
# ============================================================

spy = pd.read_parquet(SPY_FILE)

# Reset index → ensure a "date" column exists
if spy.index.name in ["Date", "date", None]:
    spy = spy.reset_index().rename(columns={"index": "date", "Date": "date"})

spy["date"] = pd.to_datetime(spy["date"])

if "spy_close" not in spy.columns:
    raise ValueError("SPY file missing 'spy_close' column")

# Get raw regime signal (1 = above 200 DMA, 0 = below)
raw_regime = spy["market_regime"].astype(int).values

# Apply confirmation period filter
confirmed_regime = create_confirmed_regime(raw_regime, SPY_REGIME_CONFIRM_DAYS)
spy["spy_above_200dma"] = confirmed_regime == 1

# Report regime statistics
n_bull_raw = (raw_regime == 1).sum()
n_bear_raw = (raw_regime == 0).sum()
n_bull_confirmed = (confirmed_regime == 1).sum()
n_bear_confirmed = (confirmed_regime == 0).sum()

print(f"\nSPY Regime Statistics:")
print(f"  Raw regime:       {n_bull_raw:,} bull days, {n_bear_raw:,} bear days")
print(f"  Confirmed regime: {n_bull_confirmed:,} bull days, {n_bear_confirmed:,} bear days")
if SPY_REGIME_CONFIRM_DAYS > 1:
    regime_changes_raw = np.sum(np.diff(raw_regime) != 0)
    regime_changes_confirmed = np.sum(np.diff(confirmed_regime) != 0)
    print(f"  Regime changes:   {regime_changes_raw} raw → {regime_changes_confirmed} confirmed")

spy_regime_map = spy.set_index("date")["spy_above_200dma"].to_dict()

# ============================================================
# PREP GROUPED DATA
# ============================================================

df_by_date = {d: sub for d, sub in df.groupby("date")}

# close price history (fallback)
px_by_ticker = {}
for t, sub in df.groupby("ticker", sort=False):
    sub = sub.sort_values("date")
    arr = np.zeros(len(sub), dtype=[("date", "datetime64[ns]"), ("px", "float64")])
    arr["date"] = sub["date"].values.astype("datetime64[ns]")
    arr["px"]   = sub["close_adj"].astype(float).values
    px_by_ticker[t] = arr

dates = sorted(df_by_date.keys())

# ============================================================
# BUILD NEXT-TRADING-DAY (T+1) MAP
# ============================================================

next_date_map = {d: dates[i + 1] if i + 1 < len(dates) else None for i, d in enumerate(dates)}

# ============================================================
# UTILITIES
# ============================================================

def is_rebalance_day(date: pd.Timestamp) -> bool:
    return date.day_name() == REBALANCE_DAY

def get_signal_close(day_df, ticker):
    row = day_df.loc[day_df["ticker"] == ticker, "close_adj"]
    if row.empty:
        return np.nan
    return float(row.iloc[0])

def cap_and_redistribute_weights(w: np.ndarray, cap: float) -> np.ndarray:
    """
    Caps weights at `cap` and redistributes any excess proportionally
    to the remaining (uncapped) names.

    If not feasible to fully invest under cap (N*cap < 1), it will cap
    and leave leftover unallocated (cash drag).
    """
    w = np.asarray(w, dtype=float).copy()
    if w.size == 0:
        return w

    s = w.sum()
    if s > 0:
        w /= s

    # Not feasible to be fully invested under cap
    if w.size * cap < 1.0:
        return np.minimum(w, cap)

    # Iteratively cap and redistribute
    for _ in range(10_000):
        over = w > cap
        if not over.any():
            break
        excess = (w[over] - cap).sum()
        w[over] = cap
        under = ~over
        under_sum = w[under].sum()
        if under_sum <= 0:
            break
        w[under] += excess * (w[under] / under_sum)

    return w

# ============================================================
# TRADING ENGINE (UPDATED FOR "PLANNED ALIGNMENT")
#
# RULES:
# - PLAN on Wednesday using ONLY Wednesday close (no Thursday info for sizing/affordability)
# - EXECUTE on Thursday morning at Thursday open (fills), but shares are fixed from Wednesday
# - NO buy clipping using Thursday open (skip in planning if insufficient cash proxy)
# ============================================================

cash = INITIAL_CAPITAL
positions = {}       # ticker -> {"shares": int, "entry": float}
history = []         # trade log
equity_curve = []
last_equity_close = INITIAL_CAPITAL

# NEW: Storage for weekly rankings before filters
weekly_rankings = []

# Pending planned orders keyed by exec_date (Thursday)
# payload contains: signal_date, spy flag, open_px_map for exec_date, planned trades list
pending_orders = {}

print("\nRunning trading engine...")

for date in dates:

    if date < START_TRADING:
        continue

    day = df_by_date.get(date)
    if day is None or day.empty:
        continue

    # =======================================================
    # 0) EXECUTE any pending orders scheduled for TODAY (Thursday morning)
    # =======================================================
    if date in pending_orders:
        payload = pending_orders.pop(date)
        signal_date = payload["signal_date"]
        spy_above_200 = payload["spy_above_200"]
        open_px_map = payload["open_px_map"]
        planned_trades = payload["planned_trades"]
        
        if date == pd.Timestamp("2010-11-26"):
            print("\n=== DEBUG 2010-11-26 EXEC ===")
            print("planned_trades:", len(planned_trades))
            if planned_trades:
                miss_open = sum(pd.isna(open_px_map.get(tr["ticker"], np.nan)) for tr in planned_trades)
                print("missing_open_count:", miss_open)


        # Execute SELLS first, then BUYS; within each, rank ascending
        planned_trades = sorted(planned_trades, key=lambda x: (0 if x["side"] == "SELL" else 1, x["rank"]))

        for tr in planned_trades:
            t = tr["ticker"]
            side = tr["side"]
            sh_plan = int(tr["shares"])

            if not t or sh_plan <= 0:
                continue

            # execution price is Thursday open; fallback to last close if missing
            px = open_px_map.get(t, np.nan)
            if pd.isna(px) or px <= 0:
                if t in px_by_ticker:
                    px = fast_price_lookup(px_by_ticker[t], date)
            if pd.isna(px) or px <= 0:
                exec_diag["dropped_missing_open"] += 1
                continue

            px = float(px)
            cash_before = float(cash)

            if side == "SELL":
                cur = int(positions.get(t, {}).get("shares", 0))
                sh_exec = min(sh_plan, cur)
                if sh_exec <= 0:
                    continue

                trade_value = sh_exec * px
                cash += trade_value

                new_sh = cur - sh_exec
                if new_sh <= 0:
                    positions.pop(t, None)
                else:
                    positions[t]["shares"] = new_sh

                typ = "SELL"
                sh_record = sh_exec

            else:  # BUY
                # enforce cash reserve at execution
                available = cash - MIN_CASH_RESERVE
                if available <= 0:
                    exec_diag["dropped_cash_floor_buy_exec"] += 1
                    continue

                max_affordable_shares = int(np.floor(available / px))
                sh_exec = min(sh_plan, max_affordable_shares)

                if sh_exec <= 0:
                    exec_diag["dropped_cash_floor_buy_exec"] += 1
                    continue

                if sh_exec < sh_plan:
                    exec_diag["clipped_cash_floor_buy_exec"] += 1

                trade_value = sh_exec * px
                cash_before = float(cash)
                cash -= trade_value

                if t in positions:
                    positions[t]["shares"] = int(positions[t]["shares"]) + sh_exec
                else:
                    positions[t] = {"shares": sh_exec, "entry": px}

                typ = "BUY"
                sh_record = sh_exec

            equity_after, portfolio_after, npos_after = snapshot_portfolio_open(
                date, cash, positions, open_px_map, px_by_ticker
            )

            # signal close comes from the original Wednesday signal date
            signal_day_df = df_by_date.get(signal_date)
            signal_px = get_signal_close(signal_day_df, t) if signal_day_df is not None else np.nan

            history.append({
                "signal_date": signal_date,
                "exec_date": date,
                "signal_close_adj": signal_px,
                "exec_open_adj": px,

                "ticker": t,
                "type": typ,
                "shares": int(sh_record),
                "price": px,
                "value": float(sh_record * px),
                "reason": tr["reason"],

                "slope_rank_within_top": tr["rank"],
                "spy_above_200dma": spy_above_200,

                "cash_before": cash_before,
                "cash_after": float(cash),
                "equity_after": float(equity_after),
                "portfolio_after": float(portfolio_after),
                "num_positions_after": int(npos_after),
            })

        exec_diag["orders_executed"] += len(planned_trades)

    # =======================================================
    # 1) PLAN trades on Wednesday close for next trading day open
    # =======================================================
    if is_rebalance_day(date):
        trade_date = next_date_map.get(date)

        if trade_date is not None and trade_date in df_by_date:
            # SPY regime as-of Wednesday close (with confirmation filter applied)
            spy_above_200 = bool(spy_regime_map.get(date, True))
            can_buy_next_open = spy_above_200

            # Rank / top group (signal day)
            rankable = day[
                (day["slope_adj"].notna()) &
                (day.get("in_sp500", True) == True)
            ].copy()

            rank_map = {}
            top_tickers = set()
            top_group = pd.DataFrame()

            if not rankable.empty:
                rankable = rankable.sort_values("slope_adj", ascending=False)
                cutoff = rankable["slope_adj"].quantile(TOP_PERCENTILE)
                top_group = rankable[rankable["slope_adj"] >= cutoff].copy()

                if not top_group.empty:
                    top_group = top_group.sort_values("slope_adj", ascending=False)
                    top_group["slope_rank_within_top"] = np.arange(1, len(top_group) + 1)
                    rank_map = dict(zip(top_group["ticker"], top_group["slope_rank_within_top"]))
                    top_tickers = set(top_group["ticker"].values)

            if not top_group.empty:
                # Execution proxy prices known at signal time (Wednesday close)
                exec_px_map = day.set_index("ticker")["close_adj"].to_dict()

                # Thursday open map stored for execution (fills)
                trade_day = df_by_date[trade_date]
                open_px_map = trade_day.set_index("ticker")["open_adj"].to_dict()

                # ---------- PLAN STATE (DO NOT TOUCH REAL cash/positions) ----------
                cash_plan = float(cash)
                pos_plan = {t: int(p["shares"]) for t, p in positions.items()}

                planned = []  # list of dicts {ticker, side, shares, reason, rank}

                def px_est(ticker: str) -> float:
                    """Wednesday close estimate (proxy)."""
                    p = exec_px_map.get(ticker, np.nan)
                    if pd.isna(p) or p <= 0:
                        if ticker in px_by_ticker:
                            p = fast_price_lookup(px_by_ticker[ticker], date)
                    return float(p) if (pd.notna(p) and p > 0) else np.nan

                # -------------------------
                # (A) EXIT SELLS FIRST
                # -------------------------
                exit_tickers = [t for t in list(pos_plan.keys()) if t not in top_tickers]
                for t in exit_tickers:
                    sh0 = int(pos_plan.get(t, 0))
                    if sh0 <= 0:
                        continue

                    p = px_est(t)
                    if pd.isna(p):
                        continue

                    cash_plan += sh0 * p
                    pos_plan.pop(t, None)

                    planned.append({
                        "ticker": t,
                        "side": "SELL",
                        "shares": sh0,
                        "reason": "not_in_top_quintile",
                        "rank": int(rank_map.get(t, 9999)),
                    })

                # -------------------------
                # (B) REVALUE PORTFOLIO AT "EXEC PROXY" AFTER EXITS (Wednesday close)
                # -------------------------
                pos_plan_struct = {t: {"shares": sh} for t, sh in pos_plan.items()}
                equity_exec, portfolio_exec, _ = snapshot_portfolio_exec_proxy(
                    date, cash_plan, pos_plan_struct, exec_px_map, px_by_ticker
                )
                effective_equity = max(portfolio_exec - MIN_CASH_RESERVE, 0.0)

                # -------------------------
                # (C) BUILD TARGETS USING WED CLOSE (NO LOOKAHEAD)
                # -------------------------
                tg = top_group.copy()
                tg = tg[
                    tg["atr20"].notna() &
                    (tg["atr20"] > 0) &
                    tg["close_adj"].notna() &
                    (tg["close_adj"] > 0)
                ].copy()

                if not tg.empty:
                    inv_vol = 1.0 / tg["atr20"].astype(float)
                    total_inv_vol = inv_vol.sum()

                    if total_inv_vol > 0:
                        tg["inv_vol"] = inv_vol

                        # Raw (uncapped) inverse-vol weights
                        tg["raw_weight"] = tg["inv_vol"] / total_inv_vol

                        # Apply 15% cap and redistribute (if feasible)
                        w_cap = cap_and_redistribute_weights(tg["raw_weight"].to_numpy(), MAX_POSITION_WEIGHT)
                        tg["weight"] = w_cap

                        tg["target_value"] = effective_equity * tg["weight"]
                        tg["exec_px_est"] = tg["ticker"].map(exec_px_map)
                        tg = tg[tg["exec_px_est"].notna() & (tg["exec_px_est"] > 0)].copy()

                        # Hard safety: ensure no target exceeds 15% of proxy portfolio value
                        tg["target_value"] = np.minimum(tg["target_value"], MAX_POSITION_WEIGHT * portfolio_exec)

                        # target shares based on Wednesday close estimate
                        tg["target_shares"] = np.floor(tg["target_value"] / tg["exec_px_est"]).astype(int)
                        tg = tg[tg["target_shares"] > 0].copy()

                        # deterministic ordering (match generator)
                        tg = tg.sort_values("slope_adj", ascending=False)

                        total_portfolio_value_exec = portfolio_exec  # proxy portfolio value

                        # -------------------------
                        # NEW: SAVE PRE-FILTER RANKINGS
                        # -------------------------
                        for _, r in tg.iterrows():
                            t = str(r["ticker"])
                            rank = int(rank_map.get(t, 9999))
                            p = float(r["exec_px_est"])
                            target_sh = int(r["target_shares"])
                            cur_sh = int(pos_plan.get(t, 0))
                            
                            target_value = target_sh * p
                            target_weight = (target_value / total_portfolio_value_exec) if total_portfolio_value_exec > 0 else 0.0
                            
                            cur_value = cur_sh * p
                            cur_weight = (cur_value / total_portfolio_value_exec) if total_portfolio_value_exec > 0 else 0.0
                            
                            weekly_rankings.append({
                                "signal_date": date,
                                "exec_date": trade_date,
                                "ticker": t,
                                "slope_rank": rank,
                                "slope_adj": float(r["slope_adj"]),
                                "atr20": float(r["atr20"]),
                                "close_adj": float(r["close_adj"]),
                                "raw_weight": float(r["raw_weight"]),
                                "capped_weight": float(r["weight"]),
                                "target_value": target_value,
                                "target_weight": target_weight,
                                "target_shares": target_sh,
                                "current_shares": cur_sh,
                                "current_value": cur_value,
                                "current_weight": cur_weight,
                                "spy_above_200dma": spy_above_200,
                                "portfolio_value": total_portfolio_value_exec,
                            })

                        # -------------------------
                        # (D) REBALANCE LOOP (SEQUENTIAL cash_plan updates like generator)
                        # -------------------------
                        for _, r in tg.iterrows():
                            t = str(r["ticker"])
                            rank = int(rank_map.get(t, 9999))

                            p = float(r["exec_px_est"])
                            if not (p > 0):
                                continue

                            target_sh = int(r["target_shares"])
                            cur_sh = int(pos_plan.get(t, 0))

                            # Enforce 15% max position at share level (proxy portfolio value)
                            max_shares_allowed = (
                                int(np.floor((MAX_POSITION_WEIGHT * total_portfolio_value_exec) / p))
                                if total_portfolio_value_exec > 0 else 0
                            )
                            target_sh = min(target_sh, max_shares_allowed)

                            # weights computed on proxy prices (Wednesday close)
                            target_value = target_sh * p
                            target_weight = (target_value / total_portfolio_value_exec) if total_portfolio_value_exec > 0 else 0.0

                            cur_value = cur_sh * p
                            cur_weight = (cur_value / total_portfolio_value_exec) if total_portfolio_value_exec > 0 else 0.0

                            weight_diff = abs(target_weight - cur_weight)
                            is_new_position = (cur_sh == 0)

                            # If currently breaching the cap, force a trim even if within drift
                            cap_breach = (cur_weight > MAX_POSITION_WEIGHT + 1e-9)

                            if (weight_diff < DRIFT_THRESHOLD) and (not cap_breach):
                                continue

                            # -------- SELL (rebalance down) --------
                            if target_sh < cur_sh:
                                trade_sh = cur_sh - target_sh
                                est_value = trade_sh * p

                                if est_value < MIN_TRADE_VALUE:
                                    continue

                                cash_plan += est_value

                                new_sh = cur_sh - trade_sh
                                if new_sh <= 0:
                                    pos_plan.pop(t, None)
                                else:
                                    pos_plan[t] = new_sh

                                planned.append({
                                    "ticker": t,
                                    "side": "SELL",
                                    "shares": int(trade_sh),
                                    "reason": "rebalance_down",
                                    "rank": rank,
                                })

                            # -------- BUY (rebalance up / new entry) --------
                            elif target_sh > cur_sh:
                                if not can_buy_next_open:
                                    continue

                                trade_sh = target_sh - cur_sh
                                est_value = trade_sh * p

                                if is_new_position and target_weight < MIN_NEW_POSITION_WEIGHT:
                                    continue

                                if est_value < MIN_TRADE_VALUE:
                                    continue

                                # IMPORTANT: skip (NO CLIP) to match generator
                                if est_value > (cash_plan - MIN_CASH_RESERVE):
                                    exec_diag["dropped_cash_floor_buy"] += 1
                                    continue

                                cash_plan -= est_value
                                pos_plan[t] = cur_sh + trade_sh

                                planned.append({
                                    "ticker": t,
                                    "side": "BUY",
                                    "shares": int(trade_sh),
                                    "reason": ("rebalance_up" if cur_sh > 0 else "new_entry"),
                                    "rank": rank,
                                })
                if date == pd.Timestamp("2010-11-24"):
                    print("\n=== DEBUG 2010-11-24 PLAN ===")
                    print("trade_date:", trade_date)
                    print("spy_above_200:", spy_above_200)
                    print("positions_before:", len(positions))
                    print("top_group_size:", len(top_group))
                    print("exit_tickers:", len([t for t in positions.keys() if t not in top_tickers]))
                    print("planned_trades:", len(planned))
                    if planned:
                        print("planned sample:", planned[:10])

                # Store for execution on trade_date open
                pending_orders[trade_date] = {
                    "signal_date": date,
                    "spy_above_200": spy_above_200,
                    "open_px_map": open_px_map,
                    "planned_trades": planned,
                }
                exec_diag["orders_seen"] += len(planned)

    # =======================================================
    # 2) DAILY MARK-TO-MARKET (end of day close)
    # =======================================================
    equity_close, portfolio_value_close, num_positions = snapshot_portfolio_close(
        date, cash, positions, px_by_ticker
    )
    last_equity_close = portfolio_value_close

    equity_curve.append({
        "date": date,
        "portfolio_value": portfolio_value_close,
        "cash": cash,
        "num_positions": num_positions,
    })

# ============================================================
# SAVE TRADES, EQUITY, AND WEEKLY RANKINGS
# ============================================================

trades = pd.DataFrame(history)
equity_df = pd.DataFrame(equity_curve)
rankings_df = pd.DataFrame(weekly_rankings)

trades_file = os.path.join(OUTPUT_DIR_TRADES, "13-trades_regression_insp500_spyfilter_cap15.parquet")
equity_file = os.path.join(OUTPUT_DIR_TRADES, "13-equity_curve_regression_insp500_spyfilter_cap15.parquet")
rankings_file = os.path.join(OUTPUT_DIR_TRADES, "13-weekly_rankings_pre_filter_cap15.parquet")

trades.to_parquet(trades_file, index=False)
equity_df.to_parquet(equity_file, index=False)
rankings_df.to_parquet(rankings_file, index=False)

print("=== TRADING COMPLETE ===")
print("Final portfolio value:", portfolio_value_close)
print("Final cash balance:", cash)
print("Total trades:", len(trades))
print("Total weekly rankings:", len(rankings_df))
print(f"Trades saved to:    {trades_file}")
print(f"Equity saved to:    {equity_file}")
print(f"Rankings saved to:  {rankings_file}\n")

print("Diagnostics:")
print(f"  orders_seen (planned):        {exec_diag['orders_seen']}")
print(f"  orders_executed (fills):      {exec_diag['orders_executed']}")
print(f"  dropped_missing_open:         {exec_diag['dropped_missing_open']}")
print(f"  dropped_cash_floor_buy(plan): {exec_diag['dropped_cash_floor_buy']}")
print(f"  warn_exec_cash_breach:        {exec_diag['warn_exec_cash_breach']}\n")

# ============================================================
# PERFORMANCE ANALYSIS
# ============================================================

print("=== PERFORMANCE ANALYSIS (Regression-Only System) ===")

if "spy_close" not in spy.columns:
    if "Close" in spy.columns:
        spy["spy_close"] = spy["Close"]
    else:
        raise ValueError("Cannot find a SPY close column for performance merge.")

eq = equity_df.copy()
eq["date"] = pd.to_datetime(eq["date"])

df_perf = eq.merge(spy[["date", "spy_close"]], on="date", how="inner")
df_perf = df_perf.sort_values("date")

print(f"Loaded equity curve: {len(eq):,} rows")
print(f"Loaded SPY file:     {len(spy):,} rows")
print(f"Merged dataset:      {len(df_perf):,} rows\n")

df_perf["strat_ret"] = df_perf["portfolio_value"].pct_change().fillna(0)
df_perf["spy_ret"]   = df_perf["spy_close"].pct_change().fillna(0)
df_perf["year"]      = df_perf["date"].dt.year

def cagr(total_return, n_years):
    return (1 + total_return)**(1 / n_years) - 1 if n_years > 0 else np.nan

def max_drawdown(series):
    roll_max = series.cummax()
    dd = series / roll_max - 1
    return dd.min()

def sharpe(returns, rf=0.0):
    if returns.std() == 0:
        return 0
    return (returns.mean() - rf) / returns.std() * np.sqrt(TRADING_DAYS_PER_YEAR)

def sortino(returns, rf=0.0):
    downside = returns[returns < 0]
    if downside.std() == 0:
        return 0
    return (returns.mean() - rf) / downside.std() * np.sqrt(TRADING_DAYS_PER_YEAR)

start_val = df_perf["portfolio_value"].iloc[0]
end_val   = df_perf["portfolio_value"].iloc[-1]
total_ret = end_val / start_val - 1

n_years = (df_perf["date"].iloc[-1] - df_perf["date"].iloc[0]).days / 365.25

strat_cagr   = cagr(total_ret, n_years)
strat_vol    = df_perf["strat_ret"].std() * np.sqrt(TRADING_DAYS_PER_YEAR)
strat_sharpe = sharpe(df_perf["strat_ret"])
strat_sortino = sortino(df_perf["strat_ret"])
strat_maxdd  = max_drawdown(df_perf["portfolio_value"])
strat_calmar = strat_cagr / abs(strat_maxdd) if strat_maxdd != 0 else np.nan

spy_total_ret = df_perf["spy_close"].iloc[-1] / df_perf["spy_close"].iloc[0] - 1
spy_cagr   = cagr(spy_total_ret, n_years)
spy_vol    = df_perf["spy_ret"].std() * np.sqrt(TRADING_DAYS_PER_YEAR)
spy_sharpe = sharpe(df_perf["spy_ret"])
spy_sortino = sortino(df_perf["spy_ret"])
spy_maxdd  = max_drawdown(df_perf["spy_close"])
spy_calmar = spy_cagr / abs(spy_maxdd) if spy_maxdd != 0 else np.nan

def year_stats(g):
    start = g["portfolio_value"].iloc[0]
    end   = g["portfolio_value"].iloc[-1]

    pv = g["portfolio_value"]
    rollmax = pv.cummax()
    dd = (pv / rollmax - 1).min()

    spy_start = g["spy_close"].iloc[0]
    spy_end   = g["spy_close"].iloc[-1]
    spy_dd    = max_drawdown(g["spy_close"])

    return pd.Series({
        "start_value": start,
        "end_value": end,
        "strat_return": end/start - 1,
        "strat_maxdd": dd,
        "strat_sharpe": sharpe(g["strat_ret"]),
        "strat_sortino": sortino(g["strat_ret"]),
        "strat_calmar": (end/start - 1)/abs(dd) if dd != 0 else np.nan,
        "spy_return": spy_end/spy_start - 1,
        "spy_maxdd": spy_dd,
        "spy_sharpe": sharpe(g["spy_ret"]),
        "spy_sortino": sortino(g["spy_ret"]),
        "spy_calmar": (spy_end/spy_start - 1)/abs(spy_dd) if spy_dd != 0 else np.nan
    })

yearly = df_perf.groupby("year", group_keys=False).apply(year_stats)

print("=== Strategy Performance ===")
print(f"CAGR:          {strat_cagr:8.4f}")
print(f"Volatility:    {strat_vol:8.4f}")
print(f"Sharpe:        {strat_sharpe:8.4f}")
print(f"Sortino:       {strat_sortino:8.4f}")
print(f"MaxDD:         {strat_maxdd:8.4f}")
print(f"Calmar:        {strat_calmar:8.4f}\n")

print("=== SPY Benchmark ===")
print(f"CAGR:          {spy_cagr:8.4f}")
print(f"Volatility:    {spy_vol:8.4f}")
print(f"Sharpe:        {spy_sharpe:8.4f}")
print(f"Sortino:       {spy_sortino:8.4f}")
print(f"MaxDD:         {spy_maxdd:8.4f}")
print(f"Calmar:        {spy_calmar:8.4f}\n")

print("=== Year-by-Year Comparison ===")
print(yearly)

yearly_path  = os.path.join(OUTPUT_DIR_PERF, "14-regression_insp500_spyfilter-yearly_comparison_cap15.csv")
summary_path = os.path.join(OUTPUT_DIR_PERF, "14-regression_insp500_spyfilter-performance_summary_cap15.csv")

df_summary = pd.DataFrame([{
    "spy_regime_confirm_days": SPY_REGIME_CONFIRM_DAYS,
    "strat_cagr": strat_cagr,
    "strat_vol": strat_vol,
    "strat_sharpe": strat_sharpe,
    "strat_sortino": strat_sortino,
    "strat_maxdd": strat_maxdd,
    "strat_calmar": strat_calmar,
    "spy_cagr": spy_cagr,
    "spy_vol": spy_vol,
    "spy_sharpe": spy_sharpe,
    "spy_sortino": spy_sortino,
    "spy_maxdd": spy_maxdd,
    "spy_calmar": spy_calmar,
}])

yearly.to_csv(yearly_path)
df_summary.to_csv(summary_path, index=False)

print("\nSaved:")
print(f"  → {summary_path}")
print(f"  → {yearly_path}")
print("\n=== COMPLETE ===")

=== REGRESSION-ONLY WEEKLY TREND STRATEGY (VOL-BASED SIZING, WITH RANKS + SPY REGIME + TURNOVER FILTERS + CASH FLOOR) ===
SPY Regime Confirmation Period: 1 day(s)
Loaded universe: 3,591,967 rows
Universe with ATR20 merged: 3,591,967 rows

SPY Regime Statistics:
  Raw regime:       5,107 bull days, 1,935 bear days
  Confirmed regime: 5,107 bull days, 1,935 bear days

Running trading engine...

=== DEBUG 2010-11-24 PLAN ===
trade_date: 2010-11-26 00:00:00
spy_above_200: True
positions_before: 6
top_group_size: 26
exit_tickers: 1
planned_trades: 2
planned sample: [{'ticker': 'MFE', 'side': 'SELL', 'shares': 6802, 'reason': 'not_in_top_quintile', 'rank': 9999}, {'ticker': 'NVDA', 'side': 'BUY', 'shares': 1054695, 'reason': 'new_entry', 'rank': 18}]

=== DEBUG 2010-11-26 EXEC ===
planned_trades: 2
missing_open_count: 0
=== TRADING COMPLETE ===
Final portfolio value: 24028422.013000008
Final cash balance: 10044156.483000007
Total trades: 3010
Total weekly rankings: 36116
Trades saved to:    