In [11]:
# ============================
# CELL 1/5 — CONFIG (UPDATED: new sweep grids)
# ============================
import os
import glob
import time
import warnings
from datetime import datetime

import numpy as np
import pandas as pd

warnings.filterwarnings("ignore")

# ----------------------------
# PATHS
# ----------------------------
UNIVERSE_DIR = "./12a-multiple-tradable_sp500_universe"
ATR_ROOT     = "./4-ATR20_adjusted_All_Prices"
SPY_FILE     = "./8-SPY_200DMA_market_regime/8-SPY_200DMA_regime.parquet"

OUT_DIR      = "./13a-trading_output_sweep_performance_daily_returns"
os.makedirs(OUT_DIR, exist_ok=True)

# ----------------------------
# SWEEP GRID (existing)
# ----------------------------
REG_LOOKBACKS    = [90]
ATR_WINDOWS      = [20]
TOP_PERCENTILES  = [0.95]
MIN_TRADE_VALUES = [5000.0, 10000.0]

# ----------------------------
# NEW SWEEP GRID (was constant before; now parameterized)
# ----------------------------
MAX_POSITION_WEIGHTS     = [0.12]
MIN_CASH_RESERVES        = [5000.0, 10000.0]
DRIFT_THRESHOLDS         = [0.05, 0.1, 0.02]
MIN_NEW_POSITION_WEIGHTS = [0.005, 0.01, 0.02, 0.03 ]

# ----------------------------
# CONSTANTS (remain constant)
# ----------------------------
START_TRADING         = pd.Timestamp("1999-01-01")
INITIAL_CAPITAL       = 360000
REBALANCE_DAY         = "Wednesday"
TRADING_DAYS_PER_YEAR = 252

# ----------------------------
# OUTPUT: per-config daily returns
# ----------------------------
WRITE_DAILY_RETURNS = True         # <- turn off if you just want summary
DAILY_RETURNS_FMT   = "parquet"    # "parquet" recommended; "csv" also supported
DAILY_RETURNS_DIRNAME = "daily_returns"  # created as OUT_DIR/daily_returns/<RUN_ID>/

# ----------------------------
# PARALLELISM (safe notebook default)
# If RAM spikes, reduce to 2 or 3.
# ----------------------------
MAX_WORKERS = 4

total_runs_expected = (
    len(REG_LOOKBACKS) * len(ATR_WINDOWS) *
    len(TOP_PERCENTILES) * len(MIN_TRADE_VALUES) *
    len(MAX_POSITION_WEIGHTS) * len(MIN_CASH_RESERVES) *
    len(DRIFT_THRESHOLDS) * len(MIN_NEW_POSITION_WEIGHTS)
)

print("=== NOTEBOOK SWEEP CONFIG ===")
print("Lookbacks:", REG_LOOKBACKS)
print("ATR windows:", ATR_WINDOWS)
print("TOP_PERCENTILES:", TOP_PERCENTILES)
print("MIN_TRADE_VALUES:", MIN_TRADE_VALUES)
print("MAX_POSITION_WEIGHTS:", MAX_POSITION_WEIGHTS)
print("MIN_CASH_RESERVES:", MIN_CASH_RESERVES)
print("DRIFT_THRESHOLDS:", DRIFT_THRESHOLDS)
print("MIN_NEW_POSITION_WEIGHTS:", MIN_NEW_POSITION_WEIGHTS)
print("MAX_WORKERS:", MAX_WORKERS)
print("Total runs expected:", total_runs_expected)


=== NOTEBOOK SWEEP CONFIG ===
Lookbacks: [90]
ATR windows: [20]
TOP_PERCENTILES: [0.95]
MIN_TRADE_VALUES: [5000.0, 10000.0]
MAX_POSITION_WEIGHTS: [0.12]
MIN_CASH_RESERVES: [5000.0, 10000.0]
DRIFT_THRESHOLDS: [0.05, 0.1, 0.02]
MIN_NEW_POSITION_WEIGHTS: [0.005, 0.01, 0.02, 0.03]
MAX_WORKERS: 4
Total runs expected: 48


In [12]:
# ============================
# CELL 2/5 — HELPERS + JOBLIB (UPDATED: config id includes new params)
# ============================
from dataclasses import dataclass

def resolve_universe_file(lookback: int) -> str:
    """
    Tries common patterns. Falls back to glob search for anything containing '{lookback}D'.
    """
    candidates = [
        os.path.join(UNIVERSE_DIR, f"12-tradable_sp500_universe_{lookback}D.parquet"),
        os.path.join(UNIVERSE_DIR, f"tradable_sp500_universe_{lookback}D.parquet"),
    ]
    for f in candidates:
        if os.path.exists(f):
            return f

    hits = sorted(glob.glob(os.path.join(UNIVERSE_DIR, f"*{lookback}D*.parquet")))
    if hits:
        return hits[0]

    raise FileNotFoundError(f"Could not find universe parquet for lookback={lookback} in {UNIVERSE_DIR}")

def resolve_atr_subdir(w: int) -> str:
    """
    Tries a few folder naming conventions. Adjust here if your ATR folders differ.
    """
    candidates = [
        os.path.join(ATR_ROOT, f"atr_{w}D"),
        os.path.join(ATR_ROOT, f"atr_{w}d"),
        os.path.join(ATR_ROOT, f"ATR_{w}D"),
        os.path.join(ATR_ROOT, f"atr{w}D"),
    ]
    for d in candidates:
        if os.path.isdir(d):
            return d

    hits = [p for p in glob.glob(os.path.join(ATR_ROOT, "*")) if os.path.isdir(p)]
    hits = [p for p in hits if ("atr" in os.path.basename(p).lower() and str(w) in os.path.basename(p))]
    if hits:
        return hits[0]

    raise FileNotFoundError(f"Could not find ATR folder for w={w} under {ATR_ROOT}")

def infer_atr_col(columns, w: int):
    """
    Infer ATR column name from a parquet file's columns.
    Supports: atr20, atr_20, atr20D, atr_20D, atr_20d, atr_20_day, etc.
    """
    cols = list(columns)
    cols_l = [c.lower() for c in cols]
    w_str = str(w)

    preferred = [
        f"atr{w_str}",
        f"atr_{w_str}",
        f"atr{w_str}d",
        f"atr_{w_str}d",
        f"atr{w_str}_d",
        f"atr_{w_str}_d",
        f"atr{w_str}day",
        f"atr_{w_str}day",
        f"atr_{w_str}_day",
    ]
    for p in preferred:
        if p in cols_l:
            return cols[cols_l.index(p)]

    for i, c in enumerate(cols_l):
        if "atr" in c and w_str in c:
            return cols[i]

    return None

def load_spy_regime_map(spy_file: str):
    spy = pd.read_parquet(spy_file)

    if spy.index.name in ["Date", "date", None]:
        spy = spy.reset_index().rename(columns={"index": "date", "Date": "date"})

    spy["date"] = pd.to_datetime(spy["date"])
    if "spy_close" not in spy.columns:
        raise ValueError("SPY file missing 'spy_close' column")

    spy["spy_above_200dma"] = spy["market_regime"].astype(int) == 1
    spy_regime_map = spy.set_index("date")["spy_above_200dma"].to_dict()
    return spy, spy_regime_map

def make_config_id(
    lookback: int,
    atr_w: int,
    top_p: float,
    mtv: float,
    max_position_weight: float,
    min_cash_reserve: float,
    drift_threshold: float,
    min_new_position_weight: float,
) -> str:
    """
    Stable per-config ID used for filenames.
    Encodes floats in bps to avoid filename instability.
    """
    tp = int(round(float(top_p) * 1000))          # 0.95 -> 950
    mtv_i = int(round(float(mtv)))

    cap_bps = int(round(float(max_position_weight) * 10000))       # 0.15 -> 1500
    cash_i  = int(round(float(min_cash_reserve)))                  # 4000.0 -> 4000
    dr_bps  = int(round(float(drift_threshold) * 10000))           # 0.01 -> 100
    mnw_bps = int(round(float(min_new_position_weight) * 10000))   # 0.005 -> 50

    return (
        f"lb{int(lookback)}_atr{int(atr_w)}_tp{tp:04d}_mtv{mtv_i}"
        f"_cap{cap_bps:04d}_cash{cash_i}_dr{dr_bps:04d}_mnw{mnw_bps:04d}"
    )

# ---------- joblib progress in notebooks ----------
from contextlib import contextmanager
import joblib
from joblib import Parallel, delayed
from tqdm.auto import tqdm

@contextmanager
def tqdm_joblib(tqdm_object):
    class TqdmBatchCompletionCallback(joblib.parallel.BatchCompletionCallBack):
        def __call__(self, *args, **kwargs):
            tqdm_object.update(n=self.batch_size)
            return super().__call__(*args, **kwargs)

    old_cb = joblib.parallel.BatchCompletionCallBack
    joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback
    try:
        yield tqdm_object
    finally:
        joblib.parallel.BatchCompletionCallBack = old_cb
        tqdm_object.close()


In [13]:
# ============================
# CELL 3/5 — ENGINE + METRICS (UPDATED: engine uses passed params)
# ============================
def fast_price_lookup(px_array, date_val):
    date_val = np.datetime64(date_val, "ns")
    dates = px_array["date"]
    idx = np.searchsorted(dates, date_val, side="right") - 1
    if idx < 0:
        return np.nan
    return px_array["px"][idx]

def snapshot_portfolio_close(date, cash, positions, px_by_ticker):
    equity = 0.0
    for t, pos in positions.items():
        arr = px_by_ticker.get(t)
        if arr is None:
            continue
        px = fast_price_lookup(arr, date)
        if not np.isnan(px):
            equity += pos["shares"] * px
    return equity, cash + equity, len(positions)

def snapshot_portfolio_exec_proxy(asof_date, cash, positions, exec_px_map, px_by_ticker_fallback):
    equity = 0.0
    for t, pos in positions.items():
        px = exec_px_map.get(t, np.nan)
        if pd.isna(px) or px <= 0:
            arr = px_by_ticker_fallback.get(t)
            if arr is not None:
                px = fast_price_lookup(arr, asof_date)
        if pd.notna(px) and px > 0:
            equity += int(pos["shares"]) * float(px)
    return equity, cash + equity, len(positions)

def is_rebalance_day(date: pd.Timestamp) -> bool:
    return date.day_name() == REBALANCE_DAY

def cap_and_redistribute_weights(w: np.ndarray, cap: float) -> np.ndarray:
    w = np.asarray(w, dtype=float).copy()
    if w.size == 0:
        return w

    s = w.sum()
    if s > 0:
        w /= s

    if w.size * cap < 1.0:
        return np.minimum(w, cap)

    for _ in range(10_000):
        over = w > cap
        if not over.any():
            break
        excess = (w[over] - cap).sum()
        w[over] = cap
        under = ~over
        under_sum = w[under].sum()
        if under_sum <= 0:
            break
        w[under] += excess * (w[under] / under_sum)

    return w

@dataclass
class PreparedContext:
    df_by_date: dict
    px_by_ticker: dict
    dates: list
    next_date_map: dict

def prepare_context(df: pd.DataFrame) -> PreparedContext:
    df_by_date = {d: sub for d, sub in df.groupby("date", sort=False)}

    px_by_ticker = {}
    for t, sub in df.groupby("ticker", sort=False):
        sub = sub.sort_values("date")
        arr = np.zeros(len(sub), dtype=[("date", "datetime64[ns]"), ("px", "float64")])
        arr["date"] = sub["date"].values.astype("datetime64[ns]")
        arr["px"]   = sub["close_adj"].astype(float).values
        px_by_ticker[t] = arr

    dates = sorted(df_by_date.keys())
    next_date_map = {d: dates[i + 1] if i + 1 < len(dates) else None for i, d in enumerate(dates)}
    return PreparedContext(df_by_date=df_by_date, px_by_ticker=px_by_ticker, dates=dates, next_date_map=next_date_map)

def compute_perf_and_daily(equity_df: pd.DataFrame, spy_df: pd.DataFrame):
    """
    Returns:
      metrics: dict
      daily_df: DataFrame with date, portfolio_value, strat_ret, spy_close, spy_ret, cash, num_positions
    """
    equity_df = equity_df.copy()
    equity_df["date"] = pd.to_datetime(equity_df["date"])

    spy_use = spy_df[["date", "spy_close"]].copy()
    spy_use["date"] = pd.to_datetime(spy_use["date"])

    df_perf = equity_df.merge(spy_use, on="date", how="inner").sort_values("date")
    if df_perf.empty:
        return None, None

    df_perf["strat_ret"] = df_perf["portfolio_value"].pct_change().fillna(0.0)
    df_perf["spy_ret"]   = df_perf["spy_close"].pct_change().fillna(0.0)

    def cagr(total_return, n_years):
        return (1 + total_return) ** (1 / n_years) - 1 if n_years > 0 else np.nan

    def max_drawdown(series):
        roll_max = series.cummax()
        dd = series / roll_max - 1
        return dd.min()

    def sharpe(returns, rf=0.0):
        sd = returns.std()
        if sd == 0 or np.isnan(sd):
            return 0.0
        return (returns.mean() - rf) / sd * np.sqrt(TRADING_DAYS_PER_YEAR)

    def sortino(returns, rf=0.0):
        downside = returns[returns < 0]
        sd = downside.std()
        if sd == 0 or np.isnan(sd):
            return 0.0
        return (returns.mean() - rf) / sd * np.sqrt(TRADING_DAYS_PER_YEAR)

    start_val = float(df_perf["portfolio_value"].iloc[0])
    end_val   = float(df_perf["portfolio_value"].iloc[-1])
    total_ret = end_val / start_val - 1.0
    n_years   = (df_perf["date"].iloc[-1] - df_perf["date"].iloc[0]).days / 365.25

    strat_cagr    = float(cagr(total_ret, n_years))
    strat_vol     = float(df_perf["strat_ret"].std() * np.sqrt(TRADING_DAYS_PER_YEAR))
    strat_sharpe  = float(sharpe(df_perf["strat_ret"]))
    strat_sortino = float(sortino(df_perf["strat_ret"]))
    strat_maxdd   = float(max_drawdown(df_perf["portfolio_value"]))
    strat_calmar  = float(strat_cagr / abs(strat_maxdd)) if strat_maxdd != 0 else np.nan

    spy_total_ret = float(df_perf["spy_close"].iloc[-1] / df_perf["spy_close"].iloc[0] - 1.0)
    spy_cagr      = float(cagr(spy_total_ret, n_years))
    spy_vol       = float(df_perf["spy_ret"].std() * np.sqrt(TRADING_DAYS_PER_YEAR))
    spy_sharpe    = float(sharpe(df_perf["spy_ret"]))
    spy_sortino   = float(sortino(df_perf["spy_ret"]))
    spy_maxdd     = float(max_drawdown(df_perf["spy_close"]))
    spy_calmar    = float(spy_cagr / abs(spy_maxdd)) if spy_maxdd != 0 else np.nan

    metrics = {
        "rows_perf": int(len(df_perf)),
        "start_date": str(df_perf["date"].iloc[0].date()),
        "end_date": str(df_perf["date"].iloc[-1].date()),
        "start_value": start_val,
        "end_value": end_val,

        "strat_cagr": strat_cagr,
        "strat_vol": strat_vol,
        "strat_sharpe": strat_sharpe,
        "strat_sortino": strat_sortino,
        "strat_maxdd": strat_maxdd,
        "strat_calmar": strat_calmar,

        "spy_cagr": spy_cagr,
        "spy_vol": spy_vol,
        "spy_sharpe": spy_sharpe,
        "spy_sortino": spy_sortino,
        "spy_maxdd": spy_maxdd,
        "spy_calmar": spy_calmar,
    }

    daily_df = df_perf[[
        "date",
        "portfolio_value",
        "strat_ret",
        "spy_close",
        "spy_ret",
        "cash",
        "num_positions",
    ]].copy()

    return metrics, daily_df

def run_backtest_with_daily(
    ctx: PreparedContext,
    spy_df: pd.DataFrame,
    spy_regime_map: dict,
    top_percentile: float,
    min_trade_value: float,
    max_position_weight: float,
    min_cash_reserve: float,
    drift_threshold: float,
    min_new_position_weight: float,
):
    cash = float(INITIAL_CAPITAL)
    positions = {}  # ticker -> {"shares": int, "entry": float}
    equity_curve = []
    pending_orders = {}

    exec_diag = {
        "orders_seen": 0,
        "orders_executed": 0,
        "dropped_missing_open": 0,
        "dropped_cash_floor_buy_plan": 0,
        "dropped_cash_floor_buy_exec": 0,
        "clipped_cash_floor_buy_exec": 0,
    }

    total_trades = 0

    for date in ctx.dates:
        if date < START_TRADING:
            continue

        day = ctx.df_by_date.get(date)
        if day is None or day.empty:
            continue

        # -------------------------
        # 0) EXECUTE pending orders
        # -------------------------
        if date in pending_orders:
            payload = pending_orders.pop(date)
            open_px_map = payload["open_px_map"]
            planned_trades = payload["planned_trades"]

            planned_trades = sorted(planned_trades, key=lambda x: (0 if x["side"] == "SELL" else 1, x["rank"]))

            for tr in planned_trades:
                t = tr["ticker"]
                side = tr["side"]
                sh_plan = int(tr["shares"])
                if not t or sh_plan <= 0:
                    continue

                px = open_px_map.get(t, np.nan)
                if pd.isna(px) or px <= 0:
                    arr = ctx.px_by_ticker.get(t)
                    if arr is not None:
                        px = fast_price_lookup(arr, date)
                if pd.isna(px) or px <= 0:
                    exec_diag["dropped_missing_open"] += 1
                    continue

                px = float(px)

                if side == "SELL":
                    cur = int(positions.get(t, {}).get("shares", 0))
                    sh_exec = min(sh_plan, cur)
                    if sh_exec <= 0:
                        continue
                    cash += sh_exec * px
                    new_sh = cur - sh_exec
                    if new_sh <= 0:
                        positions.pop(t, None)
                    else:
                        positions[t]["shares"] = new_sh
                    total_trades += 1

                else:  # BUY
                    available = cash - min_cash_reserve
                    if available <= 0:
                        exec_diag["dropped_cash_floor_buy_exec"] += 1
                        continue

                    max_affordable = int(np.floor(available / px))
                    sh_exec = min(sh_plan, max_affordable)
                    sh_exec = int(sh_exec)
                    if sh_exec <= 0:
                        exec_diag["dropped_cash_floor_buy_exec"] += 1
                        continue

                    if sh_exec < sh_plan:
                        exec_diag["clipped_cash_floor_buy_exec"] += 1

                    cash -= sh_exec * px
                    if t in positions:
                        positions[t]["shares"] = int(positions[t]["shares"]) + sh_exec
                    else:
                        positions[t] = {"shares": sh_exec, "entry": px}
                    total_trades += 1

            exec_diag["orders_executed"] += len(planned_trades)

        # -------------------------
        # 1) PLAN on Wednesday close
        # -------------------------
        if is_rebalance_day(date):
            trade_date = ctx.next_date_map.get(date)
            if trade_date is not None and trade_date in ctx.df_by_date:
                spy_above_200 = bool(spy_regime_map.get(date, True))
                can_buy_next_open = spy_above_200

                in_sp = day["in_sp500"] if "in_sp500" in day.columns else True
                rankable = day[(day["slope_adj"].notna()) & (in_sp == True)].copy()

                if not rankable.empty:
                    rankable = rankable.sort_values("slope_adj", ascending=False)
                    cutoff = rankable["slope_adj"].quantile(top_percentile)
                    top_group = rankable[rankable["slope_adj"] >= cutoff].copy()

                    if not top_group.empty:
                        top_group = top_group.sort_values("slope_adj", ascending=False)
                        top_group["slope_rank_within_top"] = np.arange(1, len(top_group) + 1)
                        rank_map = dict(zip(top_group["ticker"], top_group["slope_rank_within_top"]))
                        top_tickers = set(top_group["ticker"].values)

                        exec_px_map = day.set_index("ticker")["close_adj"].to_dict()
                        trade_day = ctx.df_by_date[trade_date]
                        open_px_map = trade_day.set_index("ticker")["open_adj"].to_dict()

                        cash_plan = float(cash)
                        pos_plan = {t: int(p["shares"]) for t, p in positions.items()}
                        planned = []

                        def px_est(ticker: str) -> float:
                            p = exec_px_map.get(ticker, np.nan)
                            if pd.isna(p) or p <= 0:
                                arr = ctx.px_by_ticker.get(ticker)
                                if arr is not None:
                                    p = fast_price_lookup(arr, date)
                            return float(p) if (pd.notna(p) and p > 0) else np.nan

                        # (A) exit sells
                        exit_tickers = [t for t in list(pos_plan.keys()) if t not in top_tickers]
                        for t in exit_tickers:
                            sh0 = int(pos_plan.get(t, 0))
                            if sh0 <= 0:
                                continue
                            p = px_est(t)
                            if pd.isna(p):
                                continue
                            cash_plan += sh0 * p
                            pos_plan.pop(t, None)
                            planned.append({"ticker": t, "side": "SELL", "shares": sh0, "rank": int(rank_map.get(t, 9999))})

                        # (B) revalue (proxy)
                        pos_plan_struct = {t: {"shares": sh} for t, sh in pos_plan.items()}
                        _, portfolio_exec, _ = snapshot_portfolio_exec_proxy(
                            date, cash_plan, pos_plan_struct, exec_px_map, ctx.px_by_ticker
                        )
                        effective_equity = max(portfolio_exec - min_cash_reserve, 0.0)

                        # (C) targets
                        tg = top_group.copy()
                        tg = tg[
                            tg["atr"].notna() & (tg["atr"] > 0) &
                            tg["close_adj"].notna() & (tg["close_adj"] > 0)
                        ].copy()

                        if not tg.empty:
                            inv_vol = 1.0 / tg["atr"].astype(float)
                            total_inv_vol = inv_vol.sum()

                            if total_inv_vol > 0:
                                tg["raw_weight"] = inv_vol / total_inv_vol
                                tg["weight"] = cap_and_redistribute_weights(tg["raw_weight"].to_numpy(), max_position_weight)

                                tg["target_value"] = effective_equity * tg["weight"]
                                tg["exec_px_est"] = tg["ticker"].map(exec_px_map)
                                tg = tg[tg["exec_px_est"].notna() & (tg["exec_px_est"] > 0)].copy()

                                tg["target_value"] = np.minimum(tg["target_value"], max_position_weight * portfolio_exec)
                                tg["target_shares"] = np.floor(tg["target_value"] / tg["exec_px_est"]).astype(int)
                                tg = tg[tg["target_shares"] > 0].copy()
                                tg = tg.sort_values("slope_adj", ascending=False)

                                total_portfolio_value_exec = portfolio_exec

                                for _, r in tg.iterrows():
                                    t = str(r["ticker"])
                                    rank = int(rank_map.get(t, 9999))
                                    p = float(r["exec_px_est"])
                                    if not (p > 0):
                                        continue

                                    target_sh = int(r["target_shares"])
                                    cur_sh = int(pos_plan.get(t, 0))

                                    max_sh_allowed = (
                                        int(np.floor((max_position_weight * total_portfolio_value_exec) / p))
                                        if total_portfolio_value_exec > 0 else 0
                                    )
                                    target_sh = min(target_sh, max_sh_allowed)

                                    target_val = target_sh * p
                                    target_w = (target_val / total_portfolio_value_exec) if total_portfolio_value_exec > 0 else 0.0
                                    cur_val = cur_sh * p
                                    cur_w = (cur_val / total_portfolio_value_exec) if total_portfolio_value_exec > 0 else 0.0

                                    weight_diff = abs(target_w - cur_w)
                                    is_new = (cur_sh == 0)
                                    cap_breach = (cur_w > max_position_weight + 1e-9)

                                    if (weight_diff < drift_threshold) and (not cap_breach):
                                        continue

                                    # SELL
                                    if target_sh < cur_sh:
                                        trade_sh = cur_sh - target_sh
                                        est_value = trade_sh * p
                                        if est_value < min_trade_value:
                                            continue

                                        cash_plan += est_value
                                        new_sh = cur_sh - trade_sh
                                        if new_sh <= 0:
                                            pos_plan.pop(t, None)
                                        else:
                                            pos_plan[t] = new_sh

                                        planned.append({"ticker": t, "side": "SELL", "shares": int(trade_sh), "rank": rank})

                                    # BUY
                                    elif target_sh > cur_sh:
                                        if not can_buy_next_open:
                                            continue

                                        trade_sh = target_sh - cur_sh
                                        est_value = trade_sh * p

                                        if is_new and target_w < min_new_position_weight:
                                            continue
                                        if est_value < min_trade_value:
                                            continue
                                        if est_value > (cash_plan - min_cash_reserve):
                                            exec_diag["dropped_cash_floor_buy_plan"] += 1
                                            continue

                                        cash_plan -= est_value
                                        pos_plan[t] = cur_sh + trade_sh
                                        planned.append({"ticker": t, "side": "BUY", "shares": int(trade_sh), "rank": rank})

                        pending_orders[trade_date] = {"open_px_map": open_px_map, "planned_trades": planned}
                        exec_diag["orders_seen"] += len(planned)

        # -------------------------
        # 2) daily mark-to-market
        # -------------------------
        _, pv, npos = snapshot_portfolio_close(date, cash, positions, ctx.px_by_ticker)
        equity_curve.append({
            "date": date,
            "portfolio_value": float(pv),
            "cash": float(cash),
            "num_positions": int(npos),
        })

    equity_df = pd.DataFrame(equity_curve)
    metrics, daily_df = compute_perf_and_daily(equity_df, spy_df)

    if metrics is None:
        metrics = {"rows_perf": 0}
    metrics["total_trades"] = int(total_trades)
    for k, v in exec_diag.items():
        metrics[f"diag_{k}"] = int(v)

    return metrics, daily_df


In [14]:
# ============================
# CELL 4/5 — LOADERS + WORKER (UPDATED: loops across new grids + writes config columns)
# ============================
def load_universe_df(lookback: int) -> pd.DataFrame:
    f = resolve_universe_file(lookback)
    df = pd.read_parquet(f)

    df["date"] = pd.to_datetime(df["date"])
    keep = [c for c in ["date", "ticker", "open_adj", "close_adj", "slope_adj", "in_sp500"] if c in df.columns]
    df = df[keep].copy()

    df["slope_adj"] = pd.to_numeric(df["slope_adj"], errors="coerce")
    df["close_adj"] = pd.to_numeric(df["close_adj"], errors="coerce")
    df["open_adj"]  = pd.to_numeric(df["open_adj"], errors="coerce")
    return df

def load_atr_all(atr_w: int) -> pd.DataFrame:
    atr_dir = resolve_atr_subdir(atr_w)
    files = [f for f in os.listdir(atr_dir) if f.endswith(".parquet")]
    if not files:
        raise FileNotFoundError(f"No ATR parquet files found in {atr_dir}")

    atr_col = None
    for fn in files[:50]:
        tmp = pd.read_parquet(os.path.join(atr_dir, fn))
        atr_col = infer_atr_col(tmp.columns, atr_w)
        if atr_col is not None and "date" in tmp.columns:
            break
    if atr_col is None:
        raise ValueError(f"Could not infer ATR column for window={atr_w} in {atr_dir}")

    rows = []
    for fn in files:
        t = fn.replace(".parquet", "")
        tmp = pd.read_parquet(os.path.join(atr_dir, fn), columns=["date", atr_col])
        tmp["date"] = pd.to_datetime(tmp["date"])
        tmp["ticker"] = t
        tmp = tmp.rename(columns={atr_col: "atr"})
        rows.append(tmp)

    atr_all = pd.concat(rows, ignore_index=True)
    atr_all["atr"] = pd.to_numeric(atr_all["atr"], errors="coerce")
    return atr_all

def _write_daily_returns(daily_df: pd.DataFrame, daily_dir: str, cfg: dict):
    """
    Writes per-config daily returns file.
    """
    if daily_df is None or daily_df.empty:
        return None

    cfg_id = make_config_id(
        cfg["lookback_days"], cfg["atr_days"], cfg["top_percentile"], cfg["min_trade_value"],
        cfg["max_position_weight"], cfg["min_cash_reserve"], cfg["drift_threshold"], cfg["min_new_position_weight"],
    )

    out = daily_df.copy()

    # Attach config columns to each row (useful for later concatenation / WFO)
    out["lookback_days"]          = int(cfg["lookback_days"])
    out["atr_days"]               = int(cfg["atr_days"])
    out["top_percentile"]         = float(cfg["top_percentile"])
    out["min_trade_value"]        = float(cfg["min_trade_value"])
    out["max_position_weight"]    = float(cfg["max_position_weight"])
    out["min_cash_reserve"]       = float(cfg["min_cash_reserve"])
    out["drift_threshold"]        = float(cfg["drift_threshold"])
    out["min_new_position_weight"]= float(cfg["min_new_position_weight"])

    os.makedirs(daily_dir, exist_ok=True)
    if DAILY_RETURNS_FMT.lower() == "csv":
        path = os.path.join(daily_dir, f"{cfg_id}.csv")
        out.to_csv(path, index=False)
        return path
    else:
        path = os.path.join(daily_dir, f"{cfg_id}.parquet")
        out.to_parquet(path, index=False)
        return path

def run_job(lookback: int, atr_w: int, daily_dir: str | None = None) -> list[dict]:
    # load spy inside worker (avoids pickling + notebook weirdness)
    spy_df, spy_regime_map = load_spy_regime_map(SPY_FILE)

    df_univ = load_universe_df(lookback)
    atr_all = load_atr_all(atr_w)

    # merge ATR once
    df = df_univ.merge(atr_all, on=["date", "ticker"], how="left")

    # prepare context once
    ctx = prepare_context(df)

    out_rows = []
    for top_p in TOP_PERCENTILES:
        for mtv in MIN_TRADE_VALUES:
            for mpw in MAX_POSITION_WEIGHTS:
                for mcr in MIN_CASH_RESERVES:
                    for drift in DRIFT_THRESHOLDS:
                        for mnw in MIN_NEW_POSITION_WEIGHTS:
                            metrics, daily_df = run_backtest_with_daily(
                                ctx=ctx,
                                spy_df=spy_df,
                                spy_regime_map=spy_regime_map,
                                top_percentile=float(top_p),
                                min_trade_value=float(mtv),
                                max_position_weight=float(mpw),
                                min_cash_reserve=float(mcr),
                                drift_threshold=float(drift),
                                min_new_position_weight=float(mnw),
                            )

                            row = {
                                "lookback_days": int(lookback),
                                "atr_days": int(atr_w),
                                "top_percentile": float(top_p),
                                "min_trade_value": float(mtv),

                                "max_position_weight": float(mpw),
                                "min_cash_reserve": float(mcr),
                                "drift_threshold": float(drift),
                                "min_new_position_weight": float(mnw),

                                **metrics,
                            }

                            if WRITE_DAILY_RETURNS and daily_dir is not None:
                                _ = _write_daily_returns(daily_df=daily_df, daily_dir=daily_dir, cfg=row)

                            out_rows.append(row)

    return out_rows


In [15]:
# ============================
# CELL 5/5 — RUN SWEEP + SAVE SUMMARY (+ per-config daily returns)
# ============================
jobs = [(lb, aw) for lb in REG_LOOKBACKS for aw in ATR_WINDOWS]
runs_per_job = (
    len(TOP_PERCENTILES) * len(MIN_TRADE_VALUES) *
    len(MAX_POSITION_WEIGHTS) * len(MIN_CASH_RESERVES) *
    len(DRIFT_THRESHOLDS) * len(MIN_NEW_POSITION_WEIGHTS)
)
total_runs = len(jobs) * runs_per_job

print("Jobs (lookback x ATR):", len(jobs))
print("Runs per job:", runs_per_job)
print("Total runs:", total_runs)

RUN_ID = datetime.now().strftime("%Y%m%d-%H%M%S")
DAILY_DIR = None
if WRITE_DAILY_RETURNS:
    DAILY_DIR = os.path.join(OUT_DIR, DAILY_RETURNS_DIRNAME, RUN_ID)
    os.makedirs(DAILY_DIR, exist_ok=True)
    print("Daily returns dir:", DAILY_DIR)

t0 = time.time()

with tqdm_joblib(tqdm(total=len(jobs), desc="Jobs completed")):
    results = Parallel(n_jobs=MAX_WORKERS, backend="loky")(
        delayed(run_job)(lb, aw, daily_dir=DAILY_DIR) for (lb, aw) in jobs
    )

summary_rows = [r for job_rows in results for r in job_rows]
summary_df = pd.DataFrame(summary_rows)

elapsed = time.time() - t0
print(f"\nDone. Rows: {len(summary_df):,} (expected {total_runs})")
print(f"Elapsed: {elapsed:.1f}s")

# Save summary
csv_path = os.path.join(OUT_DIR, f"sweep_summary_{RUN_ID}.csv")
pq_path  = os.path.join(OUT_DIR, f"sweep_summary_{RUN_ID}.parquet")

summary_df.to_csv(csv_path, index=False)
summary_df.to_parquet(pq_path, index=False)

print("Saved:", csv_path)
print("Saved:", pq_path)

# quick peek
summary_df.sort_values(["strat_cagr", "strat_sharpe"], ascending=False).head(10)


Jobs (lookback x ATR): 1
Runs per job: 48
Total runs: 48
Daily returns dir: ./13a-trading_output_sweep_performance_daily_returns\daily_returns\20251231-100856


Jobs completed:   0%|          | 0/1 [00:00<?, ?it/s]


Done. Rows: 48 (expected 48)
Elapsed: 387.0s
Saved: ./13a-trading_output_sweep_performance_daily_returns\sweep_summary_20251231-100856.csv
Saved: ./13a-trading_output_sweep_performance_daily_returns\sweep_summary_20251231-100856.parquet


Unnamed: 0,lookback_days,atr_days,top_percentile,min_trade_value,max_position_weight,min_cash_reserve,drift_threshold,min_new_position_weight,rows_perf,start_date,...,spy_sortino,spy_maxdd,spy_calmar,total_trades,diag_orders_seen,diag_orders_executed,diag_dropped_missing_open,diag_dropped_cash_floor_buy_plan,diag_dropped_cash_floor_buy_exec,diag_clipped_cash_floor_buy_exec
32,90,20,0.95,10000.0,0.12,5000.0,0.02,0.005,6790,1999-01-04,...,0.663035,-0.551894,0.15337,7439,7439,7439,0,287,0,1
33,90,20,0.95,10000.0,0.12,5000.0,0.02,0.01,6790,1999-01-04,...,0.663035,-0.551894,0.15337,7439,7439,7439,0,287,0,1
34,90,20,0.95,10000.0,0.12,5000.0,0.02,0.02,6790,1999-01-04,...,0.663035,-0.551894,0.15337,7439,7439,7439,0,287,0,1
44,90,20,0.95,10000.0,0.12,10000.0,0.02,0.005,6790,1999-01-04,...,0.663035,-0.551894,0.15337,7430,7430,7430,0,288,0,1
45,90,20,0.95,10000.0,0.12,10000.0,0.02,0.01,6790,1999-01-04,...,0.663035,-0.551894,0.15337,7430,7430,7430,0,288,0,1
46,90,20,0.95,10000.0,0.12,10000.0,0.02,0.02,6790,1999-01-04,...,0.663035,-0.551894,0.15337,7430,7430,7430,0,288,0,1
8,90,20,0.95,5000.0,0.12,5000.0,0.02,0.005,6790,1999-01-04,...,0.663035,-0.551894,0.15337,7510,7510,7510,0,297,0,2
9,90,20,0.95,5000.0,0.12,5000.0,0.02,0.01,6790,1999-01-04,...,0.663035,-0.551894,0.15337,7510,7510,7510,0,297,0,2
10,90,20,0.95,5000.0,0.12,5000.0,0.02,0.02,6790,1999-01-04,...,0.663035,-0.551894,0.15337,7510,7510,7510,0,297,0,2
20,90,20,0.95,5000.0,0.12,10000.0,0.02,0.005,6790,1999-01-04,...,0.663035,-0.551894,0.15337,7495,7495,7495,0,291,0,1
