In [1]:
import os
import numpy as np
import pandas as pd

pd.set_option("display.max_columns", 200)
pd.set_option("display.width", 200)

"""a notebook-first audit harness whose primary job is math verification:

Recompute what your main backtest uses (weekly ranks/targets/trades) and compare vs:

values stored in your 90-day regression parquet files (slope_daily, slope_annual, r2)

the values used by the main backtest (slope_adj inside 12-tradable_sp500_universe.parquet)

the executed trades saved by the backtest (13-trades_...parquet)

Below is a drop-in notebook template (cells) that:

audits any 4-week window

produces tables per week (ranking, exits, targets, decision math, planned vs actual)

recomputes rolling 90-day log regression exactly like your Numba math and compares it to:

the regression parquet file values (slope_daily, slope_annual, r2)

(optionally) your universe’s slope_adj
    """

# ============================================================
# CONFIG (adjust paths if needed)
# ============================================================

UNIVERSE_FILE   = "./12-tradable_sp500_universe/12-tradable_sp500_universe.parquet"
ATR20_DIR       = "./4-ATR20_adjusted_All_Prices"
SPY_FILE        = "./8-SPY_200DMA_market_regime/8-SPY_200DMA_regime.parquet"

TRADES_FILE     = "./13-trading_output_regression_insp500_spyfilter/13-trades_regression_insp500_spyfilter.parquet"
EQUITY_FILE     = "./13-trading_output_regression_insp500_spyfilter/13-equity_curve_regression_insp500_spyfilter.parquet"

# Your 90-day regression outputs:
REG90_DIR       = r"./7-90Day_exp_regression_adjusted_all_prices"  # change to MomentumSystem\7-... if needed

# Strategy params (mirror main script)
TOP_PERCENTILE        = 0.90
REBALANCE_DAY         = "Wednesday"
MIN_CASH_RESERVE      = 2000.0

DRIFT_THRESHOLD          = 0.01
MIN_TRADE_VALUE          = 3000.0
MIN_NEW_POSITION_WEIGHT  = 0.005

WINDOW = 90
TRADING_DAYS_PER_YEAR = 252


In [2]:
# -----------------------
# Load universe
# -----------------------
df = pd.read_parquet(UNIVERSE_FILE).copy()
df["date"] = pd.to_datetime(df["date"])

for c in ["slope_adj", "close_adj", "open_adj"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

df_idx = df.set_index(["date", "ticker"]).sort_index()

# trading calendar based on universe dates
calendar_dates = sorted(df["date"].unique())
calendar_set = set(pd.Timestamp(d) for d in calendar_dates)

next_date_map = {pd.Timestamp(d): (pd.Timestamp(calendar_dates[i + 1]) if i + 1 < len(calendar_dates) else None)
                 for i, d in enumerate(calendar_dates)}

def is_rebalance_day(date: pd.Timestamp) -> bool:
    return date.day_name() == REBALANCE_DAY

# -----------------------
# Load trades/equity
# -----------------------
trades = pd.read_parquet(TRADES_FILE).copy()
trades["signal_date"] = pd.to_datetime(trades["signal_date"])
trades["exec_date"]   = pd.to_datetime(trades["exec_date"])
trades["price"]       = pd.to_numeric(trades["price"], errors="coerce")
trades["shares"]      = pd.to_numeric(trades["shares"], errors="coerce").astype("Int64")

equity = pd.read_parquet(EQUITY_FILE).copy()
equity["date"] = pd.to_datetime(equity["date"])

# -----------------------
# Load SPY regime
# -----------------------
spy = pd.read_parquet(SPY_FILE).copy()
if spy.index.name in ["Date", "date", None]:
    spy = spy.reset_index().rename(columns={"index": "date", "Date": "date"})
spy["date"] = pd.to_datetime(spy["date"])

if "market_regime" not in spy.columns:
    raise ValueError("SPY file missing 'market_regime' column")

spy["spy_above_200dma"] = spy["market_regime"].astype(int) == 1
spy_regime_map = spy.set_index("date")["spy_above_200dma"].to_dict()

print("Universe rows:", len(df))
print("Trades rows:", len(trades))
print("Equity rows:", len(equity))
print("SPY rows:", len(spy))


Universe rows: 3587417
Trades rows: 12851
Equity rows: 6783
SPY rows: 7036


In [3]:
# ============================================================
# Regression recompute EXACTLY like your Numba script
# ============================================================
"""_summary_
This recomputes rolling 90-day OLS slope on log(close_adj) with:

x = 0..89

cov_xy = mean((x-x̄)(y-ȳ))

var_x = mean((x-x̄)²)

r2 = 1 - SSR/SST (or 1.0 when SST==0)
    """

x = np.arange(WINDOW, dtype=np.float64)
x_mean = x.mean()
var_x = np.mean((x - x_mean)**2)

def recompute_reg90_from_close_adj(close_adj: np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    close_adj: 1D array of adjusted closes aligned to dates.
    Returns: slope_daily, slope_annual, r2 arrays (len = n), NaN until index WINDOW.
    Mirrors your Numba logic (population means/vars, window = [t-WINDOW, t)).
    """
    px = close_adj.astype(float)
    log_px = np.log(px)

    n = len(log_px)
    slope_daily = np.full(n, np.nan, dtype=float)
    r2_out      = np.full(n, np.nan, dtype=float)

    for t in range(WINDOW, n):
        y = log_px[t - WINDOW:t]

        if np.any(~np.isfinite(y)):
            continue

        y_mean = np.mean(y)
        cov_xy = np.mean((x - x_mean) * (y - y_mean))
        beta1 = cov_xy / var_x
        beta0 = y_mean - beta1 * x_mean

        y_hat = beta0 + beta1 * x
        ss_res = np.sum((y - y_hat)**2)
        ss_tot = np.sum((y - y_mean)**2)
        r2 = 1.0 - ss_res / ss_tot if ss_tot > 0 else 1.0

        slope_daily[t] = beta1
        r2_out[t] = r2

    slope_annual = np.exp(slope_daily * TRADING_DAYS_PER_YEAR) - 1.0  # same as (exp(slope_daily))**252 - 1
    return slope_daily, slope_annual, r2_out


def load_reg90_ticker(ticker: str) -> pd.DataFrame:
    path = os.path.join(REG90_DIR, f"{ticker}.parquet")
    if not os.path.exists(path):
        raise FileNotFoundError(path)
    r = pd.read_parquet(path)
    r["date"] = pd.to_datetime(r["date"])
    return r


def compare_reg90_for_ticker(ticker: str, date_start: pd.Timestamp, date_end: pd.Timestamp) -> tuple[pd.DataFrame, pd.DataFrame]:
    """
    Returns:
      - per-date comparison frame
      - summary stats frame (1-row)
    """
    reg = load_reg90_ticker(ticker).sort_values("date").reset_index(drop=True)

    # Keep enough history for rolling windows to be defined in-range
    # We'll compute recompute on the full ticker history and then filter.
    if "close_adj" not in reg.columns:
        raise ValueError(f"{ticker} regression file missing close_adj")

    slope_d, slope_a, r2 = recompute_reg90_from_close_adj(reg["close_adj"].values)

    reg["re_slope_daily"]  = slope_d
    reg["re_slope_annual"] = slope_a
    reg["re_r2"]           = r2

    # Compare against stored
    # (stored columns: slope_daily, slope_annual, r2)
    out = reg[["date", "close_adj", "slope_daily", "slope_annual", "r2",
               "re_slope_daily", "re_slope_annual", "re_r2"]].copy()

    out["diff_slope_daily"]  = out["slope_daily"]  - out["re_slope_daily"]
    out["diff_slope_annual"] = out["slope_annual"] - out["re_slope_annual"]
    out["diff_r2"]           = out["r2"]           - out["re_r2"]

    mask = (out["date"] >= date_start) & (out["date"] <= date_end)
    out_win = out.loc[mask].copy()

    # Summary: max abs diffs (ignore NaNs)
    def max_abs(s: pd.Series) -> float:
        s = s.dropna()
        return float(np.max(np.abs(s))) if len(s) else np.nan

    summary = pd.DataFrame([{
        "ticker": ticker,
        "rows_in_window": int(len(out_win)),
        "max_abs_diff_slope_daily":  max_abs(out_win["diff_slope_daily"]),
        "max_abs_diff_slope_annual": max_abs(out_win["diff_slope_annual"]),
        "max_abs_diff_r2":           max_abs(out_win["diff_r2"]),
        "mean_abs_diff_slope_daily": float(np.nanmean(np.abs(out_win["diff_slope_daily"].values))) if len(out_win) else np.nan,
        "mean_abs_diff_r2":          float(np.nanmean(np.abs(out_win["diff_r2"].values))) if len(out_win) else np.nan,
    }])

    return out_win, summary


def compare_reg90_for_tickers(tickers: list[str], date_start: pd.Timestamp, date_end: pd.Timestamp) -> tuple[pd.DataFrame, pd.DataFrame]:
    all_rows = []
    summaries = []
    for t in tickers:
        try:
            out_win, summ = compare_reg90_for_ticker(t, date_start, date_end)
            out_win.insert(0, "ticker", t)
            all_rows.append(out_win)
            summaries.append(summ)
        except Exception as e:
            summaries.append(pd.DataFrame([{
                "ticker": t,
                "rows_in_window": 0,
                "error": str(e)
            }]))
    comp = pd.concat(all_rows, ignore_index=True) if all_rows else pd.DataFrame()
    summ = pd.concat(summaries, ignore_index=True) if summaries else pd.DataFrame()
    return comp, summ


In [4]:
"""_summary_
    Compare Universe slope_adj vs Regression slope_annual (critical sanity check)

This directly checks whether your main backtest’s slope_adj matches the stored regression slope_annual (or not).

If this shows big diffs, it’s your first “red flag” that either:

slope_adj isn’t slope_annual, or

there’s a merge/date alignment issue, or

the universe file is sourced from a different regression run / different annualization
    """
    
    
def compare_universe_slope_to_reg90(tickers: list[str], date_start: pd.Timestamp, date_end: pd.Timestamp) -> pd.DataFrame:
    rows = []
    for t in tickers:
        try:
            reg = load_reg90_ticker(t)[["date", "slope_daily", "slope_annual", "r2"]].copy()
            uni = df[df["ticker"] == t][["date", "slope_adj"]].copy()

            m = uni.merge(reg, on="date", how="inner")
            m = m[(m["date"] >= date_start) & (m["date"] <= date_end)].copy()
            if m.empty:
                continue

            # compare slope_adj against slope_annual (most likely intended mapping)
            m["diff_adj_minus_reg_annual"] = m["slope_adj"] - m["slope_annual"]
            corr = m[["slope_adj", "slope_annual"]].corr().iloc[0, 1]

            rows.append({
                "ticker": t,
                "rows": len(m),
                "corr(slope_adj, slope_annual)": float(corr) if pd.notna(corr) else np.nan,
                "max_abs_diff": float(np.nanmax(np.abs(m["diff_adj_minus_reg_annual"].values))),
                "mean_abs_diff": float(np.nanmean(np.abs(m["diff_adj_minus_reg_annual"].values))),
            })
        except Exception as e:
            rows.append({"ticker": t, "rows": 0, "error": str(e)})

    return pd.DataFrame(rows).sort_values(["rows", "ticker"], ascending=[False, True])


In [5]:
"""_summary_
Cell 5 — Weekly audit tables (ranking → exits → targets → decisions → reconcile actual trades)

This mirrors your main script’s core math and produces “explainable” tables.
    """

# ============================================================
# ATR loader (lazy)
# ============================================================
_atr_cache = {}

def get_atr20_series(ticker: str) -> pd.Series | None:
    if ticker in _atr_cache:
        return _atr_cache[ticker]
    path = os.path.join(ATR20_DIR, f"{ticker}.parquet")
    if not os.path.exists(path):
        _atr_cache[ticker] = None
        return None
    tmp = pd.read_parquet(path)
    if "atr20" not in tmp.columns:
        _atr_cache[ticker] = None
        return None
    tmp["date"] = pd.to_datetime(tmp["date"])
    s = tmp.set_index("date")["atr20"].astype(float).sort_index()
    _atr_cache[ticker] = s
    return s


# ============================================================
# Position reconstruction from executed trade log
# ============================================================
def apply_trade_avg_cost(state, ticker: str, side: str, shares: int, price: float):
    if shares <= 0 or not (price > 0):
        return
    if side == "BUY":
        tv = shares * price
        state["cash"] -= tv
        if ticker in state["pos"]:
            old_sh = state["pos"][ticker]["shares"]
            old_cost = state["pos"][ticker]["avg_cost"]
            new_sh = old_sh + shares
            new_cost = (old_cost * old_sh + price * shares) / new_sh
            state["pos"][ticker]["shares"] = new_sh
            state["pos"][ticker]["avg_cost"] = new_cost
        else:
            state["pos"][ticker] = {"shares": shares, "avg_cost": price}
    elif side == "SELL":
        tv = shares * price
        state["cash"] += tv
        if ticker not in state["pos"]:
            return
        old_sh = state["pos"][ticker]["shares"]
        new_sh = old_sh - shares
        if new_sh <= 0:
            state["pos"].pop(ticker, None)
        else:
            state["pos"][ticker]["shares"] = new_sh
    else:
        raise ValueError(side)


def snapshot_portfolio_open(exec_date: pd.Timestamp, state, open_px_map: dict) -> float:
    # portfolio value at open using open_adj when available; fallback to last close_adj from universe
    cash = float(state["cash"])
    equity_val = 0.0
    for t, p in state["pos"].items():
        px = open_px_map.get(t, np.nan)
        if pd.isna(px) or px <= 0:
            # fallback close_adj from universe <= exec_date
            sub = df[(df["ticker"] == t) & (df["date"] <= exec_date)][["date","close_adj"]].dropna()
            px = float(sub.sort_values("date").iloc[-1]["close_adj"]) if len(sub) else np.nan
        if pd.notna(px) and px > 0:
            equity_val += int(p["shares"]) * float(px)
    return cash + equity_val


def get_weeks(start_signal_date: str, n_weeks: int = 4) -> list[tuple[pd.Timestamp, pd.Timestamp]]:
    start = pd.Timestamp(start_signal_date)

    # find first Wednesday >= start that exists in universe calendar
    d = start
    while True:
        if d in calendar_set and is_rebalance_day(d):
            break
        d += pd.Timedelta(days=1)

    weeks = []
    while len(weeks) < n_weeks:
        if d in calendar_set and is_rebalance_day(d):
            td = next_date_map.get(d)
            if td is not None and td in calendar_set:
                weeks.append((d, td))
        d += pd.Timedelta(days=1)
    return weeks


def compute_top_group(signal_day_df: pd.DataFrame) -> tuple[pd.DataFrame, float]:
    rankable = signal_day_df[
        signal_day_df["slope_adj"].notna() &
        (signal_day_df.get("in_sp500", True) == True)
    ].copy()

    if rankable.empty:
        return rankable, np.nan

    rankable = rankable.sort_values("slope_adj", ascending=False)
    cutoff = float(rankable["slope_adj"].quantile(TOP_PERCENTILE))
    top = rankable[rankable["slope_adj"] >= cutoff].copy()
    top = top.sort_values("slope_adj", ascending=False)
    top["slope_rank_within_top"] = np.arange(1, len(top)+1)
    return top, cutoff


def build_targets(top_group: pd.DataFrame, trade_date: pd.Timestamp, open_px_map: dict, portfolio_exec: float) -> pd.DataFrame:
    effective_equity = max(portfolio_exec - MIN_CASH_RESERVE, 0.0)

    tg = top_group.copy()

    # atr20 as-of trade_date
    atr_vals = []
    for t in tg["ticker"].astype(str).tolist():
        s = get_atr20_series(t)
        atr_vals.append(float(s.get(trade_date, np.nan)) if s is not None else np.nan)
    tg["atr20"] = atr_vals

    tg = tg[
        tg["atr20"].notna() & (tg["atr20"] > 0) &
        tg["close_adj"].notna() & (tg["close_adj"] > 0)
    ].copy()
    if tg.empty:
        return tg

    inv_vol = 1.0 / tg["atr20"].astype(float)
    total_inv_vol = float(inv_vol.sum())
    if total_inv_vol <= 0:
        return tg.iloc[0:0].copy()

    tg["inv_vol"] = inv_vol
    tg["target_value"] = effective_equity * tg["inv_vol"] / total_inv_vol

    tg["exec_open"] = tg["ticker"].map(open_px_map).astype(float)
    tg = tg[tg["exec_open"].notna() & (tg["exec_open"] > 0)].copy()

    tg["target_shares"] = np.floor(tg["target_value"] / tg["exec_open"]).astype(int)
    tg = tg[tg["target_shares"] > 0].copy()

    return tg


def audit_week(signal_date: pd.Timestamp, trade_date: pd.Timestamp, state_before: dict) -> dict:
    """
    Returns tables:
      - top_group, targets
      - planned_exits
      - decisions (per ticker in targets)
      - actual_trades (from trade log)
      - reconciliation planned vs actual
    """
    # Data slices
    day = df[df["date"] == signal_date].copy()
    trade_day = df[df["date"] == trade_date].copy()
    open_px_map = trade_day.set_index("ticker")["open_adj"].astype(float).to_dict()

    spy_ok = bool(spy_regime_map.get(signal_date, True))
    can_buy = spy_ok

    # Ranking
    top_group, cutoff = compute_top_group(day)

    # ----- Planned exits (mirror your logic: exit anything not in top_tickers) -----
    top_tickers = set(top_group["ticker"].astype(str).tolist())
    planned_exits = []
    state_after_exits = {
        "cash": float(state_before["cash"]),
        "pos": {k: {"shares": int(v["shares"]), "avg_cost": float(v["avg_cost"])}
                for k, v in state_before["pos"].items()}
    }

    for t in list(state_after_exits["pos"].keys()):
        if t in top_tickers:
            continue

        sh = int(state_after_exits["pos"][t]["shares"])
        px = float(open_px_map.get(t, np.nan))
        executed = (pd.notna(px) and px > 0)

        if not executed:
            sub = df[(df["ticker"] == t) & (df["date"] <= trade_date)][["date", "close_adj"]].dropna()
            if len(sub):
                px = float(sub.sort_values("date").iloc[-1]["close_adj"])
                executed = (px > 0)

        avg_cost = float(state_after_exits["pos"][t]["avg_cost"])
        realized_pnl = (px - avg_cost) * sh if executed else np.nan
        realized_ret = (px / avg_cost - 1.0) if executed and avg_cost > 0 else np.nan

        if executed:
            apply_trade_avg_cost(state_after_exits, t, "SELL", sh, px)

        planned_exits.append({
            "signal_date": signal_date,
            "exec_date": trade_date,
            "ticker": t,
            "side": "SELL",
            "shares": sh,
            "exec_px_used": px if executed else np.nan,
            "avg_cost_before": avg_cost,
            "realized_pnl": realized_pnl,
            "realized_ret": realized_ret,
            "executed_in_plan": executed,
            "reason": "not_in_top_group"
        })

    # ✅ robust conversion + sort
    planned_exits_cols = [
        "signal_date", "exec_date", "ticker", "side", "shares",
        "exec_px_used", "avg_cost_before", "realized_pnl", "realized_ret",
        "executed_in_plan", "reason"
    ]
    planned_exits = pd.DataFrame(planned_exits, columns=planned_exits_cols)
    if not planned_exits.empty:
        planned_exits = planned_exits.sort_values(["executed_in_plan", "ticker"],
                                                  ascending=[False, True])


    # Execution-time portfolio value after exits (for sizing)
    portfolio_exec = snapshot_portfolio_open(trade_date, state_after_exits, open_px_map)

    # Targets
    targets = build_targets(top_group, trade_date, open_px_map, portfolio_exec)

    # Decisions table (mirror your drift/min trade/cash floor/regime checks)
    decisions = []
    total_port_exec = float(portfolio_exec)

    for _, r in targets.iterrows():
        t = str(r["ticker"])
        px = float(r["exec_open"])
        target_sh = int(r["target_shares"])

        cur_sh = int(state_after_exits["pos"].get(t, {}).get("shares", 0))
        cur_val = cur_sh * px
        tgt_val = target_sh * px

        cur_w = cur_val / total_port_exec if total_port_exec > 0 else 0.0
        tgt_w = tgt_val / total_port_exec if total_port_exec > 0 else 0.0
        w_diff = abs(tgt_w - cur_w)

        planned_side = "HOLD"
        planned_sh = 0
        planned_value = 0.0
        skip_reason = ""

        if w_diff < DRIFT_THRESHOLD:
            skip_reason = "skip_drift"
        else:
            if target_sh > cur_sh:
                if not can_buy:
                    skip_reason = "skip_spy_regime"
                else:
                    planned_side = "BUY"
                    planned_sh = target_sh - cur_sh
                    planned_value = planned_sh * px

                    is_new = (cur_sh == 0)
                    if is_new and tgt_w < MIN_NEW_POSITION_WEIGHT:
                        planned_side, planned_sh, planned_value = "HOLD", 0, 0.0
                        skip_reason = "skip_min_new_weight"
                    elif planned_value < MIN_TRADE_VALUE:
                        planned_side, planned_sh, planned_value = "HOLD", 0, 0.0
                        skip_reason = "skip_min_trade_value"
                    elif planned_value > state_after_exits["cash"] - MIN_CASH_RESERVE:
                        planned_side, planned_sh, planned_value = "HOLD", 0, 0.0
                        skip_reason = "skip_cash_floor"

            elif target_sh < cur_sh:
                planned_side = "SELL"
                planned_sh = cur_sh - target_sh
                planned_value = planned_sh * px

                if planned_value < MIN_TRADE_VALUE:
                    planned_side, planned_sh, planned_value = "HOLD", 0, 0.0
                    skip_reason = "skip_min_trade_value"

        decisions.append({
            "signal_date": signal_date,
            "exec_date": trade_date,
            "ticker": t,
            "exec_open": px,
            "slope_adj_signal": float(day.loc[day["ticker"] == t, "slope_adj"].iloc[0]) if (day["ticker"] == t).any() else np.nan,
            "rank_within_top": int(top_group.loc[top_group["ticker"] == t, "slope_rank_within_top"].iloc[0]) if (top_group["ticker"] == t).any() else np.nan,
            "atr20": float(r["atr20"]),
            "inv_vol": float(r["inv_vol"]),
            "target_value": float(r["target_value"]),
            "target_shares": target_sh,
            "current_shares": cur_sh,
            "delta_shares": target_sh - cur_sh,
            "current_weight": cur_w,
            "target_weight": tgt_w,
            "weight_diff": w_diff,
            "planned_side": planned_side,
            "planned_shares": planned_sh,
            "planned_trade_value": planned_value,
            "skip_reason": skip_reason,
        })

    decisions = pd.DataFrame(decisions).sort_values(["planned_side","planned_trade_value"], ascending=[True, False])

    # Actual trades this week
    actual = trades[(trades["signal_date"] == signal_date) & (trades["exec_date"] == trade_date)].copy()
    actual["trade_value"] = actual["shares"].astype(int) * actual["price"].astype(float)
    actual = actual.sort_values(["type","ticker"])

    # Reconciliation planned vs actual by ticker+side
    planned = decisions[decisions["planned_side"].isin(["BUY","SELL"])].copy()
    planned = planned.rename(columns={"planned_side":"type", "planned_shares":"shares_planned", "planned_trade_value":"value_planned"})
    planned = planned[["signal_date","exec_date","ticker","type","shares_planned","value_planned","skip_reason"]]

    actual_cmp = actual.rename(columns={"shares":"shares_actual","trade_value":"value_actual"}) \
                       [["signal_date","exec_date","ticker","type","shares_actual","value_actual","reason"]]

    recon = planned.merge(actual_cmp, on=["signal_date","exec_date","ticker","type"], how="outer", indicator=True)

    return {
        "signal_date": signal_date,
        "trade_date": trade_date,
        "spy_above_200dma": spy_ok,
        "cutoff": cutoff,
        "top_group": top_group,
        "planned_exits": planned_exits,
        "targets": targets,
        "decisions": decisions,
        "actual_trades": actual,
        "reconciliation": recon,
        "portfolio_exec_after_exits": portfolio_exec,
    }


In [6]:
"""
    Cell 6 — Run a 4-week audit window + regression validation for traded tickers
    """



# Choose window
start_signal = "1999-01-06"   # example
weeks = get_weeks(start_signal, n_weeks=4)
weeks

# Reconstruct portfolio state using executed trades up to each trade_date
# (so we’re comparing your intended math vs the backtest's actual execution history)

# Sort trades so SELL happens before BUY on same exec_date (matches your engine)
type_order = {"SELL": 0, "BUY": 1}
tr_sorted = trades.copy()
tr_sorted["_type_order"] = tr_sorted["type"].map(type_order).fillna(9).astype(int)
tr_sorted = tr_sorted.sort_values(["exec_date","_type_order"]).drop(columns=["_type_order"])

records = tr_sorted.to_dict("records")
ptr = 0

state = {"cash": 360000.0, "pos": {}}  # INITIAL_CAPITAL

def advance_state_until(exec_date: pd.Timestamp):
    global ptr, state
    while ptr < len(records) and pd.Timestamp(records[ptr]["exec_date"]) < exec_date:
        r = records[ptr]
        apply_trade_avg_cost(state, str(r["ticker"]), str(r["type"]), int(r["shares"]), float(r["price"]))
        ptr += 1

audits = []

for (sd, td) in weeks:
    advance_state_until(td)
    state_before = {"cash": float(state["cash"]),
                    "pos": {k: {"shares": int(v["shares"]), "avg_cost": float(v["avg_cost"])} for k,v in state["pos"].items()}}
    wk = audit_week(sd, td, state_before)
    audits.append(wk)

# Quick summary
pd.DataFrame([{
    "signal_date": a["signal_date"].date(),
    "trade_date": a["trade_date"].date(),
    "spy_ok": a["spy_above_200dma"],
    "top_group_size": len(a["top_group"]),
    "targets_size": len(a["targets"]),
    "planned_exits": len(a["planned_exits"]),
    "actual_trades": len(a["actual_trades"]),
    "portfolio_exec_after_exits": a["portfolio_exec_after_exits"],
} for a in audits])


Unnamed: 0,signal_date,trade_date,spy_ok,top_group_size,targets_size,planned_exits,actual_trades,portfolio_exec_after_exits
0,1999-01-06,1999-01-07,True,50,50,0,36,360000.0
1,1999-01-13,1999-01-14,True,50,50,3,7,356196.273998
2,1999-01-20,1999-01-21,True,50,50,4,8,364119.095654
3,1999-01-27,1999-01-28,True,50,50,6,15,364652.428214


In [7]:
wk1 = audits[0]
wk1["top_group"].head(30)


Unnamed: 0,date,open_adj,high_adj,low_adj,close_adj,volume,ma100,above_ma100,pct_change,abs_pct,abs_rollmax_90,no_big_jump_90,slope_annual,r2,slope_adj,ticker,in_sp500,slope_rank_within_top
126829,1999-01-06,75.870745,78.348157,75.870745,76.903,21261300.0,37.61231,True,0.014297,0.014297,0.179492,False,25.665515,0.869853,22.325236,TWX,True,1
126830,1999-01-06,13.487376,14.300939,13.487376,14.181,7462000.0,7.95667,True,0.085336,0.085336,0.164655,False,12.673693,0.898356,11.385487,SCHW,True,2
126831,1999-01-06,18.125,19.688,18.125,18.75,762900.0,13.97064,True,0.041667,0.041667,0.235538,False,11.766742,0.784356,9.229315,DGN,True,3
126832,1999-01-06,27.87806,29.219994,26.960666,29.098,16109000.0,18.55088,True,0.104247,0.104247,0.127994,True,10.0056,0.910476,9.109862,MU,True,4
126833,1999-01-06,46.906,47.312,45.375,45.469,15670800.0,29.89187,True,-0.017545,0.017545,0.085321,True,8.500223,0.895999,7.616191,JAVA1,True,5
126834,1999-01-06,14.190431,14.86391,13.958922,14.222,9289000.0,8.53911,True,0.02723,0.02723,0.194454,False,8.128575,0.869058,7.064203,KLAC,True,6
126835,1999-01-06,14.065,14.19,13.565,13.625,12031200.0,11.0327,True,-0.01589,0.01589,0.207746,False,8.759793,0.801552,7.021433,AMD,True,7
126836,1999-01-06,9.235418,10.119121,9.054332,9.699,89909000.0,6.0764,True,0.07659,0.07659,0.134611,True,7.667024,0.884278,6.779778,AMAT,True,8
126837,1999-01-06,21.5,21.656,20.75,21.516,10565600.0,14.81405,True,-0.010759,0.010759,0.139402,True,6.667202,0.916185,6.108389,SLR,True,9
126838,1999-01-06,65.625,66.188,65.562,66.062,238700.0,47.99156,True,0.005694,0.005694,0.326941,False,6.631171,0.901887,5.980569,UCC1,True,10


In [8]:
wk1["targets"].head(30)


Unnamed: 0,date,open_adj,high_adj,low_adj,close_adj,volume,ma100,above_ma100,pct_change,abs_pct,abs_rollmax_90,no_big_jump_90,slope_annual,r2,slope_adj,ticker,in_sp500,slope_rank_within_top,atr20,inv_vol,target_value,exec_open,target_shares
126829,1999-01-06,75.870745,78.348157,75.870745,76.903,21261300.0,37.61231,True,0.014297,0.014297,0.179492,False,25.665515,0.869853,22.325236,TWX,True,1,6.166012,0.162179,808.548982,74.838235,10
126830,1999-01-06,13.487376,14.300939,13.487376,14.181,7462000.0,7.95667,True,0.085336,0.085336,0.164655,False,12.673693,0.898356,11.385487,SCHW,True,2,0.978507,1.021965,5095.028936,13.788363,369
126831,1999-01-06,18.125,19.688,18.125,18.75,762900.0,13.97064,True,0.041667,0.041667,0.235538,False,11.766742,0.784356,9.229315,DGN,True,3,1.069,0.935454,4663.726006,18.5,252
126832,1999-01-06,27.87806,29.219994,26.960666,29.098,16109000.0,18.55088,True,0.104247,0.104247,0.127994,True,10.0056,0.910476,9.109862,MU,True,4,1.451963,0.688723,3433.643823,29.035056,118
126833,1999-01-06,46.906,47.312,45.375,45.469,15670800.0,29.89187,True,-0.017545,0.017545,0.085321,True,8.500223,0.895999,7.616191,JAVA1,True,5,1.6358,0.611322,3047.758345,44.781,68
126834,1999-01-06,14.190431,14.86391,13.958922,14.222,9289000.0,8.53911,True,0.02723,0.02723,0.194454,False,8.128575,0.869058,7.064203,KLAC,True,6,0.797266,1.254287,6253.277366,14.008,446
126835,1999-01-06,14.065,14.19,13.565,13.625,12031200.0,11.0327,True,-0.01589,0.01589,0.207746,False,8.759793,0.801552,7.021433,AMD,True,7,0.786,1.272265,6342.904708,13.5,469
126836,1999-01-06,9.235418,10.119121,9.054332,9.699,89909000.0,6.0764,True,0.07659,0.07659,0.134611,True,7.667024,0.884278,6.779778,AMAT,True,8,0.521147,1.918846,9566.451684,9.608335,995
126837,1999-01-06,21.5,21.656,20.75,21.516,10565600.0,14.81405,True,-0.010759,0.010759,0.139402,True,6.667202,0.916185,6.108389,SLR,True,9,1.0741,0.931012,4641.581883,21.875,212
126838,1999-01-06,65.625,66.188,65.562,66.062,238700.0,47.99156,True,0.005694,0.005694,0.326941,False,6.631171,0.901887,5.980569,UCC1,True,10,1.513646,0.660657,3293.718706,65.625,50


In [9]:
wk1["decisions"].head(50)


Unnamed: 0,signal_date,exec_date,ticker,exec_open,slope_adj_signal,rank_within_top,atr20,inv_vol,target_value,target_shares,current_shares,delta_shares,current_weight,target_weight,weight_diff,planned_side,planned_shares,planned_trade_value,skip_reason
36,1999-01-06,1999-01-07,PGR,7.718245,2.582871,37,0.17303,5.779346,28813.062317,3733,0,3733,0.0,0.080034,0.080034,BUY,3733,28812.207914,
12,1999-01-06,1999-01-07,ORCL,6.047484,5.138252,13,0.2476,4.038779,20135.427858,3329,0,3329,0.0,0.055922,0.055922,BUY,3329,20132.074315,
42,1999-01-06,1999-01-07,RTX,9.431263,2.14801,43,0.258278,3.871792,19302.907657,2046,0,2046,0.0,0.053601,0.053601,BUY,2046,19296.363259,
10,1999-01-06,1999-01-07,GLW,10.286719,5.744725,11,0.274856,3.638262,18138.640454,1763,0,1763,0.0,0.050376,0.050376,BUY,1763,18135.485194,
20,1999-01-06,1999-01-07,ADBE,5.759831,3.930499,21,0.363109,2.753997,13730.117091,2383,0,2383,0.0,0.038127,0.038127,BUY,2383,13725.677552,
14,1999-01-06,1999-01-07,ADSK,10.878234,4.904064,15,0.428485,2.333801,11635.22104,1069,0,1069,0.0,0.032302,0.032302,BUY,1069,11628.832451,
15,1999-01-06,1999-01-07,COMS1,9.421,4.787861,16,0.448777,2.228276,11109.123195,1179,0,1179,0.0,0.030854,0.030854,BUY,1179,11107.359,
19,1999-01-06,1999-01-07,NSM1,6.81888,3.951502,20,0.455255,2.196573,10951.067544,1605,0,1605,0.0,0.030401,0.030401,BUY,1605,10944.302882,
13,1999-01-06,1999-01-07,TXN,13.808879,4.90981,14,0.487129,2.052844,10234.502518,741,0,741,0.0,0.028423,0.028423,BUY,741,10232.379108,
28,1999-01-06,1999-01-07,AES,13.792674,2.955245,29,0.499371,2.002521,9983.612462,723,0,723,0.0,0.0277,0.0277,BUY,723,9972.103074,


In [10]:
wk1["actual_trades"]


Unnamed: 0,signal_date,exec_date,signal_close_adj,exec_open_adj,ticker,type,shares,price,value,reason,slope_rank_within_top,spy_above_200dma,cash_before,cash_after,equity_after,portfolio_after,num_positions_after,trade_value
18,1999-01-06,1999-01-07,23.122,23.049,ABI1,BUY,323,23.049,7444.827,new_entry,25,True,196239.821954,188794.994954,171205.005046,360000.0,19,7444.827
15,1999-01-06,1999-01-07,5.838,5.759831,ADBE,BUY,2352,5.759831,13547.122787,new_entry,21,True,219809.276,206262.153213,153737.846787,360000.0,16,13547.122787
10,1999-01-06,1999-01-07,10.921,10.878234,ADSK,BUY,1050,10.878234,11422.146,new_entry,15,True,268595.995256,257173.849256,102826.150744,360000.0,11,11422.146
22,1999-01-06,1999-01-07,14.008,13.792674,AES,BUY,712,13.792674,9820.383663,new_entry,29,True,171744.236793,161923.85313,198076.14687,360000.0,23,9820.383663
4,1999-01-06,1999-01-07,9.699,9.608335,AMAT,BUY,993,9.608335,9541.07706,new_entry,8,True,337977.629225,328436.552165,31563.447835,360000.0,5,9541.07706
3,1999-01-06,1999-01-07,13.625,13.5,AMD,BUY,462,13.5,6237.0,new_entry,7,True,344214.629225,337977.629225,22022.370775,360000.0,4,6237.0
33,1999-01-06,1999-01-07,14.243,14.326116,AVP,BUY,545,14.326116,7807.73312,new_entry,48,True,64397.286594,56589.553474,303410.446526,360000.0,34,7807.73312
12,1999-01-06,1999-01-07,15.764,15.512846,BC,BUY,527,15.512846,8175.269853,new_entry,18,True,246556.382256,238381.112404,121618.887596,360000.0,13,8175.269853
31,1999-01-06,1999-01-07,20.489,20.221697,BK,BUY,355,20.221697,7178.702292,new_entry,46,True,81693.488886,74514.786594,285485.213406,360000.0,32,7178.702292
30,1999-01-06,1999-01-07,51.942,51.721597,BOL,BUY,74,51.721597,3827.398206,new_entry,45,True,85520.887091,81693.488886,278306.511114,360000.0,31,3827.398206


In [11]:
wk1["reconciliation"].sort_values(["_merge","ticker","type"]).head(80)


Unnamed: 0,signal_date,exec_date,ticker,type,shares_planned,value_planned,skip_reason,shares_actual,value_actual,reason,_merge
0,1999-01-06,1999-01-07,ABI1,BUY,328,7560.072,,323,7444.827,new_entry,both
1,1999-01-06,1999-01-07,ADBE,BUY,2383,13725.677552,,2352,13547.122787,new_entry,both
2,1999-01-06,1999-01-07,ADSK,BUY,1069,11628.832451,,1050,11422.146,new_entry,both
3,1999-01-06,1999-01-07,AES,BUY,723,9972.103074,,712,9820.383663,new_entry,both
4,1999-01-06,1999-01-07,AMAT,BUY,995,9560.293731,,993,9541.07706,new_entry,both
5,1999-01-06,1999-01-07,AMD,BUY,469,6331.5,,462,6237.0,new_entry,both
6,1999-01-06,1999-01-07,AVP,BUY,556,7965.320394,,545,7807.73312,new_entry,both
7,1999-01-06,1999-01-07,BC,BUY,519,8051.167085,,527,8175.269853,new_entry,both
8,1999-01-06,1999-01-07,BK,BUY,364,7360.697561,,355,7178.702292,new_entry,both
9,1999-01-06,1999-01-07,BOL,BUY,74,3827.398206,,74,3827.398206,new_entry,both


In [12]:
wk1 = audits[0]
wk1["reconciliation"]["_merge"].value_counts()


_merge
both          36
left_only      0
right_only     0
Name: count, dtype: int64

In [13]:
wk1 = audits[0]
dec = wk1["decisions"][["ticker","exec_open","target_shares"]].copy()
act = wk1["actual_trades"][["ticker","price","shares"]].copy().rename(columns={"price":"exec_open_backtest","shares":"shares_actual"})
m = dec.merge(act, on="ticker", how="inner")
m["open_diff"] = m["exec_open"] - m["exec_open_backtest"]
m["share_diff"] = m["target_shares"] - m["shares_actual"]
m.sort_values("share_diff").head(20)


Unnamed: 0,ticker,exec_open,target_shares,exec_open_backtest,shares_actual,open_diff,share_diff
0,PGR,7.718245,3733,7.718245,3781,0.0,-48
10,LSI1,9.5,1036,9.5,1065,0.0,-29
13,CSCO,16.023364,529,16.023364,546,0.0,-17
31,SLR,21.875,212,21.875,223,0.0,-11
21,JPM,22.366476,287,22.366476,298,0.0,-11
28,Q1,19.650975,245,19.650975,254,0.0,-9
15,BC,15.512846,519,15.512846,527,0.0,-8
7,NSM1,6.81888,1605,6.81888,1609,0.0,-4
20,SPLS,14.329777,471,14.329777,472,0.0,-1
33,CAR,26.192684,151,26.192684,151,0.0,0


Regression math verification for tickers involved in those 4 weeks

In [14]:
# Collect tickers traded in the 4-week window
tickers_traded = sorted(set(pd.concat([a["actual_trades"]["ticker"] for a in audits if len(a["actual_trades"])], axis=0).astype(str)))
tickers_traded[:20], len(tickers_traded)


(['ABI1',
  'ADBE',
  'ADSK',
  'AES',
  'AMAT',
  'AMD',
  'AVP',
  'BC',
  'BK',
  'BOL',
  'BT1',
  'CAR',
  'CCL',
  'CCTYQ',
  'COMS1',
  'CSCO',
  'DGN',
  'DHR',
  'EMC1',
  'FDX'],
 48)

In [15]:
# Regression compare window (cover the audit period)
date_start = audits[0]["signal_date"] - pd.Timedelta(days=10)  # small buffer
date_end   = audits[-1]["trade_date"] + pd.Timedelta(days=10)

comp, summ = compare_reg90_for_tickers(tickers_traded, date_start, date_end)

summ.sort_values(["max_abs_diff_slope_daily"], ascending=False).head(20)


Unnamed: 0,ticker,rows_in_window,max_abs_diff_slope_daily,max_abs_diff_slope_annual,max_abs_diff_r2,mean_abs_diff_slope_daily,mean_abs_diff_r2
45,TEK1,28,8.673617e-18,2.469136e-13,2.220446e-16,1.858632e-18,7.930164e-17
4,AMAT,28,8.673617e-18,2.930989e-13,1.110223e-16,2.5091540000000002e-18,4.3615900000000005e-17
47,TXN,28,6.938894e-18,1.971756e-13,1.110223e-16,1.796678e-18,4.3615900000000005e-17
16,DGN,28,6.938894e-18,2.771117e-13,3.330669e-16,2.199382e-18,1.387779e-16
40,SCHW,28,6.938894e-18,4.369838e-13,1.110223e-16,2.4781760000000002e-18,2.3790490000000002e-17
17,DHR,28,6.071532e-18,1.225686e-13,4.440892e-16,1.0222480000000001e-18,1.229175e-16
36,ORCL,28,5.2041700000000004e-18,2.593481e-13,1.110223e-16,1.641792e-18,3.5685740000000003e-17
21,GLW,28,5.2041700000000004e-18,1.882938e-13,1.110223e-16,1.796678e-18,3.965082e-17
18,EMC1,28,5.2041700000000004e-18,1.554312e-13,3.330669e-16,1.6108150000000001e-18,4.758099e-17
15,CSCO,28,5.2041700000000004e-18,2.717826e-13,6.661338e-16,1.208111e-18,1.586033e-16


In [16]:
# If you want to drill into the worst offender
worst = summ.sort_values("max_abs_diff_slope_daily", ascending=False).iloc[0]["ticker"]
worst


'TEK1'

In [17]:
comp[comp["ticker"] == worst].sort_values("date").tail(30)


Unnamed: 0,ticker,date,close_adj,slope_daily,slope_annual,r2,re_slope_daily,re_slope_annual,re_r2,diff_slope_daily,diff_slope_annual,diff_r2
1260,TEK1,1998-12-28,14.141,0.006104,3.655932,0.572385,0.006104,3.655932,0.572385,8.673616999999999e-19,-2.309264e-14,-2.220446e-16
1261,TEK1,1998-12-29,14.111,0.006538,4.194921,0.632208,0.006538,4.194921,0.632208,0.0,-1.358913e-13,1.110223e-16
1262,TEK1,1998-12-30,14.141,0.006949,4.760852,0.687643,0.006949,4.760852,0.687643,-1.734723e-18,4.707346e-14,-1.110223e-16
1263,TEK1,1998-12-31,14.259,0.007326,5.335869,0.735696,0.007326,5.335869,0.735696,1.734723e-18,-7.105427e-14,1.110223e-16
1264,TEK1,1999-01-04,14.022,0.007637,5.851584,0.768475,0.007637,5.851584,0.768475,8.673616999999999e-19,-2.664535e-15,2.220446e-16
1265,TEK1,1999-01-05,14.407,0.007916,6.351272,0.798022,0.007916,6.351272,0.798022,1.734723e-18,4.174439e-14,0.0
1266,TEK1,1999-01-06,15.238,0.008151,6.799477,0.817392,0.008151,6.799477,0.817392,0.0,-1.270095e-13,0.0
1267,TEK1,1999-01-07,15.0,0.008362,7.224952,0.828719,0.008362,7.224952,0.828719,1.734723e-18,8.171241e-14,0.0
1268,TEK1,1999-01-08,15.03,0.008564,7.654982,0.841272,0.008564,7.654982,0.841272,0.0,2.078338e-13,0.0
1269,TEK1,1999-01-11,14.734,0.008685,7.924007,0.846353,0.008685,7.924007,0.846353,-1.734723e-18,1.669775e-13,1.110223e-16


What this gives you (in practice)
A) Regression verification (math correctness)

For every traded ticker/date in your window:

stored slope_daily vs recomputed slope_daily

stored r2 vs recomputed r2

and slope_annual too

If your recompute matches within ~1e-12 to 1e-9, your regression pipeline is basically identical.

If it doesn’t, you’ll see exact dates where it diverges.

B) Backtest weekly decision verification

For each week it produces tables you can eyeball:

top_group: includes slope_adj, cutoff, ranks

planned_exits: every forced sell because not in top group

targets: ATR inverse-vol math → target_value → target_shares (using Thursday open)

decisions: current vs target shares/weights, drift checks, cash-floor checks, SPY regime gating, etc.

reconciliation: planned vs actual trades from your log

In [18]:
compare_universe_slope_to_reg90(tickers_traded, date_start, date_end).head(30)


Unnamed: 0,ticker,rows,"corr(slope_adj, slope_annual)",max_abs_diff,mean_abs_diff
0,ABI1,28,0.999813,0.407793,0.320585
1,ADBE,28,0.999524,0.716254,0.597916
2,ADSK,28,0.998871,1.288171,1.013548
3,AES,28,0.995008,0.819845,0.617479
4,AMAT,28,0.999292,1.418603,0.900863
5,AMD,28,0.999333,1.896543,1.756217
6,AVP,28,0.992709,1.029894,0.894826
7,BC,28,0.989397,1.216824,0.905972
8,BK,28,0.999426,0.46562,0.318342
9,BOL,28,0.999824,0.714636,0.458635


In [19]:
wk1["actual_trades"].columns


Index(['signal_date', 'exec_date', 'signal_close_adj', 'exec_open_adj', 'ticker', 'type', 'shares', 'price', 'value', 'reason', 'slope_rank_within_top', 'spy_above_200dma', 'cash_before',
       'cash_after', 'equity_after', 'portfolio_after', 'num_positions_after', 'trade_value'],
      dtype='object')

In [20]:
wk1 = audits[0]

dec = wk1["decisions"][["ticker","exec_open","target_shares"]].copy()

act = wk1["actual_trades"].copy()

# pick the right column names defensively
side_col  = "type"  if "type"  in act.columns else "side"
price_col = "price" if "price" in act.columns else "exec_open_adj"
sh_col    = "shares"

act2 = (
    act[["ticker", price_col, sh_col]]
    .groupby("ticker", as_index=False)
    .agg({price_col:"first", sh_col:"sum"})
    .rename(columns={price_col:"exec_open_backtest", sh_col:"shares_actual"})
)

m = dec.merge(act2, on="ticker", how="inner")
m["open_diff"]  = m["exec_open"] - m["exec_open_backtest"]
m["share_diff"] = m["target_shares"] - m["shares_actual"]

m.sort_values("share_diff").head(20)


Unnamed: 0,ticker,exec_open,target_shares,exec_open_backtest,shares_actual,open_diff,share_diff
0,PGR,7.718245,3733,7.718245,3781,0.0,-48
10,LSI1,9.5,1036,9.5,1065,0.0,-29
13,CSCO,16.023364,529,16.023364,546,0.0,-17
31,SLR,21.875,212,21.875,223,0.0,-11
21,JPM,22.366476,287,22.366476,298,0.0,-11
28,Q1,19.650975,245,19.650975,254,0.0,-9
15,BC,15.512846,519,15.512846,527,0.0,-8
7,NSM1,6.81888,1605,6.81888,1609,0.0,-4
20,SPLS,14.329777,471,14.329777,472,0.0,-1
33,CAR,26.192684,151,26.192684,151,0.0,0


In [21]:
wk1 = audits[0]

dec = wk1["decisions"][["ticker","exec_open","target_shares"]].copy()

act = wk1["actual_trades"].copy()
side_col = "type" if "type" in act.columns else "side"

# starting shares before this rebalance (from your audit state)
pos_before = {
    t: int(v["shares"])
    for t, v in wk1.get("state_before", {}).get("pos", {}).items()
}

# signed share changes from executed trades
act["_side"] = act[side_col].astype(str).str.upper().str.strip()
act["_signed_shares"] = np.where(act["_side"] == "BUY", act["shares"], -act["shares"])

delta = act.groupby("ticker")["_signed_shares"].sum()

# compute pos_after
pos_after = pd.Series(pos_before, dtype=float).add(delta, fill_value=0).astype(int)

act_pos = pos_after.reset_index()
act_pos.columns = ["ticker", "pos_after"]

m = dec.merge(act_pos, on="ticker", how="left").fillna({"pos_after": 0})
m["pos_diff"] = m["target_shares"] - m["pos_after"]

m.sort_values("pos_diff").head(20)


Unnamed: 0,ticker,exec_open,target_shares,pos_after,pos_diff
0,PGR,7.718245,3733,3781.0,-48.0
10,LSI1,9.5,1036,1065.0,-29.0
13,CSCO,16.023364,529,546.0,-17.0
31,SLR,21.875,212,223.0,-11.0
21,JPM,22.366476,287,298.0,-11.0
28,Q1,19.650975,245,254.0,-9.0
15,BC,15.512846,519,527.0,-8.0
7,NSM1,6.81888,1605,1609.0,-4.0
20,SPLS,14.329777,471,472.0,-1.0
26,MKG,29.495066,190,190.0,0.0


In [22]:
wk1["decisions"].columns


Index(['signal_date', 'exec_date', 'ticker', 'exec_open', 'slope_adj_signal', 'rank_within_top', 'atr20', 'inv_vol', 'target_value', 'target_shares', 'current_shares', 'delta_shares',
       'current_weight', 'target_weight', 'weight_diff', 'planned_side', 'planned_shares', 'planned_trade_value', 'skip_reason'],
      dtype='object')

In [23]:
wk1["actual_trades"].columns


Index(['signal_date', 'exec_date', 'signal_close_adj', 'exec_open_adj', 'ticker', 'type', 'shares', 'price', 'value', 'reason', 'slope_rank_within_top', 'spy_above_200dma', 'cash_before',
       'cash_after', 'equity_after', 'portfolio_after', 'num_positions_after', 'trade_value'],
      dtype='object')

In [24]:
wk1 = audits[0]

dec = wk1["decisions"][["ticker","planned_side","planned_shares","exec_open","planned_trade_value","skip_reason"]].copy()

act = wk1["actual_trades"][["ticker","type","shares","price","value"]].copy()

# aggregate actual trades just in case there are multiple rows per ticker
act_agg = (
    act.groupby(["ticker","type"], as_index=False)
       .agg(shares_actual=("shares","sum"),
            exec_open_backtest=("price","first"),
            value_actual=("value","sum"))
)

m = dec.merge(
    act_agg,
    left_on=["ticker","planned_side"],
    right_on=["ticker","type"],
    how="left"
)

m["shares_actual"] = m["shares_actual"].fillna(0).astype(int)
m["exec_open_backtest"] = m["exec_open_backtest"].astype(float)

m["open_diff"]  = m["exec_open"] - m["exec_open_backtest"]
m["share_diff"] = m["planned_shares"].astype(int) - m["shares_actual"]



m.sort_values("share_diff").head(30)


Unnamed: 0,ticker,planned_side,planned_shares,exec_open,planned_trade_value,skip_reason,type,shares_actual,exec_open_backtest,value_actual,open_diff,share_diff
0,PGR,BUY,3733,7.718245,28812.207914,,BUY,3781,7.718245,29182.683665,0.0,-48
10,LSI1,BUY,1036,9.5,9842.0,,BUY,1065,9.5,10117.5,0.0,-29
13,CSCO,BUY,529,16.023364,8476.359503,,BUY,546,16.023364,8748.756689,0.0,-17
31,SLR,BUY,212,21.875,4637.5,,BUY,223,21.875,4878.125,0.0,-11
21,JPM,BUY,287,22.366476,6419.178547,,BUY,298,22.366476,6665.209781,0.0,-11
28,Q1,BUY,245,19.650975,4814.488942,,BUY,254,19.650975,4991.347719,0.0,-9
15,BC,BUY,519,15.512846,8051.167085,,BUY,527,15.512846,8175.269853,0.0,-8
7,NSM1,BUY,1605,6.81888,10944.302882,,BUY,1609,6.81888,10971.578403,0.0,-4
20,SPLS,BUY,471,14.329777,6749.324929,,BUY,472,14.329777,6763.654706,0.0,-1
32,IBM,BUY,93,46.212745,4297.785318,,BUY,93,46.212745,4297.785318,0.0,0


In [25]:
m.loc[m["share_diff"] != 0, ["ticker","planned_side","planned_shares","shares_actual","planned_trade_value","value_actual","skip_reason"]].head(50)

Unnamed: 0,ticker,planned_side,planned_shares,shares_actual,planned_trade_value,value_actual,skip_reason
0,PGR,BUY,3733,3781,28812.207914,29182.683665,
1,ORCL,BUY,3329,3204,20132.074315,19376.138812,
2,RTX,BUY,2046,2045,19296.363259,19286.931996,
3,GLW,BUY,1763,1644,18135.485194,16911.36566,
4,ADBE,BUY,2383,2352,13725.677552,13547.122787,
5,ADSK,BUY,1069,1050,11628.832451,11422.146,
6,COMS1,BUY,1179,1127,11107.359,10617.467,
7,NSM1,BUY,1605,1609,10944.302882,10971.578403,
8,TXN,BUY,741,725,10232.379108,10011.437049,
9,AES,BUY,723,712,9972.103074,9820.383663,


In [26]:
wk1 = audits[0]

dec = wk1["decisions"][[
    "ticker","planned_side","planned_shares","planned_trade_value","exec_open","skip_reason"
]].copy()

act = wk1["actual_trades"][["ticker","type","shares","price","value"]].copy().rename(columns={
    "type":  "planned_side",
    "shares":"shares_actual",
    "price": "exec_open_backtest",
    "value": "value_actual",
})

# normalize side strings
dec["planned_side"] = dec["planned_side"].astype(str).str.upper().str.strip()
act["planned_side"] = act["planned_side"].astype(str).str.upper().str.strip()

m = dec.merge(act, on=["ticker","planned_side"], how="left")

m["open_diff"]  = m["exec_open"] - m["exec_open_backtest"]
m["share_diff"] = m["planned_shares"] - m["shares_actual"]
m["value_diff"] = m["planned_trade_value"] - m["value_actual"]

m.sort_values("share_diff").head(50)



Unnamed: 0,ticker,planned_side,planned_shares,planned_trade_value,exec_open,skip_reason,shares_actual,exec_open_backtest,value_actual,open_diff,share_diff,value_diff
0,PGR,BUY,3733,28812.207914,7.718245,,3781.0,7.718245,29182.683665,0.0,-48.0,-370.475751
10,LSI1,BUY,1036,9842.0,9.5,,1065.0,9.5,10117.5,0.0,-29.0,-275.5
13,CSCO,BUY,529,8476.359503,16.023364,,546.0,16.023364,8748.756689,0.0,-17.0,-272.397186
31,SLR,BUY,212,4637.5,21.875,,223.0,21.875,4878.125,0.0,-11.0,-240.625
21,JPM,BUY,287,6419.178547,22.366476,,298.0,22.366476,6665.209781,0.0,-11.0,-246.031234
28,Q1,BUY,245,4814.488942,19.650975,,254.0,19.650975,4991.347719,0.0,-9.0,-176.858777
15,BC,BUY,519,8051.167085,15.512846,,527.0,15.512846,8175.269853,0.0,-8.0,-124.102768
7,NSM1,BUY,1605,10944.302882,6.81888,,1609.0,6.81888,10971.578403,0.0,-4.0,-27.275521
20,SPLS,BUY,471,6749.324929,14.329777,,472.0,14.329777,6763.654706,0.0,-1.0,-14.329777
33,CAR,BUY,151,3955.095292,26.192684,,151.0,26.192684,3955.095292,0.0,0.0,0.0


In [27]:
# show only BUYs with diffs
x = m[(m["planned_side"]=="BUY") & (m["share_diff"].fillna(0)!=0)].copy()

x = x.sort_values("share_diff")  # most negative first
x[["ticker","planned_shares","shares_actual","share_diff","planned_trade_value","value_actual"]].head(30)


Unnamed: 0,ticker,planned_shares,shares_actual,share_diff,planned_trade_value,value_actual
0,PGR,3733,3781,-48,28812.207914,29182.683665
10,LSI1,1036,1065,-29,9842.0,10117.5
13,CSCO,529,546,-17,8476.359503,8748.756689
21,JPM,287,298,-11,6419.178547,6665.209781
31,SLR,212,223,-11,4637.5,4878.125
28,Q1,245,254,-9,4814.488942,4991.347719
15,BC,519,527,-8,8051.167085,8175.269853
7,NSM1,1605,1609,-4,10944.302882,10971.578403
20,SPLS,471,472,-1,6749.324929,6763.654706
25,KLAC,446,445,1,6247.568,6233.56


In [28]:
wk1["decisions"].loc[wk1["decisions"]["planned_side"]=="BUY", ["ticker","rank_within_top","planned_shares"]]\
    .sort_values("rank_within_top").head(20)



Unnamed: 0,ticker,rank_within_top,planned_shares
1,SCHW,2,369
2,DGN,3,252
5,KLAC,6,446
6,AMD,7,469
7,AMAT,8,995
8,SLR,9,212
10,GLW,11,1763
11,TEK1,12,582
12,ORCL,13,3329
13,TXN,14,741


In [29]:
wk1["actual_trades"].loc[wk1["actual_trades"]["type"]=="BUY", ["ticker","slope_rank_within_top","shares","value"]]\
    .sort_values("slope_rank_within_top").head(20)

Unnamed: 0,ticker,slope_rank_within_top,shares,value
0,SCHW,2,360,4963.810775
1,DGN,3,248,4588.0
2,KLAC,6,445,6233.56
3,AMD,7,462,6237.0
4,AMAT,8,993,9541.07706
5,SLR,9,223,4878.125
6,GLW,11,1644,16911.36566
7,TEK1,12,573,8663.490387
8,ORCL,13,3204,19376.138812
9,TXN,14,725,10011.437049


In [30]:
# Compare planned vs actual cumulative capital use
m = m.assign(
    rank = wk1["decisions"].set_index("ticker").loc[m["ticker"], "rank_within_top"].values
).sort_values("rank")

m["planned_capital"] = m["planned_trade_value"].cumsum()
m["actual_capital"]  = m["value_actual"].fillna(0).cumsum()
m[["ticker","rank","planned_trade_value","value_actual","planned_capital","actual_capital","share_diff"]].head(25)


Unnamed: 0,ticker,rank,planned_trade_value,value_actual,planned_capital,actual_capital,share_diff
36,TWX,1,0.0,,0.0,0.0,
27,SCHW,2,5087.906045,4963.810775,5087.906045,4963.810775,9.0
30,DGN,3,4662.0,4588.0,9749.906045,9551.810775,4.0
37,MU,4,0.0,,9749.906045,9551.810775,
38,JAVA1,5,0.0,,9749.906045,9551.810775,
25,KLAC,6,6247.568,6233.56,15997.474045,15785.370775,1.0
24,AMD,7,6331.5,6237.0,22328.974045,22022.370775,7.0
11,AMAT,8,9560.293731,9541.07706,31889.267776,31563.447835,2.0
31,SLR,9,4637.5,4878.125,36526.767776,36441.572835,-11.0
39,UCC1,10,0.0,,36526.767776,36441.572835,


In [31]:
planner_cash = wk1["decisions"]["planned_trade_value"].sum()
executor_cash = wk1["actual_trades"]["value"].sum()

print(f"Planned total spend: {planner_cash:,.2f}")
print(f"Executed total spend: {executor_cash:,.2f}")


Planned total spend: 321,472.55
Executed total spend: 318,246.69
