In [4]:
#!/usr/bin/env python
"""
Weekly trading log PDF generator WITH full reporting (AUDIT-BASED) -
used to make sure the trade generator generates the same trades as the backtest engine
— UPDATED to handle NEW TRADES SCHEMA with signal + execution fields.
— UPDATED to include ranked stocks table showing which were traded and why others weren't

NEW trades schema expected (example):
    signal_date, exec_date, signal_close_adj, exec_open_adj,
    ticker, type, shares, price, value, reason, slope_rank_within_top,
    spy_above_200dma, cash_before, cash_after, equity_after, portfolio_after, num_positions_after
(plus any extra columns your engine writes)

Core reporting logic:
- Weekly report date is the EXECUTION day (e.g. Thursday), grouped by exec_date.
- Signal-day analytics (rank map, slope_adj, filters, SPY regime message) use signal_date (e.g. Wednesday).
- Audited cash BEFORE/AFTER comes from per-trade cash_before/cash_after on exec_date.
- Post-trade holdings are reconstructed by applying trades on exec_date and then valuing
  holdings at exec_date close_adj (Q3=A backfill with last-known close).
- Trade tables show BOTH:
    * signal price (signal_close_adj on Wednesday)
    * execution price (exec_open_adj on Thursday)
  and still keep the original 'price'/'trade_value' columns if present.

NEW SECTION:
- All Ranked Stocks table showing every stock that ranked in the top percentile,
  with indicators for whether it was traded and reasons if not traded.

Outputs:
- PDF in ./15-match_weekly_trading_logs/
- Overview equity/drawdown chart on front page

"""

import os
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from reportlab.lib.pagesizes import letter, landscape
from reportlab.lib import colors
from reportlab.lib.units import inch
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import (
    SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle,
    PageBreak, Image
)

# ============================================================
# CONFIG
# ============================================================
 
TRADES_FILE = "./13-match_trade_generator_output_regression_insp500_spyfilter_cap15/13-match_trades_regression_insp500_spyfilter_cap15.parquet"
EQUITY_FILE = "./13-match_trade_generator_output_regression_insp500_spyfilter_cap15/13-match_equity_curve_regression_insp500_spyfilter_cap15.parquet"
UNIVERSE_FILE = "./12-tradable_sp500_universe/12-tradable_sp500_universe.parquet"
SPY_FILE      = "./8-SPY_200DMA_market_regime/8-SPY_200DMA_regime.parquet"
RANKINGS_FILE = "./13-match_trade_generator_output_regression_insp500_spyfilter_cap15/13-match_weekly_rankings_pre_filter_cap15.parquet"

OUTPUT_DIR = "./15a-match_weekly_trading_logs"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Report cadence:
# - trades execute on Thursday in your system (exec_date = Thursday)
# - signals are generated Wednesday (signal_date = Wednesday)
TRADING_DAY_NAME      = "Thursday"     # execution day (weekly report day)
SIGNAL_DAY_NAME       = "Wednesday"    # signal day (rank/spy regime day)

START_TRADING         = pd.Timestamp("2025-12-17")
TOP_PERCENTILE        = 0.90
MIN_YEAR_FOR_REPORT   = 1999
MAX_YEAR_FOR_REPORT   = 2026
TRADING_DAYS_PER_YEAR = 252

# cash risk flags (for the weekly summary)
LOW_CASH_FLOOR = 2000.0

# Turnover filters (for categorizing why stocks didn't trade)
DRIFT_THRESHOLD          = 0.05
MIN_TRADE_VALUE          = 10000
MIN_NEW_POSITION_WEIGHT  = 0.003


# ============================================================
# METRICS
# ============================================================

def sharpe_ratio(r: pd.Series) -> float:
    r = r.dropna()
    if len(r) < 2 or r.std() == 0:
        return np.nan
    return np.sqrt(TRADING_DAYS_PER_YEAR) * r.mean() / r.std()

def sortino_ratio(r: pd.Series) -> float:
    r = r.dropna()
    d = r[r < 0]
    if len(d) == 0 or d.std() == 0:
        return np.nan
    return np.sqrt(TRADING_DAYS_PER_YEAR) * r.mean() / d.std()

def max_drawdown(series: pd.Series) -> float:
    s = series.dropna()
    if s.empty:
        return np.nan
    peak = s.cummax()
    dd = s / peak - 1
    return dd.min()

def calmar_ratio(total_return: float, maxdd: float) -> float:
    # NOTE: For YTD block we treat "total_return" as "return over period" (not CAGR).
    if np.isnan(total_return) or np.isnan(maxdd) or maxdd >= 0:
        return np.nan
    return total_return / abs(maxdd)

def ytd_stats_for_date(dt: pd.Timestamp, daily: pd.DataFrame) -> dict:
    year = dt.year
    start = max(
    pd.Timestamp(year=year, month=1, day=1),
    FIRST_EXEC_DATE
    )
    ytd = daily[(daily.index >= start) & (daily.index <= dt)]
    if ytd.empty:
        return {k: np.nan for k in [
            "strat_ret","strat_maxdd","strat_sharpe","strat_sortino","strat_calmar",
            "spy_ret","spy_maxdd","spy_sharpe","spy_sortino","spy_calmar"
        ]}

    strat_ret = ytd["portfolio_value"].iloc[-1] / ytd["portfolio_value"].iloc[0] - 1
    strat_dd  = max_drawdown(ytd["portfolio_value"])
    strat_sh  = sharpe_ratio(ytd["strat_ret"])
    strat_so  = sortino_ratio(ytd["strat_ret"])
    strat_ca  = calmar_ratio(strat_ret, strat_dd)

    spy_ret = ytd["spy_close"].iloc[-1] / ytd["spy_close"].iloc[0] - 1
    spy_dd  = max_drawdown(ytd["spy_close"])
    spy_sh  = sharpe_ratio(ytd["spy_ret"])
    spy_so  = sortino_ratio(ytd["spy_ret"])
    spy_ca  = calmar_ratio(spy_ret, spy_dd)

    return {
        "strat_ret": strat_ret, "strat_maxdd": strat_dd,
        "strat_sharpe": strat_sh, "strat_sortino": strat_so,
        "strat_calmar": strat_ca,
        "spy_ret": spy_ret, "spy_maxdd": spy_dd,
        "spy_sharpe": spy_sh, "spy_sortino": spy_so,
        "spy_calmar": spy_ca
    }


# ============================================================
# PRICE LOOKUP (Q3 = A: last-known close)
# ============================================================

def fast_price_lookup(px_array, date_val):
    """
    Given a structured array with fields ['date','px'] and a date,
    return the last known price at or before that date.
    """
    date_val = np.datetime64(date_val, "ns")
    dates = px_array["date"]
    idx = np.searchsorted(dates, date_val, side="right") - 1
    if idx < 0:
        return np.nan
    return px_array["px"][idx]


# ============================================================
# CATEGORIZE WHY STOCK DIDN'T TRADE
# ============================================================

def categorize_no_trade(row):
    """Determine why a stock didn't trade"""
    
    # Already at target (within drift threshold)
    if abs(row['weight_change']) < DRIFT_THRESHOLD:
        return 'Within drift threshold'
    
    # New position but too small
    if row['current_shares'] == 0 and row['target_weight'] < MIN_NEW_POSITION_WEIGHT:
        return 'New position too small'
    
    # Trade value too small
    if abs(row['shares_change'] * row['close_adj']) < MIN_TRADE_VALUE:
        return 'Trade value too small'
    
    # Would buy but SPY regime prevents it
    if row['shares_change'] > 0 and not row['spy_above_200dma']:
        return 'SPY regime prevented buy'
    
    # Insufficient cash
    if row['shares_change'] > 0:
        return 'Insufficient cash'
    
    return 'Other'


# ============================================================
# LOAD DATA
# ============================================================

print("Loading input data...")

trades = pd.read_parquet(TRADES_FILE)

# --- normalize / validate new schema ---
required = ["signal_date", "exec_date", "ticker", "type", "shares"]
missing = [c for c in required if c not in trades.columns]
if missing:
    raise ValueError(f"Trades file missing required columns: {missing}")

trades["signal_date"] = pd.to_datetime(trades["signal_date"])
trades["exec_date"]   = pd.to_datetime(trades["exec_date"])

# Back-compat helper: treat exec_date as the reporting "date"
# (a lot of older report logic groups by trades['date'])
trades["date"] = trades["exec_date"]

# IMPORTANT: preserve intra-day execution order; only stable-sort by exec_date.
trades = trades.sort_values("date", kind="mergesort").reset_index(drop=True)

# ============================================================
# LOAD RANKINGS DATA
# ============================================================

print("Loading rankings data...")
rankings = pd.read_parquet(RANKINGS_FILE)
rankings["signal_date"] = pd.to_datetime(rankings["signal_date"])
rankings["exec_date"] = pd.to_datetime(rankings["exec_date"])

# Group by signal_date and ticker for merging
trades_agg = trades.groupby(['signal_date', 'ticker']).agg({
    'type': lambda x: ', '.join(x.unique()),
    'shares': 'sum',
    'value': 'sum' if 'value' in trades.columns else 'count',
}).reset_index()
trades_agg.columns = ['signal_date', 'ticker', 'trade_type', 'trade_shares', 'trade_value']

# Merge rankings with trades
rankings = rankings.merge(
    trades_agg,
    on=['signal_date', 'ticker'],
    how='left',
    indicator=True
)

# Add trade status
rankings['traded'] = rankings['_merge'] == 'both'
rankings['traded_flag'] = rankings['traded'].map({True: 'TRADED', False: 'NOT_TRADED'})
rankings = rankings.drop('_merge', axis=1)

# Add analysis columns
rankings['weight_change'] = rankings['target_weight'] - rankings['current_weight']
rankings['shares_change'] = rankings['target_shares'] - rankings['current_shares']

# Categorize why stocks didn't trade
rankings['no_trade_reason'] = rankings[~rankings['traded']].apply(categorize_no_trade, axis=1)

print(f"Loaded {len(rankings):,} ranking records")

# Group rankings by signal_date for quick lookup
rankings_by_signal_date = {d: sub for d, sub in rankings.groupby("signal_date")}

# ============================================================
# SLIPPAGE (signal close -> execution open)
# Positive slippage_dollars = worse (cost) for both BUY and SELL
# ============================================================

def add_slippage_columns(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()

    # Choose columns robustly
    sig_col = "signal_close_adj" if "signal_close_adj" in df.columns else None
    exe_col = "exec_open_adj" if "exec_open_adj" in df.columns else ("price" if "price" in df.columns else None)

    if sig_col is None or exe_col is None:
        # still add columns so downstream doesn't break
        df["slippage_per_share"] = np.nan
        df["slippage_dollars"] = np.nan
        df["slippage_bps"] = np.nan
        return df

    side = df["type"].astype(str).str.upper()
    shares = pd.to_numeric(df["shares"], errors="coerce").fillna(0).astype(float)
    signal_px = pd.to_numeric(df[sig_col], errors="coerce")
    exec_px   = pd.to_numeric(df[exe_col], errors="coerce")

    # cost-per-share: BUY -> exec - signal ; SELL -> signal - exec
    slip_ps = np.where(
        side == "BUY", exec_px - signal_px,
        np.where(side == "SELL", signal_px - exec_px, np.nan)
    )

    slip_dollars = slip_ps * shares

    # Bps vs executed notional (abs)
    exec_notional = (exec_px * shares).abs()
    slip_bps = np.where(exec_notional > 0, (slip_dollars / exec_notional) * 1e4, np.nan)

    df["slippage_per_share"] = slip_ps
    df["slippage_dollars"] = slip_dollars
    df["slippage_bps"] = slip_bps
    return df


trades = add_slippage_columns(trades)


# Trade value normalization
if "trade_value" not in trades.columns:
    if "value" in trades.columns:
        trades["trade_value"] = trades["value"]
    else:
        # fallback if you only have shares*price
        if "price" in trades.columns:
            trades["trade_value"] = trades["shares"].astype(float) * trades["price"].astype(float)

# Audit fields are still required for cash before/after reporting
audit_cols = ["cash_before", "cash_after"]
missing_audit = [c for c in audit_cols if c not in trades.columns]
if missing_audit:
    raise ValueError(
        f"ERROR: Trades file is missing required audit fields: {missing_audit}. "
        f"Trading engine must write these per trade."
    )

FIRST_EXEC_DATE = trades["exec_date"].min()

equity_df = pd.read_parquet(EQUITY_FILE)
equity_df["date"] = pd.to_datetime(equity_df["date"])
equity_df = equity_df.sort_values("date").reset_index(drop=True)

# INITIAL_CAPITAL from earliest portfolio_value (same as before)
INITIAL_CAPITAL = float(equity_df["portfolio_value"].iloc[0])

universe = pd.read_parquet(UNIVERSE_FILE)
universe["date"] = pd.to_datetime(universe["date"])
universe = universe.sort_values(["date", "ticker"]).reset_index(drop=True)

spy = pd.read_parquet(SPY_FILE)
# normalize date column
if spy.index.name in ["Date", "date", None]:
    spy = spy.reset_index().rename(columns={"index": "date", "Date": "date"})
spy["date"] = pd.to_datetime(spy["date"])

# Ensure spy_close exists
if "spy_close" not in spy.columns:
    if "Close" in spy.columns:
        spy["spy_close"] = spy["Close"]
    else:
        raise ValueError("SPY file missing spy_close/Close column.")

print(f"Trades:       {len(trades):,}")
print(f"Equity curve: {len(equity_df):,}")
print(f"Universe:     {len(universe):,}")
print(f"SPY rows:     {len(spy):,}")

trades_by_exec_date = {d: sub for d, sub in trades.groupby("exec_date")}
universe_by_date    = {d: sub for d, sub in universe.groupby("date")}

# price history by ticker for Q3=A
px_by_ticker = {}
for t, sub in universe.groupby("ticker", sort=False):
    sub = sub.sort_values("date")
    arr = np.zeros(len(sub), dtype=[("date","datetime64[ns]"), ("px","float64")])
    arr["date"] = sub["date"].values.astype("datetime64[ns]")
    arr["px"]   = sub["close_adj"].astype(float).values
    px_by_ticker[t] = arr

# daily = equity + spy_close
daily = equity_df.merge(spy[["date", "spy_close"]], on="date", how="left")
daily = daily.set_index("date").sort_index()
daily["strat_ret"] = daily["portfolio_value"].pct_change().fillna(0)
daily["spy_ret"]   = daily["spy_close"].pct_change().fillna(0)


# ============================================================
# RECONSTRUCT POSITIONS (EXECUTION-DAY snapshots)
# ============================================================

positions = {}          # live positions (ticker -> shares)
weekly_portfolios = {}  # snapshot on each exec Thursday

# Build report calendar from BOTH trades AND rankings
# This ensures we generate reports even for no-trade weeks
trade_exec_dates = set(trades["exec_date"].unique())
ranking_exec_dates = set(rankings["exec_date"].unique())
exec_dates = sorted(trade_exec_dates | ranking_exec_dates)

for exec_dt in exec_dates:
    todays = trades_by_exec_date.get(exec_dt, pd.DataFrame())

    # apply trades in recorded order
    for _, tr in todays.iterrows():
        ticker = tr["ticker"]
        sh = int(tr["shares"])
        side = str(tr["type"]).upper()

        if side == "BUY":
            positions[ticker] = positions.get(ticker, 0) + sh
        else:
            positions[ticker] = positions.get(ticker, 0) - sh
            if positions[ticker] <= 0:
                positions.pop(ticker, None)

    # snapshot on Thursday OR Friday (to handle holiday-shifted executions) within range
    if exec_dt.day_name() in ("Thursday", "Friday") and MIN_YEAR_FOR_REPORT <= exec_dt.year <= MAX_YEAR_FOR_REPORT:
        if not positions:
            weekly_portfolios[exec_dt] = pd.DataFrame()
            continue

        day_univ = universe_by_date.get(exec_dt, pd.DataFrame())
        pos_df = pd.DataFrame([{"ticker": t, "shares": sh} for t, sh in positions.items()])
        merged = pos_df.merge(day_univ, on="ticker", how="left")

        # Q3=A: backfill missing close_adj with last-known price
        def _fill_price(row):
            v = row.get("close_adj")
            if not pd.isna(v):
                return float(v)
            t = row["ticker"]
            if t in px_by_ticker:
                return float(fast_price_lookup(px_by_ticker[t], exec_dt))
            return np.nan

        merged["close_adj"] = merged.apply(_fill_price, axis=1)
        merged["market_value"] = merged["shares"] * merged["close_adj"]
        total_mv = merged["market_value"].sum()
        merged["weight"] = merged["market_value"] / total_mv if total_mv > 0 else 0.0

        weekly_portfolios[exec_dt] = merged


# ============================================================
# FRONT PAGE CHART
# ============================================================

chart_path = os.path.join(OUTPUT_DIR, "equity_drawdown_overview.png")

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 7), sharex=True)

norm_strat = daily["portfolio_value"] / daily["portfolio_value"].iloc[0]
norm_spy   = daily["spy_close"] / daily["spy_close"].iloc[0]

ax1.plot(daily.index, norm_strat, label="Strategy", color="blue")
ax1.plot(daily.index, norm_spy,   label="SPY",      color="orange")
ax1.set_title("Strategy vs SPY – Full Period")
ax1.legend()
ax1.grid(alpha=0.3)

dd = daily["portfolio_value"] / daily["portfolio_value"].cummax() - 1
ax2.plot(daily.index, dd, color="red")
ax2.set_ylabel("Drawdown")
ax2.grid(alpha=0.3)

fig.tight_layout()
fig.savefig(chart_path, dpi=150)
plt.close(fig)


# ============================================================
# PDF STYLES
# ============================================================

styles = getSampleStyleSheet()
styles.add(ParagraphStyle(name="Small",  fontSize=8, leading=9))
styles.add(ParagraphStyle(name="Tiny",   fontSize=7, leading=8))
styles.add(ParagraphStyle(name="Header", fontSize=12, leading=14, spaceAfter=6, spaceBefore=6))
styles.add(ParagraphStyle(name="TitleBig", fontSize=16, leading=20, spaceAfter=10))


# ============================================================
# PDF SETUP
# ============================================================

pdf_name = f"weekly_trading_logs_{datetime.now():%Y%m%d-%H%M%S}.pdf"
pdf_path = os.path.join(OUTPUT_DIR, pdf_name)

doc = SimpleDocTemplate(
    pdf_path,
    pagesize=landscape(letter),
    rightMargin=36, leftMargin=36,
    topMargin=36,  bottomMargin=36,
)

story = []

# Front page
story.append(Paragraph("Weekly Trading Logs", styles["TitleBig"]))
story.append(Paragraph(f"Generated: {datetime.now().isoformat(timespec='seconds')}", styles["Small"]))
story.append(Spacer(1, 0.2 * inch))
story.append(Image(chart_path, width=9.0 * inch, height=4.5 * inch))
story.append(PageBreak())


# ============================================================
# TABLE HELPERS
# ============================================================

def _fmt(v, col=None):
    if v is None:
        return "N/A"

    # Handle NaN / NaT cleanly
    if isinstance(v, (float, np.floating)) and np.isnan(v):
        return "N/A"
    if isinstance(v, (pd.Timestamp, np.datetime64)):
        return str(pd.Timestamp(v).date())

    # Numbers
    if isinstance(v, (int, np.integer)):
        return str(int(v))

    if isinstance(v, (float, np.floating)):
        # bps
        if col in ("slippage_bps",):
            return f"{float(v):,.1f}"

        # money-ish
        if col in (
            "trade_value", "value",
            "cash_before", "cash_after",
            "portfolio_after", "equity_after",
            "market_value", "slippage_dollars",
            "target_value",
        ):
            return f"{float(v):,.2f}"

        # prices
        if col in (
            "price", "signal_close_adj", "exec_open_adj",
            "close_adj", "spy_close", "spy_ma200", "ma100",
        ):
            return f"{float(v):,.2f}"

        # per-share slippage or other small-ish metrics
        if col in ("slippage_per_share",):
            return f"{float(v):0.4f}"

        # weights (as percentages)
        if col in ("target_weight", "current_weight", "weight", "raw_weight", "capped_weight"):
            return f"{float(v)*100:0.2f}%"

        # default float formatting
        return f"{float(v):0.4f}"

    return str(v)



def make_trade_table(df: pd.DataFrame, title: str):
    story.append(Paragraph(title, styles["Small"]))

    if df.empty:
        story.append(Paragraph("None", styles["Tiny"]))
        story.append(Spacer(1, 0.1 * inch))
        return

    # Prefer showing both signal and execution prices when present
    cols_pref = [
        "ticker",
        "type",
        "shares",
        "signal_date",
        "exec_date",
        "signal_close_adj",
        "exec_open_adj",
        "slippage_per_share",
        "slippage_dollars",
        "slippage_bps",
        "price",         # (your engine may set this = exec_open_adj)
        "trade_value",
        "reason",
        "slope_rank_within_top",
        "spy_above_200dma",
        "cash_before",
        "cash_after",
    ]

    cols = [c for c in cols_pref if c in df.columns]

    # sort by rank if available
    if "slope_rank_within_top" in df.columns:
        df = df.sort_values("slope_rank_within_top", na_position="last")

    data = [cols]
    for _, row in df[cols].iterrows():
        data.append([_fmt(row[c], c) for c in cols])

    tbl = Table(data, hAlign="LEFT", repeatRows=1)
    tbl.setStyle(TableStyle([
        ("BACKGROUND",(0,0),(-1,0),colors.lightgrey),
        ("GRID",(0,0),(-1,-1),0.25,colors.grey),
        ("FONTNAME",(0,0),(-1,-1),"Helvetica"),
        ("FONTSIZE",(0,0),(-1,-1),6.5),
        ("ALIGN",(1,1),(-1,-1),"RIGHT"),
    ]))
    story.append(tbl)
    story.append(Spacer(1, 0.12 * inch))


def make_ranked_stocks_table(df: pd.DataFrame):
    """
    NEW: Create table showing all ranked stocks with trade status and reasons
    """
    story.append(Paragraph("All Ranked Stocks (Top Percentile)", styles["Small"]))

    if df.empty:
        story.append(Paragraph("No stocks ranked this week.", styles["Tiny"]))
        story.append(Spacer(1, 0.1 * inch))
        return

    # Select columns to display
    cols_pref = [
        "slope_rank",
        "ticker",
        "traded_flag",
        "no_trade_reason",
        "slope_adj",
        "target_weight",
        "current_weight",
        "target_shares",
        "current_shares",
        "close_adj",
    ]

    cols = [c for c in cols_pref if c in df.columns]

    # Sort by rank
    df = df.sort_values("slope_rank", na_position="last")

    data = [cols]
    for _, row in df[cols].iterrows():
        row_out = []
        for c in cols:
            v = row[c]
            # Special handling for no_trade_reason - show empty for traded stocks
            if c == "no_trade_reason" and row.get("traded_flag") == "TRADED":
                row_out.append("")
            else:
                row_out.append(_fmt(v, c))
        data.append(row_out)

    tbl = Table(data, hAlign="LEFT", repeatRows=1)
    tbl.setStyle(TableStyle([
        ("BACKGROUND",(0,0),(-1,0),colors.lightgrey),
        ("GRID",(0,0),(-1,-1),0.25,colors.grey),
        ("FONTNAME",(0,0),(-1,-1),"Helvetica"),
        ("FONTSIZE",(0,0),(-1,-1),6),
        ("ALIGN",(1,1),(-1,-1),"RIGHT"),
        # Highlight traded vs not traded rows
        ("TEXTCOLOR",(0,1),(-1,-1),colors.black),
    ]))
    
    story.append(tbl)
    story.append(Spacer(1, 0.12 * inch))


# ============================================================
# SLIPPAGE BREAKDOWN HELPERS
# ============================================================

def slippage_breakdown(df: pd.DataFrame) -> dict:
    """
    Returns a dict with:
      net_dollars: sum(slippage_dollars) (positive = cost, negative = improvement)
      gross_cost_dollars: sum of positive slippage_dollars (cost only)
      gross_improve_dollars: -sum of negative slippage_dollars (positive magnitude)
      net_bps: notional-weighted bps using exec price (exec_open_adj else price)
    """
    if df is None or df.empty or "slippage_dollars" not in df.columns:
        return {
            "net_dollars": np.nan,
            "gross_cost_dollars": np.nan,
            "gross_improve_dollars": np.nan,
            "net_bps": np.nan,
        }

    slip = pd.to_numeric(df["slippage_dollars"], errors="coerce")

    # Notional proxy for bps
    if "exec_open_adj" in df.columns:
        px = pd.to_numeric(df["exec_open_adj"], errors="coerce")
    elif "price" in df.columns:
        px = pd.to_numeric(df["price"], errors="coerce")
    else:
        px = pd.Series(np.nan, index=df.index)

    sh = pd.to_numeric(df["shares"], errors="coerce")
    notional = (px * sh).abs()

    net = float(slip.sum(skipna=True))

    pos = slip.where(slip > 0, 0.0)
    neg = slip.where(slip < 0, 0.0)

    gross_cost = float(pos.sum(skipna=True))
    gross_improve = float((-neg).sum(skipna=True))  # positive magnitude

    tot_notional = float(notional.sum(skipna=True))
    net_bps = (net / tot_notional) * 1e4 if tot_notional > 0 else np.nan

    return {
        "net_dollars": net,
        "gross_cost_dollars": gross_cost,
        "gross_improve_dollars": gross_improve,
        "net_bps": net_bps,
    }


# ============================================================
# WEEKLY REPORT LOOP (EXECUTION DAY)
# ============================================================

weekly_dates = sorted(weekly_portfolios.keys())
prev_week_port_value = None

for exec_dt in weekly_dates:
    if exec_dt < START_TRADING or not (MIN_YEAR_FOR_REPORT <= exec_dt.year <= MAX_YEAR_FOR_REPORT):
        continue

    # trades executed this day
    day_trades = trades_by_exec_date.get(exec_dt, pd.DataFrame()).copy()

    # infer the signal date for this execution day (normally one unique)
    # infer the signal date for this execution day
    signal_dt = None
    
    # First try from trades
    if not day_trades.empty and "signal_date" in day_trades.columns:
        uniq = sorted(pd.to_datetime(day_trades["signal_date"]).unique())
        if len(uniq) >= 1:
            signal_dt = pd.Timestamp(uniq[0])
    
    # Fallback: look up signal_date from rankings if no trades
    if signal_dt is None:
        ranking_match = rankings[rankings["exec_date"] == exec_dt]
        if not ranking_match.empty and "signal_date" in ranking_match.columns:
            signal_dt = pd.Timestamp(ranking_match["signal_date"].iloc[0])

    # Get rankings for this signal date
    day_rankings = pd.DataFrame()
    if signal_dt is not None and signal_dt in rankings_by_signal_date:
        day_rankings = rankings_by_signal_date[signal_dt].copy()

    # Header
    # Header - indicate if no trades this week
    has_trades = not day_trades.empty
    header_suffix = "" if has_trades else " (NO TRADES)"
    story.append(Paragraph(f"Week of {exec_dt.date()} (EXECUTION DAY){header_suffix} — Year {exec_dt.year}", styles["Header"]))
    if signal_dt is not None:
        story.append(Paragraph(f"Signal day: {signal_dt.date()} ({signal_dt.day_name()})", styles["Small"]))
    story.append(Spacer(1, 0.08 * inch))

    # ======================================================
    # SPY regime message (use SIGNAL DAY if available, else EXEC DAY)
    # ======================================================
    spy_day = signal_dt if signal_dt is not None else exec_dt

    spy_row = spy[spy["date"] == spy_day]
    if not spy_row.empty:
        spy_close = float(spy_row["spy_close"].iloc[0])
        # many regime files store 200DMA under a specific name; try common options
        spy_ma200 = None
        for c in ("spy_ma200", "ma200", "MA200", "spy_200dma", "spy_ma_200"):
            if c in spy_row.columns:
                v = spy_row[c].iloc[0]
                if not pd.isna(v):
                    spy_ma200 = float(v)
                    break
    else:
        spy_close, spy_ma200 = np.nan, None

    if spy_ma200 is not None and not np.isnan(spy_ma200) and not np.isnan(spy_close):
        if spy_close >= spy_ma200:
            spy_status_msg = "<b>SPY is ABOVE 200-Day Moving Average — Buying Allowed (signal day)</b>"
        else:
            spy_status_msg = "<b>SPY is BELOW 200-Day Moving Average — No Buying Stock (signal day)</b>"
    else:
        spy_status_msg = "<b>SPY 200DMA unavailable for this date</b>"

    story.append(Paragraph(spy_status_msg, styles["Small"]))
    story.append(Spacer(1, 0.05 * inch))
    story.append(Paragraph(
        f"SPY Date: {spy_day.date()} | SPY Close: {_fmt(spy_close, 'spy_close')} | SPY 200DMA: "
        f"{'N/A' if spy_ma200 is None else _fmt(spy_ma200, 'spy_ma200')}",
        styles["Small"]
    ))
    story.append(Spacer(1, 0.12 * inch))

    # ======================================================
    # YTD PERFORMANCE (use execution date vs daily equity curve)
    # ======================================================
    stats = ytd_stats_for_date(exec_dt, daily)
    
    # ======================================================
    # YTD SLIPPAGE (net + gross cost + gross improvement), through exec_dt
    # ======================================================
    ystart = max(pd.Timestamp(exec_dt.year, 1, 1), FIRST_EXEC_DATE)
    ytd_tr = trades[(trades["exec_date"] >= ystart) & (trades["exec_date"] <= exec_dt)]

    yb = slippage_breakdown(ytd_tr)
    ytd_slip_net_dollars = yb["net_dollars"]
    ytd_slip_gross_cost_dollars = yb["gross_cost_dollars"]
    ytd_slip_gross_improve_dollars = yb["gross_improve_dollars"]
    ytd_slip_net_bps = yb["net_bps"]

    perf_data = [
        ["Metric", "Strategy", "SPY"],
        ["YTD Return",
         f"{stats['strat_ret']*100:5.2f}%"  if not np.isnan(stats['strat_ret']) else "N/A",
         f"{stats['spy_ret']*100:5.2f}%"    if not np.isnan(stats['spy_ret'])   else "N/A"],
        ["YTD Max Drawdown",
         f"{stats['strat_maxdd']*100:5.2f}%" if not np.isnan(stats['strat_maxdd']) else "N/A",
         f"{stats['spy_maxdd']*100:5.2f}%"   if not np.isnan(stats['spy_maxdd'])   else "N/A"],
        ["YTD Sharpe",
         f"{stats['strat_sharpe']:5.2f}" if not np.isnan(stats['strat_sharpe']) else "N/A",
         f"{stats['spy_sharpe']:5.2f}"   if not np.isnan(stats['spy_sharpe'])   else "N/A"],
        ["YTD Sortino",
         f"{stats['strat_sortino']:5.2f}" if not np.isnan(stats['strat_sortino']) else "N/A",
         f"{stats['spy_sortino']:5.2f}"   if not np.isnan(stats['spy_sortino'])   else "N/A"],
        ["YTD Calmar",
         f"{stats['strat_calmar']:5.2f}" if not np.isnan(stats['strat_calmar']) else "N/A",
         f"{stats['spy_calmar']:5.2f}"   if not np.isnan(stats['spy_calmar'])   else "N/A"],
        ["YTD Slippage (net $)",
         "N/A" if np.isnan(ytd_slip_net_dollars) else f"{ytd_slip_net_dollars:,.2f}",
         "N/A"],
        ["YTD Slippage (gross cost $)",
         "N/A" if np.isnan(ytd_slip_gross_cost_dollars) else f"{ytd_slip_gross_cost_dollars:,.2f}",
         "N/A"],
        ["YTD Slippage (gross improve $)",
         "N/A" if np.isnan(ytd_slip_gross_improve_dollars) else f"{ytd_slip_gross_improve_dollars:,.2f}",
         "N/A"],
        ["YTD Slippage (net bps)",
         "N/A" if np.isnan(ytd_slip_net_bps) else f"{ytd_slip_net_bps:,.1f}",
         "N/A"],
    ]

    t = Table(perf_data, hAlign="LEFT")
    t.setStyle(TableStyle([
        ("BACKGROUND",(0,0),(-1,0),colors.lightgrey),
        ("GRID",(0,0),(-1,-1),0.25,colors.grey),
        ("FONTNAME",(0,0),(-1,-1),"Helvetica"),
        ("FONTSIZE",(0,0),(-1,-1),8),
        ("ALIGN",(1,1),(-1,-1),"RIGHT"),
    ]))
    story.append(t)
    story.append(Spacer(1, 0.15 * inch))

    # ======================================================
    # BUILD RANK MAP (use SIGNAL DAY universe if available)
    # ======================================================
    rank_map = {}
    day_univ_signal = universe_by_date.get(signal_dt, pd.DataFrame()) if signal_dt is not None else pd.DataFrame()

    if not day_univ_signal.empty and "slope_adj" in day_univ_signal.columns:
        rankable = day_univ_signal[day_univ_signal["slope_adj"].notna()].copy()
        if not rankable.empty:
            rankable = rankable.sort_values("slope_adj", ascending=False)
            cutoff = rankable["slope_adj"].quantile(TOP_PERCENTILE)
            top_slice = rankable[rankable["slope_adj"] >= cutoff].copy()
            if not top_slice.empty:
                top_slice = top_slice.sort_values("slope_adj", ascending=False)
                top_slice["slope_rank_within_top"] = np.arange(1, len(top_slice) + 1)
                rank_map = dict(zip(top_slice["ticker"], top_slice["slope_rank_within_top"]))

    # ======================================================
    # ENRICH DAY TRADES WITH SIGNAL-DAY UNIVERSE FIELDS (optional)
    # ======================================================
    if not day_trades.empty and not day_univ_signal.empty:
        # merge on ticker only (dates differ: signal_date vs exec_date)
        enrich_cols = [c for c in day_univ_signal.columns if c not in ("date",)]
        day_trades = day_trades.merge(
            day_univ_signal[["ticker"] + [c for c in enrich_cols if c != "ticker"]],
            on="ticker",
            how="left",
            suffixes=("", "_u")
        )

    # If file doesn't already carry rank, map it
    if not day_trades.empty and "slope_rank_within_top" in day_trades.columns:
        # keep existing if present; fill missing from rank_map
        day_trades["slope_rank_within_top"] = day_trades["slope_rank_within_top"].fillna(
            day_trades["ticker"].map(rank_map)
        )
    elif not day_trades.empty:
        day_trades["slope_rank_within_top"] = day_trades["ticker"].map(rank_map)

    buys  = day_trades[day_trades["type"].str.upper() == "BUY"]  if (not day_trades.empty and "type" in day_trades) else pd.DataFrame()
    sells = day_trades[day_trades["type"].str.upper() == "SELL"] if (not day_trades.empty and "type" in day_trades) else pd.DataFrame()
    
    # ======================================================
    # WEEKLY SLIPPAGE (net + gross cost + gross improvement)
    # ======================================================
    wb = slippage_breakdown(day_trades)
    weekly_slip_net_dollars = wb["net_dollars"]
    weekly_slip_gross_cost_dollars = wb["gross_cost_dollars"]
    weekly_slip_gross_improve_dollars = wb["gross_improve_dollars"]
    weekly_slip_net_bps = wb["net_bps"]

    # ======================================================
    # AUDITED CASH (from this EXECUTION DAY trades)
    # ======================================================
    # ======================================================
    # AUDITED CASH (from this EXECUTION DAY trades)
    # ======================================================
    if not day_trades.empty:
        cash_before = float(day_trades["cash_before"].iloc[0])
        cash_after  = float(day_trades["cash_after"].iloc[-1])

        # On the first execution day, enforce initial capital as "cash_before"
        if exec_dt == FIRST_EXEC_DATE:
            cash_before = INITIAL_CAPITAL

        cash_delta = cash_after - cash_before
    else:
        # No trades executed this day; use equity_df cash if available
        if exec_dt in daily.index and "cash" in daily.columns:
            cash_before = cash_after = float(daily.loc[exec_dt, "cash"])
            cash_delta = 0.0
        else:
            # Fallback: use previous week's cash or look in equity curve
            equity_row = equity_df[equity_df["date"] == exec_dt]
            if not equity_row.empty and "cash" in equity_row.columns:
                cash_before = cash_after = float(equity_row["cash"].iloc[0])
                cash_delta = 0.0
            else:
                cash_before = cash_after = cash_delta = np.nan

    # ======================================================
    # WEEKLY SUMMARY (execution-day end-of-day values)
    # ======================================================
    week_port_value = float(daily.loc[exec_dt, "portfolio_value"]) if exec_dt in daily.index else np.nan

    # securities AFTER trades from reconstructed holdings
    week_port = weekly_portfolios.get(exec_dt, pd.DataFrame())
    sec_after = float(week_port["market_value"].sum()) if (not week_port.empty and "market_value" in week_port.columns) else np.nan

    cash_status = "OK"
    if not np.isnan(cash_after):
        if cash_after < 0:
            cash_status = "NEGATIVE (AUDIT)"
        elif cash_after < LOW_CASH_FLOOR:
            cash_status = f"LOW CASH ( < {LOW_CASH_FLOOR:,.0f} )"

    if (prev_week_port_value is not None and
        not np.isnan(prev_week_port_value) and
        not np.isnan(week_port_value) and
        prev_week_port_value != 0):
        weekly_ret    = week_port_value / prev_week_port_value - 1.0
        weekly_change = week_port_value - prev_week_port_value
    else:
        weekly_ret = weekly_change = np.nan

    week_buys_value  = float(buys["trade_value"].sum())  if ("trade_value" in buys.columns and not buys.empty) else 0.0
    week_sells_value = float(sells["trade_value"].sum()) if ("trade_value" in sells.columns and not sells.empty) else 0.0

    summary_data = [
        ["Item",                          "Amount"],
        ["--- AUDITED CASH (EXECUTION DAY) ---", ""],
        ["Cash BEFORE first trade",       "N/A" if np.isnan(cash_before) else f"{cash_before:,.2f}"],
        ["Cash AFTER last trade",         "N/A" if np.isnan(cash_after)  else f"{cash_after:,.2f}"],
        ["Cash Δ (After - Before)",       "N/A" if np.isnan(cash_delta)  else f"{cash_delta:,.2f}"],

        ["", ""],
        ["--- PORTFOLIO (AFTER TRADES, END OF DAY) ---", ""],
        ["Total Portfolio Value",         "N/A" if np.isnan(week_port_value) else f"{week_port_value:,.2f}"],
        ["Total Securities Value",        "N/A" if np.isnan(sec_after)       else f"{sec_after:,.2f}"],
        ["Total Cash Value (Audited)",    "N/A" if np.isnan(cash_after)      else f"{cash_after:,.2f}"],
        ["Cash Status",                   cash_status],

        ["", ""],
        ["Weekly Portfolio Return",       "N/A" if np.isnan(weekly_ret)     else f"{weekly_ret*100:,.2f}%"],
        ["Weekly P&L (Δ value)",          "N/A" if np.isnan(weekly_change)  else f"{weekly_change:,.2f}"],
        
        ["Weekly Slippage (net $)",            "N/A" if np.isnan(weekly_slip_net_dollars) else f"{weekly_slip_net_dollars:,.2f}"],
        ["Weekly Slippage (gross cost $)",     "N/A" if np.isnan(weekly_slip_gross_cost_dollars) else f"{weekly_slip_gross_cost_dollars:,.2f}"],
        ["Weekly Slippage (gross improve $)",  "N/A" if np.isnan(weekly_slip_gross_improve_dollars) else f"{weekly_slip_gross_improve_dollars:,.2f}"],
        ["Weekly Slippage (net bps)",          "N/A" if np.isnan(weekly_slip_net_bps) else f"{weekly_slip_net_bps:,.1f}"],

        ["Total Dollar Buys",             f"{week_buys_value:,.2f}"],
        ["Total Dollar Sells",            f"{week_sells_value:,.2f}"],
        ["Net Buy/Sell Flow",             f"{(week_buys_value - week_sells_value):,.2f}"],
    ]

    summary_tbl = Table(summary_data, hAlign="LEFT")
    summary_tbl.setStyle(TableStyle([
        ("BACKGROUND",(0,0),(-1,0),colors.lightgrey),
        ("GRID",(0,0),(-1,-1),0.25,colors.grey),
        ("FONTNAME",(0,0),(-1,-1),"Helvetica"),
        ("FONTSIZE",(0,0),(-1,-1),8),
        ("ALIGN",(1,1),(-1,-1),"RIGHT"),
    ]))
    story.append(summary_tbl)
    story.append(Spacer(1, 0.15 * inch))

    prev_week_port_value = week_port_value

    # ======================================================
    # NEW: ALL RANKED STOCKS TABLE (BEFORE FILTERS)
    # ======================================================
    make_ranked_stocks_table(day_rankings)

    # ======================================================
    # TRADES TABLES (NOW INCLUDE signal/execution prices)
    # ======================================================
    make_trade_table(sells, "Sells (shows signal_close_adj + exec_open_adj)")
    make_trade_table(buys,  "Buys (shows signal_close_adj + exec_open_adj)")

    # ======================================================
    # PORTFOLIO TABLE (post-trade holdings valued at EXEC CLOSE)
    # ======================================================
    story.append(Paragraph("Current Portfolio (post-trade, valued at execution-day close)", styles["Small"]))

    port = weekly_portfolios.get(exec_dt, pd.DataFrame()).copy()
    if port.empty:
        story.append(Paragraph("No open positions.", styles["Tiny"]))
        story.append(PageBreak())
        continue

    # Add slope ranks using SIGNAL rank_map (more meaningful for that week's selection)
    if rank_map:
        port["slope_rank_within_top"] = port["ticker"].map(rank_map)

    port_cols = [
        "ticker","shares","close_adj","market_value","weight",
        "slope_rank_within_top","slope_adj","above_ma100",
        "no_big_jump_90","ma100"
    ]
    port_cols = [c for c in port_cols if c in port.columns]

    if "slope_rank_within_top" in port.columns:
        port = port.sort_values("slope_rank_within_top", na_position="last")

    data = [port_cols]
    for _, row in port[port_cols].iterrows():
        row_out = []
        for c in port_cols:
            v = row[c]
            if c == "weight" and not pd.isna(v):
                row_out.append(f"{float(v)*100:0.2f}%")
            else:
                row_out.append(_fmt(v, c))
        data.append(row_out)

    tbl = Table(data, hAlign="LEFT", repeatRows=1)
    tbl.setStyle(TableStyle([
        ("BACKGROUND",(0,0),(-1,0),colors.lightgrey),
        ("GRID",(0,0),(-1,-1),0.25,colors.grey),
        ("FONTNAME",(0,0),(-1,-1),"Helvetica"),
        ("FONTSIZE",(0,0),(-1,-1),6.5),
        ("ALIGN",(1,1),(-1,-1),"RIGHT"),
    ]))

    story.append(tbl)
    story.append(PageBreak())


# ============================================================
# BUILD & SAVE PDF
# ============================================================

doc.build(story)

print("=== COMPLETE ===")
print(f"Weekly trading log PDF saved → {pdf_path}")

Loading input data...
Loading rankings data...
Loaded 78 ranking records
Trades:       9
Equity curve: 11
Universe:     3,591,967
SPY rows:     7,042


  daily["spy_ret"]   = daily["spy_close"].pct_change().fillna(0)


=== COMPLETE ===
Weekly trading log PDF saved → ./15a-match_weekly_trading_logs\weekly_trading_logs_20260105-115659.pdf
