In [4]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
SMC Swing Backtester (Long-only) — Weekly Bias + Daily Execution
-----------------------------------------------------------------

Replaces prior EMA/RSI/ADX logic with Smart Money Concepts:

DETECTION (on each timeframe):
  • Swing highs/lows via left/right windows
  • Market Structure Events: BOS_up / BOS_down / CHoCH_up / CHoCH_down
  • Liquidity Sweeps: price takes prior swing low/high and closes back inside
  • Fair Value Gaps (FVG): simple 3-candle imbalance
  • Order Blocks (OB): last opposing candle body before BOS

STRATEGY (long-only):
  • Bias (Weekly): trend == 'up' (from structure) OR recent BOS_up/CHoCH_up on WEEKLY
  • Trigger (Daily): (recent sweep_low in last N bars) OR (recent CHoCH_up/BOS_up on DAILY)
  • Optional momentum confirm: Daily close > prior Daily high
  • Entry: at next day's open by default (configurable)
  • Stop: OB-low if present else ATR(14)*k below entry (intrabar checks)
  • Targets: TP1=1R (move SL→BE), TP2=2R; Structure flip (CHoCH_down) or Time stop exits

PORTFOLIO:
  • Your fee model preserved
  • Optional 52-week proximity filter before allocating
  • VOLAR ranking + long-only Markowitz sizing
  • Max concurrent positions + capped deployable cash per day

NOTES:
  • No EMA/RSI/ADX anywhere; ATR is used *only* for risk.
  • Uses yfinance EOD data for both Weekly (via resample) and Daily.

"""

import os, json, math, warnings, logging
from dataclasses import dataclass
from typing import Dict, List, Tuple, Optional

import numpy as np
import pandas as pd

try:
    import yfinance as yf
    import matplotlib.pyplot as plt
except Exception:
    pass

warnings.filterwarnings("ignore", category=FutureWarning)

# =========================
# LOGGING
# =========================
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)s | %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)
log = logging.getLogger("smc_swing_long_only_WD")

# =========================
# CONFIG
# =========================
@dataclass
class Config:
    # Data
    start_date: str = "2015-01-01"
    end_date: str   = "2025-01-01"
    static_symbols: Optional[List[str]] = None       # e.g., ["RELIANCE.NS", ...]
    static_symbols_path: Optional[str] = None        # or path to file with one symbol per line
    cache_dir: str = "cache"
    out_dir: str   = "outputs"
    plot: bool     = True

    # --- SMC params (Daily) ---
    swing_left_D: int = 2
    swing_right_D: int = 2
    min_fvg_gap_pct_D: float = 0.02   # 0.02% gap
    lookback_D: int = 5000

    # --- SMC params (Weekly bias) ---
    swing_left_W: int = 2
    swing_right_W: int = 2
    min_fvg_gap_pct_W: float = 0.02
    lookback_W: int = 2000

    # Entry recipe (Daily execution)
    recent_sweep_lookback: int = 10
    require_fvg_or_ob: bool = False   # loosened: sweep OR structure is enough; FVG/OB optional
    momentum_confirm: bool = False    # loosened: allow entries without D close > prior high

    # Exits
    atr_len: int = 14
    atr_mult_sl: float = 1.5
    rr_tp1: float = 1.0
    rr_tp2: float = 2.0
    time_stop_bars: int = 15
    exit_on_next_open: bool = True
    entry_on_next_open: bool = True

    # Portfolio / universe
    apply_fees: bool    = True
    initial_capital: float = 200_000.0
    max_concurrent_positions: int = 6
    deploy_cash_frac: float = 0.35    # cap daily deployable cash

    # Ranking / filters
    benchmark_try: Tuple[str,...] = ("^CNX500","^CRSLDX","^NSE500","^NIFTY500","^BSE500","^NSEI")
    volar_lookback: int = 252
    filter_52w_window: int = 252
    within_pct_of_52w_high: float = 0.60
    enable_52w_filter: bool = True
    top_k_daily: int = 4

CFG = Config()

# =========================
# FEES (unchanged)
# =========================
APPLY_FEES = True
def calc_fees(turnover_buy: float, turnover_sell: float) -> float:
    if not APPLY_FEES:
        return 0.0
    BROKER_PCT = 0.001
    BROKER_MIN = 5.0
    BROKER_CAP = 20.0
    STT_PCT = 0.001
    STAMP_BUY_PCT = 0.00015
    EXCH_PCT = 0.0000297
    SEBI_PCT = 0.000001
    IPFT_PCT = 0.000001
    GST_PCT = 0.18
    DP_SELL = 20.0 if turnover_sell >= 100 else 0.0

    def _broker(turnover):
        if turnover <= 0:
            return 0.0
        fee = turnover * BROKER_PCT
        fee = max(BROKER_MIN, min(fee, BROKER_CAP))
        return fee

    br_buy  = _broker(turnover_buy)
    br_sell = _broker(turnover_sell)
    stt   = STT_PCT * (turnover_buy + turnover_sell)
    stamp = STAMP_BUY_PCT * turnover_buy
    exch  = EXCH_PCT * (turnover_buy + turnover_sell)
    sebi  = SEBI_PCT * (turnover_buy + turnover_sell)
    ipft  = IPFT_PCT * (turnover_buy + turnover_sell)
    dp    = DP_SELL
    gst_base = br_buy + br_sell + dp + exch + sebi + ipft
    gst   = GST_PCT * gst_base
    return float((br_buy + br_sell) + stt + stamp + exch + sebi + ipft + dp + gst)

# =========================
# Helpers
# =========================
def ensure_dirs(*paths):
    for p in paths:
        os.makedirs(p, exist_ok=True)

def today_str():
    return pd.Timestamp.today(tz="Asia/Kolkata").strftime("%Y-%m-%d")

def load_static_symbols(static_symbols: Optional[List[str]], static_symbols_path: Optional[str]) -> List[str]:
    syms: List[str] = []
    if static_symbols and len(static_symbols) > 0:
        syms = list(static_symbols)
    elif static_symbols_path and os.path.exists(static_symbols_path):
        with open(static_symbols_path, "r") as f:
            syms = [line.strip() for line in f if line.strip()]
    else:
        raise ValueError(
            "Provide CFG.static_symbols=[...] ('.NS' suffixes) or set CFG.static_symbols_path "
            "to a file containing one symbol per line."
        )
    out = []
    for s in syms:
        s = s.strip().upper()
        if not s.endswith(".NS"):
            s = f"{s}.NS"
        out.append(s)
    seen = set(); uniq = []
    for s in out:
        if s not in seen:
            uniq.append(s); seen.add(s)
    return uniq

def fetch_prices(tickers: List[str], start: str, end: Optional[str], cache_dir: str) -> Dict[str, pd.DataFrame]:
    ensure_dirs(cache_dir)
    data = {}
    end = end or today_str()
    for ticker in tickers:
        cache_path = os.path.join(cache_dir, f"{ticker.replace('^', '_')}.parquet")
        if os.path.exists(cache_path):
            try:
                df = pd.read_parquet(cache_path)
                if len(df) and pd.to_datetime(df.index[-1]).strftime("%Y-%m-%d") >= end:
                    data[ticker] = df
                    continue
            except Exception:
                pass
        try:
            df = yf.download(ticker, start=start, end=end, auto_adjust=True, progress=False, multi_level_index=False)
            if df is None or df.empty:
                continue
            df = df.rename(columns=str.title)  # Open, High, Low, Close, Volume
            df = df[['Open', 'High', 'Low', 'Close', 'Volume']].dropna()
            df.index.name = "date"
            data[ticker] = df
            df.to_parquet(cache_path)
        except Exception:
            continue
    return data

# =========================
# Risk (ATR)
# =========================
def ATR(df: pd.DataFrame, n=14) -> pd.Series:
    h, l, c = df['High'], df['Low'], df['Close']
    tr = pd.concat([
        (h - l),
        (h - c.shift()).abs(),
        (l - c.shift()).abs()
    ], axis=1).max(axis=1)
    return tr.rolling(n, min_periods=n).mean()

# =========================
# -------- SMC CORE -------
# =========================
@dataclass
class SMCParams:
    swing_left: int = 2
    swing_right: int = 2
    min_gap_pct: float = 0.02  # e.g., 0.02% -> 0.0002
    lookback: int = 5000

def _swing_points(df: pd.DataFrame, L: int, R: int) -> Tuple[pd.Series, pd.Series]:
    h, l = df['High'].values, df['Low'].values
    n = len(df)
    swing_high = np.zeros(n, dtype=bool)
    swing_low  = np.zeros(n, dtype=bool)
    for i in range(L, n - R):
        if h[i] == max(h[i-L:i+R+1]): swing_high[i] = True
        if l[i] == min(l[i-L:i+R+1]): swing_low[i]  = True
    return pd.Series(swing_high, index=df.index), pd.Series(swing_low, index=df.index)

def detect_market_structure(df: pd.DataFrame, prm: SMCParams) -> pd.DataFrame:
    out = df.copy()
    sh, sl = _swing_points(df, prm.swing_left, prm.swing_right)
    out['swing_high'] = sh
    out['swing_low']  = sl
    trend = []; event = []
    last_trend = None
    last_sh = np.nan; last_sl = np.nan
    for _, row in out.iterrows():
        if row['swing_high']: last_sh = row['High']
        if row['swing_low']:  last_sl = row['Low']
        e = None
        if np.isfinite(last_sh) and row['Close'] > last_sh:
            e = "BOS_up" if last_trend == 'up' else "CHoCH_up"; last_trend = 'up'
        elif np.isfinite(last_sl) and row['Close'] < last_sl:
            e = "BOS_down" if last_trend == 'down' else "CHoCH_down"; last_trend = 'down'
        trend.append(last_trend); event.append(e)
    out['trend'] = trend
    out['ms_event'] = event
    return out

def detect_fvg(df: pd.DataFrame, prm: SMCParams) -> pd.DataFrame:
    out = df.copy()
    up_fvg = np.zeros(len(df), dtype=bool)
    dn_fvg = np.zeros(len(df), dtype=bool)
    pct = prm.min_gap_pct / 100.0
    for i in range(2, len(df)):
        if df['Low'].iat[i] > df['High'].iat[i-2]:
            gap = (df['Low'].iat[i] - df['High'].iat[i-2]) / df['High'].iat[i-2]
            if gap >= pct: up_fvg[i] = True
        if df['High'].iat[i] < df['Low'].iat[i-2]:
            gap = (df['Low'].iat[i-2] - df['High'].iat[i]) / df['Low'].iat[i-2]
            if gap >= pct: dn_fvg[i] = True
    out['fvg_up'] = up_fvg; out['fvg_down'] = dn_fvg
    return out

def detect_liquidity_sweeps(df: pd.DataFrame, prm: SMCParams) -> pd.DataFrame:
    out = df.copy()
    sh, sl = _swing_points(df, prm.swing_left, prm.swing_right)
    sweep_hi = np.zeros(len(df), dtype=bool)
    sweep_lo = np.zeros(len(df), dtype=bool)
    swing_high_levels: List[float] = []; swing_low_levels: List[float] = []
    for i in range(len(df)):
        if sh.iat[i]: swing_high_levels.append(df['High'].iat[i])
        if sl.iat[i]: swing_low_levels.append(df['Low'].iat[i])
        if swing_high_levels:
            lvl = max(swing_high_levels)
            if df['High'].iat[i] > lvl and df['Close'].iat[i] < lvl: sweep_hi[i] = True
        if swing_low_levels:
            lvl = min(swing_low_levels)
            if df['Low'].iat[i] < lvl and df['Close'].iat[i] > lvl: sweep_lo[i] = True
    out['sweep_high'] = sweep_hi; out['sweep_low'] = sweep_lo
    return out

def detect_order_blocks(df: pd.DataFrame, ms: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()
    out['bull_ob_low'] = np.nan; out['bull_ob_high'] = np.nan
    out['bear_ob_low'] = np.nan; out['bear_ob_high'] = np.nan
    last_down = None; last_up = None
    for i in range(len(df)):
        o, c = df['Open'].iat[i], df['Close'].iat[i]
        if c < o: last_down = (min(o,c), max(o,c))
        elif c > o: last_up = (min(o,c), max(o,c))
        ev = ms['ms_event'].iat[i] if 'ms_event' in ms.columns else None
        if ev == 'BOS_up' and last_down:
            lo, hi = last_down
            out.loc[df.index[i], ['bull_ob_low','bull_ob_high']] = (lo, hi)
        if ev == 'BOS_down' and last_up:
            lo, hi = last_up
            out.loc[df.index[i], ['bear_ob_low','bear_ob_high']] = (lo, hi)
    return out

def run_smc_pipeline(df: pd.DataFrame, prm: SMCParams) -> pd.DataFrame:
    d = df.copy().iloc[-prm.lookback:].dropna()
    ms  = detect_market_structure(d, prm)
    fvg = detect_fvg(d, prm)
    lq  = detect_liquidity_sweeps(d, prm)
    ob  = detect_order_blocks(d, ms)
    out = d.join(ms[['swing_high','swing_low','trend','ms_event']], how='left')
    out = out.join(fvg[['fvg_up','fvg_down']], how='left')
    out = out.join(lq[['sweep_high','sweep_low']], how='left')
    out = out.join(ob[['bull_ob_low','bull_ob_high','bear_ob_low','bear_ob_high']], how='left')
    return out

# =========================
# VOLAR + MVO (unchanged)
# =========================
def pick_benchmark(benchmarks: Tuple[str,...], start: str, end: Optional[str], cache_dir: str) -> Tuple[str, pd.DataFrame]:
    for t in benchmarks:
        data = fetch_prices([t], start, end, cache_dir)
        df = data.get(t)
        if df is not None and not df.empty:
            log.info("Using benchmark: %s", t)
            return t, df
    idx = pd.date_range(start=start, end=end or today_str(), freq="B")
    df = pd.DataFrame({"Close": np.ones(len(idx))}, index=idx)
    log.warning("No benchmark found; using synthetic flat series.")
    return "SYNTH_BENCH", df

def compute_volar_scores(end_dt: pd.Timestamp, tickers: List[str], data_map: Dict[str,pd.DataFrame], bench_df: pd.DataFrame, lookback: int) -> Dict[str, float]:
    scores = {}
    bser = bench_df["Close"].loc[:end_dt].pct_change().dropna().iloc[-lookback:]
    for t in tickers:
        df = data_map.get(t)
        if df is None or df.empty:
            scores[t] = 0.0; continue
        if end_dt not in df.index:
            df = df[df.index <= end_dt]
            if df.empty:
                scores[t] = 0.0; continue
        r = df["Close"].loc[:end_dt].pct_change().dropna().iloc[-lookback:]
        common = pd.concat([r, bser], axis=1, keys=["s","b"]).dropna()
        if common.shape[0] < max(20, int(0.4*lookback)):
            scores[t] = 0.0; continue
        excess = common["s"] - common["b"]
        vol = common["s"].std(ddof=0)
        scores[t] = 0.0 if vol <= 1e-8 else float((excess.mean() / vol) * math.sqrt(252.0))
    return scores

def markowitz_long_only(mu: np.ndarray, Sigma: np.ndarray) -> np.ndarray:
    n = len(mu); eps = 1e-6
    Sigma = Sigma + eps*np.eye(n)
    best_w = np.full(n, 1.0/n); best_sr = -1e9
    lambdas = np.logspace(-3, 3, 31)

    def solve_masked(lmbd, mask):
        idx = np.where(mask)[0]
        S = Sigma[np.ix_(idx, idx)]
        o = np.ones(len(idx))
        m = mu[idx]
        A = np.block([[2*lmbd*S, o[:,None]],[o[None,:], np.zeros((1,1))]])
        b = np.concatenate([m, np.array([1.0])])
        try:
            sol = np.linalg.solve(A, b)
            w_sub = sol[:len(idx)]
        except np.linalg.LinAlgError:
            w_sub = np.full(len(idx), 1.0/len(idx))
        w = np.zeros(n); w[idx] = w_sub
        return w

    for lmbd in lambdas:
        active = np.ones(n, dtype=bool)
        for _ in range(n):
            w = solve_masked(lmbd, active)
            if (w >= 0).all(): break
            active[np.argmin(w)] = False
        w = np.clip(w, 0, None)
        if w.sum() <= 0: continue
        w = w / w.sum()
        mu_p = float(mu @ w); vol_p = float(np.sqrt(w @ Sigma @ w))
        if vol_p <= 1e-8: continue
        sr = mu_p / vol_p
        if sr > best_sr:
            best_sr = sr; best_w = w.copy()
    return best_w

# =========================
# Utilities for signals
# =========================
def _recent_true(series_bool: pd.Series, window: int) -> pd.Series:
    return series_bool.astype(int).rolling(window, min_periods=1).max().gt(0)

def _weekly_from_daily(df: pd.DataFrame) -> pd.DataFrame:
    """OHLCV weekly (Fri close) using resample('W-FRI')."""
    W = pd.DataFrame(index=df.index)
    out = pd.DataFrame({
        "Open": df["Open"].resample("W-FRI").first(),
        "High": df["High"].resample("W-FRI").max(),
        "Low":  df["Low"].resample("W-FRI").min(),
        "Close":df["Close"].resample("W-FRI").last(),
        "Volume": df["Volume"].resample("W-FRI").sum()
    }).dropna()
    out.index.name = "date"
    return out

# =========================
# SMC LONG-ONLY (Weekly bias + Daily exec)
# =========================
def simulate_ticker_smc_long_WD(ticker: str, dfD: pd.DataFrame, cfg: Config):
    """
    Weekly SMC for bias; Daily SMC for entries/exits.
    """
    # --- WEEKLY (bias) ---
    dfW = _weekly_from_daily(dfD)
    smcW = run_smc_pipeline(
        dfW,
        SMCParams(
            swing_left=cfg.swing_left_W,
            swing_right=cfg.swing_right_W,
            min_gap_pct=cfg.min_fvg_gap_pct_W,
            lookback=cfg.lookback_W
        )
    )

    # Bias: weekly trend up OR recent BOS_up / CHoCH_up (weekly)
    biasW = (
        smcW['trend'].eq('up') |
        _recent_true(smcW['ms_event'].eq('BOS_up'), 5) |
        _recent_true(smcW['ms_event'].eq('CHoCH_up'), 5)
    ).astype(bool)

    # Forward-fill weekly bias onto daily dates
    biasW_D = biasW.reindex(dfD.index, method="ffill").fillna(False)

    # --- DAILY (entries/exits) ---
    smcD = run_smc_pipeline(
        dfD,
        SMCParams(
            swing_left=cfg.swing_left_D,
            swing_right=cfg.swing_right_D,
            min_gap_pct=cfg.min_fvg_gap_pct_D,
            lookback=cfg.lookback_D
        )
    )
    smcD = smcD.copy()
    smcD['atr'] = ATR(smcD, n=cfg.atr_len)

    # Triggers: (recent sweep_low) OR (recent CHoCH_up/BOS_up on Daily)
    trig_sweep = _recent_true(smcD['sweep_low'].fillna(False), cfg.recent_sweep_lookback)
    trig_struct = (
        _recent_true(smcD['ms_event'].eq('CHoCH_up'), 5) |
        _recent_true(smcD['ms_event'].eq('BOS_up'), 5)
    )
    triggerD = (trig_sweep | trig_struct)

    # Optional extra filter: FVG_up or bull_OB present (relaxed by default)
    if cfg.require_fvg_or_ob:
        triggerD = triggerD & (smcD['fvg_up'] | (~smcD['bull_ob_low'].isna()))

    # Momentum confirm (optional): close > prior high
    momentum = (smcD['Close'] > smcD['High'].shift(1)) if cfg.momentum_confirm else pd.Series(True, index=smcD.index)

    entry_bar = (biasW_D & triggerD & momentum).fillna(False)

    # Walk forward and create trades
    trades = []
    in_pos = False
    entry_px = sl = tp1 = tp2 = 0.0
    hit_tp1 = False
    bars_left = 0

    idx = list(smcD.index)
    for i in range(len(idx) - 1):
        dt, nxt = idx[i], idx[i+1]
        row, nxt_row = smcD.loc[dt], smcD.loc[nxt]

        if not in_pos:
            if entry_bar.loc[dt]:
                px = float(nxt_row["Open"] if cfg.entry_on_next_open else row["Close"])
                ob_low = row['bull_ob_low'] if pd.notna(row['bull_ob_low']) else np.nan
                atr_sl = px - cfg.atr_mult_sl * float(row['atr'])
                sl = float(min(ob_low, atr_sl)) if pd.notna(ob_low) else float(atr_sl)
                risk = max(1e-6, px - sl)
                tp1 = float(px + cfg.rr_tp1 * risk)
                tp2 = float(px + cfg.rr_tp2 * risk)
                in_pos = True
                hit_tp1 = False
                bars_left = cfg.time_stop_bars

                sig_reason = (
                    f"BiasW_up={bool(biasW_D.loc[dt])}; "
                    f"Trig=(sweep≤{cfg.recent_sweep_lookback} or recent BOS/CHoCH up D); "
                    f"{'FVG/OB req' if cfg.require_fvg_or_ob else 'No FVG/OB req'}; "
                    f"Momentum={'on' if cfg.momentum_confirm else 'off'}"
                )
                trades.append({
                    "ticker": ticker, "side": "BUY", "date": (nxt if cfg.entry_on_next_open else dt),
                    "price": px, "shares": 0, "reason": "candidate",
                    "signal_reason": sig_reason, "score": np.nan
                })
                continue

        else:
            bars_left -= 1
            high_i = float(nxt_row["High"])
            low_i  = float(nxt_row["Low"])
            exec_date = nxt if cfg.exit_on_next_open else dt

            # Intrabar SL/TP
            if low_i <= sl and high_i >= tp2:
                trades.append({"ticker": ticker, "side": "SELL", "date": exec_date, "price": tp2, "shares": 0, "reason": "TP2"})
                in_pos = False; continue
            if low_i <= sl:
                trades.append({"ticker": ticker, "side": "SELL", "date": exec_date, "price": sl, "shares": 0, "reason": "SL"})
                in_pos = False; continue
            if not hit_tp1 and high_i >= tp1:
                hit_tp1 = True
                sl = float(trades[-1]["price"]) if trades and trades[-1]["side"] == "BUY" else entry_px  # move to BE

            if high_i >= tp2:
                trades.append({"ticker": ticker, "side": "SELL", "date": exec_date, "price": tp2, "shares": 0, "reason": "TP2"})
                in_pos = False; continue

            # Structure invalidate on Daily: CHoCH_down
            if smcD['ms_event'].iat[i] == 'CHoCH_down':
                price_exit = float(nxt_row["Open"] if cfg.exit_on_next_open else row["Close"])
                trades.append({"ticker": ticker, "side": "SELL", "date": exec_date, "price": price_exit, "shares": 0, "reason": "StructureFlip"})
                in_pos = False; continue

            # Time stop
            if bars_left <= 0:
                price_exit = float(nxt_row["Open"] if cfg.exit_on_next_open else row["Close"])
                trades.append({"ticker": ticker, "side": "SELL", "date": exec_date, "price": price_exit, "shares": 0, "reason": "TimeStop"})
                in_pos = False; continue

    # Final close if still open
    if in_pos:
        last_dt = smcD.index[-1]
        trades.append({"ticker": ticker, "side": "SELL", "date": last_dt, "price": float(smcD.loc[last_dt, "Close"]), "shares": 0, "reason": "final_close"})

    cols = ["ticker","side","date","price","shares","reason","signal_reason","score"]
    return pd.DataFrame(trades, columns=cols), pd.Series(dtype=float)

# =========================
# Portfolio engine (same pattern as yours)
# =========================
def aggregate_and_apply(all_trades: pd.DataFrame, data_map: Dict[str, pd.DataFrame], bench_df: pd.DataFrame, cfg: Config):
    if all_trades.empty:
        return all_trades, pd.Series(dtype=float), {}

    side_order = {"BUY": 0, "SELL": 1}
    all_trades = (all_trades
        .assign(_sorder=all_trades["side"].map(side_order))
        .sort_values(by=["date", "_sorder"], kind="stable")
        .drop(columns=["_sorder"])
        .reset_index(drop=True)
    )
    all_trades["date"] = pd.to_datetime(all_trades["date"])

    equity_curve = []
    dates = sorted(all_trades["date"].unique().tolist())
    cash = cfg.initial_capital
    open_positions = {}
    completed_legs = []

    global APPLY_FEES
    APPLY_FEES = cfg.apply_fees

    def _get_close_on(tkr, dt):
        df = data_map.get(tkr)
        if df is None or df.empty:
            return np.nan
        if dt in df.index:
            return float(df.loc[dt, "Close"])
        prev = df[df.index <= dt]
        if prev.empty:
            return np.nan
        return float(prev["Close"].iloc[-1])

    if dates:
        seed_date = pd.to_datetime(dates[0]) - pd.Timedelta(days=1)
        equity_curve.append((seed_date, float(cash)))

    for dt in dates:
        day_trades = all_trades[all_trades["date"] == dt].copy()

        # ---- SELL first ----
        for _, tr in day_trades[day_trades["side"] == "SELL"].iterrows():
            tkr = tr["ticker"]; price = float(tr["price"])
            pos = open_positions.get(tkr)
            if pos is None: continue
            shares = int(pos["shares"])
            turnover_sell = shares * price
            fee = calc_fees(0.0, turnover_sell)
            pnl = (price - pos["entry_px"]) * shares
            cash += (turnover_sell - fee)
            realized = pnl - fee - pos.get("buy_fee", 0.0)
            completed_legs.append({
                "ticker": tkr, "side": "SELL", "date": dt, "price": price,
                "shares": shares, "reason": tr.get("reason",""),
                "turnover": turnover_sell, "fees_inr": fee, "pnl_inr": realized
            })
            log.info("Exit %-12s px=%8.2f shares=%6d reason=%s net=%.2f cash=%.2f",
                     tkr, price, shares, tr.get("reason",""), realized, cash)
            del open_positions[tkr]

        # ---- BUY candidates today ----
        buys_today = day_trades[day_trades["side"] == "BUY"].copy()

        # 52w filter
        if cfg.enable_52w_filter and not buys_today.empty:
            keep = []
            for _, rr in buys_today.iterrows():
                df = data_map.get(rr["ticker"])
                if df is None or df.empty or dt not in df.index:
                    continue
                close = float(df.loc[dt, "Close"])
                hist = df["Close"].loc[:dt]
                window = hist.iloc[-cfg.filter_52w_window:] if len(hist)>=cfg.filter_52w_window else hist
                high_52w = float(window.max())
                if high_52w > 0 and close >= cfg.within_pct_of_52w_high * high_52w:
                    keep.append(rr)
            buys_today = pd.DataFrame(keep) if keep else pd.DataFrame(columns=buys_today.columns)

        # Exclude already-held tickers
        if not buys_today.empty:
            buys_today = buys_today[~buys_today["ticker"].isin(open_positions.keys())]

        # VOLAR rank
        if not buys_today.empty:
            tickers = buys_today["ticker"].tolist()
            volar_scores = compute_volar_scores(dt, tickers, data_map, bench_df, cfg.volar_lookback)
            buys_today["volar"] = buys_today["ticker"].map(volar_scores)
            buys_today = buys_today.sort_values("volar", ascending=False).reset_index(drop=True)

        # Pick top-K respecting max concurrent
        slots = cfg.max_concurrent_positions - len(open_positions)
        selected = pd.DataFrame(columns=buys_today.columns)
        if slots > 0 and not buys_today.empty:
            selected = buys_today.head(min(cfg.top_k_daily, slots)).copy()

        # Allocate
        if not selected.empty:
            log.info("Selected %d BUY candidates on %s:", selected.shape[0], dt.date())
            for i, rr in selected.reset_index(drop=True).iterrows():
                log.info("  %-12s volar=%6.2f rank=%d px=%8.2f", rr["ticker"], rr.get("volar",0.0), i+1, rr["price"])

            names = selected["ticker"].tolist()
            rets = []
            for t in names:
                df = data_map.get(t)
                ser = df["Close"].loc[:dt].pct_change().dropna().iloc[-cfg.volar_lookback:]
                rets.append(ser)
            R = pd.concat(rets, axis=1)
            R.columns = names
            R = R.dropna()
            if R.empty or R.shape[0] < max(20, int(0.4*cfg.volar_lookback)) or R.shape[1] == 0:
                weights = np.full(len(names), 1.0/len(names))
            else:
                mu = R.mean().values
                Sigma = R.cov().values
                weights = markowitz_long_only(mu, Sigma)

            deploy_cash = max(0.0, float(cash)) * float(cfg.deploy_cash_frac)
            if deploy_cash <= 0:
                log.info("No deployable cash (cap=%.0f%%) on %s", 100*cfg.deploy_cash_frac, dt.date())
            else:
                alloc = (weights / weights.sum()) * deploy_cash if weights.sum()>0 else np.full(len(names), deploy_cash/len(names))
                rank_map = {row["ticker"]: (idx+1) for idx, (_, row) in enumerate(selected.iterrows())}
                for w_amt, t in zip(alloc, names):
                    df_t = data_map[t]
                    price = float(df_t.loc[dt, "Close"] if dt in df_t.index else df_t["Close"].loc[:dt].iloc[-1])
                    shares = int(math.floor(w_amt / price))
                    if shares <= 0:
                        log.info("Skip BUY %-12s (alloc %.2f too small)", t, w_amt); continue
                    turn = shares * price
                    fee = calc_fees(turn, 0.0)
                    total_cost = turn + fee
                    if total_cost > cash:
                        shares = int(math.floor((cash - fee) / price))
                        if shares <= 0:
                            log.info("Skip BUY %-12s due to cash/fees", t); continue
                        turn = shares * price; total_cost = turn + fee
                    cash -= total_cost
                    open_positions[t] = {"entry_date": dt, "entry_px": price, "shares": shares, "buy_fee": fee, "entry_reason": "entry"}

                    row_sel = selected[selected["ticker"]==t].iloc[0]
                    volar_val = float(row_sel.get("volar", np.nan))
                    rank_pos = rank_map.get(t, np.nan)
                    reason_text = (
                        f"SMC W→D long; VOLAR rank {int(rank_pos)}/{len(names)} (VOLAR={volar_val:.2f}); "
                        f"MVO weight={w_amt/deploy_cash:.1%} of capped cash ({100*cfg.deploy_cash_frac:.0f}% of available)"
                    )
                    completed_legs.append({
                        "ticker": t, "side": "BUY", "date": dt, "price": price,
                        "shares": shares, "reason": reason_text,
                        "turnover": turn, "fees_inr": fee, "pnl_inr": 0.0
                    })
                    log.info("BUY %-12s px=%8.2f sh=%6d fee=%.2f cash=%.2f :: %s",
                             t, price, shares, fee, cash, reason_text)

        # MTM valuation
        mtm = 0.0
        for _tkr, pos in open_positions.items():
            px = _get_close_on(_tkr, dt)
            if not np.isnan(px):
                mtm += pos["shares"] * px
        total_equity = cash + mtm
        equity_curve.append((dt, float(total_equity)))

    eq_ser = pd.Series([e for _, e in equity_curve], index=[d for d, _ in equity_curve])

    # Build roundtrips
    legs_df = pd.DataFrame(completed_legs).sort_values(["date","ticker","side"]).reset_index(drop=True)
    roundtrips = []
    by_tkr_open = {}
    for _, leg in legs_df.iterrows():
        tkr = leg["ticker"]
        if leg["side"] == "BUY":
            by_tkr_open[tkr] = leg
        else:
            buy = by_tkr_open.pop(tkr, None)
            if buy is None: continue
            fees_total = float(buy.get("fees_inr", 0.0) + leg.get("fees_inr", 0.0))
            gross_pnl = (leg["price"] - buy["price"]) * buy["shares"]
            net_pnl   = gross_pnl - fees_total
            ret_pct   = (leg["price"] / buy["price"] - 1.0) * 100.0
            days_held = (pd.to_datetime(leg["date"]) - pd.to_datetime(buy["date"])).days
            roundtrips.append({
                "ticker": tkr,
                "entry_date": pd.to_datetime(buy["date"]),
                "entry_price": float(buy["price"]),
                "exit_date": pd.to_datetime(leg["date"]),
                "exit_price": float(leg["price"]),
                "days_held": int(days_held),
                "shares": int(buy["shares"]),
                "entry_reason": buy.get("reason",""),
                "exit_reason": leg.get("reason",""),
                "gross_pnl_inr": float(gross_pnl),
                "fees_total_inr": float(fees_total),
                "net_pnl_inr": float(net_pnl),
                "return_pct": float(ret_pct),
            })
    trips_df = pd.DataFrame(roundtrips).sort_values(["entry_date","ticker"]).reset_index(drop=True)

    metrics = compute_metrics(eq_ser, legs_df)
    return legs_df, trips_df, eq_ser, metrics

def compute_metrics(equity: pd.Series, legs_df: pd.DataFrame):
    out = {}
    if equity is None or equity.empty:
        return out
    eq = equity.dropna()
    daily_ret = eq.pct_change().fillna(0.0)
    days = (eq.index[-1] - eq.index[0]).days or 1
    years = days / 365.25
    cagr = (eq.iloc[-1] / eq.iloc[0]) ** (1/years) - 1 if years > 0 else 0.0
    sharpe = (daily_ret.mean() / (daily_ret.std(ddof=0) or 1e-9)) * np.sqrt(252)
    cummax = eq.cummax()
    dd = (eq - cummax) / cummax
    max_dd = dd.min()
    n_sells = legs_df[legs_df["side"] == "SELL"].shape[0] if (legs_df is not None and not legs_df.empty) else 0
    wins = int((legs_df[legs_df["side"] == "SELL"]["pnl_inr"] > 0).sum()) if n_sells else 0
    win_rate = (wins / n_sells) * 100.0 if n_sells > 0 else 0.0
    out.update({
        "start_equity_inr": float(eq.iloc[0]),
        "final_equity_inr": float(eq.iloc[-1]),
        "cagr_pct": float(cagr * 100),
        "sharpe": float(sharpe),
        "max_drawdown_pct": float(max_dd * 100),
        "win_rate_pct": float(win_rate),
        "n_trades": int(n_sells),
    })
    return out

def plot_equity(equity: pd.Series, out_path: str):
    if equity is None or equity.empty:
        return
    try:
        import matplotlib.pyplot as plt
        plt.figure(figsize=(10,5))
        plt.plot(equity.index, equity.values)
        plt.title("Equity Curve")
        plt.xlabel("Date")
        plt.ylabel("Equity (INR)")
        plt.tight_layout()
        plt.savefig(out_path)
        plt.close()
    except Exception:
        pass

# =========================
# Backtest driver
# =========================
def backtest(cfg: Config):
    ensure_dirs(cfg.cache_dir, cfg.out_dir)
    log.info("Universe: loading static symbols...")
    symbols = load_static_symbols(cfg.static_symbols, cfg.static_symbols_path)
    log.info("Loaded %d symbols.", len(symbols))

    log.info("Data: fetching OHLCV from yfinance (adjusted)...")
    data_map = fetch_prices(symbols, cfg.start_date, cfg.end_date, cfg.cache_dir)
    log.info("Downloaded %d symbols with data.", len(data_map))

    bench_tkr, bench_df = pick_benchmark(cfg.benchmark_try, cfg.start_date, cfg.end_date, cfg.cache_dir)
    log.info("Benchmark selected: %s", bench_tkr)

    log.info("Signals: generating SMC W→D long-only entries...")
    all_trades = []
    for i, tkr in enumerate(symbols, 1):
        dfD = data_map.get(tkr)
        if dfD is None or dfD.empty:
            continue
        tr, _ = simulate_ticker_smc_long_WD(tkr, dfD, cfg)
        if not tr.empty:
            all_trades.append(tr)
        if i % 50 == 0:
            log.info("  processed %d/%d tickers...", i, len(symbols))

    if not all_trades:
        log.warning("No SMC signals generated; consider loosening triggers/lookbacks.")
        return None, None, None, {}
    all_trades = pd.concat(all_trades, ignore_index=True)

    log.info("Portfolio: cap daily deploy to %.0f%% of cash; 52w %s %.0f%% high; top-%d by VOLAR; MVO; max %d positions.",
             cfg.deploy_cash_frac*100, ">=" if cfg.enable_52w_filter else "(OFF)", cfg.within_pct_of_52w_high*100,
             cfg.top_k_daily, cfg.max_concurrent_positions)
    legs_df, trips_df, equity, metrics = aggregate_and_apply(all_trades, data_map, bench_df, cfg)

    stamp = pd.Timestamp.today(tz="Asia/Kolkata").strftime("%Y%m%d_%H%M%S")
    legs_path = os.path.join(cfg.out_dir, f"trades_legs_{stamp}.csv")
    trips_path = os.path.join(cfg.out_dir, f"trades_roundtrips_{stamp}.csv")
    equity_path = os.path.join(cfg.out_dir, f"equity_{stamp}.csv")
    metrics_path = os.path.join(cfg.out_dir, f"metrics_{stamp}.json")
    eq_plot_path = os.path.join(cfg.out_dir, f"equity_{stamp}.png")

    if legs_df is not None:
        legs_df.to_csv(legs_path, index=False)
    if trips_df is not None:
        trips_df.to_csv(trips_path, index=False)
    if equity is not None:
        pd.DataFrame({"date": equity.index, "equity": equity.values}).to_csv(equity_path, index=False)
    with open(metrics_path, "w") as f:
        json.dump(metrics, f, indent=2)

    if cfg.plot and equity is not None:
        plot_equity(equity, eq_plot_path)

    log.info("=== METRICS ===\n%s", json.dumps(metrics, indent=2))
    log.info("Files written:\n  %s\n  %s\n  %s\n  %s", legs_path, trips_path, equity_path, metrics_path)
    if cfg.plot:
        log.info("  %s", eq_plot_path)

def main():
    global APPLY_FEES
    APPLY_FEES = bool(CFG.apply_fees)

    # EITHER set symbols inline:
    # CFG.static_symbols = [
    #     'RELIANCE.NS','HDFCBANK.NS','ICICIBANK.NS','INFY.NS','TCS.NS','SBIN.NS',
    #     'ITC.NS','LT.NS','KOTAKBANK.NS','BHARTIARTL.NS','MARUTI.NS','AXISBANK.NS',
    #     'SUNPHARMA.NS','ONGC.NS','ASIANPAINT.NS','HCLTECH.NS','POWERGRID.NS','NTPC.NS',
    #     'ULTRACEMCO.NS','BAJFINANCE.NS','TATAMOTORS.NS','ADANIPORTS.NS','TATASTEEL.NS',
    #     'M&M.NS','HINDUNILVR.NS','WIPRO.NS','JSWSTEEL.NS','DIVISLAB.NS','NESTLEIND.NS',
    # ]
    # OR read from file (one per line):
    CFG.static_symbols_path = "nifty500.txt"

    backtest(CFG)

if __name__ == "__main__":
    main()


2025-10-17 00:02:25 | INFO | Universe: loading static symbols...
2025-10-17 00:02:25 | INFO | Loaded 500 symbols.
2025-10-17 00:02:25 | INFO | Data: fetching OHLCV from yfinance (adjusted)...
2025-10-17 00:02:29 | ERROR | 
1 Failed download:
2025-10-17 00:02:29 | ERROR | ['ABLBL.NS']: YFPricesMissingError('possibly delisted; no price data found  (1d 2015-01-01 -> 2025-01-01) (Yahoo error = "Data doesn\'t exist for startDate = 1420050600, endDate = 1735669800")')
2025-10-17 00:02:33 | ERROR | 
1 Failed download:
2025-10-17 00:02:33 | ERROR | ['AEGISVOPAK.NS']: YFPricesMissingError('possibly delisted; no price data found  (1d 2015-01-01 -> 2025-01-01) (Yahoo error = "Data doesn\'t exist for startDate = 1420050600, endDate = 1735669800")')
2025-10-17 00:02:34 | ERROR | 
1 Failed download:
2025-10-17 00:02:34 | ERROR | ['AGARWALEYE.NS']: YFPricesMissingError('possibly delisted; no price data found  (1d 2015-01-01 -> 2025-01-01) (Yahoo error = "Data doesn\'t exist for startDate = 1420050600