In [3]:
#!/usr/bin/env python3

import os
import time
import math
import logging
from dataclasses import dataclass, field
from typing import List, Optional, Dict, Tuple
import io

import numpy as np
import pandas as pd

try:
    import yfinance as yf
except Exception:
    raise SystemExit("Please: pip install yfinance pandas numpy")

import requests  # for NSE index CSVs

# =========================
# LOGGING
# =========================
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)s | %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)
log = logging.getLogger("india_sector_scanner_ta")

# Columns we store as percentages (numeric*100)
PCT_COLS = [
    "ret_1w", "ret_1m", "ret_3m", "ret_6m", "ret_1y",
    "ret_ytd", "cagr", "vol_ann", "rs_vs_bench"
]

# Common headers for NSE archive CSV
NSE_HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/120.0.0.0 Safari/537.36"
    ),
    "Accept": "text/csv,application/json,application/xhtml+xml,text/html;q=0.9,*/*;q=0.8",
}

# =========================
# CONFIG
# =========================
@dataclass
class Config:
    start_date: str = "2012-01-01"
    end_date: Optional[str] = None

    symbols: List[str] = field(default_factory=lambda: [
        # NSE sector indices (Yahoo caret tickers)
        "^CNXAUTO",      # Nifty Auto
        "^NSEBANK",      # Nifty Bank
        "^CNXIT",        # Nifty IT
        "^CNXMEDIA",     # Nifty Media
        "^CNXMETAL",     # Nifty Metal
        "^CNXPHARMA",    # Nifty Pharma
        "^CNXFMCG",      # Nifty FMCG
        "^CNXREALTY",    # Nifty Realty
        "^CNXENERGY",    # Nifty Energy
        "^CNXINFRA",     # Nifty Infrastructure
        "^CNXPSUBANK",   # Nifty PSU Bank

        # Additional NSE index symbols exposed with .NS on Yahoo
        "NIFTY_PVT_BANK.NS",       # Nifty Private Bank
        "NIFTY_FIN_SERVICE.NS",    # Nifty Financial Services
        "NIFTYFINSRV25_50.NS",     # Nifty Financial Services 25/50
        "NIFTY_HEALTHCARE.NS",     # Nifty Healthcare
        "NIFTY_OIL_AND_GAS.NS",    # Nifty Oil & Gas
        "NIFTY_CONSR_DURBL.NS",    # Nifty Consumer Durables
    ])

    symbols_path: Optional[str] = None
    benchmark: str = "^NSEI"

    lookbacks_days: Optional[Dict[str, int]] = None
    composite_weights: Optional[Dict[str, float]] = None

    risk_free_rate_annual: float = 0.06
    trading_days_per_year: int = 252

    out_root: str = "outputs/india_sector_rotation"
    top_n: int = 5
    save_latest_prices: bool = True

    max_retries: int = 3
    retry_sleep_sec: float = 2.5
    threads: bool = True

    tz_display: str = "Asia/Kolkata"

    # TA Params
    sma_fast: int = 50
    sma_slow: int = 200
    ema_fast: int = 12
    ema_slow: int = 26
    bb_len: int = 20
    bb_k: float = 2.0
    adx_len: int = 14
    rrg_window: int = 63      # days for RS momentum slope
    breakout_short: int = 20
    breakout_long: int = 55
    dist_52w_len: int = 252

    def __post_init__(self):
        if self.lookbacks_days is None:
            self.lookbacks_days = {
                "ret_1w": 5,
                "ret_1m": 21,
                "ret_3m": 63,
                "ret_6m": 126,
                "ret_1y": 252,
            }
        if self.composite_weights is None:
            self.composite_weights = {
                "ret_1m": 0.25,
                "ret_3m": 0.35,
                "ret_6m": 0.25,
                "ret_1y": 0.15,
            }

CFG = Config()

# =========================
# INDEX NAME MAP (Yahoo -> Pretty name)
# =========================
INDEX_NAME_MAP: Dict[str, str] = {
    "^CNXAUTO": "NIFTY AUTO",
    "^NSEBANK": "NIFTY BANK",
    "^CNXIT": "NIFTY IT",
    "^CNXMEDIA": "NIFTY MEDIA",
    "^CNXMETAL": "NIFTY METAL",
    "^CNXPHARMA": "NIFTY PHARMA",
    "^CNXFMCG": "NIFTY FMCG",
    "^CNXREALTY": "NIFTY REALTY",
    "^CNXENERGY": "NIFTY ENERGY",
    "^CNXINFRA": "NIFTY INFRASTRUCTURE",
    "^CNXPSUBANK": "NIFTY PSU BANK",

    "NIFTY_PVT_BANK.NS": "NIFTY PRIVATE BANK",
    "NIFTY_FIN_SERVICE.NS": "NIFTY FINANCIAL SERVICES",
    "NIFTYFINSRV25_50.NS": "NIFTY FINANCIAL SERVICES 25/50",
    "NIFTY_HEALTHCARE.NS": "NIFTY HEALTHCARE INDEX",
    "NIFTY_OIL_AND_GAS.NS": "NIFTY OIL & GAS",
    "NIFTY_CONSR_DURBL.NS": "NIFTY CONSUMER DURABLES",

    "^NSEI": "NIFTY 50",
}

# =========================
# INDEX SLUG MAP (Yahoo -> archives slug for CSV)
# =========================
INDEX_SLUG_MAP: Dict[str, str] = {
    "^NSEI": "nifty50",
    "^NSEBANK": "niftybank",
    "^CNXAUTO": "niftyauto",
    "^CNXFMCG": "niftyfmcg",
    "^CNXIT": "niftyit",
    "^CNXMEDIA": "niftymedia",
    "^CNXMETAL": "niftymetal",
    "^CNXPHARMA": "niftypharma",
    "^CNXREALTY": "niftyrealty",
    "^CNXENERGY": "niftyenergy",
    "^CNXINFRA": "niftyinfra",
    "^CNXPSUBANK": "niftypsubank",
    "NIFTY_PVT_BANK.NS": "niftypvtbank",
    "NIFTY_FIN_SERVICE.NS": "niftyfinservice",
    "NIFTY_HEALTHCARE.NS": "niftyhealthcare",
    "NIFTY_OIL_AND_GAS.NS": "niftyoilgas",
    "NIFTY_CONSR_DURBL.NS": "niftyconsrdurbl",
    # NIFTYFINSRV25_50 is tricky; may not have CSV
}

# =========================
# UTILITIES
# =========================
def ensure_dir(path: str) -> None:
    os.makedirs(path, exist_ok=True)

def today_folder(root: str, tz: str) -> str:
    now = pd.Timestamp.now(tz)
    d = os.path.join(root, now.strftime("%Y-%m-%d"))
    ensure_dir(d)
    return d

def read_symbols_from_file(path: str) -> List[str]:
    with open(path, "r", encoding="utf-8") as f:
        return [ln.strip() for ln in f if ln.strip()]

def yahoo_download(tickers: List[str], start: str, end: Optional[str],
                   max_retries: int, sleep_s: float, threads: bool) -> pd.DataFrame:
    last_err = None
    for attempt in range(1, max_retries + 1):
        try:
            return yf.download(
                tickers=tickers,
                start=start if start else None,
                end=end,
                auto_adjust=False,
                actions=False,
                group_by="ticker",
                threads=threads,
                progress=False,
                multi_level_index=False,
            )
        except Exception as e:
            last_err = e
            log.warning(f"Download attempt {attempt}/{max_retries} failed: {e}")
            time.sleep(sleep_s)
    raise RuntimeError(f"Yahoo download failed after {max_retries} retries: {last_err}")

def extract_wide(download_df: pd.DataFrame, tickers: List[str]) -> Dict[str, pd.DataFrame]:
    """Return dict with wide DataFrames: Adj Close, Close, High, Low (Date x Ticker)."""
    out = {}
    if isinstance(download_df.columns, pd.MultiIndex):
        for field in ["Adj Close", "Close", "High", "Low"]:
            frames = []
            for t in tickers:
                if (t, field) in download_df.columns:
                    frames.append(download_df[(t, field)].rename(t))
            out[field] = pd.concat(frames, axis=1).sort_index() if frames else pd.DataFrame()
    else:
        # Single ticker fallback
        for field in ["Adj Close", "Close", "High", "Low"]:
            if field in download_df.columns and len(tickers) == 1:
                out[field] = download_df[field].to_frame(tickers[0])
            else:
                out[field] = pd.DataFrame()
    # Clean & ffill
    for k in out:
        out[k] = out[k].dropna(how="all", axis=1).ffill()
    return out

def ytd_anchor_idx(prices: pd.Series) -> Optional[pd.Timestamp]:
    if prices.empty:
        return None
    idx = prices.index
    this_year = idx[idx.year == idx[-1].year]
    if len(this_year) == 0:
        return None
    first_this_year = this_year[0]
    pos = idx.get_loc(first_this_year)
    return idx[pos - 1] if pos > 0 else None

# =========================
# TOP CONSTITUENT (approx) VIA MARKET CAP
# =========================
def fetch_top_constituent_by_mcap(index_yahoo: str, timeout: float = 10.0) -> Optional[Tuple[str, float]]:
    """
    Approximate biggest-weight stock for a given index:
      1) Download index constituents CSV from NSE archives
      2) For each 'Symbol', fetch market cap from Yahoo (SYMBOL.NS)
      3) Compute weights from market cap and return (symbol, weight%)

    Returns None if anything fails or no caps found.
    """
    slug = INDEX_SLUG_MAP.get(index_yahoo)
    if not slug:
        log.debug(f"No slug mapping for index {index_yahoo}, skipping.")
        return None

    url = f"https://nsearchives.nseindia.com/content/indices/ind_{slug}list.csv"

    try:
        resp = requests.get(url, headers=NSE_HEADERS, timeout=timeout)
        resp.raise_for_status()
        csv_text = resp.text
        df = pd.read_csv(io.StringIO(csv_text))
    except Exception as e:
        log.warning(f"Failed to fetch constituents CSV for {index_yahoo} ({slug}): {e}")
        return None

    if "Symbol" not in df.columns:
        log.warning(f"Constituents CSV for {index_yahoo} has no 'Symbol' column; columns={list(df.columns)}")
        return None

    symbols = (
        df["Symbol"]
        .dropna()
        .astype(str)
        .str.strip()
        .tolist()
    )
    if not symbols:
        return None

    caps: Dict[str, float] = {}
    for sym in symbols:
        tkr = yf.Ticker(sym + ".NS")
        mc = None
        try:
            fast_info = getattr(tkr, "fast_info", None)
            if fast_info is not None:
                mc = fast_info.get("market_cap") or fast_info.get("marketCap")
        except Exception:
            mc = None

        if mc is None:
            try:
                info = tkr.info
                mc = info.get("marketCap")
            except Exception:
                mc = None

        if mc is not None and mc > 0:
            caps[sym] = float(mc)

    if not caps:
        log.warning(f"No market caps obtained for index {index_yahoo} ({slug})")
        return None

    total_cap = sum(caps.values())
    top_sym, top_cap = max(caps.items(), key=lambda kv: kv[1])
    weight_pct = (top_cap / total_cap) * 100.0 if total_cap > 0 else float("nan")
    return top_sym, weight_pct

# =========================
# CORE METRICS
# =========================
def compute_returns_table(adj: pd.DataFrame, benchmark: str,
                          lookbacks: Dict[str, int], rfr: float, tdpy: int) -> pd.DataFrame:
    if adj.empty:
        return pd.DataFrame()

    adj = adj.sort_index().dropna(how="all")
    latest_prices = adj.iloc[-1]

    lr = np.log(adj / adj.shift(1))
    vol_ann = lr.std(skipna=True) * math.sqrt(tdpy)

    def col_cagr(col: pd.Series) -> float:
        col = col.dropna()
        if len(col) < 2:
            return np.nan
        start_val, end_val = col.iloc[0], col.iloc[-1]
        years = (col.index[-1] - col.index[0]).days / 365.25
        return (end_val / start_val) ** (1 / years) - 1 if years > 0 else np.nan

    cagr = adj.apply(col_cagr, axis=0)

    daily_excess = lr.sub(rfr / tdpy, axis=0)
    sharpe = (daily_excess.mean(skipna=True) / daily_excess.std(skipna=True)) * math.sqrt(tdpy)

    rets = {name: adj.iloc[-1] / adj.shift(n).iloc[-1] - 1 for name, n in lookbacks.items()}

    ytd_vals: Dict[str, float] = {}
    for t in adj.columns:
        anchor = ytd_anchor_idx(adj[t])
        ytd_vals[t] = adj[t].iloc[-1] / adj[t].loc[anchor] - 1 if anchor is not None else np.nan
    ret_ytd = pd.Series(ytd_vals, name="ret_ytd")

    # RS vs benchmark (3M/6M blend)
    if benchmark in adj.columns:
        try:
            b3 = adj[benchmark].iloc[-1] / adj[benchmark].shift(63).iloc[-1] - 1
            b6 = adj[benchmark].iloc[-1] / adj[benchmark].shift(126).iloc[-1] - 1
            base = 0.5 * b3 + 0.5 * b6
        except Exception:
            base = np.nan
    else:
        base = np.nan

    rs_vals: Dict[str, float] = {}
    for t in adj.columns:
        try:
            r3 = adj[t].iloc[-1] / adj[t].shift(63).iloc[-1] - 1
            r6 = adj[t].iloc[-1] / adj[t].shift(126).iloc[-1] - 1
            rs_vals[t] = 0.5 * r3 + 0.5 * r6 - base
        except Exception:
            rs_vals[t] = np.nan
    rs_vs_bench = pd.Series(rs_vals, name="rs_vs_bench")

    out = pd.DataFrame(index=adj.columns)
    out["price_latest"] = latest_prices
    for k, v in rets.items():
        out[k] = v
    out["ret_ytd"] = ret_ytd
    out["vol_ann"] = vol_ann
    out["cagr"] = cagr
    out["sharpe"] = sharpe
    out["rs_vs_bench"] = rs_vs_bench
    return out

def rank_and_score(df: pd.DataFrame, weights: Dict[str, float]) -> pd.DataFrame:
    df = df.copy()
    cols = [c for c in weights.keys() if c in df.columns]
    if not cols:
        raise ValueError("No ranking columns found; check composite_weights vs computed columns.")
    for c in cols:
        df[f"rank_{c}"] = df[c].rank(ascending=False, method="min")
    sw = sum(weights[c] for c in cols)
    norm_w = {c: weights[c] / sw for c in cols}
    df["score_composite"] = sum(norm_w[c] * df[f"rank_{c}"] for c in cols)
    df["score_percentile"] = (df["score_composite"].rank(ascending=True) / len(df)) * 100.0
    return df.sort_values(["score_composite", "sharpe"], ascending=[True, False])

def to_percent_inplace(df: pd.DataFrame, cols: List[str]) -> None:
    for c in cols:
        if c in df.columns:
            df[c] = df[c] * 100.0

# =========================
# TA HELPERS
# =========================
def ema(series: pd.Series, span: int) -> pd.Series:
    return series.ewm(span=span, adjust=False, min_periods=span).mean()

def sma(series: pd.Series, length: int) -> pd.Series:
    return series.rolling(length, min_periods=length).mean()

def bb_width(close: pd.Series, length: int, k: float) -> pd.Series:
    ma = sma(close, length)
    sd = close.rolling(length, min_periods=length).std()
    upper = ma + k * sd
    lower = ma - k * sd
    return (upper - lower) / ma

def pct_rank(series: pd.Series, window: int) -> pd.Series:
    """Percentile rank of last value within a rolling window (0-1)."""
    def pr(x):
        if len(x) < 2:
            return np.nan
        return (x.rank(pct=True).iloc[-1])
    return series.rolling(window).apply(pr, raw=False)

def linear_slope(y: pd.Series) -> float:
    """Slope via linear regression over index 0..n-1 (nan-safe)."""
    y = y.dropna()
    n = len(y)
    if n < 3:
        return np.nan
    x = np.arange(n, dtype=float)
    num = (x - x.mean()) @ (y.values - y.mean())
    den = ((x - x.mean()) ** 2).sum()
    return float(num / den) if den != 0 else np.nan

def true_range(h: pd.Series, l: pd.Series, c: pd.Series) -> pd.Series:
    prev_c = c.shift(1)
    return pd.concat([(h - l), (h - prev_c).abs(), (l - prev_c).abs()], axis=1).max(axis=1)

def adx_series(h: pd.Series, l: pd.Series, c: pd.Series, n: int = 14) -> Tuple[pd.Series, pd.Series, pd.Series]:
    """Classic Welles Wilder ADX with DI+, DI- (nan-safe)."""
    up = h.diff()
    dn = -l.diff()
    plus_dm = np.where((up > dn) & (up > 0), up, 0.0)
    minus_dm = np.where((dn > up) & (dn > 0), dn, 0.0)

    tr = true_range(h, l, c)
    atr = tr.rolling(n, min_periods=n).mean()

    plus_di = (pd.Series(plus_dm, index=h.index).rolling(n, min_periods=n).mean() / atr) * 100.0
    minus_di = (pd.Series(minus_dm, index=h.index).rolling(n, min_periods=n).mean() / atr) * 100.0

    dx = ((plus_di - minus_di).abs() / (plus_di + minus_di)) * 100.0
    adx = dx.rolling(n, min_periods=n).mean()
    return adx, plus_di, minus_di

# =========================
# TA PANEL
# =========================
def compute_ta_panel(close: pd.DataFrame, high: pd.DataFrame, low: pd.DataFrame,
                     bench_close: pd.Series, cfg: Config) -> pd.DataFrame:
    """
    Returns a per-symbol table of TA metrics at the last date.
    """
    # Align all
    idx = close.index
    bench_close = bench_close.reindex(idx).ffill()

    rows = []
    for t in close.columns:
        cl = close[t].dropna()
        if len(cl) < max(cfg.sma_slow, cfg.dist_52w_len) + 5:
            continue

        h = high[t].reindex(cl.index).ffill()
        l = low[t].reindex(cl.index).ffill()

        # MAs & regime
        sma_f = sma(cl, cfg.sma_fast)
        sma_s = sma(cl, cfg.sma_slow)
        regime_bull = int((cl.iloc[-1] > sma_f.iloc[-1] > sma_s.iloc[-1]) if not np.isnan(sma_f.iloc[-1]) and not np.isnan(sma_s.iloc[-1]) else 0)
        above_50 = int(cl.iloc[-1] > sma_f.iloc[-1]) if not np.isnan(sma_f.iloc[-1]) else 0
        above_200 = int(cl.iloc[-1] > sma_s.iloc[-1]) if not np.isnan(sma_s.iloc[-1]) else 0

        # PPO & slope
        ema_f = ema(cl, cfg.ema_fast)
        ema_s = ema(cl, cfg.ema_slow)
        ppo = ((ema_f - ema_s) / ema_s).iloc[-1] if not np.isnan(ema_s.iloc[-1]) and ema_s.iloc[-1] != 0 else np.nan
        ppo_slope = linear_slope(((ema_f - ema_s) / ema_s).dropna().tail(cfg.rrg_window))

        # RS vs benchmark + RS momentum (slope)
        rs_series = (cl / bench_close.reindex(cl.index)).dropna()
        rs_level = (rs_series.iloc[-1] / rs_series.shift(63).iloc[-1] - 1) if len(rs_series) > 63 else np.nan
        rs_momentum = linear_slope(rs_series.tail(cfg.rrg_window))  # slope of RS line

        # Breakouts
        hh20 = cl.rolling(cfg.breakout_short, min_periods=cfg.breakout_short).max()
        hh55 = cl.rolling(cfg.breakout_long,  min_periods=cfg.breakout_long).max()
        brk20 = int(cl.iloc[-1] > hh20.iloc[-1]) if not np.isnan(hh20.iloc[-1]) else 0
        brk55 = int(cl.iloc[-1] > hh55.iloc[-1]) if not np.isnan(hh55.iloc[-1]) else 0

        # Distance to 52W high
        hh52 = cl.rolling(cfg.dist_52w_len, min_periods=cfg.dist_52w_len).max()
        dist_52w = (cl.iloc[-1] / hh52.iloc[-1] - 1) if not np.isnan(hh52.iloc[-1]) and hh52.iloc[-1] != 0 else np.nan

        # BB width (contraction percentile over ~6 months)
        bbw = bb_width(cl, cfg.bb_len, cfg.bb_k)
        bbw_pct = pct_rank(bbw, 126).iloc[-1]  # 0..1; low => contraction

        # ADX & ADX rising
        adx, di_p, di_m = adx_series(h, l, cl, n=cfg.adx_len)
        adx_val = adx.iloc[-1] if len(adx.dropna()) else np.nan
        adx_rising = int(adx.diff().iloc[-1] > 0) if not np.isnan(adx_val) else 0
        di_plus_gt = int(di_p.iloc[-1] > di_m.iloc[-1]) if di_p.notna().iloc[-1] and di_m.notna().iloc[-1] else 0

        # % bars above 50DMA in last month (stability)
        last21 = cl.tail(21)
        sma_f_last = sma_f.reindex(last21.index)
        pct_above50_last21 = float((last21 > sma_f_last).mean()) if sma_f_last.notna().all() else np.nan

        rows.append({
            "symbol": t,
            "ppo": ppo,
            "ppo_slope": ppo_slope,
            "rs_level_3m": rs_level,       # ~3M RS change
            "rs_momentum": rs_momentum,    # slope of RS line
            "regime_bull": regime_bull,
            "above_50": above_50,
            "above_200": above_200,
            "breakout_20": brk20,
            "breakout_55": brk55,
            "dist_52w": dist_52w,          # <= 0 close to high; negative means below high
            "bbw_pct": bbw_pct,            # 0..1; lower => tighter
            "adx": adx_val,
            "adx_rising": adx_rising,
            "di_plus_gt": di_plus_gt,
            "pct_above50_last21": pct_above50_last21,
        })

    ta = pd.DataFrame(rows).set_index("symbol").sort_index()
    return ta

# =========================
# SIGNALS / SCORING
# =========================
def rank_series(s: pd.Series, ascending: bool) -> pd.Series:
    return s.rank(ascending=ascending, method="min")

def build_signals(ta: pd.DataFrame, table: pd.DataFrame, benchmark: str) -> pd.DataFrame:
    """
    Build 'leaders' and 'next-up' composite scores.

    Uses momentum rank (score_composite) so Leaders / Next-Up
    are consistent with the main momentum table.
    """
    # Join TA with RS/Sharpe and momentum score
    df = ta.join(table[["rs_vs_bench", "sharpe", "score_composite"]], how="left")

    # Guard: ensure all used columns exist
    for col in [
        "ppo", "ppo_slope", "rs_level_3m", "rs_momentum",
        "adx", "rs_vs_bench", "dist_52w", "bbw_pct", "score_composite",
        "adx_rising", "di_plus_gt", "regime_bull", "breakout_20", "breakout_55",
    ]:
        if col not in df:
            df[col] = np.nan

    # ---------- Common ranks ----------
    # Momentum rank (1 = best momentum)
    mom_rank = rank_series(df["score_composite"], ascending=True)

    # RS & ADX ranks (higher better => lower rank)
    rs_rank   = rank_series(df["rs_vs_bench"], ascending=False)
    adx_rank  = rank_series(df["adx"],         ascending=False)

    # Distance to 52W high: closer to high better
    dist_rank = rank_series(
        -df["dist_52w"].clip(lower=-20.0, upper=0.0),  # dist_52w is already in %
        ascending=False,
    )

    # RS momentum & PPO slope for Next-Up
    rs_mom_rank     = rank_series(df["rs_momentum"], ascending=False)
    ppo_slope_rank  = rank_series(df["ppo_slope"],   ascending=False)

    # Volatility contraction: lower bbw_pct better
    squeeze_rank = rank_series(-df["bbw_pct"], ascending=False)

    # ---------- Leaders: momentum-heavy, TA-filtered ----------
    leaders_score = (
        mom_rank * 0.50 +      # main driver: same momentum rank as TOP table
        rs_rank  * 0.20 +      # strong RS vs NIFTY
        adx_rank * 0.20 +      # strong trend
        dist_rank * 0.10       # closer to 52W high
    )

    # Small bonus: bullish regime & breakouts reduce score (better)
    leaders_score = leaders_score - (
        df["regime_bull"].fillna(0) * 0.30 +
        df["breakout_20"].fillna(0) * 0.20 +
        df["breakout_55"].fillna(0) * 0.20
    )

    # ---------- Next-Up: improvement + some momentum ----------
    nextup_score = (
        mom_rank       * 0.30 +   # still respect overall momentum
        rs_mom_rank    * 0.30 +   # RS improving
        ppo_slope_rank * 0.20 +   # PPO (MACD) slope improving
        squeeze_rank   * 0.20     # volatility contraction (squeeze)
    )

    # Bonus: ADX rising, DI+ > DI-, near breakouts
    nextup_score = nextup_score - (
        df["adx_rising"].fillna(0) * 0.30 +
        df["di_plus_gt"].fillna(0) * 0.20 +
        df["breakout_20"].fillna(0) * 0.25 +
        df["breakout_55"].fillna(0) * 0.25
    )

    out = df.copy()
    out["leaders_score"] = leaders_score
    out["nextup_score"] = nextup_score

    # Optional: keep ranks for debugging
    out["mom_rank"]       = mom_rank
    out["rs_rank"]        = rs_rank
    out["adx_rank"]       = adx_rank
    out["dist_rank"]      = dist_rank
    out["rs_mom_rank"]    = rs_mom_rank
    out["ppo_slope_rank"] = ppo_slope_rank
    out["squeeze_rank"]   = squeeze_rank

    # Lower score = better â‡’ sort ascending
    out = out.sort_values(["leaders_score", "nextup_score"], ascending=[True, True])
    return out

# =========================
# MAIN
# =========================
def main():
    # ===== Universe =====
    symbols = CFG.symbols
    if CFG.symbols_path:
        symbols = read_symbols_from_file(CFG.symbols_path)
    symbols = list(dict.fromkeys([s.strip().upper() for s in symbols if s.strip()]))
    if CFG.benchmark.upper() not in symbols:
        symbols.append(CFG.benchmark.upper())

    log.info(f"Universe ({len(symbols)}): {', '.join(symbols)} | Benchmark={CFG.benchmark}")

    # ===== Download =====
    dl = yahoo_download(
        tickers=symbols,
        start=CFG.start_date,
        end=CFG.end_date,
        max_retries=CFG.max_retries,
        sleep_s=CFG.retry_sleep_sec,
        threads=CFG.threads,
    )
    wide = extract_wide(dl, tickers=symbols)
    adj, close, high, low = wide["Adj Close"], wide["Close"], wide["High"], wide["Low"]
    if adj.empty or close.empty:
        raise SystemExit("No data downloaded. Check tickers/date range.")

    # Drop short-history columns (>15% NaNs)
    valid_frac = adj.notna().mean()
    keep_cols = valid_frac[valid_frac > 0.85].index.tolist()
    dropped = [c for c in adj.columns if c not in keep_cols]
    if dropped:
        log.warning(f"Dropping short-history/illiquid symbols (NaNs>15%): {', '.join(dropped)}")
    adj = adj[keep_cols]
    close = close[keep_cols]
    high = high[keep_cols]
    low = low[keep_cols]

    # ===== CORE METRICS =====
    table = compute_returns_table(
        adj=adj,
        benchmark=CFG.benchmark.upper(),
        lookbacks=CFG.lookbacks_days,
        rfr=CFG.risk_free_rate_annual,
        tdpy=CFG.trading_days_per_year,
    )
    if table.empty or len(table) < 2:
        raise SystemExit("Insufficient data after filtering; adjust universe or dates.")

    # ===== MOMENTUM RANKING (exclude benchmark) =====
    rank_df = table.copy()
    if CFG.benchmark.upper() in rank_df.index:
        rank_df_no_bm = rank_df.drop(index=[CFG.benchmark.upper()])
    else:
        rank_df_no_bm = rank_df

    ranked = rank_and_score(rank_df_no_bm, CFG.composite_weights)

    # Propagate momentum score back into table so signals can use it
    table["score_composite"] = ranked["score_composite"]

    # ===== TA PANEL =====
    bench_close = close[CFG.benchmark.upper()] if CFG.benchmark.upper() in close.columns else pd.Series(index=close.index, dtype=float)
    ta = compute_ta_panel(
        close.drop(columns=[CFG.benchmark.upper()], errors="ignore"),
        high.drop(columns=[CFG.benchmark.upper()], errors="ignore"),
        low.drop(columns=[CFG.benchmark.upper()], errors="ignore"),
        bench_close,
        CFG,
    )

    # ===== SIGNALS =====
    # IMPORTANT: pass full `table` (has score_composite), not rank_df_no_bm
    signals = build_signals(ta, table, CFG.benchmark.upper())
    leaders = signals.sort_values("leaders_score", ascending=True).head(CFG.top_n)
    nextup  = signals.sort_values("nextup_score",  ascending=True).head(CFG.top_n)

    # ===== Convert to % where applicable =====
    to_percent_inplace(table, PCT_COLS)
    to_percent_inplace(ranked, [c for c in PCT_COLS if c in ranked.columns])
    if "dist_52w" in ta.columns:
        ta["dist_52w"] = ta["dist_52w"] * 100.0

    # ===== Outputs =====
    out_dir = today_folder(CFG.out_root, CFG.tz_display)
    returns_csv = os.path.join(out_dir, "returns_table.csv")
    ranked_csv = os.path.join(out_dir, "sector_scores.csv")
    latest_csv = os.path.join(out_dir, "latest_prices.csv")
    ta_csv = os.path.join(out_dir, "ta_scores.csv")
    sig_csv = os.path.join(out_dir, "signals.csv")

    table.sort_index().to_csv(returns_csv, float_format="%.2f")
    ranked.to_csv(ranked_csv, float_format="%.2f")
    if CFG.save_latest_prices:
        pd.DataFrame({"price_latest": adj.iloc[-1]}).sort_index().to_csv(latest_csv, float_format="%.2f")
    ta.to_csv(ta_csv, float_format="%.6f")
    signals.to_csv(sig_csv, float_format="%.6f")

    log.info(f"Saved: {ranked_csv}")
    log.info(f"Saved: {returns_csv}")
    if CFG.save_latest_prices:
        log.info(f"Saved: {latest_csv}")
    log.info(f"Saved: {ta_csv}")
    log.info(f"Saved: {sig_csv}")

    # ===== Console summary =====
    print("\n================ INDIA: TOP SECTOR OUTPERFORMERS (Momentum) ================")
    disp_cols = ["score_composite", "score_percentile", "ret_1m", "ret_3m", "ret_6m", "ret_1y", "sharpe", "rs_vs_bench"]
    disp = ranked[disp_cols].copy().astype(object)
    for c in ["score_percentile", "ret_1m", "ret_3m", "ret_6m", "ret_1y", "rs_vs_bench"]:
        if c in disp.columns:
            disp[c] = disp[c].map(lambda x: f"{float(x):.2f}%")
    if "sharpe" in disp.columns:
        disp["sharpe"] = disp["sharpe"].map(lambda x: f"{float(x):.2f}")
    if "score_composite" in disp.columns:
        disp["score_composite"] = disp["score_composite"].map(lambda x: f"{float(x):.2f}")
    print(disp.to_string())
    print("==========================================================================\n")

    print("Leaders (TA-weighted strong-now, momentum-aware):")
    ld = leaders[[
        "leaders_score","score_composite","mom_rank",
        "rs_vs_bench","rs_rank","rs_momentum",
        "adx","adx_rank","regime_bull","breakout_20","breakout_55",
        "dist_52w","dist_rank","bbw_pct"
    ]].copy().astype(object)
    for c in ["rs_vs_bench", "dist_52w"]:
        if c in ld.columns:
            ld[c] = ld[c].map(lambda x: f"{float(x):.2f}%" if pd.notna(x) else "nan")
    for c in ["leaders_score","score_composite","mom_rank","rs_rank","adx_rank","dist_rank","bbw_pct","adx","rs_momentum"]:
        if c in ld.columns:
            ld[c] = ld[c].map(lambda x: f"{float(x):.2f}" if pd.notna(x) else "nan")
    print(ld.to_string())
    print()

    print("Next-Up (building strength / likely to boom, momentum-aware):")
    nu = nextup[[
        "nextup_score","score_composite","mom_rank",
        "rs_momentum","rs_mom_rank","ppo_slope","ppo_slope_rank",
        "adx_rising","di_plus_gt","breakout_20","breakout_55",
        "dist_52w","bbw_pct","squeeze_rank","rs_vs_bench"
    ]].copy().astype(object)
    for c in ["dist_52w","rs_vs_bench"]:
        if c in nu.columns:
            nu[c] = nu[c].map(lambda x: f"{float(x):.2f}%" if pd.notna(x) else "nan")
    for c in ["nextup_score","score_composite","mom_rank","rs_mom_rank","ppo_slope_rank","squeeze_rank","bbw_pct","rs_momentum","ppo_slope"]:
        if c in nu.columns:
            nu[c] = nu[c].map(lambda x: f"{float(x):.2f}" if pd.notna(x) else "nan")
    print(nu.to_string())

    # ===== Top-weight stock for each index (approx via market cap) =====
    index_rows = []
    for sym in sorted(adj.columns):
        pretty_name = INDEX_NAME_MAP.get(sym, sym)
        top_const = fetch_top_constituent_by_mcap(sym)
        if not top_const:
            continue
        top_symbol, top_weight = top_const
        index_rows.append(
            {
                "index_yahoo": sym,
                "index_name": pretty_name,
                "top_stock": top_symbol,
                "top_weight_pct_est": top_weight,
            }
        )

    if index_rows:
        idx_df = pd.DataFrame(index_rows)
        top_constituents_csv = os.path.join(out_dir, "index_top_constituents.csv")
        idx_df.to_csv(top_constituents_csv, index=False, float_format="%.2f")
        log.info(f"Saved: {top_constituents_csv}")

        print("\nTop-weight stock by index (approx weight, from market cap):")
        disp_idx = idx_df.copy()
        disp_idx["top_weight_pct_est"] = disp_idx["top_weight_pct_est"].map(
            lambda x: f"{float(x):.2f}%" if pd.notna(x) else "nan"
        )
        print(disp_idx.to_string(index=False))

    # Benchmark snapshot
    if CFG.benchmark.upper() in table.index:
        bm = table.loc[CFG.benchmark.upper(), ["ret_1m","ret_3m","ret_6m","ret_1y","ret_ytd","sharpe"]].copy()
        bm_disp = bm.astype(object)
        for c in ["ret_1m","ret_3m","ret_6m","ret_1y","ret_ytd"]:
            if c in bm_disp.index:
                bm_disp[c] = f"{float(bm[c]):.2f}%"
        if "sharpe" in bm_disp.index:
            bm_disp["sharpe"] = f"{float(bm['sharpe']):.2f}"
        print("\nBenchmark snapshot (NIFTY 50):")
        print(pd.DataFrame(bm_disp).T.to_string(index=False))

if __name__ == "__main__":
    main()


2025-11-23 08:00:11 | INFO | Universe (18): ^CNXAUTO, ^NSEBANK, ^CNXIT, ^CNXMEDIA, ^CNXMETAL, ^CNXPHARMA, ^CNXFMCG, ^CNXREALTY, ^CNXENERGY, ^CNXINFRA, ^CNXPSUBANK, NIFTY_PVT_BANK.NS, NIFTY_FIN_SERVICE.NS, NIFTYFINSRV25_50.NS, NIFTY_HEALTHCARE.NS, NIFTY_OIL_AND_GAS.NS, NIFTY_CONSR_DURBL.NS, ^NSEI | Benchmark=^NSEI
2025-11-23 08:00:13 | INFO | Saved: outputs/india_sector_rotation/2025-11-23/sector_scores.csv
2025-11-23 08:00:13 | INFO | Saved: outputs/india_sector_rotation/2025-11-23/returns_table.csv
2025-11-23 08:00:13 | INFO | Saved: outputs/india_sector_rotation/2025-11-23/latest_prices.csv
2025-11-23 08:00:13 | INFO | Saved: outputs/india_sector_rotation/2025-11-23/ta_scores.csv
2025-11-23 08:00:13 | INFO | Saved: outputs/india_sector_rotation/2025-11-23/signals.csv



                     score_composite score_percentile  ret_1m   ret_3m   ret_6m   ret_1y sharpe rs_vs_bench
^CNXPSUBANK                     1.00            8.33%   6.69%   17.95%   25.66%   29.05%   0.08      16.81%
^CNXAUTO                        2.75           16.67%   1.13%    8.12%   17.38%   20.18%   0.45       7.76%
^NSEBANK                        4.25           25.00%   1.48%    5.69%    7.15%   17.31%   0.36       1.42%
^CNXINFRA                       4.40           33.33%   2.07%    5.48%    8.17%   14.57%   0.26       1.83%
^CNXMETAL                       4.80           41.67%  -0.88%    6.79%   10.49%   14.88%   0.16       3.64%
NIFTY_FIN_SERVICE.NS            6.05           50.00%   0.11%    4.07%    5.11%   18.82%   0.44      -0.40%
^CNXIT                          7.10           58.33%   4.49%    3.35%   -0.44%  -12.99%   0.07      -3.54%
^CNXENERGY                      7.65           66.67%   0.58%    2.88%    1.40%   -3.23%   0.29      -2.85%
^CNXPHARMA                 

2025-11-23 08:01:48 | INFO | Saved: outputs/india_sector_rotation/2025-11-23/index_top_constituents.csv



Top-weight stock by index (approx weight, from market cap):
index_yahoo           index_name  top_stock top_weight_pct_est
   ^CNXAUTO           NIFTY AUTO     MARUTI             19.97%
 ^CNXENERGY         NIFTY ENERGY   RELIANCE             34.70%
   ^CNXFMCG           NIFTY FMCG HINDUNILVR             24.03%
  ^CNXINFRA NIFTY INFRASTRUCTURE   RELIANCE             25.17%
     ^CNXIT             NIFTY IT        TCS             37.27%
  ^CNXMEDIA          NIFTY MEDIA      SUNTV             23.13%
  ^CNXMETAL          NIFTY METAL   ADANIENT             15.69%
 ^CNXPHARMA         NIFTY PHARMA  SUNPHARMA             24.33%
^CNXPSUBANK       NIFTY PSU BANK       SBIN             49.11%
 ^CNXREALTY         NIFTY REALTY        DLF             28.52%
   ^NSEBANK           NIFTY BANK   HDFCBANK             31.30%
      ^NSEI             NIFTY 50   RELIANCE             10.00%

Benchmark snapshot (NIFTY 50):
ret_1m ret_3m ret_6m ret_1y ret_ytd sharpe
 0.77%  4.06%  5.93% 10.77%  10.25%   0.40
