In [9]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
India Sector Outperformance + TA Strength Scanner (Yahoo Finance)

Adds TA diagnostics to identify:
- Current Leaders (already strong)
- Next-Up (strength building, likely to boom next)

Outputs
-------
outputs/india_sector_rotation/YYYY-MM-DD/sector_scores.csv      # momentum ranks (as before)
outputs/india_sector_rotation/YYYY-MM-DD/returns_table.csv       # returns & stats (as before)
outputs/india_sector_rotation/YYYY-MM-DD/latest_prices.csv       # last prices (as before)
outputs/india_sector_rotation/YYYY-MM-DD/ta_scores.csv           # TA metrics per sector
outputs/india_sector_rotation/YYYY-MM-DD/signals.csv             # Leaders / Next-Up ranks

Deps
----
pip install yfinance pandas numpy
"""

import os
import time
import math
import logging
from dataclasses import dataclass, field
from typing import List, Optional, Dict, Tuple

import numpy as np
import pandas as pd

try:
    import yfinance as yf
except Exception:
    raise SystemExit("Please: pip install yfinance pandas numpy")

# =========================
# LOGGING
# =========================
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)s | %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)
log = logging.getLogger("india_sector_scanner_ta")

# Columns we store as percentages (numeric*100)
PCT_COLS = [
    "ret_1w", "ret_1m", "ret_3m", "ret_6m", "ret_1y",
    "ret_ytd", "cagr", "vol_ann", "rs_vs_bench"
]

# =========================
# CONFIG
# =========================
@dataclass
class Config:
    start_date: str = "2012-01-01"
    end_date: Optional[str] = None

    symbols: List[str] = field(default_factory=lambda: [
        "^NSEBANK", "^CNXIT", "^CNXAUTO", "^CNXPHARMA", "^CNXFMCG",
        "^CNXMETAL", "^CNXREALTY", "^CNXINFRA", "^CNXENERGY",
        "^CNXMEDIA"
    ])
    symbols_path: Optional[str] = None
    benchmark: str = "^NSEI"

    lookbacks_days: Optional[Dict[str, int]] = None
    composite_weights: Optional[Dict[str, float]] = None

    risk_free_rate_annual: float = 0.06
    trading_days_per_year: int = 252

    out_root: str = "outputs/india_sector_rotation"
    top_n: int = 5
    save_latest_prices: bool = True

    max_retries: int = 3
    retry_sleep_sec: float = 2.5
    threads: bool = True

    tz_display: str = "Asia/Kolkata"

    # TA Params
    sma_fast: int = 50
    sma_slow: int = 200
    ema_fast: int = 12
    ema_slow: int = 26
    bb_len: int = 20
    bb_k: float = 2.0
    adx_len: int = 14
    rrg_window: int = 63      # days for RS momentum slope
    breakout_short: int = 20
    breakout_long: int = 55
    dist_52w_len: int = 252

    def __post_init__(self):
        if self.lookbacks_days is None:
            self.lookbacks_days = {
                "ret_1w": 5,
                "ret_1m": 21,
                "ret_3m": 63,
                "ret_6m": 126,
                "ret_1y": 252,
            }
        if self.composite_weights is None:
            self.composite_weights = {
                "ret_1m": 0.25,
                "ret_3m": 0.35,
                "ret_6m": 0.25,
                "ret_1y": 0.15,
            }

CFG = Config()

# =========================
# UTILITIES
# =========================
def ensure_dir(path: str) -> None:
    os.makedirs(path, exist_ok=True)

def today_folder(root: str, tz: str) -> str:
    now = pd.Timestamp.now(tz)
    d = os.path.join(root, now.strftime("%Y-%m-%d"))
    ensure_dir(d)
    return d

def read_symbols_from_file(path: str) -> List[str]:
    with open(path, "r", encoding="utf-8") as f:
        return [ln.strip() for ln in f if ln.strip()]

def yahoo_download(tickers: List[str], start: str, end: Optional[str],
                   max_retries: int, sleep_s: float, threads: bool) -> pd.DataFrame:
    last_err = None
    for attempt in range(1, max_retries + 1):
        try:
            return yf.download(
                tickers=tickers,
                start=start if start else None,
                end=end,
                auto_adjust=False,
                actions=False,
                group_by="ticker",
                threads=threads,
                progress=False,
                multi_level_index=False,
            )
        except Exception as e:
            last_err = e
            log.warning(f"Download attempt {attempt}/{max_retries} failed: {e}")
            time.sleep(sleep_s)
    raise RuntimeError(f"Yahoo download failed after {max_retries} retries: {last_err}")

def extract_wide(download_df: pd.DataFrame, tickers: List[str]) -> Dict[str, pd.DataFrame]:
    """Return dict with wide DataFrames: Adj Close, Close, High, Low (Date x Ticker)."""
    out = {}
    if isinstance(download_df.columns, pd.MultiIndex):
        for field in ["Adj Close", "Close", "High", "Low"]:
            frames = []
            for t in tickers:
                if (t, field) in download_df.columns:
                    frames.append(download_df[(t, field)].rename(t))
            out[field] = pd.concat(frames, axis=1).sort_index() if frames else pd.DataFrame()
    else:
        # Single ticker fallback
        for field in ["Adj Close", "Close", "High", "Low"]:
            if field in download_df.columns and len(tickers) == 1:
                out[field] = download_df[field].to_frame(tickers[0])
            else:
                out[field] = pd.DataFrame()
    # Clean & ffill
    for k in out:
        out[k] = out[k].dropna(how="all", axis=1).ffill()
    return out

def ytd_anchor_idx(prices: pd.Series) -> Optional[pd.Timestamp]:
    if prices.empty:
        return None
    idx = prices.index
    this_year = idx[idx.year == idx[-1].year]
    if len(this_year) == 0:
        return None
    first_this_year = this_year[0]
    pos = idx.get_loc(first_this_year)
    return idx[pos - 1] if pos > 0 else None

# =========================
# CORE METRICS (same as before)
# =========================
def compute_returns_table(adj: pd.DataFrame, benchmark: str,
                          lookbacks: Dict[str, int], rfr: float, tdpy: int) -> pd.DataFrame:
    if adj.empty:
        return pd.DataFrame()

    adj = adj.sort_index().dropna(how="all")
    latest_prices = adj.iloc[-1]

    lr = np.log(adj / adj.shift(1))
    vol_ann = lr.std(skipna=True) * math.sqrt(tdpy)

    def col_cagr(col: pd.Series) -> float:
        col = col.dropna()
        if len(col) < 2:
            return np.nan
        start_val, end_val = col.iloc[0], col.iloc[-1]
        years = (col.index[-1] - col.index[0]).days / 365.25
        return (end_val / start_val) ** (1 / years) - 1 if years > 0 else np.nan

    cagr = adj.apply(col_cagr, axis=0)

    daily_excess = lr.sub(rfr / tdpy, axis=0)
    sharpe = (daily_excess.mean(skipna=True) / daily_excess.std(skipna=True)) * math.sqrt(tdpy)

    rets = {name: adj.iloc[-1] / adj.shift(n).iloc[-1] - 1 for name, n in lookbacks.items()}

    ytd_vals: Dict[str, float] = {}
    for t in adj.columns:
        anchor = ytd_anchor_idx(adj[t])
        ytd_vals[t] = adj[t].iloc[-1] / adj[t].loc[anchor] - 1 if anchor is not None else np.nan
    ret_ytd = pd.Series(ytd_vals, name="ret_ytd")

    # RS vs benchmark (3M/6M blend)
    if benchmark in adj.columns:
        try:
            b3 = adj[benchmark].iloc[-1] / adj[benchmark].shift(63).iloc[-1] - 1
            b6 = adj[benchmark].iloc[-1] / adj[benchmark].shift(126).iloc[-1] - 1
            base = 0.5 * b3 + 0.5 * b6
        except Exception:
            base = np.nan
    else:
        base = np.nan

    rs_vals: Dict[str, float] = {}
    for t in adj.columns:
        try:
            r3 = adj[t].iloc[-1] / adj[t].shift(63).iloc[-1] - 1
            r6 = adj[t].iloc[-1] / adj[t].shift(126).iloc[-1] - 1
            rs_vals[t] = 0.5 * r3 + 0.5 * r6 - base
        except Exception:
            rs_vals[t] = np.nan
    rs_vs_bench = pd.Series(rs_vals, name="rs_vs_bench")

    out = pd.DataFrame(index=adj.columns)
    out["price_latest"] = latest_prices
    for k, v in rets.items():
        out[k] = v
    out["ret_ytd"] = ret_ytd
    out["vol_ann"] = vol_ann
    out["cagr"] = cagr
    out["sharpe"] = sharpe
    out["rs_vs_bench"] = rs_vs_bench
    return out

def rank_and_score(df: pd.DataFrame, weights: Dict[str, float]) -> pd.DataFrame:
    df = df.copy()
    cols = [c for c in weights.keys() if c in df.columns]
    if not cols:
        raise ValueError("No ranking columns found; check composite_weights vs computed columns.")
    for c in cols:
        df[f"rank_{c}"] = df[c].rank(ascending=False, method="min")
    sw = sum(weights[c] for c in cols)
    norm_w = {c: weights[c] / sw for c in cols}
    df["score_composite"] = sum(norm_w[c] * df[f"rank_{c}"] for c in cols)
    df["score_percentile"] = (df["score_composite"].rank(ascending=True) / len(df)) * 100.0
    return df.sort_values(["score_composite", "sharpe"], ascending=[True, False])

def to_percent_inplace(df: pd.DataFrame, cols: List[str]) -> None:
    for c in cols:
        if c in df.columns:
            df[c] = df[c] * 100.0

# =========================
# TA HELPERS
# =========================
def ema(series: pd.Series, span: int) -> pd.Series:
    return series.ewm(span=span, adjust=False, min_periods=span).mean()

def sma(series: pd.Series, length: int) -> pd.Series:
    return series.rolling(length, min_periods=length).mean()

def bb_width(close: pd.Series, length: int, k: float) -> pd.Series:
    ma = sma(close, length)
    sd = close.rolling(length, min_periods=length).std()
    upper = ma + k * sd
    lower = ma - k * sd
    return (upper - lower) / ma

def pct_rank(series: pd.Series, window: int) -> pd.Series:
    """Percentile rank of last value within a rolling window (0-1)."""
    def pr(x):
        if len(x) < 2:
            return np.nan
        return (x.rank(pct=True).iloc[-1])
    return series.rolling(window).apply(pr, raw=False)

def linear_slope(y: pd.Series) -> float:
    """Slope via linear regression over index 0..n-1 (nan-safe)."""
    y = y.dropna()
    n = len(y)
    if n < 3:
        return np.nan
    x = np.arange(n, dtype=float)
    # slope of y on x
    num = (x - x.mean()) @ (y.values - y.mean())
    den = ((x - x.mean()) ** 2).sum()
    return float(num / den) if den != 0 else np.nan

def true_range(h: pd.Series, l: pd.Series, c: pd.Series) -> pd.Series:
    prev_c = c.shift(1)
    return pd.concat([(h - l), (h - prev_c).abs(), (l - prev_c).abs()], axis=1).max(axis=1)

def adx_series(h: pd.Series, l: pd.Series, c: pd.Series, n: int = 14) -> Tuple[pd.Series, pd.Series, pd.Series]:
    """Classic Welles Wilder ADX with DI+, DI- (nan-safe)."""
    up = h.diff()
    dn = -l.diff()
    plus_dm = np.where((up > dn) & (up > 0), up, 0.0)
    minus_dm = np.where((dn > up) & (dn > 0), dn, 0.0)

    tr = true_range(h, l, c)
    atr = tr.rolling(n, min_periods=n).mean()

    plus_di = (pd.Series(plus_dm, index=h.index).rolling(n, min_periods=n).mean() / atr) * 100.0
    minus_di = (pd.Series(minus_dm, index=h.index).rolling(n, min_periods=n).mean() / atr) * 100.0

    dx = ( (plus_di - minus_di).abs() / (plus_di + minus_di) ) * 100.0
    adx = dx.rolling(n, min_periods=n).mean()
    return adx, plus_di, minus_di

# =========================
# TA PANEL
# =========================
def compute_ta_panel(close: pd.DataFrame, high: pd.DataFrame, low: pd.DataFrame,
                     bench_close: pd.Series, cfg: Config) -> pd.DataFrame:
    """
    Returns a per-symbol table of TA metrics at the last date.
    """
    # Align all
    idx = close.index
    bench_close = bench_close.reindex(idx).ffill()

    rows = []
    for t in close.columns:
        cl = close[t].dropna()
        if len(cl) < max(cfg.sma_slow, cfg.dist_52w_len) + 5:
            continue

        h = high[t].reindex(cl.index).ffill()
        l = low[t].reindex(cl.index).ffill()

        # MAs & regime
        sma_f = sma(cl, cfg.sma_fast)
        sma_s = sma(cl, cfg.sma_slow)
        regime_bull = int((cl.iloc[-1] > sma_f.iloc[-1] > sma_s.iloc[-1]) if not np.isnan(sma_f.iloc[-1]) and not np.isnan(sma_s.iloc[-1]) else 0)
        above_50 = int(cl.iloc[-1] > sma_f.iloc[-1]) if not np.isnan(sma_f.iloc[-1]) else 0
        above_200 = int(cl.iloc[-1] > sma_s.iloc[-1]) if not np.isnan(sma_s.iloc[-1]) else 0

        # PPO & slope
        ema_f = ema(cl, cfg.ema_fast)
        ema_s = ema(cl, cfg.ema_slow)
        ppo = ((ema_f - ema_s) / ema_s).iloc[-1] if not np.isnan(ema_s.iloc[-1]) and ema_s.iloc[-1] != 0 else np.nan
        ppo_slope = linear_slope(((ema_f - ema_s) / ema_s).dropna().tail(cfg.rrg_window))

        # RS vs benchmark + RS momentum (slope)
        rs_series = (cl / bench_close.reindex(cl.index)).dropna()
        rs_level = (rs_series.iloc[-1] / rs_series.shift(63).iloc[-1] - 1) if len(rs_series) > 63 else np.nan
        rs_momentum = linear_slope(rs_series.tail(cfg.rrg_window))  # slope of RS line

        # Breakouts
        hh20 = cl.rolling(cfg.breakout_short, min_periods=cfg.breakout_short).max()
        hh55 = cl.rolling(cfg.breakout_long,  min_periods=cfg.breakout_long).max()
        brk20 = int(cl.iloc[-1] > hh20.iloc[-1]) if not np.isnan(hh20.iloc[-1]) else 0
        brk55 = int(cl.iloc[-1] > hh55.iloc[-1]) if not np.isnan(hh55.iloc[-1]) else 0

        # Distance to 52W high
        hh52 = cl.rolling(cfg.dist_52w_len, min_periods=cfg.dist_52w_len).max()
        dist_52w = (cl.iloc[-1] / hh52.iloc[-1] - 1) if not np.isnan(hh52.iloc[-1]) and hh52.iloc[-1] != 0 else np.nan

        # BB width (contraction percentile over ~6 months)
        bbw = bb_width(cl, cfg.bb_len, cfg.bb_k)
        bbw_pct = pct_rank(bbw, 126).iloc[-1]  # 0..1; low => contraction

        # ADX & ADX rising
        adx, di_p, di_m = adx_series(h, l, cl, n=cfg.adx_len)
        adx_val = adx.iloc[-1] if len(adx.dropna()) else np.nan
        adx_rising = int(adx.diff().iloc[-1] > 0) if not np.isnan(adx_val) else 0
        di_plus_gt = int(di_p.iloc[-1] > di_m.iloc[-1]) if di_p.notna().iloc[-1] and di_m.notna().iloc[-1] else 0

        # % bars above 50DMA in last month (stability)
        last21 = cl.tail(21)
        sma_f_last = sma_f.reindex(last21.index)
        pct_above50_last21 = float((last21 > sma_f_last).mean()) if sma_f_last.notna().all() else np.nan

        rows.append({
            "symbol": t,
            "ppo": ppo,
            "ppo_slope": ppo_slope,
            "rs_level_3m": rs_level,       # ~3M RS change
            "rs_momentum": rs_momentum,    # slope of RS line
            "regime_bull": regime_bull,
            "above_50": above_50,
            "above_200": above_200,
            "breakout_20": brk20,
            "breakout_55": brk55,
            "dist_52w": dist_52w,          # <= 0 close to high; negative means below high
            "bbw_pct": bbw_pct,            # 0..1; lower => tighter
            "adx": adx_val,
            "adx_rising": adx_rising,
            "di_plus_gt": di_plus_gt,
            "pct_above50_last21": pct_above50_last21,
        })

    ta = pd.DataFrame(rows).set_index("symbol").sort_index()
    return ta

# =========================
# SIGNALS / SCORING
# =========================
def rank_series(s: pd.Series, ascending: bool) -> pd.Series:
    return s.rank(ascending=ascending, method="min")

def build_signals(ta: pd.DataFrame, table: pd.DataFrame, benchmark: str) -> pd.DataFrame:
    """
    Build 'leaders' and 'next-up' composite scores.
    Returns a DataFrame with both scores and flags.
    """
    df = ta.join(table[["rs_vs_bench", "sharpe"]], how="left")

    # Normalize/guard
    for col in ["ppo", "ppo_slope", "rs_level_3m", "rs_momentum", "adx", "rs_vs_bench", "dist_52w", "bbw_pct"]:
        if col not in df:
            df[col] = np.nan

    # Leaders composite: strong now
    # Higher better: rs_vs_bench, rs_momentum, ppo, adx
    # Bonuses: regime_bull, breakout flags, near 52w high (dist_52w close to 0 but >= -0.05)
    leaders_score = (
        rank_series(df["rs_vs_bench"], ascending=False) * 0.30 +
        rank_series(df["rs_momentum"], ascending=False)  * 0.20 +
        rank_series(df["ppo"], ascending=False)          * 0.20 +
        rank_series(df["adx"], ascending=False)          * 0.15 +
        (df["regime_bull"].fillna(0) * 0.5 +
         df["breakout_20"].fillna(0) * 0.25 +
         df["breakout_55"].fillna(0) * 0.25)             * 10 +
        # closeness to 52w high: penalize far below; reward within 5% of high
        rank_series(-df["dist_52w"].clip(lower=-0.20, upper=0.0), ascending=False) * 0.15
    )

    # Next-Up composite: improvement & setup > absolute strength
    nextup_score = (
        rank_series(df["rs_momentum"], ascending=False)  * 0.30 +
        rank_series(df["ppo_slope"], ascending=False)    * 0.25 +
        # ADX rising signal & DI+ leadership
        (df["adx_rising"].fillna(0) * 0.7 + df["di_plus_gt"].fillna(0) * 0.3) * 10 +
        # Volatility contraction (lower bbw_pct better) + near breakout
        rank_series(-df["bbw_pct"], ascending=False)     * 0.20 +
        (df["breakout_20"].fillna(0) * 0.2 + df["breakout_55"].fillna(0) * 0.3) * 10 +
        # Not too far from 52w high
        rank_series(-df["dist_52w"].clip(lower=-0.25, upper=0.0), ascending=False) * 0.15 +
        # Some absolute RS helps
        rank_series(df["rs_vs_bench"], ascending=False)  * 0.10
    )

    out = df.copy()
    out["leaders_score"] = leaders_score
    out["nextup_score"] = nextup_score

    # Order best-first (highest scores to top)
    out = out.sort_values(["leaders_score", "nextup_score"], ascending=[False, False])
    return out

# =========================
# MAIN
# =========================
def main():
    # ===== Universe =====
    symbols = CFG.symbols
    if CFG.symbols_path:
        symbols = read_symbols_from_file(CFG.symbols_path)
    symbols = list(dict.fromkeys([s.strip().upper() for s in symbols if s.strip()]))
    if CFG.benchmark.upper() not in symbols:
        symbols.append(CFG.benchmark.upper())

    log.info(f"Universe ({len(symbols)}): {', '.join(symbols)} | Benchmark={CFG.benchmark}")

    # ===== Download =====
    dl = yahoo_download(
        tickers=symbols,
        start=CFG.start_date,
        end=CFG.end_date,
        max_retries=CFG.max_retries,
        sleep_s=CFG.retry_sleep_sec,
        threads=CFG.threads,
    )
    wide = extract_wide(dl, tickers=symbols)
    adj, close, high, low = wide["Adj Close"], wide["Close"], wide["High"], wide["Low"]
    if adj.empty or close.empty:
        raise SystemExit("No data downloaded. Check tickers/date range.")

    # Drop short-history columns (>15% NaNs) consistently across panels
    valid_frac = adj.notna().mean()
    keep_cols = valid_frac[valid_frac > 0.85].index.tolist()
    dropped = [c for c in adj.columns if c not in keep_cols]
    if dropped:
        log.warning(f"Dropping short-history/illiquid symbols (NaNs>15%): {', '.join(dropped)}")
    adj = adj[keep_cols]
    close = close[keep_cols]
    high = high[keep_cols]
    low = low[keep_cols]

    # ===== CORE METRICS =====
    table = compute_returns_table(
        adj=adj,
        benchmark=CFG.benchmark.upper(),
        lookbacks=CFG.lookbacks_days,
        rfr=CFG.risk_free_rate_annual,
        tdpy=CFG.trading_days_per_year,
    )
    if table.empty or len(table) < 2:
        raise SystemExit("Insufficient data after filtering; adjust universe or dates.")

    # ===== MOMENTUM RANKING (exclude benchmark) =====
    rank_df = table.copy()
    if CFG.benchmark.upper() in rank_df.index:
        rank_df_no_bm = rank_df.drop(index=[CFG.benchmark.upper()])
    else:
        rank_df_no_bm = rank_df
    ranked = rank_and_score(rank_df_no_bm, CFG.composite_weights)

    # ===== TA PANEL =====
    bench_close = close[CFG.benchmark.upper()] if CFG.benchmark.upper() in close.columns else pd.Series(index=close.index, dtype=float)
    ta = compute_ta_panel(close.drop(columns=[CFG.benchmark.upper()], errors="ignore"),
                          high.drop(columns=[CFG.benchmark.upper()], errors="ignore"),
                          low.drop(columns=[CFG.benchmark.upper()], errors="ignore"),
                          bench_close, CFG)

    # ===== SIGNALS =====
    signals = build_signals(ta, rank_df_no_bm, CFG.benchmark.upper())
    leaders = signals.sort_values("leaders_score", ascending=False).head(CFG.top_n)
    nextup = signals.sort_values("nextup_score", ascending=False).head(CFG.top_n)

    # ===== Convert to % where applicable (numeric) =====
    to_percent_inplace(table, PCT_COLS)
    to_percent_inplace(ranked, [c for c in PCT_COLS if c in ranked.columns])
    # TA has some percentage-like fields; keep them raw except dist_52w which is a return
    if "dist_52w" in ta.columns:
        ta["dist_52w"] = ta["dist_52w"] * 100.0

    # ===== Outputs =====
    out_dir = today_folder(CFG.out_root, CFG.tz_display)
    returns_csv = os.path.join(out_dir, "returns_table.csv")
    ranked_csv = os.path.join(out_dir, "sector_scores.csv")
    latest_csv = os.path.join(out_dir, "latest_prices.csv")
    ta_csv = os.path.join(out_dir, "ta_scores.csv")
    sig_csv = os.path.join(out_dir, "signals.csv")

    table.sort_index().to_csv(returns_csv, float_format="%.2f")
    ranked.to_csv(ranked_csv, float_format="%.2f")
    if CFG.save_latest_prices:
        pd.DataFrame({"price_latest": adj.iloc[-1]}).sort_index().to_csv(latest_csv, float_format="%.2f")
    ta.to_csv(ta_csv, float_format="%.6f")
    signals.to_csv(sig_csv, float_format="%.6f")

    log.info(f"Saved: {ranked_csv}")
    log.info(f"Saved: {returns_csv}")
    if CFG.save_latest_prices:
        log.info(f"Saved: {latest_csv}")
    log.info(f"Saved: {ta_csv}")
    log.info(f"Saved: {sig_csv}")

    # ===== Console summary =====
    print("\n================ INDIA: TOP SECTOR OUTPERFORMERS (Momentum) ================")
    disp_cols = ["score_composite", "score_percentile", "ret_1m", "ret_3m", "ret_6m", "ret_1y", "sharpe", "rs_vs_bench"]
    disp = ranked[disp_cols].copy().astype(object)
    for c in ["score_percentile", "ret_1m", "ret_3m", "ret_6m", "ret_1y", "rs_vs_bench"]:
        if c in disp.columns:
            disp[c] = disp[c].map(lambda x: f"{float(x):.2f}%")
    if "sharpe" in disp.columns:
        disp["sharpe"] = disp["sharpe"].map(lambda x: f"{float(x):.2f}")
    if "score_composite" in disp.columns:
        disp["score_composite"] = disp["score_composite"].map(lambda x: f"{float(x):.2f}")
    print(disp.to_string())
    print("==========================================================================\n")

    print("Leaders (TA-weighted strong-now):")
    ld = leaders[["leaders_score","rs_vs_bench","rs_momentum","ppo","adx","regime_bull","breakout_20","breakout_55","dist_52w","bbw_pct"]].copy().astype(object)
    # pretty format % lookers
    for c in ["rs_vs_bench", "dist_52w"]:
        if c in ld.columns:
            ld[c] = ld[c].map(lambda x: f"{float(x):.2f}%" if pd.notna(x) else "nan")
    for c in ["ppo","rs_momentum","adx","leaders_score","bbw_pct"]:
        if c in ld.columns:
            ld[c] = ld[c].map(lambda x: f"{float(x):.2f}" if pd.notna(x) else "nan")
    print(ld.to_string())
    print()

    print("Next-Up (building strength / likely to boom):")
    nu = nextup[["nextup_score","rs_momentum","ppo_slope","adx_rising","di_plus_gt","breakout_20","breakout_55","dist_52w","bbw_pct","rs_vs_bench"]].copy().astype(object)
    for c in ["dist_52w","rs_vs_bench"]:
        if c in nu.columns:
            nu[c] = nu[c].map(lambda x: f"{float(x):.2f}%" if pd.notna(x) else "nan")
    for c in ["ppo_slope","rs_momentum","nextup_score","bbw_pct"]:
        if c in nu.columns:
            nu[c] = nu[c].map(lambda x: f"{float(x):.2f}" if pd.notna(x) else "nan")
    print(nu.to_string())

    # Benchmark snapshot
    if CFG.benchmark.upper() in table.index:
        bm = table.loc[CFG.benchmark.upper(), ["ret_1m","ret_3m","ret_6m","ret_1y","ret_ytd","sharpe"]].copy()
        bm_disp = bm.astype(object)
        for c in ["ret_1m","ret_3m","ret_6m","ret_1y","ret_ytd"]:
            if c in bm_disp.index:
                bm_disp[c] = f"{float(bm[c]):.2f}%"
        if "sharpe" in bm_disp.index:
            bm_disp["sharpe"] = f"{float(bm['sharpe']):.2f}"
        print("\nBenchmark snapshot (NIFTY 50):")
        print(pd.DataFrame(bm_disp).T.to_string(index=False))

if __name__ == "__main__":
    main()


2025-11-09 13:51:52 | INFO | Universe (11): ^NSEBANK, ^CNXIT, ^CNXAUTO, ^CNXPHARMA, ^CNXFMCG, ^CNXMETAL, ^CNXREALTY, ^CNXINFRA, ^CNXENERGY, ^CNXMEDIA, ^NSEI | Benchmark=^NSEI
2025-11-09 13:51:54 | INFO | Saved: outputs/india_sector_rotation/2025-11-09/sector_scores.csv
2025-11-09 13:51:54 | INFO | Saved: outputs/india_sector_rotation/2025-11-09/returns_table.csv
2025-11-09 13:51:54 | INFO | Saved: outputs/india_sector_rotation/2025-11-09/latest_prices.csv
2025-11-09 13:51:54 | INFO | Saved: outputs/india_sector_rotation/2025-11-09/ta_scores.csv
2025-11-09 13:51:54 | INFO | Saved: outputs/india_sector_rotation/2025-11-09/signals.csv



           score_composite score_percentile  ret_1m  ret_3m  ret_6m   ret_1y sharpe rs_vs_bench
^CNXMETAL             2.40           10.00%   2.66%  11.68%  23.71%   11.79%   0.16      13.48%
^CNXAUTO              3.25           20.00%  -0.57%  12.16%  18.11%   13.88%   0.44      10.91%
^CNXREALTY            3.30           30.00%   6.13%   4.25%  12.16%   -5.32%   0.19       3.98%
^NSEBANK              3.35           40.00%   2.91%   4.55%   6.46%   12.44%   0.36       1.28%
^CNXINFRA             4.10           50.00%   2.67%   3.88%   7.53%    6.46%   0.25       1.49%
^CNXPHARMA            6.20           60.00%   1.35%   0.91%   5.00%   -2.49%   0.30      -1.27%
^CNXENERGY            6.55           70.00%   0.90%   2.51%   6.73%   -8.94%   0.29       0.40%
^CNXFMCG              7.70           80.00%   1.23%  -0.53%  -1.04%   -6.36%   0.40      -5.01%
^CNXIT                8.40           90.00%   1.18%   0.24%  -2.46%  -13.09%   0.07      -5.33%
^CNXMEDIA             9.75          100