In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Simple grid search for candle_confirm_mvo (no plotting).
- ThreadPool only (pickle-free, notebook-safe).
- Cache-only data loading (reads parquet; no network).
- Train/Test split with per-combo logging & BEST-SO-FAR updates.
- Saves a sorted CSV and artifacts (legs/roundtrips/equity/metrics) for top-N configs.

Place next to `candle_confirm_mvo_v1.py` and run:
    python grid_search_candle_confirm_mvo.py
"""

import os, json, itertools, importlib, logging
from copy import deepcopy
from typing import Dict, List, Any, Tuple
import pandas as pd
import numpy as np
from multiprocessing.dummy import Pool as ThreadPool  # threads only

# =========================
# CONFIG — EDIT HERE
# =========================

STRAT_MODULE = "candle_confirm_mvo_v1"   # strategy filename without .py

# Data windows
START_DATE   = "2015-01-01"              # global fetch start
TRAIN_END    = "2021-12-31"              # inclusive
TEST_START   = "2022-01-01"              # inclusive
TEST_END     = "2025-01-01"              # inclusive

# Universe: set to a text file OR use the inline list below
SYMBOLS_FILE = "nifty500.txt"  # e.g., "nifty500.txt" (one ticker per line)
UNIVERSE = [
    '360ONE.NS','3MINDIA.NS','AARTIIND.NS','ABB.NS','ACC.NS','ADANIENT.NS','ADANIPORTS.NS',
    'APOLLOHOSP.NS','ASIANPAINT.NS','AXISBANK.NS','BAJAJ-AUTO.NS','BAJFINANCE.NS','BHARTIARTL.NS',
    'BPCL.NS','BRITANNIA.NS','CIPLA.NS','COALINDIA.NS','COFORGE.NS','DRREDDY.NS','EICHERMOT.NS',
    'GRASIM.NS','HCLTECH.NS','HDFCBANK.NS','HINDALCO.NS','HINDUNILVR.NS','ICICIBANK.NS',
    'INFY.NS','ITC.NS','JSWSTEEL.NS','KOTAKBANK.NS','LT.NS','MARUTI.NS','NESTLEIND.NS','NTPC.NS',
    'ONGC.NS','POWERGRID.NS','RELIANCE.NS','SBIN.NS','SUNPHARMA.NS','TATAMOTORS.NS',
    'TATASTEEL.NS','TCS.NS','TECHM.NS','TITAN.NS','ULTRACEMCO.NS','WIPRO.NS'
]

# Output folder
OUT_DIR = "outputs"

# Execution (threads only; avoids pickling issues)
N_JOBS = 4       # set 1 for serial

# Save artifacts (legs/roundtrips/equity/metrics) for top-N configs after sweep
SAVE_TOP_N = 3
# Grid to sweep
PARAM_GRID = {
    # Trend filters
    "ema_fast": [8, 10, 12],
    "ema_slow": [20, 26, 30],
    "use_htf_trend": [True, False],
    "ema_htf": [150, 200],

    # Exits
    "stop_loss_pct": [0.03, 0.05, 0.07],
    "target_pct":    [0.08, 0.10, 0.15],

    # Portfolio
    "max_concurrent_positions": [3, 5, 8],
    "deploy_cash_frac": [0.25, 0.5],
    "top_k_daily": [50, 150, 300],

    # Ranking filter
    "within_pct_of_52w_high": [0.4, 0.5, 0.6],

    # Liquidity guards (off by default)
    "enable_basic_liquidity": [False],
}

# Logging
LOG_LEVEL = logging.INFO
LOG_NAME  = "grid_candle_confirm_mvo"
LOG_FMT   = "%(asctime)s | %(levelname)s | %(message)s"
DATE_FMT  = "%Y-%m-%d %H:%M:%S"

logging.basicConfig(level=LOG_LEVEL, format=LOG_FMT, datefmt=DATE_FMT)
log = logging.getLogger(LOG_NAME)

# =========================
# Helpers (no plotting)
# =========================
def composite_score(m: dict) -> float:
    """Higher Sharpe & CAGR, penalize deeper drawdown; discard too-few trades."""
    n_trades = m.get("n_trades", 0) or 0
    if n_trades < 50:
        return -1e9
    sharpe = m.get("sharpe", 0.0) or 0.0
    cagr   = (m.get("cagr_pct", 0.0) or 0.0) / 100.0
    mdd    = abs(m.get("max_drawdown_pct", 0.0) or 0.0) / 100.0
    return (1.00 * sharpe) + (0.75 * cagr * 10.0) - (0.50 * mdd * 10.0)

def read_cached_prices(cache_dir: str, tickers: List[str]) -> Dict[str, pd.DataFrame]:
    """Cache-only loader: reads parquet; no network calls here."""
    dm = {}
    for t in tickers:
        p = os.path.join(cache_dir, f"{t.replace('^','_')}.parquet")
        if os.path.exists(p):
            try:
                df = pd.read_parquet(p)
                if df is not None and not df.empty:
                    dm[t] = df
            except Exception:
                pass
    return dm

def simulate_universe_period(
    strat, symbols: List[str], data_map: Dict[str, pd.DataFrame], bench_df: pd.DataFrame,
    cfg, period_start: str, period_end: str,
):
    """Slice each ticker’s data to [period_start, period_end] and reuse simulate_ticker + aggregate_and_apply."""
    ps = pd.to_datetime(period_start); pe = pd.to_datetime(period_end)
    all_trades = []
    for t in symbols:
        df = data_map.get(t)
        if df is None or df.empty: continue
        dfp = df[(df.index >= ps) & (df.index <= pe)]
        if dfp.empty: continue
        tr, _ = strat.simulate_ticker(t, dfp, cfg)
        if not tr.empty: all_trades.append(tr)
    if not all_trades:
        # empty-safe metrics
        return pd.DataFrame(), pd.DataFrame(), pd.Series(dtype=float), {
            "start_equity_inr": cfg.initial_capital,
            "final_equity_inr": cfg.initial_capital,
            "cagr_pct": 0.0,
            "sharpe": 0.0,
            "max_drawdown_pct": 0.0,
            "win_rate_pct": 0.0,
            "n_trades": 0,
        }
    all_trades = pd.concat(all_trades, ignore_index=True)
    legs_df, trips_df, equity, metrics = strat.aggregate_and_apply(all_trades, data_map, bench_df, cfg)
    return legs_df, trips_df, equity, metrics

def log_combo(idx: int, total: int, keys: List[str], row: dict):
    if "error" in row:
        log.error("Combo %4d/%d ERROR %s | params=%s",
                  idx, total, row["error"], {k: row.get(k, None) for k in keys})
        return
    params_str = ", ".join(f"{k}={row[k]}" for k in keys)
    log.info(
        "Combo %4d/%d  [train: trades=%d, Sharpe=%.3f, CAGR=%.2f%%, MDD=%.2f%%, score=%.3f]  "
        "[test: trades=%d, Sharpe=%.3f, CAGR=%.2f%%, MDD=%.2f%%, score=%.3f]  | %s",
        idx, total,
        row["train_n_trades"], row["train_sharpe"], row["train_cagr_pct"], row["train_max_drawdown_pct"], row["train_score"],
        row["test_n_trades"], row["test_sharpe"], row["test_cagr_pct"], row["test_max_drawdown_pct"], row["test_score"],
        params_str
    )

def maybe_update_best(row: dict, best_row: dict, best_score: float, keys: List[str]):
    if "error" in row: return best_row, best_score
    if best_score is None or row["test_score"] > best_score:
        params_str = ", ".join(f"{k}={row[k]}" for k in keys)
        log.warning(
            "BEST-SO-FAR ⇧ test_score=%.3f | test_sharpe=%.3f, test_cagr=%.2f%%, test_mdd=%.2f%%, test_trades=%d | %s",
            row["test_score"], row["test_sharpe"], row["test_cagr_pct"],
            row["test_max_drawdown_pct"], row["test_n_trades"], params_str
        )
        return row, row["test_score"]
    return best_row, best_score

def save_top_artifacts(strat, keys: List[str], top_rows: pd.DataFrame,
                       symbols: List[str], data_map: Dict[str, pd.DataFrame],
                       bench_df: pd.DataFrame, base_cfg, stamp: str):
    """
    Re-run top configs on TEST window and dump legs/roundtrips/equity/metrics files (no plotting).
    """
    for i, (_, r) in enumerate(top_rows.iterrows(), 1):
        cfg_i = deepcopy(base_cfg)
        for k in keys:
            if k in r:
                setattr(cfg_i, k, r[k])
        legs, trips, equity, metrics = simulate_universe_period(
            strat, symbols, data_map, bench_df, cfg_i, TEST_START, TEST_END
        )
        prefix = os.path.join(OUT_DIR, f"grid_top{i}_{stamp}")
        try:
            if legs is not None and not legs.empty:
                legs.to_csv(prefix + "_legs.csv", index=False)
            if trips is not None and not trips.empty:
                trips.to_csv(prefix + "_roundtrips.csv", index=False)
            if equity is not None and not equity.empty:
                pd.DataFrame({"date": equity.index, "equity": equity.values}).to_csv(prefix + "_equity.csv", index=False)
            with open(prefix + "_metrics.json", "w") as f:
                json.dump(dict(metrics), f, indent=2)
            log.info("Artifacts saved: %s_*", prefix)
        except Exception as e:
            log.error("Failed to save artifacts for top-%d: %s", i, repr(e))

# =========================
# Main
# =========================
def main():
    # Import strategy and build a base config
    strat = importlib.import_module(STRAT_MODULE)
    cfg = strat.Config()
    cfg.start_date = START_DATE
    cfg.end_date   = TEST_END
    cfg.out_dir    = OUT_DIR
    cfg.plot       = False  # no images
    # Universe
    if SYMBOLS_FILE:
        cfg.static_symbols = None
        cfg.static_symbols_path = SYMBOLS_FILE
    else:
        cfg.static_symbols = UNIVERSE

    # Ensure dirs & resolve symbols
    strat.ensure_dirs(cfg.cache_dir, cfg.out_dir)
    symbols = strat.load_static_symbols(cfg.static_symbols, cfg.static_symbols_path)
    log.info("Universe size: %d", len(symbols))

    # Cache-only prices (no network here)
    data_map = read_cached_prices(cfg.cache_dir, symbols)
    if not data_map:
        log.error("No cached parquet files found in '%s'. Run your backtest once to build cache, then re-run grid.", cfg.cache_dir)
        return

    # Synthetic flat benchmark (keeps VOLAR math stable without fetching)
    idx = pd.date_range(start=cfg.start_date, end=cfg.end_date, freq="B")
    bench_df = pd.DataFrame({"Close": np.ones(len(idx))}, index=idx)

    # Build grid
    keys = list(PARAM_GRID.keys())
    vals = [PARAM_GRID[k] for k in keys]
    combos_all = list(itertools.product(*vals))
    combos = [c for c in combos_all if dict(zip(keys, c))["ema_fast"] < dict(zip(keys, c))["ema_slow"]]
    log.info("Grid size (after sanity): %d (of %d)", len(combos), len(combos_all))
    log.info("Train: %s → %s | Test: %s → %s", START_DATE, TRAIN_END, TEST_START, TEST_END)
    log.info("Threads: %d", N_JOBS)

    # Worker using shared in-memory objects (threads)
    def worker(vals_tuple: Tuple[Any, ...]):
        overrides = dict(zip(keys, vals_tuple))
        try:
            cfg_i = deepcopy(cfg)
            for k, v in overrides.items():
                setattr(cfg_i, k, v)

            # TRAIN
            _, _, _, met_tr = simulate_universe_period(
                strat, symbols, data_map, bench_df, cfg_i, START_DATE, TRAIN_END
            )
            sc_tr = composite_score(met_tr)

            # TEST
            _, _, _, met_te = simulate_universe_period(
                strat, symbols, data_map, bench_df, cfg_i, TEST_START, TEST_END
            )
            sc_te = composite_score(met_te)

            return {
                **overrides,
                "train_cagr_pct": float(met_tr.get("cagr_pct", 0.0) or 0.0),
                "train_sharpe": float(met_tr.get("sharpe", 0.0) or 0.0),
                "train_max_drawdown_pct": float(met_tr.get("max_drawdown_pct", 0.0) or 0.0),
                "train_win_rate_pct": float(met_tr.get("win_rate_pct", 0.0) or 0.0),
                "train_n_trades": int(met_tr.get("n_trades", 0) or 0),
                "train_score": float(sc_tr),

                "test_cagr_pct": float(met_te.get("cagr_pct", 0.0) or 0.0),
                "test_sharpe": float(met_te.get("sharpe", 0.0) or 0.0),
                "test_max_drawdown_pct": float(met_te.get("max_drawdown_pct", 0.0) or 0.0),
                "test_win_rate_pct": float(met_te.get("win_rate_pct", 0.0) or 0.0),
                "test_n_trades": int(met_te.get("n_trades", 0) or 0),
                "test_score": float(sc_te),
            }
        except Exception as e:
            err = {"error": repr(e)}
            err.update(overrides)
            return err

    # Run (threads)
    results: List[dict] = []
    best_row = None
    best_score = None

    if N_JOBS <= 1:
        for i, vals_tuple in enumerate(combos, 1):
            row = worker(vals_tuple)
            if row is None: continue
            results.append(row)
            log_combo(i, len(combos), keys, row)
            best_row, best_score = maybe_update_best(row, best_row, best_score, keys)
    else:
        with ThreadPool(N_JOBS) as pool:
            for i, row in enumerate(pool.imap_unordered(worker, combos), 1):
                if row is None: continue
                results.append(row)
                log_combo(i, len(combos), keys, row)
                best_row, best_score = maybe_update_best(row, best_row, best_score, keys)

    if not results:
        log.warning("No results generated.")
        return

    # Save results CSV
    df = pd.DataFrame(results)
    if "test_score" in df.columns:
        df = df.sort_values(["test_score", "test_sharpe", "test_cagr_pct"], ascending=[False, False, False]).reset_index(drop=True)

    stamp = pd.Timestamp.today(tz="Asia/Kolkata").strftime("%Y%m%d_%H%M%S")
    out_csv = os.path.join(OUT_DIR, f"grid_search_results_{stamp}.csv")
    df.to_csv(out_csv, index=False)
    log.info("Grid results saved → %s", out_csv)

    # Save artifacts for top N (no plots)
    if SAVE_TOP_N > 0 and not df.empty:
        log.info("Saving artifacts for top %d configs…", min(SAVE_TOP_N, len(df)))
        save_top_artifacts(strat, keys, df.head(SAVE_TOP_N), symbols, data_map, bench_df, cfg, stamp)

    # Final best summary
    if best_row is not None:
        params_str = ", ".join(f"{k}={best_row[k]}" for k in keys)
        log.warning(
            "FINAL BEST (by test_score): test_score=%.3f | test_sharpe=%.3f, test_cagr=%.2f%%, test_mdd=%.2f%%, test_trades=%d | %s",
            best_row["test_score"], best_row["test_sharpe"], best_row["test_cagr_pct"],
            best_row["test_max_drawdown_pct"], best_row["test_n_trades"], params_str
        )


if __name__ == "__main__":
    main()
