In [25]:
# ===================== Celda 00 ‚Äî Run Context + Freeze (NB3 TREND, M5) =====================
# OBJETIVO:
#   1) Definir el contexto reproducible del run (RUN_ID, paths, versiones).
#   2) Crear carpeta de run y snapshots (configs/docs).
#   3) Inicializar GLOBAL_STATE (contrato base de todo el notebook).
#
# NOTA OPERATIVA:
#   - Esta celda NO debe depender de paths hardcodeados.
#   - Si no detecta tu estructura, define ER_STRATEGY_LAB_ROOT o MT5_PROJECT_ROOT como variable de entorno.
#
# GATES (debe cumplir para continuar):
#   - Se resuelve ER_STRATEGY_LAB_ROOT existente
#   - Se crea RUN_DIR
#   - Se escribe run_meta.json
# =============================================================================

from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json
import os
import shutil
import platform
import sys

import polars as pl

print(">>> Celda 00 :: Run Context + Freeze (NB3 TREND, M5)")

# ========================= Helpers =========================
def _resolve_er_strategy_lab_root() -> Path:
    """
    Resuelve el root de ER_STRATEGY_LAB de forma robusta:
      1) Env var ER_STRATEGY_LAB_ROOT / ER_STRATEGY_LAB
      2) Env var MT5_PROJECT_ROOT / MT5_DE_PROJECT_ROOT (se asume <root>/ER_STRATEGY_LAB)
      3) B√∫squeda hacia arriba desde cwd
      4) Fallback t√≠pico Windows
    """
    # 1) Directo a ER_STRATEGY_LAB
    env_direct = os.getenv("ER_STRATEGY_LAB_ROOT") or os.getenv("ER_STRATEGY_LAB")
    if env_direct:
        p = Path(env_direct).expanduser().resolve()
        return p

    # 2) Project root -> <root>/ER_STRATEGY_LAB
    env_proj = os.getenv("MT5_PROJECT_ROOT") or os.getenv("MT5_DE_PROJECT_ROOT")
    if env_proj:
        pr = Path(env_proj).expanduser().resolve()
        cand = pr / "ER_STRATEGY_LAB"
        return cand if cand.exists() else pr

    # 3) Buscar en cwd y padres
    cwd = Path.cwd().resolve()
    for p in [cwd, *cwd.parents]:
        if p.name.upper() == "ER_STRATEGY_LAB" and p.exists():
            return p
        cand = p / "ER_STRATEGY_LAB"
        if cand.exists():
            return cand.resolve()

    # 4) Fallback t√≠pico (si existe)
    fallback = Path(r"C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB")
    if fallback.exists():
        return fallback.resolve()

    raise RuntimeError(
        "[Celda 00] No se pudo resolver ER_STRATEGY_LAB_ROOT.\n"
        "Soluci√≥n recomendada: define variable de entorno ER_STRATEGY_LAB_ROOT apuntando al folder ER_STRATEGY_LAB.\n"
        "Alternativa: define MT5_PROJECT_ROOT apuntando al root del proyecto (que contiene ER_STRATEGY_LAB/).\n"
        f"cwd actual: {Path.cwd()}"
    )

def _safe_mkdir(p: Path) -> None:
    p.mkdir(parents=True, exist_ok=True)

def _polars_version() -> str:
    try:
        return pl.__version__
    except Exception:
        return "unknown"

# ========================= Resolver ra√≠z del laboratorio =========================
ER_STRATEGY_LAB_ROOT = _resolve_er_strategy_lab_root().resolve()

# Estructura esperada (si falta algo, lo creamos; lo importante es que el root sea correcto)
PATH_NOTEBOOKS = ER_STRATEGY_LAB_ROOT / "notebooks"
PATH_CONFIG    = ER_STRATEGY_LAB_ROOT / "config"
PATH_INPUTS    = ER_STRATEGY_LAB_ROOT / "inputs"
PATH_ARTIFACTS = ER_STRATEGY_LAB_ROOT / "artifacts"
PATH_LOGS      = ER_STRATEGY_LAB_ROOT / "research_logs" / "runs"

for p in (PATH_NOTEBOOKS, PATH_CONFIG, PATH_INPUTS, PATH_ARTIFACTS, PATH_LOGS):
    _safe_mkdir(p)

# RUN_ID UTC (formato consistente)
NOW_UTC = datetime.now(timezone.utc)
RUN_ID = NOW_UTC.strftime("%Y%m%d_%H%M%S")

RUN_DIR      = PATH_LOGS / RUN_ID
RUN_SNAP_DIR = RUN_DIR / "snapshots"
RUN_OUT_DIR  = RUN_DIR / "outputs"
RUN_TMP_DIR  = RUN_DIR / "tmp"

for p in (RUN_DIR, RUN_SNAP_DIR, RUN_OUT_DIR, RUN_TMP_DIR):
    _safe_mkdir(p)

# ========================= GLOBAL_STATE (contrato) =========================
GLOBAL_STATE = {
    "run_id": RUN_ID,
    "paths": {
        "lab_root": str(ER_STRATEGY_LAB_ROOT),
        "notebooks": str(PATH_NOTEBOOKS),
        "config": str(PATH_CONFIG),
        "inputs": str(PATH_INPUTS),
        "artifacts": str(PATH_ARTIFACTS),
        "logs_root": str(PATH_LOGS),
        "run_dir": str(RUN_DIR),
        "run_snapshots": str(RUN_SNAP_DIR),
        "run_outputs": str(RUN_OUT_DIR),
        "run_tmp": str(RUN_TMP_DIR),
    },
    "nb2": {},      # se completa en Celda 01
    "universe": {}, # se completa en Celda 01
    "data": {},     # se completa en Celda 02+
    "config": {},   # reservado
    "metrics": {},  # reservado
}

# ========================= Snapshot meta del run =========================
meta = {
    "run_id": RUN_ID,
    "ts_utc": NOW_UTC.isoformat(),
    "python": sys.version,
    "platform": {
        "system": platform.system(),
        "release": platform.release(),
        "version": platform.version(),
        "machine": platform.machine(),
    },
    "libs": {
        "polars": _polars_version(),
    },
    "paths": GLOBAL_STATE["paths"],
}

meta_path = RUN_DIR / "run_meta.json"
with open(meta_path, "w", encoding="utf-8") as f:
    json.dump(meta, f, ensure_ascii=False, indent=2)

# (Opcional) snapshot de config si existe algo
try:
    cfg_snap = RUN_SNAP_DIR / "config_snapshot"
    if PATH_CONFIG.exists() and any(PATH_CONFIG.iterdir()):
        if cfg_snap.exists():
            shutil.rmtree(cfg_snap)
        shutil.copytree(PATH_CONFIG, cfg_snap)
except Exception as e:
    print(f"[Celda 00] WARN: no se pudo snapshotear config/: {e}")

print(f"[Celda 00] ER_STRATEGY_LAB_ROOT = {ER_STRATEGY_LAB_ROOT}")
print(f"[Celda 00] RUN_DIR             = {RUN_DIR}")
print(f"üíæ SNAPSHOT ‚Üí {meta_path} (OK)")
print(">>> Celda 00 :: OK")


>>> Celda 00 :: Run Context + Freeze (NB3 TREND, M5)
[Celda 00] ER_STRATEGY_LAB_ROOT = C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB
[Celda 00] RUN_DIR             = C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\research_logs\runs\20251222_103043
üíæ SNAPSHOT ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\research_logs\runs\20251222_103043\run_meta.json (OK)
>>> Celda 00 :: OK


In [26]:
# ===================== Celda 00C v1.0 ‚Äî State Recovery (GLOBAL_STATE) [WFO-safe] =====================
from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json

print(">>> Celda 00C v1.0 :: State Recovery (GLOBAL_STATE) [WFO-safe]")

def _utc_now_iso() -> str:
    return datetime.utcnow().replace(tzinfo=timezone.utc).isoformat()

def _pick_latest(files: list[Path]) -> Path | None:
    files2 = [p for p in files if p.exists()]
    if not files2:
        return None
    return sorted(files2, key=lambda p: p.stat().st_mtime, reverse=True)[0]

def _find_latest_by_glob(root: Path, pattern: str) -> Path | None:
    if not root.exists():
        return None
    hits = list(root.rglob(pattern))
    return _pick_latest(hits)

def ensure_global_state() -> None:
    global GLOBAL_STATE

    if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
        GLOBAL_STATE = {}

    # Paths m√≠nimos
    if "paths" not in GLOBAL_STATE or not isinstance(GLOBAL_STATE["paths"], dict):
        # Fallback: intenta inferir un artifacts local relativo al cwd
        cwd = Path.cwd().resolve()
        GLOBAL_STATE["paths"] = {
            "artifacts": str((cwd / "artifacts").resolve()),
            "run_snapshots": str((cwd / "snapshots").resolve()),
        }

    paths = GLOBAL_STATE["paths"]
    artifacts = Path(paths.get("artifacts", "")).resolve()
    snap_dir = Path(paths.get("run_snapshots", "")).resolve()

    # Si ya existe, no pisar
    if "backtest_engine" in GLOBAL_STATE and isinstance(GLOBAL_STATE["backtest_engine"], dict):
        bt = GLOBAL_STATE["backtest_engine"]
        tp = bt.get("trades_path"); sp = bt.get("summary_path")
        if tp and Path(tp).exists() and sp and Path(sp).exists():
            print("[Celda 00C] backtest_engine ya est√° OK en GLOBAL_STATE.")
            return

    # Intentar reconstruir desde snapshots conocidos (prioriza overlay)
    cand_snapshots = []
    if snap_dir.exists():
        cand_snapshots += [
            snap_dir / "overlay_engine_v16_snapshot.json",
            snap_dir / "backtest_engine_v10_snapshot.json",
        ]
    # Tambi√©n buscar dentro de artifacts (por si guardas snapshots ah√≠)
    if artifacts.exists():
        cand_snapshots += [
            artifacts / "snapshots" / "overlay_engine_v16_snapshot.json",
            artifacts / "snapshots" / "backtest_engine_v10_snapshot.json",
        ]

    snap_file = _pick_latest(cand_snapshots)
    bt_dict = {}

    if snap_file and snap_file.exists():
        try:
            j = json.loads(snap_file.read_text(encoding="utf-8"))
            # No asumimos schema exacto; buscamos paths t√≠picos
            # 1) outputs directos
            outs = j.get("outputs", {}) if isinstance(j.get("outputs", {}), dict) else {}
            tp = outs.get("trades_path") or outs.get("trades") or outs.get("trades_parquet")
            sp = outs.get("summary_path") or outs.get("summary") or outs.get("summary_parquet")

            # 2) inputs puede traerlo en algunos snapshots
            ins = j.get("inputs", {}) if isinstance(j.get("inputs", {}), dict) else {}
            tp = tp or ins.get("trades_path")
            sp = sp or ins.get("summary_path")

            # 3) si snapshot no trae, inferir por glob
            if not tp or not Path(str(tp)).exists():
                tp2 = _find_latest_by_glob(artifacts, "trades_engine_v10*_overlay_*.parquet") \
                      or _find_latest_by_glob(artifacts, "trades_engine_v10*.parquet")
                tp = str(tp2) if tp2 else None

            if not sp or not Path(str(sp)).exists():
                sp2 = _find_latest_by_glob(artifacts, "summary_engine_v10*_overlay_*.parquet") \
                      or _find_latest_by_glob(artifacts, "summary_engine_v10*.parquet")
                sp = str(sp2) if sp2 else None

            bt_dict = {
                "trades_path": str(tp) if tp else None,
                "summary_path": str(sp) if sp else None,
                "params": j.get("params", {}) if isinstance(j.get("params", {}), dict) else {},
                "recovered_from": str(snap_file),
                "recovered_utc": _utc_now_iso(),
            }
        except Exception as e:
            print(f"[Celda 00C] WARN: no pude parsear snapshot {snap_file}: {e}")

    # Si no hubo snapshot o fall√≥, inferir desde artifacts directo
    if not bt_dict.get("trades_path") or not Path(bt_dict["trades_path"]).exists():
        tp2 = _find_latest_by_glob(artifacts, "trades_engine_v10*_overlay_*.parquet") \
              or _find_latest_by_glob(artifacts, "trades_engine_v10*.parquet")
        bt_dict["trades_path"] = str(tp2) if tp2 else None

    if not bt_dict.get("summary_path") or not Path(bt_dict["summary_path"]).exists():
        sp2 = _find_latest_by_glob(artifacts, "summary_engine_v10*_overlay_*.parquet") \
              or _find_latest_by_glob(artifacts, "summary_engine_v10*.parquet")
        bt_dict["summary_path"] = str(sp2) if sp2 else None

    # Gate final
    if not bt_dict.get("trades_path") or not Path(bt_dict["trades_path"]).exists():
        raise RuntimeError(f"[Celda 00C] ERROR: no pude reconstruir trades_path. artifacts={artifacts}")

    if not bt_dict.get("summary_path") or not Path(bt_dict["summary_path"]).exists():
        raise RuntimeError(f"[Celda 00C] ERROR: no pude reconstruir summary_path. artifacts={artifacts}")

    GLOBAL_STATE["backtest_engine"] = bt_dict
    print("[Celda 00C] OK: GLOBAL_STATE['backtest_engine'] reconstruido.")
    print(f"  trades_path = {bt_dict['trades_path']}")
    print(f"  summary_path = {bt_dict['summary_path']}")
    if bt_dict.get("recovered_from"):
        print(f"  recovered_from = {bt_dict['recovered_from']}")

ensure_global_state()
print(">>> Celda 00C v1.0 :: OK")


>>> Celda 00C v1.0 :: State Recovery (GLOBAL_STATE) [WFO-safe]
[Celda 00C] OK: GLOBAL_STATE['backtest_engine'] reconstruido.
  trades_path = C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\backtests\backtest_engine_v10\overlay_engine_v16\trades_engine_v10_overlay_v16.parquet
  summary_path = C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\backtests\backtest_engine_v10\overlay_engine_v16\summary_engine_v10_overlay_v16.parquet
>>> Celda 00C v1.0 :: OK


In [27]:
# ===================== Celda 00B v1.0 ‚Äî Recover BACKTEST_ENGINE pointer (disk) [WFO-safe] =====================
from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json

print(">>> Celda 00B v1.0 :: Recover BACKTEST_ENGINE pointer (disk) [WFO-safe]")

# ========================= Validaciones GLOBAL_STATE =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 00B] ERROR: GLOBAL_STATE no existe o no es dict. Ejecuta tu Celda 00 (init).")

if "paths" not in GLOBAL_STATE or not isinstance(GLOBAL_STATE["paths"], dict):
    raise RuntimeError("[Celda 00B] ERROR: GLOBAL_STATE['paths'] no existe o no es dict. Ejecuta tu Celda 00 (init).")

paths = GLOBAL_STATE["paths"]

if "artifacts" not in paths:
    raise RuntimeError("[Celda 00B] ERROR: paths['artifacts'] no existe. Revisa tu Celda 00 (init paths).")

ART_DIR = Path(paths["artifacts"]).resolve()
ENGINE_ROOT = ART_DIR / "backtests" / "backtest_engine_v10"
OVERLAY_DIR = ENGINE_ROOT / "overlay_engine_v16"

SNAP_DIR = Path(paths.get("run_snapshots") or (ART_DIR / "snapshots")).resolve()
SNAP_DIR.mkdir(parents=True, exist_ok=True)

SNAP_PTR = SNAP_DIR / "backtest_engine_pointer_snapshot.json"

def _pick_existing_pair(cands: list[tuple[str, Path, Path]]) -> tuple[str, Path, Path]:
    for tag, t, s in cands:
        if t.exists() and s.exists():
            return tag, t, s
    # diagn√≥stico
    dbg = [{"tag": tag, "trades": str(t), "trades_exists": t.exists(), "summary": str(s), "summary_exists": s.exists()}
           for tag, t, s in cands]
    raise RuntimeError(f"[Celda 00B] ERROR: no encuentro pares trades/summary v√°lidos. debug={dbg}")

def _load_params_from_snapshots() -> dict:
    # buscamos snapshots t√≠picos (preferir run_snapshots)
    cand_files = []

    if "run_snapshots" in paths and paths["run_snapshots"]:
        sd = Path(paths["run_snapshots"]).resolve()
        cand_files += [
            sd / "overlay_engine_v16_snapshot.json",
            sd / "backtest_engine_v10_snapshot.json",
        ]

    # fallback: artifacts/snapshots
    cand_files += [
        (ART_DIR / "snapshots" / "overlay_engine_v16_snapshot.json"),
        (ART_DIR / "snapshots" / "backtest_engine_v10_snapshot.json"),
    ]

    for fp in cand_files:
        if fp.exists():
            try:
                js = json.loads(fp.read_text(encoding="utf-8"))
                # soportar estructuras comunes
                if isinstance(js, dict):
                    if "params" in js and isinstance(js["params"], dict):
                        return js["params"]
                    if "backtest_engine" in js and isinstance(js["backtest_engine"], dict):
                        p = js["backtest_engine"].get("params")
                        if isinstance(p, dict):
                            return p
            except Exception:
                pass
    return {}

# ========================= Selecci√≥n preferente: OVERLAY si existe =========================
candidates = []
candidates.append(("OVERLAY_V16", OVERLAY_DIR / "trades_engine_v10_overlay_v16.parquet",
                   OVERLAY_DIR / "summary_engine_v10_overlay_v16.parquet"))
candidates.append(("ENGINE_V10", ENGINE_ROOT / "trades_engine_v10.parquet",
                   ENGINE_ROOT / "summary_engine_v10.parquet"))

mode, trades_fp, summary_fp = _pick_existing_pair(candidates)
params = _load_params_from_snapshots()

bt = {
    "mode": mode,
    "trades_path": str(trades_fp),
    "summary_path": str(summary_fp),
    "params": params,
    "created_utc": datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
}

GLOBAL_STATE["backtest_engine"] = bt

SNAP_PTR.write_text(json.dumps({
    "created_utc": bt["created_utc"],
    "mode": mode,
    "trades_path": bt["trades_path"],
    "summary_path": bt["summary_path"],
    "params_keys": sorted(list((params or {}).keys())),
}, indent=2, ensure_ascii=False), encoding="utf-8")

print(f"[Celda 00B] OK :: backtest_engine.mode = {mode}")
print(f"[Celda 00B] OK :: trades_path = {bt['trades_path']}")
print(f"[Celda 00B] OK :: summary_path = {bt['summary_path']}")
print(f"[Celda 00B] SNAPSHOT ‚Üí {SNAP_PTR}")
print(">>> Celda 00B v1.0 :: OK")


>>> Celda 00B v1.0 :: Recover BACKTEST_ENGINE pointer (disk) [WFO-safe]
[Celda 00B] OK :: backtest_engine.mode = OVERLAY_V16
[Celda 00B] OK :: trades_path = C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\backtests\backtest_engine_v10\overlay_engine_v16\trades_engine_v10_overlay_v16.parquet
[Celda 00B] OK :: summary_path = C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\backtests\backtest_engine_v10\overlay_engine_v16\summary_engine_v10_overlay_v16.parquet
[Celda 00B] SNAPSHOT ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\research_logs\runs\20251222_103043\snapshots\backtest_engine_pointer_snapshot.json
>>> Celda 00B v1.0 :: OK


In [28]:
# ==========================================================================================
# Celda 01 v1.3 :: Universe Resolver (NOTEBOOK 2 basket) [HARD-SOURCE-OF-TRUTH + NO MISMATCH]
# ------------------------------------------------------------------------------------------
# GARANT√çA:
# - El universo de ESTE notebook (TREND M5) sale SOLO de la cesta del Notebook 2.
# - Si NB2 no est√° accesible por ruta, usa fallback expl√≠cito (auditado) extra√≠do del HTML report.
# - BLOQUEA silencios: si aparece un s√≠mbolo fuera del basket => error.
#
# EFECTO (soluci√≥n de fondo):
# - Escribe SIEMPRE:
#   GLOBAL_STATE["universe"]["selected_symbols_TREND"]
#   GLOBAL_STATE["universe"]["basket_TREND"]
# - Y PARA EVITAR QUE CELDAS MAL ESCRITAS METAN S√çMBOLOS:
#   Fuerza tambi√©n GLOBAL_STATE["data_quality"]["final_symbols"] = selected
#   (quedando trazabilidad del override)
# ==========================================================================================

from __future__ import annotations

import os
import re
import json
from pathlib import Path
from datetime import datetime
import polars as pl

print(">>> Celda 01 v1.3 :: Universe Resolver (NOTEBOOK 2 basket) [HARD-SOURCE-OF-TRUTH + NO MISMATCH]")

# =========================
# Config m√≠nimo (editable)
# =========================
TARGET_FAMILY = "TREND"   # ESTE notebook es TREND
TARGET_PRESET = "CORE"    # seg√∫n tu report: CORE
SELECT_TOP_N  = 0         # 0 => usar TODA la cesta; >0 => top N por score
SELECT_SYMBOLS_MANUAL: list[str] = []  # opcional (se valida contra basket)

# Overrides directos (si quieres control absoluto)
NB2_REPORT_PATH_OVERRIDE = ""  # ej: r"C:\Quant\MT5_Data_Extraction\ER_FILTER_5M_V1\research_logs\runs\YYYYMMDD_HHMMSS\regimen_selector_report.html"
NB2_RUN_DIR_OVERRIDE     = ""  # ej: r"C:\Quant\MT5_Data_Extraction\ER_FILTER_5M_V1\research_logs\runs\YYYYMMDD_HHMMSS"

# Fallback auditado (extra√≠do del HTML que compartiste)
FALLBACK_BASKETS = {
    ("TREND","CORE"): ["BNBUSD", "XAUAUD", "BTCUSD", "LVMH"],
    ("RANGE","CORE"): ["ETHUSD", "XAUUSD"],
}

# =========================
# Validaciones GLOBAL_STATE
# =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 01] ERROR: GLOBAL_STATE no existe o no es dict.")

paths = GLOBAL_STATE.get("paths", {}) or {}
if "artifacts" not in paths:
    raise RuntimeError("[Celda 01] ERROR: falta GLOBAL_STATE['paths']['artifacts'].")

lab_root = Path(paths.get("lab_root") or Path(paths["artifacts"]).resolve().parent).resolve()
base_guess = lab_root.parent  # t√≠pico: C:\Quant\MT5_Data_Extraction

# =========================
# Helpers
# =========================
def _is_run_dirname(name: str) -> bool:
    return bool(re.match(r"^\d{8}_\d{6}$", name.strip()))

def _latest_run_dir(runs_root: Path) -> Path | None:
    if not runs_root.exists():
        return None
    runs = [p for p in runs_root.iterdir() if p.is_dir() and _is_run_dirname(p.name)]
    if not runs:
        return None
    runs.sort(key=lambda p: p.stat().st_mtime, reverse=True)
    return runs[0]

def _find_report_in_run(run_dir: Path) -> Path | None:
    if not run_dir.exists():
        return None
    c1 = run_dir / "regimen_selector_report.html"
    c2 = run_dir / "regime_selector_report.html"
    if c1.exists(): return c1.resolve()
    if c2.exists(): return c2.resolve()
    # fallback: buscar por patr√≥n en nivel 1
    for p in run_dir.glob("*selector*report*.html"):
        if p.is_file():
            return p.resolve()
    return None

def _find_nb2_runs_roots(base_dir: Path) -> list[Path]:
    roots: list[Path] = []
    if not base_dir.exists():
        return roots
    for d in base_dir.iterdir():
        if not d.is_dir():
            continue
        name_u = d.name.upper()
        # m√°s laxo que "ER_FILTER*" para evitar misses
        if ("ER_FILTER" in name_u) or ("FILTER" in name_u and "ER_" in name_u):
            rr = d / "research_logs" / "runs"
            if rr.exists():
                roots.append(rr.resolve())
    return roots

def _read_basket_from_html(html_path: Path) -> pl.DataFrame:
    try:
        import pandas as pd  # type: ignore
    except Exception as e:
        raise RuntimeError(f"[Celda 01] ERROR: falta pandas para leer HTML: {e}")

    tables = pd.read_html(str(html_path))
    best = None
    for t in tables:
        cols = [str(c).strip().lower() for c in list(t.columns)]
        if ("symbol" in cols) and ("family" in cols) and ("preset" in cols):
            best = t.copy()
            break
    if best is None:
        raise RuntimeError("[Celda 01] ERROR: HTML no contiene tabla con columnas (symbol,family,preset).")

    best.columns = [str(c).strip() for c in best.columns]
    low = {c: c.lower() for c in best.columns}
    best.rename(columns=low, inplace=True)

    score_col = None
    for c in ["score_final", "score_v2", "score"]:
        if c in best.columns:
            score_col = c
            break

    df = pl.from_pandas(best)
    df = df.with_columns([
        pl.col("symbol").cast(pl.Utf8, strict=False).str.to_uppercase().alias("symbol"),
        pl.col("family").cast(pl.Utf8, strict=False).str.to_uppercase().alias("family"),
        pl.col("preset").cast(pl.Utf8, strict=False).str.to_uppercase().alias("preset"),
    ])
    if score_col:
        df = df.with_columns(pl.col(score_col).cast(pl.Float64, strict=False).alias("score_key"))
    else:
        df = df.with_columns(pl.lit(None).cast(pl.Float64).alias("score_key"))

    return df.select(["symbol","family","preset","score_key"]).unique(subset=["symbol"], keep="first")

# =========================
# Resolver report HTML (prioridad absoluta)
# =========================
picked_report: Path | None = None
picked_run_dir: Path | None = None
picked_source = None

# 1) override directo por report path
if NB2_REPORT_PATH_OVERRIDE.strip():
    p = Path(NB2_REPORT_PATH_OVERRIDE.strip()).expanduser().resolve()
    if not p.exists():
        raise RuntimeError(f"[Celda 01] ERROR: NB2_REPORT_PATH_OVERRIDE no existe: {p}")
    picked_report = p
    picked_run_dir = p.parent
    picked_source = "OVERRIDE_REPORT_PATH"

# 2) override por run dir
if picked_report is None and NB2_RUN_DIR_OVERRIDE.strip():
    rd = Path(NB2_RUN_DIR_OVERRIDE.strip()).expanduser().resolve()
    rp = _find_report_in_run(rd)
    if rp is None:
        raise RuntimeError(f"[Celda 01] ERROR: no encontr√© *selector*report*.html dentro de NB2_RUN_DIR_OVERRIDE: {rd}")
    picked_report = rp
    picked_run_dir = rd
    picked_source = "OVERRIDE_RUN_DIR"

# 3) auto-discovery: buscar NB2 roots en base_guess
if picked_report is None:
    roots = _find_nb2_runs_roots(base_guess)
    best = None
    best_mtime = -1.0
    best_run = None
    for rr in roots:
        lr = _latest_run_dir(rr)
        if lr is None:
            continue
        rp = _find_report_in_run(lr)
        if rp is None:
            continue
        mt = rp.stat().st_mtime
        if mt > best_mtime:
            best_mtime = mt
            best = rp
            best_run = lr
    if best is not None:
        picked_report = best
        picked_run_dir = best_run
        picked_source = "AUTO_NB2_LATEST_REPORT"

# 4) auto-discovery local: buscar report dentro de ER_STRATEGY_LAB (r√°pido)
if picked_report is None:
    # buscar en rutas t√≠picas del lab
    candidates = [
        lab_root / "research_logs" / "runs",
        lab_root / "research_logs",
        lab_root,
    ]
    found = []
    for c in candidates:
        if c.exists():
            for p in c.rglob("regimen_selector_report.html"):
                if p.is_file():
                    found.append(p.resolve())
            for p in c.rglob("regime_selector_report.html"):
                if p.is_file():
                    found.append(p.resolve())
    if found:
        found.sort(key=lambda p: p.stat().st_mtime, reverse=True)
        picked_report = found[0]
        picked_run_dir = picked_report.parent
        picked_source = "AUTO_LOCAL_LATEST_REPORT"

# =========================
# Leer basket (HTML -> fallback hardcode)
# =========================
df_basket = None
basket_source = None

if picked_report is not None:
    try:
        df_basket = _read_basket_from_html(picked_report)
        basket_source = f"HTML::{picked_source}"
    except Exception as e:
        print(f"[Celda 01] WARNING: fall√≥ lectura HTML ({picked_report.name}). Usar√© fallback hardcode. err={e}")

if df_basket is None:
    # fallback hardcode
    rows = []
    for (fam, pre), syms in FALLBACK_BASKETS.items():
        for s in syms:
            rows.append({"symbol": s, "family": fam, "preset": pre, "score_key": None})
    df_basket = pl.DataFrame(rows)
    basket_source = "FALLBACK_HARDCODED_FROM_SHARED_REPORT"
    picked_report = None
    picked_run_dir = None

# =========================
# Filtrar target family/preset
# =========================
df_target = df_basket.filter((pl.col("family") == TARGET_FAMILY) & (pl.col("preset") == TARGET_PRESET))
if df_target.height == 0:
    # relajar preset si no existe
    df_target = df_basket.filter(pl.col("family") == TARGET_FAMILY)

symbols_basket = df_target.select(pl.col("symbol")).to_series().to_list()
if not symbols_basket:
    raise RuntimeError(f"[Celda 01] ERROR: basket vac√≠o para family={TARGET_FAMILY} preset={TARGET_PRESET} (source={basket_source}).")

# =========================
# Selecci√≥n final (sin silencios)
# =========================
basket_set = set(symbols_basket)

if SELECT_SYMBOLS_MANUAL:
    selected = [str(s).upper().strip() for s in SELECT_SYMBOLS_MANUAL if str(s).strip()]
    bad = [s for s in selected if s not in basket_set]
    if bad:
        raise RuntimeError(f"[Celda 01] ERROR: SELECT_SYMBOLS_MANUAL fuera del basket NB2: bad={bad} basket_sample={symbols_basket[:20]}")
else:
    # ranking si score_key existe y no es todo null
    has_score = df_target.select(pl.col("score_key").drop_nulls().count()).item() > 0
    df_rank = df_target.sort("score_key", descending=True) if has_score else df_target
    if int(SELECT_TOP_N) > 0:
        selected = df_rank.head(int(SELECT_TOP_N)).select(pl.col("symbol")).to_series().to_list()
    else:
        selected = df_rank.select(pl.col("symbol")).to_series().to_list()

# guardrail definitivo
bad2 = [s for s in selected if s not in basket_set]
if bad2:
    raise RuntimeError(f"[Celda 01] ERROR: selecci√≥n final fuera del basket: {bad2}")

# =========================
# Persistir a GLOBAL_STATE (y forzar source-of-truth)
# =========================
GLOBAL_STATE.setdefault("universe", {})
GLOBAL_STATE["universe"].update({
    "source": "NOTEBOOK2_BASKET_LOCKED",
    "basket_source": basket_source,
    "target_family": TARGET_FAMILY,
    "target_preset": TARGET_PRESET,
    "nb2_report_path": str(picked_report) if picked_report else None,
    "nb2_run_dir": str(picked_run_dir) if picked_run_dir else None,
    "basket_TREND": symbols_basket if TARGET_FAMILY == "TREND" else GLOBAL_STATE["universe"].get("basket_TREND", []),
    "selected_symbols_TREND": selected if TARGET_FAMILY == "TREND" else GLOBAL_STATE["universe"].get("selected_symbols_TREND", []),
    "resolved_utc": datetime.utcnow().isoformat(),
})

# FORZAR: evitar que celdas posteriores usen listas equivocadas
GLOBAL_STATE.setdefault("data_quality", {})
prev = GLOBAL_STATE["data_quality"].get("final_symbols")
GLOBAL_STATE["data_quality"]["final_symbols_prev"] = prev
GLOBAL_STATE["data_quality"]["final_symbols"] = selected
GLOBAL_STATE["data_quality"]["final_symbols_source"] = "FORCED_FROM_NB2_BASKET_LOCKED"
GLOBAL_STATE["data_quality"]["final_symbols_forced_utc"] = datetime.utcnow().isoformat()

# =========================
# Prints auditables
# =========================
print(f"[Celda 01] basket_source = {basket_source}")
print(f"[Celda 01] report_path   = {str(picked_report) if picked_report else 'NONE (fallback)'}")
print(f"[Celda 01] run_dir       = {str(picked_run_dir) if picked_run_dir else 'NONE (fallback)'}")
print(f"[Celda 01] BASKET({TARGET_FAMILY}/{TARGET_PRESET}) size={len(symbols_basket)} => {symbols_basket}")
print(f"[Celda 01] SELECTED size={len(selected)} => {selected}")
print(f"[Celda 01] data_quality.final_symbols FORZADO => {GLOBAL_STATE['data_quality']['final_symbols']}")

# Killer guard: si aparece GBPNZD en selected, esto debe explotar (no deber√≠a pasar)
if "GBPNZD" in set(selected):
    raise RuntimeError("[Celda 01] ERROR: GBPNZD apareci√≥ en SELECTED pero NO pertenece al basket NB2. STOP.")

print(">>> Celda 01 v1.3 :: OK")


>>> Celda 01 v1.3 :: Universe Resolver (NOTEBOOK 2 basket) [HARD-SOURCE-OF-TRUTH + NO MISMATCH]
[Celda 01] basket_source = FALLBACK_HARDCODED_FROM_SHARED_REPORT
[Celda 01] report_path   = NONE (fallback)
[Celda 01] run_dir       = NONE (fallback)
[Celda 01] BASKET(TREND/CORE) size=4 => ['BNBUSD', 'XAUAUD', 'BTCUSD', 'LVMH']
[Celda 01] SELECTED size=4 => ['BNBUSD', 'XAUAUD', 'BTCUSD', 'LVMH']
[Celda 01] data_quality.final_symbols FORZADO => ['BNBUSD', 'XAUAUD', 'BTCUSD', 'LVMH']
>>> Celda 01 v1.3 :: OK


In [29]:
# ===================== Celda 02 v2.2 ‚Äî Cargar OHLCV M5 + QA (robust time inference + polars engine) =====================
# OBJETIVO:
#   1) Cargar OHLCV M5 para s√≠mbolos seleccionados.
#   2) Inferir y normalizar columna de tiempo (incluye epoch int).
#   3) Normalizar OHLCV; QA; persistir parquet limpio por s√≠mbolo.
# =============================================================================

from __future__ import annotations
from pathlib import Path
import os
import re
from datetime import timedelta

import polars as pl

print(">>> Celda 02 v2.2 :: OHLCV M5 + QA (robust time inference + polars engine)")

# ========================= Validaciones GLOBAL_STATE =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 02] ERROR: GLOBAL_STATE no existe.")

paths = GLOBAL_STATE.get("paths", {}) or {}
for k in ("artifacts", "lab_root"):
    if k not in paths:
        raise RuntimeError(f"[Celda 02] ERROR: GLOBAL_STATE['paths'] incompleto; falta: {k}")

selected = (GLOBAL_STATE.get("universe", {}) or {}).get("selected_symbols_TREND", [])
if not selected:
    raise RuntimeError("[Celda 02] ERROR: no hay selected_symbols_TREND (ejecuta Celda 01).")

print("[Celda 02] selected_symbols_TREND =", selected)

# ========================= Par√°metros QA =========================
MAX_GAP_SEC = 2 * 3600
MACRO_GAP_SEC = 12 * 3600
MIN_COVERAGE_RECENT = 0.95

FALLBACK_MAX_PARQUETS = 3000
FALLBACK_MAX_MATCHES  = 50

OUT_DIR = Path(paths["artifacts"]).resolve() / "features" / "m5_ohlcv_clean"
OUT_DIR.mkdir(parents=True, exist_ok=True)

# ========================= Roots =========================
def _resolve_data_root(er_lab_root: Path) -> Path:
    env = os.getenv("MT5_PROJECT_ROOT") or os.getenv("MT5_DE_PROJECT_ROOT") or os.getenv("DATA_ROOT")
    if env:
        return Path(env).expanduser().resolve()

    if er_lab_root.name.upper() == "ER_STRATEGY_LAB":
        return er_lab_root.parent.resolve()

    for p in [er_lab_root, *er_lab_root.parents]:
        if (p / "data").exists():
            return p.resolve()

    common = Path(r"C:\Quant\MT5_Data_Extraction")
    if common.exists():
        return common.resolve()

    raise RuntimeError("[Celda 02] No se pudo resolver DATA_ROOT. Define MT5_PROJECT_ROOT o DATA_ROOT.")

DATA_ROOT = _resolve_data_root(Path(paths["lab_root"]))

RATES_ROOT_CANDIDATES = [
    DATA_ROOT / "data" / "historical_data" / "m5_clean",
    DATA_ROOT / "data" / "historical_data" / "rates_5m",
    DATA_ROOT / "data" / "rates_5m",
    DATA_ROOT / "data" / "bulk_data" / "m5_raw",
    DATA_ROOT / "bulk_data" / "rates_5m",
]

RATES_ROOT_EXISTING = [p for p in RATES_ROOT_CANDIDATES if p.exists() and p.is_dir()]
if not RATES_ROOT_EXISTING:
    raise RuntimeError(
        "[Celda 02] ERROR: no existe ning√∫n root de rates M5.\n"
        "Prob√©:\n  " + "\n  ".join(map(str, RATES_ROOT_CANDIDATES)) + "\n"
        f"DATA_ROOT={DATA_ROOT}"
    )

print("[Celda 02] DATA_ROOT =", DATA_ROOT)
print("[Celda 02] Roots rates existentes:")
for p in RATES_ROOT_EXISTING:
    print("   -", p)

# ========================= Helpers =========================
def _collect_compat(lf: pl.LazyFrame) -> pl.DataFrame:
    # Polars >=1.25: streaming param deprecated; usar engine="streaming"
    try:
        return lf.collect(engine="streaming")
    except TypeError:
        # Polars viejo
        return lf.collect(streaming=True)

def _first_dir_with_parquets(dirs: list[Path]) -> Path | None:
    for d in dirs:
        if not d.is_dir():
            continue
        if any(d.glob("*.parquet")) or any(d.glob("*/*.parquet")) or any(d.glob("**/*.parquet")):
            return d
    return None

def _scan_symbol_parquet(symbol: str, root: Path) -> pl.LazyFrame:
    sym_u = symbol.upper().strip()

    direct = root / sym_u
    if direct.exists() and direct.is_dir():
        return pl.scan_parquet(str(direct / "**" / "*.parquet"), hive_partitioning=True)

    exact_dirs = [p for p in root.rglob(sym_u) if p.is_dir()]
    chosen = _first_dir_with_parquets(sorted(exact_dirs, key=lambda x: len(str(x))))
    if chosen is not None:
        return pl.scan_parquet(str(chosen / "**" / "*.parquet"), hive_partitioning=True)

    part_keys = ("symbol=", "ticker=", "instrument=", "pair=")
    part_dirs = []
    for p in root.rglob(f"*{sym_u}*"):
        if p.is_dir():
            nu = p.name.upper()
            if any(k.upper() in nu for k in part_keys):
                part_dirs.append(p)
    chosen = _first_dir_with_parquets(sorted(part_dirs, key=lambda x: len(str(x))))
    if chosen is not None:
        return pl.scan_parquet(str(chosen / "**" / "*.parquet"), hive_partitioning=True)

    hits = list(root.rglob(f"*{sym_u}*.parquet"))
    if hits:
        return pl.scan_parquet([str(p) for p in hits], hive_partitioning=True)

    all_parqs = list(root.rglob("*.parquet"))
    if 0 < len(all_parqs) <= FALLBACK_MAX_PARQUETS:
        lf_all = pl.scan_parquet([str(p) for p in all_parqs], hive_partitioning=True)
        cols = lf_all.collect_schema().names()
        if "symbol" in cols:
            return lf_all.filter(pl.col("symbol").cast(pl.Utf8).str.to_uppercase() == pl.lit(sym_u))
        if "ticker" in cols:
            return lf_all.filter(pl.col("ticker").cast(pl.Utf8).str.to_uppercase() == pl.lit(sym_u))

    raise RuntimeError(f"[Celda 02] ERROR: no se encontr√≥ dataset parquet para {sym_u} en root: {root}")

def _pick_time_col(df: pl.DataFrame) -> str:
    # 1) nombres comunes (case-insensitive)
    candidates = [
        "time_utc","time","datetime","dt","timestamp",
        "Time","TIME","date","Date","DATE","date_time","DateTime","datetime_utc",
        "time_msc","time_ms","timestamp_ms","timestamp_us","timestamp_ns",
        "ts","ts_utc","t","open_time","close_time"
    ]
    cols = df.columns
    cols_lower = {c.lower(): c for c in cols}

    for c in candidates:
        if c.lower() in cols_lower:
            return cols_lower[c.lower()]

    # 2) regex heur√≠stica en nombre
    rx = re.compile(r"(time|date|datetime|timestamp|ts)", re.IGNORECASE)
    rx_hits = [c for c in cols if rx.search(c)]
    if len(rx_hits) == 1:
        return rx_hits[0]

    # 3) por dtype: si hay exactamente 1 Datetime/Date, √∫sala
    schema = df.schema
    dt_like = [c for c, t in schema.items() if isinstance(t, (pl.Datetime, pl.Date))]
    if len(dt_like) == 1:
        return dt_like[0]

    # 4) falla: imprime diagn√≥stico y corta
    print("\n[Celda 02][DIAG] Columnas disponibles (name: dtype):")
    for c, t in df.schema.items():
        print(f"  - {c}: {t}")
    if rx_hits:
        print("\n[Celda 02][DIAG] Candidatas por regex (time/date/ts...):", rx_hits)
    if dt_like:
        print("\n[Celda 02][DIAG] Candidatas por dtype Date/Datetime:", dt_like)

    raise RuntimeError("[Celda 02] ERROR: no encuentro columna de tiempo. Revisa [DIAG] arriba.")

def _to_time_utc(df: pl.DataFrame) -> pl.DataFrame:
    time_col = _pick_time_col(df)
    out = df

    if time_col != "time_utc":
        out = out.rename({time_col: "time_utc"})

    t = out.schema["time_utc"]

    # Si es string -> parse datetime
    if t == pl.Utf8:
        out = out.with_columns(pl.col("time_utc").str.to_datetime(strict=False, time_zone="UTC"))
        return out

    # Si es Date -> a Datetime UTC
    if isinstance(t, pl.Date):
        out = out.with_columns(pl.col("time_utc").cast(pl.Datetime(time_zone="UTC")))
        return out

    # Si ya es Datetime -> asegurar UTC si no tiene tz
    if isinstance(t, pl.Datetime):
        try:
            tz = out.schema["time_utc"].time_zone
            if tz is None:
                out = out.with_columns(pl.col("time_utc").dt.replace_time_zone("UTC"))
        except Exception:
            pass
        return out

    # Si es entero epoch -> inferir unidad por magnitud
    if t in (pl.Int64, pl.Int32, pl.UInt64, pl.UInt32, pl.Float64, pl.Float32):
        # Tomar una muestra para inferir escala
        sample = out.select(pl.col("time_utc").drop_nulls().head(50)).to_series()
        if sample.len() == 0:
            raise RuntimeError("[Celda 02] ERROR: columna tiempo sin valores (todo null).")

        vmax = float(sample.max())
        # heur√≠stica:
        # seconds ~ 1e9, ms ~ 1e12-1e13, us ~ 1e15, ns ~ 1e18
        if vmax < 1e11:
            unit = "s"
        elif vmax < 1e14:
            unit = "ms"
        elif vmax < 1e17:
            unit = "us"
        else:
            unit = "ns"

        out = out.with_columns(
            pl.from_epoch(pl.col("time_utc").cast(pl.Int64, strict=False), time_unit=unit).dt.replace_time_zone("UTC")
        )
        print(f"[Celda 02] INFO: time_utc inferido desde epoch int con unidad='{unit}'")
        return out

    # √∫ltimo intento gen√©rico
    out = out.with_columns(pl.col("time_utc").cast(pl.Datetime(time_zone="UTC"), strict=False))
    return out

def _ensure_ohlcv_cols(df: pl.DataFrame) -> pl.DataFrame:
    ren = {}
    for c in df.columns:
        cl = c.lower().strip()
        if cl in ("o","open"): ren[c] = "open"
        if cl in ("h","high"): ren[c] = "high"
        if cl in ("l","low"):  ren[c] = "low"
        if cl in ("c","close"): ren[c] = "close"
        if cl in ("vol","volume","tick_volume"): ren[c] = "volume"
        if cl in ("spr","spread"): ren[c] = "spread"
    out = df.rename(ren) if ren else df

    for req in ["open","high","low","close"]:
        if req not in out.columns:
            print("\n[Celda 02][DIAG] Columnas disponibles:", out.columns)
            raise RuntimeError(f"[Celda 02] ERROR: falta columna requerida OHLC: {req}")

    out = out.with_columns([
        pl.col("open").cast(pl.Float64, strict=False),
        pl.col("high").cast(pl.Float64, strict=False),
        pl.col("low").cast(pl.Float64, strict=False),
        pl.col("close").cast(pl.Float64, strict=False),
    ])
    if "volume" in out.columns:
        out = out.with_columns(pl.col("volume").cast(pl.Float64, strict=False))
    if "spread" in out.columns:
        out = out.with_columns(pl.col("spread").cast(pl.Float64, strict=False))
    return out

def _qa_time_gaps(df: pl.DataFrame) -> dict:
    t = df.get_column("time_utc").to_list()
    if len(t) < 3:
        return {"max_gap_sec": None, "macro_gaps": 0}

    gaps = []
    for i in range(1, len(t)):
        dt = (t[i] - t[i-1]).total_seconds()
        if dt is not None and dt >= 0:
            gaps.append(dt)
    if not gaps:
        return {"max_gap_sec": None, "macro_gaps": 0}

    max_gap = max(gaps)
    macro = sum(1 for g in gaps if g > MACRO_GAP_SEC)
    return {"max_gap_sec": float(max_gap), "macro_gaps": int(macro)}

def _coverage_recent(df: pl.DataFrame) -> float:
    if df.height < 10:
        return 0.0
    tmax = df.select(pl.col("time_utc").max()).item()
    tmin = tmax - timedelta(days=7)
    recent = df.filter(pl.col("time_utc") >= pl.lit(tmin))
    if recent.height < 10:
        return 0.0
    expected = 7 * 24 * 12
    return min(1.0, recent.height / expected)

# ========================= Main =========================
clean_paths = {}
qa_rows = []

for sym in selected:
    sym_u = sym.upper().strip()

    lf = None
    last_err = None
    for root in RATES_ROOT_EXISTING:
        try:
            lf = _scan_symbol_parquet(sym_u, root)
            print(f"[Celda 02] Found dataset for {sym_u} in root: {root}")
            break
        except Exception as e:
            last_err = e
            continue

    if lf is None:
        raise RuntimeError(f"[Celda 02] ERROR: no pude cargar {sym_u}. √öltimo error: {last_err}")

    df = _collect_compat(lf)

    # --- FIX central: inferir time correctamente ---
    df = _to_time_utc(df)
    df = _ensure_ohlcv_cols(df)

    df = df.sort("time_utc").unique(subset=["time_utc"], keep="last")

    qa = _qa_time_gaps(df)
    cov = _coverage_recent(df)

    qa_rows.append({
        "symbol": sym_u,
        "n_rows": int(df.height),
        "max_gap_sec": qa["max_gap_sec"],
        "macro_gaps": qa["macro_gaps"],
        "coverage_7d": float(cov),
        "time_min": df.select(pl.col("time_utc").min()).item(),
        "time_max": df.select(pl.col("time_utc").max()).item(),
    })

    if qa["max_gap_sec"] is not None and qa["max_gap_sec"] > MAX_GAP_SEC:
        print(f"[Celda 02] WARN: {sym_u} max_gap_sec={qa['max_gap_sec']:.0f}s (> {MAX_GAP_SEC}s)")
    if cov < MIN_COVERAGE_RECENT:
        print(f"[Celda 02] WARN: {sym_u} coverage_7d={cov:.2%} (< {MIN_COVERAGE_RECENT:.0%})")

    out_path = OUT_DIR / f"{sym_u}_M5_OHLCV_clean.parquet"
    df.write_parquet(str(out_path), compression="zstd")
    clean_paths[sym_u] = str(out_path)

    print(f"[Celda 02] OK: {sym_u} ‚Üí {out_path} | rows={df.height}")

qa_df = pl.DataFrame(qa_rows).sort("symbol")
qa_path = OUT_DIR / "_QA_summary.parquet"
qa_df.write_parquet(str(qa_path), compression="zstd")

GLOBAL_STATE["data"] = {
    "ohlcv_clean_dir": str(OUT_DIR),
    "ohlcv_clean_paths": clean_paths,
    "qa_summary_path": str(qa_path),
}

print(f"üíæ QA_summary ‚Üí {qa_path} (OK)")
print(">>> Celda 02 :: OK")


>>> Celda 02 v2.2 :: OHLCV M5 + QA (robust time inference + polars engine)
[Celda 02] selected_symbols_TREND = ['BNBUSD', 'XAUAUD', 'BTCUSD', 'LVMH']
[Celda 02] DATA_ROOT = C:\Quant\MT5_Data_Extraction
[Celda 02] Roots rates existentes:
   - C:\Quant\MT5_Data_Extraction\data\historical_data\m5_clean
   - C:\Quant\MT5_Data_Extraction\data\bulk_data\m5_raw
   - C:\Quant\MT5_Data_Extraction\bulk_data\rates_5m
[Celda 02] Found dataset for BNBUSD in root: C:\Quant\MT5_Data_Extraction\data\historical_data\m5_clean
[Celda 02] WARN: BNBUSD max_gap_sec=125100s (> 7200s)
[Celda 02] OK: BNBUSD ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\features\m5_ohlcv_clean\BNBUSD_M5_OHLCV_clean.parquet | rows=409320
[Celda 02] Found dataset for XAUAUD in root: C:\Quant\MT5_Data_Extraction\data\historical_data\m5_clean
[Celda 02] WARN: XAUAUD max_gap_sec=264000s (> 7200s)
[Celda 02] WARN: XAUAUD coverage_7d=65.08% (< 95%)
[Celda 02] OK: XAUAUD ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\art

In [30]:
# ===================== Celda 02B v1.3 ‚Äî QA Gate INSTITUCIONAL (break-aware) + Coverage bounded + Timeframe sanity =====================
# FIXES vs v1.2:
# - coverage_day CLIP a [0,1] (nunca >1).
# - gap_p90 se calcula EXCLUYENDO el gap m√°ximo por d√≠a (remueve break diario / weekend gap).
# - sanity check de timeframe: median_gap_small ~300s para M5 (si no, NO_GO por dataset mal).
# - diagn√≥stico ampliado en print por s√≠mbolo (para auditar sin celdas extra).

from __future__ import annotations
from pathlib import Path
from datetime import timedelta
import json
import polars as pl

print(">>> Celda 02B v1.3 :: QA Gate INSTITUCIONAL (break-aware) + Coverage bounded + Timeframe sanity")

# ------------------------- Requisitos -------------------------
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 02B] ERROR: GLOBAL_STATE no existe o no es dict.")

paths = GLOBAL_STATE.get("paths", {}) or {}
if "run_snapshots" not in paths:
    raise RuntimeError("[Celda 02B] ERROR: falta GLOBAL_STATE['paths']['run_snapshots'] (ejecuta Celda 00).")

data_state = GLOBAL_STATE.get("data", {}) or {}
if "ohlcv_clean_paths" not in data_state:
    raise RuntimeError("[Celda 02B] ERROR: falta GLOBAL_STATE['data']['ohlcv_clean_paths'] (ejecuta Celda 02).")

clean_paths = dict(data_state["ohlcv_clean_paths"])
symbols = sorted([str(s).upper().strip() for s in clean_paths.keys() if str(s).strip()])
if not symbols:
    raise RuntimeError("[Celda 02B] ERROR: no hay s√≠mbolos en ohlcv_clean_paths.")

snap_dir = Path(paths["run_snapshots"]).resolve()
snap_dir.mkdir(parents=True, exist_ok=True)

# ------------------------- Par√°metros institucionales -------------------------
FAIL_ON_NO_GO = True
REPLACE_SELECTED_WITH_FINAL = True

AUTO_CROP_ALWAYS = True
CROP_DAYS = 180
MIN_DAYS_AFTER_CROP = 30

LOOKBACK_DAYS_FOR_STATS = 60
WINDOW_CAL_DAYS = 14

MIN_BARS_DAY_ACTIVE = 30

# Thresholds M5 intra-day (excluyendo gap m√°ximo por d√≠a):
GAP_P90_EXCL_GO_SEC = 900
GAP_P90_EXCL_WARN_SEC = 1800

# Coverage (bounded) sobre d√≠as activos:
COV_MED_GO = 0.95
COV_MED_WARN = 0.85

# Timeframe sanity para M5:
# mediana de gaps "peque√±os" (filtrados <= 1800s) debe estar cerca de 300s
M5_TARGET_SEC = 300
M5_TOL_SEC = 30   # tolerancia amplia (broker quirks)
MAX_SMALL_GAP_SEC = 1800  # para estimar periodicidad sin contaminar con breaks

# Actividad esperada (14 d√≠as) por tipo:
MIN_ACTIVE_DAYS_SESSION = 7
MIN_ACTIVE_DAYS_WEEKDAY_24H = 8
MIN_ACTIVE_DAYS_24_7 = 12

# Force-continue (research) por defecto apagado
ALLOW_FORCE_CONTINUE = False
FORCE_CONTINUE_SYMBOLS: list[str] = []

# ------------------------- Helpers -------------------------
def _save_json(p: Path, obj: dict) -> None:
    p.write_text(json.dumps(obj, ensure_ascii=False, indent=2), encoding="utf-8")

def _classify(window_hours_median: float, weekend_share: float) -> tuple[str, int]:
    # SESSION: ventana <=12h
    if window_hours_median <= 12.0:
        return "SESSION", MIN_ACTIVE_DAYS_SESSION
    # 24h
    if weekend_share < 0.05:
        return "WEEKDAY_24H", MIN_ACTIVE_DAYS_WEEKDAY_24H
    return "ALLDAY_24_7", MIN_ACTIVE_DAYS_24_7

def _decision(m5_ok: bool, active_days_14: int, min_active_days_14: int, cov_med: float, gap_p90_excl: float) -> str:
    if not m5_ok:
        return "NO_GO"
    if active_days_14 < min_active_days_14:
        return "NO_GO"
    if (cov_med < COV_MED_WARN) or (gap_p90_excl > GAP_P90_EXCL_WARN_SEC):
        return "NO_GO"
    if (cov_med < COV_MED_GO) or (gap_p90_excl > GAP_P90_EXCL_GO_SEC):
        return "WARN"
    return "GO"

# ------------------------- Main -------------------------
decisions = []
final_symbols = []

for sym in symbols:
    p = Path(clean_paths[sym]).resolve()
    if not p.exists():
        raise RuntimeError(f"[Celda 02B] ERROR: no existe parquet para {sym}: {p}")

    df = pl.read_parquet(str(p))
    if "time_utc" not in df.columns:
        raise RuntimeError(f"[Celda 02B] ERROR: {sym} no tiene time_utc (revisa Celda 02).")

    df = df.sort("time_utc")

    # ---- Crop (siempre, si habilitado) ----
    crop_applied = False
    crop_path = None
    crop_days_effective = None

    if AUTO_CROP_ALWAYS:
        tmax0 = df.select(pl.col("time_utc").max()).item()
        tmin0 = tmax0 - timedelta(days=int(CROP_DAYS))
        df_crop = df.filter(pl.col("time_utc") >= pl.lit(tmin0))
        if df_crop.height >= 10:
            tmin_eff = df_crop.select(pl.col("time_utc").min()).item()
            crop_days_effective = int((tmax0 - tmin_eff).days)
            if crop_days_effective >= MIN_DAYS_AFTER_CROP:
                out_dir = Path(data_state.get("ohlcv_clean_dir") or p.parent).resolve()
                out_dir.mkdir(parents=True, exist_ok=True)
                out_p = out_dir / f"{sym}_M5_OHLCV_clean_CROP_{CROP_DAYS}d.parquet"
                df_crop.write_parquet(str(out_p), compression="zstd")
                crop_applied = True
                crop_path = str(out_p)
                clean_paths[sym] = crop_path
                df = df_crop.sort("time_utc")

    # ---- lookback stats ----
    tmax = df.select(pl.col("time_utc").max()).item()
    tmin_stats = tmax - timedelta(days=int(LOOKBACK_DAYS_FOR_STATS))
    df_s = df.filter(pl.col("time_utc") >= pl.lit(tmin_stats)).sort("time_utc")

    df_s = df_s.with_columns([
        pl.col("time_utc").dt.date().alias("_date"),
        pl.col("time_utc").dt.weekday().alias("_wday"),
        pl.col("time_utc").dt.hour().alias("_hour"),
        pl.col("time_utc").dt.minute().alias("_minute"),
        pl.col("time_utc").dt.epoch("s").alias("_ts_s"),
    ]).with_columns([
        (pl.col("_hour") * 60 + pl.col("_minute")).alias("_tod_min"),
        (pl.col("_wday") >= 6).alias("_is_weekend"),
    ]).with_columns([
        (pl.col("_ts_s") - pl.col("_ts_s").shift(1)).alias("_gap_s"),
    ]).with_columns([
        # limpiar gaps inv√°lidos
        pl.when((pl.col("_gap_s") > 0) & (pl.col("_gap_s") < 86400 * 10)).then(pl.col("_gap_s")).otherwise(None).alias("_gap_s"),
    ])

    # weekend share (por barras)
    weekend_share = float(
        df_s.select(
            (pl.col("_is_weekend").cast(pl.Int8).sum() / (pl.len() + 1e-12)).alias("wk_share")
        ).item()
    )

    # ---- Timeframe sanity (M5) ----
    gaps_small = df_s.select(pl.col("_gap_s")).drop_nulls().filter(pl.col("_gap_s") <= MAX_SMALL_GAP_SEC)
    med_gap_small = float(gaps_small.select(pl.col("_gap_s").median()).item() or 0.0)
    m5_ok = (abs(med_gap_small - M5_TARGET_SEC) <= M5_TOL_SEC) if med_gap_small > 0 else False

    # ---- Daily stats (ventana observada + expected bars) ----
    daily = (
        df_s.group_by("_date")
        .agg([
            pl.len().alias("bars"),
            pl.col("_ts_s").min().alias("ts_min"),
            pl.col("_ts_s").max().alias("ts_max"),
        ])
        .with_columns([
            (pl.col("ts_max") - pl.col("ts_min")).alias("window_sec"),
        ])
        .with_columns([
            (pl.when(pl.col("window_sec") <= 0)
               .then(1)
               .otherwise((pl.col("window_sec") / M5_TARGET_SEC).floor() + 1)
             ).cast(pl.Int64).alias("expected_bars"),
        ])
        .with_columns([
            # coverage bounded
            (pl.col("bars") / (pl.col("expected_bars") + 1e-12)).clip(0.0, 1.0).alias("coverage_day"),
            (pl.col("bars") >= MIN_BARS_DAY_ACTIVE).alias("is_active_day"),
        ])
        .sort("_date")
    )

    # ventana (horas) sobre d√≠as activos
    daily_active_all = daily.filter(pl.col("is_active_day"))
    window_hours_median = float(
        daily_active_all.select((pl.col("window_sec").median() / 3600.0).alias("wh")).item() or 0.0
    )

    cls_kind, min_active_days_14 = _classify(window_hours_median, weekend_share)

    # ---- gap_p90 excluyendo el gap m√°ximo por d√≠a ----
    # rank de gaps dentro del d√≠a (donde el gap pertenece al d√≠a del bar actual)
    gaps = df_s.select(["_date", "_gap_s"]).drop_nulls()
    if gaps.height > 0:
        gaps = gaps.with_columns([
            pl.col("_gap_s").rank(method="dense", descending=True).over("_date").alias("_gap_rank"),
        ]).with_columns([
            pl.when(pl.col("_gap_rank") > 1).then(pl.col("_gap_s")).otherwise(None).alias("_gap_excl_day_max"),
        ])
        gap_p90_raw = float(gaps.select(pl.col("_gap_s").quantile(0.90, interpolation="nearest")).item() or 0.0)
        gap_p90_excl = float(gaps.select(pl.col("_gap_excl_day_max").drop_nulls().quantile(0.90, interpolation="nearest")).item() or 0.0)

        # si no hay suficientes gaps tras excluir max (p.ej. muy pocos por d√≠a), fallback a raw
        if gap_p90_excl <= 0:
            gap_p90_excl = gap_p90_raw

        # mediana del gap m√°ximo diario (para auditar tama√±o del break)
        day_max_gap = gaps.group_by("_date").agg(pl.col("_gap_s").max().alias("day_max_gap"))
        day_max_gap_p50 = float(day_max_gap.select(pl.col("day_max_gap").median()).item() or 0.0)
    else:
        gap_p90_raw = 0.0
        gap_p90_excl = 0.0
        day_max_gap_p50 = 0.0

    # ---- ventana operativa 14 d√≠as ----
    tmin_win = tmax - timedelta(days=int(WINDOW_CAL_DAYS))
    daily_w = daily.filter(pl.col("_date") >= pl.lit(tmin_win.date()))
    daily_active = daily_w.filter(pl.col("is_active_day"))
    active_days_14 = int(daily_active.height)

    cov_med = float(daily_active.select(pl.col("coverage_day").median()).item() or 0.0)

    dec = _decision(m5_ok, active_days_14, min_active_days_14, cov_med, gap_p90_excl)

    decisions.append({
        "symbol": sym,
        "decision": dec,
        "m5_ok": bool(m5_ok),
        "med_gap_small_sec": float(med_gap_small),
        "class_inferred": cls_kind,
        "window_hours_median": float(window_hours_median),
        "weekend_share": float(weekend_share),
        "active_days_14": int(active_days_14),
        "min_active_days_14": int(min_active_days_14),
        "coverage_median_active": float(cov_med),
        "gap_p90_raw_sec": float(gap_p90_raw),
        "gap_p90_excl_day_max_sec": float(gap_p90_excl),
        "day_max_gap_p50_sec": float(day_max_gap_p50),
        "crop_applied": bool(crop_applied),
        "crop_days": int(CROP_DAYS) if crop_applied else None,
        "crop_days_effective": crop_days_effective,
        "active_ohlcv_path": str(clean_paths[sym]),
    })

    print(
        f"[Celda 02B] {sym} :: {dec} | m5_ok={m5_ok} med_gap_small={med_gap_small:.0f}s "
        f"| class={cls_kind} win_h_med={window_hours_median:.1f} wk_share={weekend_share:.2%} "
        f"| active_days_14={active_days_14}/{min_active_days_14} cov_med={cov_med:.3f} "
        f"| gap_p90_excl={gap_p90_excl:.0f}s (raw={gap_p90_raw:.0f}s, day_max_p50={day_max_gap_p50:.0f}s) "
        f"| crop={crop_applied}"
    )

    if dec in ("GO", "WARN"):
        final_symbols.append(sym)

# Force continue (research)
if ALLOW_FORCE_CONTINUE and FORCE_CONTINUE_SYMBOLS:
    force = [str(s).upper().strip() for s in FORCE_CONTINUE_SYMBOLS if str(s).strip()]
    for s in force:
        if s in symbols and s not in final_symbols:
            final_symbols.append(s)

final_symbols = sorted(final_symbols)

# Persistir paths activos (incluye crop)
GLOBAL_STATE["data"]["ohlcv_clean_paths"] = clean_paths
GLOBAL_STATE["data"]["m5_ohlcv_paths"] = dict(clean_paths)  # alias directo

GLOBAL_STATE["data_quality"] = {
    "final_symbols": final_symbols,
    "decisions": decisions,
    "params": {
        "fail_on_no_go": FAIL_ON_NO_GO,
        "replace_selected_with_final": REPLACE_SELECTED_WITH_FINAL,
        "auto_crop_always": AUTO_CROP_ALWAYS,
        "crop_days": CROP_DAYS,
        "min_days_after_crop": MIN_DAYS_AFTER_CROP,
        "lookback_days_for_stats": LOOKBACK_DAYS_FOR_STATS,
        "window_cal_days": WINDOW_CAL_DAYS,
        "min_bars_day_active": MIN_BARS_DAY_ACTIVE,
        "gap_p90_excl_go_sec": GAP_P90_EXCL_GO_SEC,
        "gap_p90_excl_warn_sec": GAP_P90_EXCL_WARN_SEC,
        "cov_med_go": COV_MED_GO,
        "cov_med_warn": COV_MED_WARN,
        "m5_target_sec": M5_TARGET_SEC,
        "m5_tol_sec": M5_TOL_SEC,
        "max_small_gap_sec": MAX_SMALL_GAP_SEC,
    }
}

if REPLACE_SELECTED_WITH_FINAL:
    GLOBAL_STATE.setdefault("universe", {})
    GLOBAL_STATE["universe"]["selected_symbols_TREND"] = final_symbols

# Snapshots
dec_df = pl.DataFrame(decisions).sort("symbol")
dec_path = snap_dir / "data_quality_decisions.parquet"
dec_df.write_parquet(str(dec_path), compression="zstd")

snap = {
    "cell": "02B",
    "version": "v1.3",
    "final_symbols": final_symbols,
    "decisions_path": str(dec_path),
    "notes": [
        "Coverage bounded a [0,1].",
        "gap_p90 excluye el gap m√°ximo por d√≠a (break-aware).",
        "Timeframe sanity: med_gap_small ~300s para validar M5.",
    ],
}
snap_path = snap_dir / "data_quality_snapshot.json"
_save_json(snap_path, snap)

print(f"\nüíæ [Celda 02B] decisions -> {dec_path} (OK)")
print(f"üíæ [Celda 02B] snapshot  -> {snap_path} (OK)")
print(f"[Celda 02B] FINAL_SYMBOLS = {final_symbols}")

if not final_symbols and FAIL_ON_NO_GO:
    raise RuntimeError("[Celda 02B] NO_GO: ning√∫n s√≠mbolo pas√≥ QA (revisa data_quality_decisions.parquet).")

print(">>> Celda 02B v1.3 :: OK")


>>> Celda 02B v1.3 :: QA Gate INSTITUCIONAL (break-aware) + Coverage bounded + Timeframe sanity
[Celda 02B] BNBUSD :: GO | m5_ok=True med_gap_small=300s | class=ALLDAY_24_7 win_h_med=23.9 wk_share=28.06% | active_days_14=14/12 cov_med=0.993 | gap_p90_excl=300s (raw=300s, day_max_p50=900s) | crop=True
[Celda 02B] BTCUSD :: GO | m5_ok=True med_gap_small=300s | class=ALLDAY_24_7 win_h_med=23.9 wk_share=28.05% | active_days_14=15/12 cov_med=0.993 | gap_p90_excl=300s (raw=300s, day_max_p50=900s) | crop=True
[Celda 02B] LVMH :: GO | m5_ok=True med_gap_small=300s | class=SESSION win_h_med=8.3 wk_share=0.00% | active_days_14=11/7 cov_med=1.000 | gap_p90_excl=300s (raw=300s, day_max_p50=56400s) | crop=True
[Celda 02B] XAUAUD :: WARN | m5_ok=True med_gap_small=300s | class=WEEKDAY_24H win_h_med=23.9 wk_share=3.71% | active_days_14=12/8 cov_med=0.948 | gap_p90_excl=300s (raw=300s, day_max_p50=4800s) | crop=True

üíæ [Celda 02B] decisions -> C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\research_l

In [31]:
# ===================== Celda 02C ‚Äî Compat: alias m5_ohlcv_paths ‚Üí ohlcv_clean_paths =====================
from __future__ import annotations
from pathlib import Path

print(">>> Celda 02C :: Compat aliases (m5_ohlcv_paths)")

if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 02C] ERROR: GLOBAL_STATE no existe.")

data_state = GLOBAL_STATE.get("data", {}) or GLOBAL_STATE.get("data", {})
# En tu pipeline actual, Celda 02 v2.2 guard√≥ en GLOBAL_STATE["data"] (no en root).
# Normalizamos:
if "data" not in GLOBAL_STATE or not isinstance(GLOBAL_STATE.get("data"), dict):
    GLOBAL_STATE["data"] = {}
data_state = GLOBAL_STATE["data"]

# Fuente preferida: paths ya ‚Äúactivos‚Äù (incluye crop si 02B lo aplic√≥)
src_paths = data_state.get("ohlcv_clean_paths") or data_state.get("ohlcv_clean_paths".lower()) or {}
src_dir   = data_state.get("ohlcv_clean_dir") or ""

if not src_paths:
    raise RuntimeError("[Celda 02C] ERROR: no encuentro ohlcv_clean_paths (ejecuta Celda 02 v2.2).")

# Alias esperados por Celda 05/07:
data_state["m5_ohlcv_paths"] = dict(src_paths)
data_state["m5_ohlcv_dir"] = str(Path(src_dir).resolve()) if src_dir else str(Path(list(src_paths.values())[0]).resolve().parent)

print("[Celda 02C] OK: m5_ohlcv_paths apunta a tus parquets limpios/activos.")
print(">>> Celda 02C :: OK")


>>> Celda 02C :: Compat aliases (m5_ohlcv_paths)
[Celda 02C] OK: m5_ohlcv_paths apunta a tus parquets limpios/activos.
>>> Celda 02C :: OK


In [32]:
# ===================== Celda 03 v1.3.3 ‚Äî Cost Model INSTRUMENT-AWARE (class + spread + coherent fallback) =====================
from __future__ import annotations

from pathlib import Path
import json
import os
import re
import math
import polars as pl

print(">>> Celda 03 v1.3.3 :: Cost Model INSTRUMENT-AWARE (class + spread + coherent fallback)")

# ========================= Requisitos previos =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 03] ERROR: GLOBAL_STATE no existe o no es dict.")

paths = GLOBAL_STATE.get("paths", {}) or {}
for k in ("run_snapshots",):
    if k not in paths:
        raise RuntimeError(f"[Celda 03] ERROR: falta GLOBAL_STATE['paths']['{k}'].")

univ = (GLOBAL_STATE.get("universe", {}) or {})
selected = univ.get("selected_symbols_TREND", []) or []
if not selected:
    raise RuntimeError("[Celda 03] ERROR: no hay selected_symbols_TREND. Ejecuta Celda 01/02B.")

selected_u = [str(s).upper().strip() for s in selected if str(s).strip()]
print(f"[Celda 03] selected_TREND = {selected_u}")

data_state = (GLOBAL_STATE.get("data", {}) or {})
m5_paths = (
    data_state.get("m5_ohlcv_paths")
    or data_state.get("ohlcv_clean_paths")
    or {}
)
if not m5_paths:
    raise RuntimeError("[Celda 03] ERROR: no hay m5_ohlcv_paths/ohlcv_clean_paths. Ejecuta Celda 02/02B/02C.")

# normaliza keys a UPPER
m5_paths_u = {str(k).upper().strip(): str(v) for k, v in m5_paths.items() if k is not None and v is not None}

snap_dir = Path(paths["run_snapshots"]).resolve()
snap_dir.mkdir(parents=True, exist_ok=True)

# ========================= Convenci√≥n de ejecuci√≥n (freeze) =========================
EXECUTION_CONVENTION = {
    "signal_timestamp": "close(t)",
    "fill_timestamp": "t+1",
    "entry_price_rule": "open(t+1) if exists else close(t+1)",
    "exit_price_rule": "open(t+1) if exists else close(t+1)",
    "notes": [
        "Convenci√≥n fija para todo c√°lculo downstream.",
        "El modelo de costos se aplica como roundtrip (entry+exit) si el costo es por-lado.",
    ],
}

# ========================= Config defaults =========================
cfg = GLOBAL_STATE.get("config", {}) or {}
costs_cfg = (cfg.get("costs", {}) or {})

def _cfg_float(d: dict, k: str, default: float) -> float:
    try:
        return float(d.get(k, default))
    except Exception:
        return float(default)

def _cfg_bool(d: dict, k: str, default: bool) -> bool:
    try:
        v = d.get(k, default)
        if isinstance(v, bool):
            return v
        if isinstance(v, str):
            return v.strip().lower() in ("1", "true", "yes", "y", "on")
        return bool(v)
    except Exception:
        return bool(default)

COST_DEFAULTS = {
    "min_cost_bps": _cfg_float(costs_cfg, "min_cost_bps", 0.0),
    "max_cost_bps": _cfg_float(costs_cfg, "max_cost_bps", 500.0),
    "cost_reported_is_roundtrip": _cfg_bool(costs_cfg, "cost_reported_is_roundtrip_default", False),
    "stress_mult_if_missing": _cfg_float(costs_cfg, "stress_mult_if_missing", 1.80),
    "slippage_extra_bps_base": _cfg_float(costs_cfg, "slippage_extra_bps_base", 0.0),
    "slippage_extra_bps_stress": _cfg_float(costs_cfg, "slippage_extra_bps_stress", 0.0),
}

print(f"[Celda 03] COST_DEFAULTS = {COST_DEFAULTS}")

def _clip_bps(x: float) -> float:
    return float(min(max(float(x), float(COST_DEFAULTS["min_cost_bps"])), float(COST_DEFAULTS["max_cost_bps"])))

def _bps_to_dec(bps: float) -> float:
    return float(bps) / 10000.0

def cost_roundtrip_dec(cost_bps: float, cost_reported_is_roundtrip: bool) -> float:
    c = _bps_to_dec(cost_bps)
    return c if cost_reported_is_roundtrip else (2.0 * c)

def apply_cost_to_gross_return(gross_ret_dec: float, cost_bps: float, cost_reported_is_roundtrip: bool) -> float:
    return float(gross_ret_dec) - cost_roundtrip_dec(cost_bps, cost_reported_is_roundtrip)

# ========================= Clasificaci√≥n de instrumento =========================
_CRYPTO_PREFIX = ("BTC","ETH","BNB","SOL","XRP","ADA","DOGE","LTC","AVAX","DOT","MATIC","LINK","TRX")
_METAL_PREFIX  = ("XAU","XAG","XPT","XPD")

def infer_asset_class(sym_u: str) -> str:
    s = sym_u.upper().strip()
    if s.startswith(_METAL_PREFIX):
        return "METAL"
    if s.startswith(_CRYPTO_PREFIX) or ("USD" in s and any(s.startswith(p) for p in _CRYPTO_PREFIX)):
        return "CRYPTO"
    if re.fullmatch(r"[A-Z]{6}", s):
        return "FX"
    # fallback institucional: si no es FX/crypto/metal, tratar como equity/other
    return "EQUITY"

# Fallback coherente por clase (bps, base/stress)
FALLBACK_BPS = {
    "CRYPTO": (8.0, 16.0),
    "FX":     (1.2, 2.5),
    "METAL":  (4.0, 8.0),
    "EQUITY": (12.0, 25.0),
}

# ========================= Derivaci√≥n desde spread (si existe) =========================
def _load_tail_close_spread(sym_u: str, max_rows: int = 200_000) -> pl.DataFrame:
    if sym_u not in m5_paths_u:
        raise KeyError(f"[Celda 03] ERROR: s√≠mbolo {sym_u} no existe en m5_paths. keys_sample={list(m5_paths_u)[:10]}")
    p = Path(m5_paths_u[sym_u]).resolve()
    # leer solo columnas necesarias
    df = pl.read_parquet(str(p), columns=[c for c in ("time_utc","close","spread") if c in pl.read_parquet(str(p), n_rows=1).columns])
    if "close" not in df.columns:
        raise RuntimeError(f"[Celda 03] ERROR: falta close en {sym_u}.")
    df = df.sort("time_utc") if "time_utc" in df.columns else df
    if df.height > max_rows:
        df = df.tail(max_rows)
    return df

def _derive_cost_from_spread_bps(sym_u: str) -> tuple[float | None, float | None, str | None, str | None]:
    # Devuelve: base_bps, stress_bps, base_src, stress_src
    try:
        df = _load_tail_close_spread(sym_u)
    except Exception:
        return None, None, None, None

    if "spread" not in df.columns:
        return None, None, None, None

    d = (
        df.select([
            pl.col("close").cast(pl.Float64, strict=False).alias("close"),
            pl.col("spread").cast(pl.Float64, strict=False).alias("spread"),
        ])
        .filter(pl.col("close") > 0)
        .filter(pl.col("spread").is_not_null() & (pl.col("spread") >= 0))
        .with_columns((pl.col("spread") / pl.col("close") * 10000.0).alias("spread_bps"))
        .filter(pl.col("spread_bps").is_not_null())
        .filter(pl.col("spread_bps") >= 0)
        .filter(pl.col("spread_bps") <= 500)   # guardrail institucional
    )

    if d.height < 500:
        return None, None, None, None

    med = float(d.select(pl.col("spread_bps").median()).item())
    p90 = float(d.select(pl.col("spread_bps").quantile(0.90, interpolation="nearest")).item())

    if not (math.isfinite(med) and 0 <= med <= 500):
        return None, None, None, None
    if not (math.isfinite(p90) and 0 <= p90 <= 500):
        p90 = med * float(COST_DEFAULTS["stress_mult_if_missing"])

    return med, max(p90, med), "derived_from_m5_spread_median", "derived_from_m5_spread_p90"

# ========================= Construcci√≥n por s√≠mbolo =========================
costs_by_symbol: dict[str, dict] = {}
slip_base = float(COST_DEFAULTS["slippage_extra_bps_base"])
slip_strs = float(COST_DEFAULTS["slippage_extra_bps_stress"])
stress_mult = float(COST_DEFAULTS["stress_mult_if_missing"])
is_roundtrip = bool(COST_DEFAULTS["cost_reported_is_roundtrip"])

for su in selected_u:
    asset_class = infer_asset_class(su)

    base_bps = None
    stress_bps = None
    base_src = None
    stress_src = None

    # A) intentar derivar de spread (si existe y es razonable)
    b, s, bs, ss = _derive_cost_from_spread_bps(su)
    if b is not None:
        base_bps, stress_bps, base_src, stress_src = float(b), float(s), bs, ss

    # B) fallback coherente por clase
    if base_bps is None:
        fb = FALLBACK_BPS.get(asset_class, (10.0, 20.0))
        base_bps = float(fb[0])
        stress_bps = float(fb[1])
        base_src = f"fallback_by_class:{asset_class}"
        stress_src = f"fallback_by_class:{asset_class}"

    # C) si stress faltara, derivar
    if stress_bps is None:
        stress_bps = float(base_bps) * stress_mult
        stress_src = f"derived_mult:{stress_mult:.2f}"

    # D) slippage extra + clip
    base_bps = _clip_bps(float(base_bps) + slip_base)
    stress_bps = _clip_bps(float(stress_bps) + slip_strs)

    # Guardrail: nunca ‚Äúcrypto fallback‚Äù en equity
    if asset_class == "EQUITY" and isinstance(base_src, str) and "fallback_by_class:CRYPTO" in base_src:
        raise RuntimeError(f"[Celda 03] ERROR: Equity {su} qued√≥ clasificado/valuado como CRYPTO. Revisa infer_asset_class().")

    costs_by_symbol[su] = {
        "ASSET_CLASS": asset_class,
        "COST_BASE_BPS": float(base_bps),
        "COST_STRESS_BPS": float(stress_bps),
        "COST_BASE_SOURCE": str(base_src),
        "COST_STRESS_SOURCE": str(stress_src),
        "SLIPPAGE_EXTRA_BPS_BASE": float(slip_base),
        "SLIPPAGE_EXTRA_BPS_STRESS": float(slip_strs),
    }

    print(f"[Celda 03] {su} ({asset_class}) :: BASE={base_bps:.3f}bps [{base_src}] | STRESS={stress_bps:.3f}bps [{stress_src}]")

# ========================= Gate 03 ‚Äî Sanity tests =========================
for su in selected_u:
    # usar close t0,t1 para validar net<=gross y cost=0 preserva
    p = Path(m5_paths_u[su]).resolve()
    dfp = pl.read_parquet(str(p), columns=[c for c in ("time_utc","close") if c in pl.read_parquet(str(p), n_rows=1).columns]).sort("time_utc")
    if dfp.height < 3:
        raise RuntimeError(f"[Celda 03] ERROR: muy pocas filas para sanity ({su}).")

    c0 = float(dfp["close"][0]); c1 = float(dfp["close"][1])
    if c0 <= 0 or c1 <= 0:
        raise RuntimeError(f"[Celda 03] ERROR: close inv√°lido en sanity ({su}).")

    gross = (c1 / c0) - 1.0
    net0 = apply_cost_to_gross_return(gross, cost_bps=0.0, cost_reported_is_roundtrip=is_roundtrip)
    if abs(net0 - gross) > 1e-12:
        raise RuntimeError(f"[Celda 03] GATE FAIL: cost=0 no preserva gross ({su}).")

    cb = float(costs_by_symbol[su]["COST_BASE_BPS"])
    netb = apply_cost_to_gross_return(gross, cost_bps=cb, cost_reported_is_roundtrip=is_roundtrip)
    if netb > gross + 1e-12:
        raise RuntimeError(f"[Celda 03] GATE FAIL: net>gross con costo ({su}). gross={gross}, net={netb}, cost_bps={cb}")

print("[Celda 03] Gate 03 :: sanity tests OK")

# ========================= Snapshot + GLOBAL_STATE =========================
snapshot = {
    "execution_convention": EXECUTION_CONVENTION,
    "cost_defaults": COST_DEFAULTS,
    "cost_reported_is_roundtrip": bool(is_roundtrip),
    "fallback_bps_by_class": FALLBACK_BPS,
    "costs_by_symbol": costs_by_symbol,
    "symbols": costs_by_symbol,  # compat
}

snap_path = snap_dir / "execution_and_cost_model.json"
snap_path.write_text(json.dumps(snapshot, indent=2, ensure_ascii=False), encoding="utf-8")

GLOBAL_STATE.setdefault("execution", {})
GLOBAL_STATE["execution"]["convention"] = EXECUTION_CONVENTION

GLOBAL_STATE.setdefault("cost_model", {})
GLOBAL_STATE["cost_model"]["cost_reported_is_roundtrip"] = bool(is_roundtrip)
GLOBAL_STATE["cost_model"]["costs_by_symbol"] = costs_by_symbol
GLOBAL_STATE["cost_model"]["symbols"] = costs_by_symbol
GLOBAL_STATE["cost_model"]["snapshot_path"] = str(snap_path)

print(f"[Celda 03] SNAPSHOT -> {snap_path} (OK)")
print(">>> Celda 03 v1.3.3 :: OK")


>>> Celda 03 v1.3.3 :: Cost Model INSTRUMENT-AWARE (class + spread + coherent fallback)
[Celda 03] selected_TREND = ['BNBUSD', 'BTCUSD', 'LVMH', 'XAUAUD']
[Celda 03] COST_DEFAULTS = {'min_cost_bps': 0.0, 'max_cost_bps': 500.0, 'cost_reported_is_roundtrip': False, 'stress_mult_if_missing': 1.8, 'slippage_extra_bps_base': 0.0, 'slippage_extra_bps_stress': 0.0}
[Celda 03] BNBUSD (CRYPTO) :: BASE=8.000bps [fallback_by_class:CRYPTO] | STRESS=16.000bps [fallback_by_class:CRYPTO]
[Celda 03] BTCUSD (CRYPTO) :: BASE=8.000bps [fallback_by_class:CRYPTO] | STRESS=16.000bps [fallback_by_class:CRYPTO]
[Celda 03] LVMH (EQUITY) :: BASE=12.000bps [fallback_by_class:EQUITY] | STRESS=25.000bps [fallback_by_class:EQUITY]
[Celda 03] XAUAUD (METAL) :: BASE=4.000bps [fallback_by_class:METAL] | STRESS=8.000bps [fallback_by_class:METAL]
[Celda 03] Gate 03 :: sanity tests OK
[Celda 03] SNAPSHOT -> C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\research_logs\runs\20251222_103043\snapshots\execution_and_cost_model.

In [33]:
# ===================== Celda 04 v1.3 ‚Äî WFO folds (hard-consistency + multi-symbol safe) =====================
from __future__ import annotations

from pathlib import Path
import json
from datetime import datetime, timedelta, timezone
import polars as pl

print(">>> Celda 04 v1.3 :: WFO folds (hard-consistency + multi-symbol safe)")

# ========================= Validaciones GLOBAL_STATE =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 04] ERROR: GLOBAL_STATE no existe.")

paths = GLOBAL_STATE.get("paths", {}) or {}
for k in ("artifacts", "run_snapshots"):
    if k not in paths:
        raise RuntimeError(f"[Celda 04] ERROR: falta GLOBAL_STATE['paths']['{k}'].")

univ = (GLOBAL_STATE.get("universe", {}) or {})
selected = [str(s).upper().strip() for s in (univ.get("selected_symbols_TREND", []) or []) if str(s).strip()]
if not selected:
    raise RuntimeError("[Celda 04] ERROR: selected_symbols_TREND vac√≠o. Ejecuta 02B (REPLACE_SELECTED_WITH_FINAL=True).")

dq = (GLOBAL_STATE.get("data_quality", {}) or {})
dq_final = [str(s).upper().strip() for s in (dq.get("final_symbols", []) or []) if str(s).strip()]

# Hard-consistency: si existe data_quality.final_symbols, debe coincidir con selected
if dq_final:
    if set(dq_final) != set(selected):
        raise RuntimeError(
            "[Celda 04] ERROR: mismatch entre universe.selected_symbols_TREND y data_quality.final_symbols.\n"
            f"  selected={selected}\n"
            f"  dq_final={dq_final}\n"
            "Soluci√≥n: re-ejecuta Celda 02B v1.3 (o revisa si ejecutaste celdas fuera de orden)."
        )

data_state = (GLOBAL_STATE.get("data", {}) or {})
m5_paths = (
    data_state.get("m5_ohlcv_paths")
    or data_state.get("ohlcv_clean_paths")
    or {}
)
if not m5_paths:
    raise RuntimeError("[Celda 04] ERROR: falta m5_ohlcv_paths/ohlcv_clean_paths. Ejecuta Celda 02B/02C.")

# normaliza keys
m5_paths_u = {str(k).upper().strip(): str(v) for k, v in m5_paths.items() if k is not None and v is not None}

print(f"[Celda 04] selected_TREND = {selected}")
print(f"[Celda 04] m5_paths keys sample = {list(m5_paths_u.keys())[:8]}")

OUT_WFO_DIR = Path(paths["artifacts"]).resolve() / "wfo"
OUT_WFO_DIR.mkdir(parents=True, exist_ok=True)
snap_dir = Path(paths["run_snapshots"]).resolve()
snap_dir.mkdir(parents=True, exist_ok=True)

# ========================= Par√°metros base =========================
PREF = {"n_folds": 4, "is_days": 365, "oos_days": 90, "embargo_days": 7}
FALLBACK = {"n_folds": 3, "is_days": 180, "oos_days": 60, "embargo_days": 7}

MIN_OOS_DAYS_HARD = 30
MIN_IS_DAYS_HARD = 60
MIN_OOS_DAYS_TARGET = 60
MIN_IS_DAYS_TARGET = 180

def _ensure_utc(dt: datetime) -> datetime:
    if dt.tzinfo is None:
        return dt.replace(tzinfo=timezone.utc)
    return dt.astimezone(timezone.utc)

def _read_time_range(sym_u: str) -> tuple[datetime, datetime, int]:
    if sym_u not in m5_paths_u:
        raise KeyError(f"[Celda 04] ERROR: no hay path para {sym_u} en m5_paths.")
    p = Path(m5_paths_u[sym_u]).resolve()
    lf = pl.scan_parquet(str(p)).select([
        pl.col("time_utc").min().alias("tmin"),
        pl.col("time_utc").max().alias("tmax"),
        pl.len().alias("n"),
    ])
    out = lf.collect()
    tmin = out["tmin"][0]; tmax = out["tmax"][0]; n = int(out["n"][0])
    if tmin is None or tmax is None:
        raise RuntimeError(f"[Celda 04] ERROR: rango temporal inv√°lido ({sym_u}).")
    return (_ensure_utc(tmin), _ensure_utc(tmax), n)

def _required_span(n_folds: int, is_days: int, oos_days: int, emb_days: int) -> float:
    return float(n_folds) * (float(is_days) + float(oos_days) + float(emb_days))

def _build_folds_for_range(tmin: datetime, tmax: datetime, n_folds: int, is_days: int, oos_days: int, emb_days: int) -> list[dict]:
    folds = []
    oos_end = tmax
    for i in range(int(n_folds)):
        oos_start = oos_end - timedelta(days=int(oos_days))
        is_end = oos_start - timedelta(days=int(emb_days))
        is_start = is_end - timedelta(days=int(is_days))
        if is_start < tmin:
            break
        folds.append({
            "fold_id": f"F{i+1}",
            "IS_start": is_start.isoformat(),
            "IS_end": is_end.isoformat(),
            "embargo_days": int(emb_days),
            "OOS_start": oos_start.isoformat(),
            "OOS_end": oos_end.isoformat(),
            "is_days": int(is_days),
            "oos_days": int(oos_days),
        })
        oos_end = oos_start
    return folds

def _auto_params(span_days: float) -> dict:
    emb = int(FALLBACK["embargo_days"])
    oos = int(max(MIN_OOS_DAYS_HARD, min(90, round(span_days * 0.20))))
    is_guess = int(max(MIN_IS_DAYS_HARD, round(span_days * 0.70)))
    is_days = int(min(is_guess, max(MIN_IS_DAYS_HARD, int(span_days) - oos - emb)))
    oos_days = int(max(MIN_OOS_DAYS_HARD, int(span_days) - is_days - emb))
    if oos_days < 10:
        oos_days = 10
        is_days = int(max(10, int(span_days) - oos_days - emb))
    return {"n_folds": 1, "is_days": is_days, "oos_days": oos_days, "embargo_days": emb}

def _gate_folds(folds: list[dict], label: str) -> dict:
    degraded = False
    for f in folds:
        is_s = _ensure_utc(datetime.fromisoformat(f["IS_start"]))
        is_e = _ensure_utc(datetime.fromisoformat(f["IS_end"]))
        o_s  = _ensure_utc(datetime.fromisoformat(f["OOS_start"]))
        o_e  = _ensure_utc(datetime.fromisoformat(f["OOS_end"]))
        is_len = (is_e - is_s).total_seconds() / 86400.0
        o_len  = (o_e - o_s).total_seconds() / 86400.0
        if is_len + 1e-9 < MIN_IS_DAYS_HARD or o_len + 1e-9 < MIN_OOS_DAYS_HARD:
            raise RuntimeError(f"[Celda 04] GATE FAIL ({label}): fold demasiado corto. IS={is_len:.1f}d OOS={o_len:.1f}d")
        if is_len + 1e-9 < MIN_IS_DAYS_TARGET or o_len + 1e-9 < MIN_OOS_DAYS_TARGET:
            degraded = True
    return {"degraded_minimums": degraded}

# ========================= Rango por s√≠mbolo + folds por s√≠mbolo =========================
ranges = []
for sym in selected:
    tmin, tmax, n = _read_time_range(sym)
    ranges.append((sym, tmin, tmax, n))
    print(f"[Celda 04] {sym} :: tmin={tmin} | tmax={tmax} | rows={n}")

folds_by_symbol: dict[str, list[dict]] = {}
params_by_symbol: dict[str, dict] = {}
flags_by_symbol: dict[str, dict] = {}

for sym, tmin, tmax, _ in ranges:
    span_days = (tmax - tmin).total_seconds() / 86400.0
    pref_need = _required_span(PREF["n_folds"], PREF["is_days"], PREF["oos_days"], PREF["embargo_days"])
    fb_need   = _required_span(FALLBACK["n_folds"], FALLBACK["is_days"], FALLBACK["oos_days"], FALLBACK["embargo_days"])

    if span_days >= pref_need:
        used = dict(PREF); mode = "WFO_pref"
    elif span_days >= fb_need:
        used = dict(FALLBACK); mode = "WFO_fallback"
    else:
        used = _auto_params(span_days); mode = "SINGLE_FOLD_AUTO"

    folds = _build_folds_for_range(tmin, tmax, used["n_folds"], used["is_days"], used["oos_days"], used["embargo_days"])
    if len(folds) < 1:
        raise RuntimeError(f"[Celda 04] GATE FAIL ({sym}): no se pudo construir ni 1 fold. span_days‚âà{span_days:.1f}")

    q = _gate_folds(folds, label=sym)

    folds_by_symbol[sym] = folds
    params_by_symbol[sym] = {"mode": mode, "params_used": used, "span_days": float(span_days)}
    flags_by_symbol[sym] = q

# ========================= Folds globales (intersecci√≥n) para compat =========================
global_tmin = max(r[1] for r in ranges)
global_tmax = min(r[2] for r in ranges)
if global_tmax <= global_tmin:
    raise RuntimeError("[Celda 04] ERROR: rango com√∫n inv√°lido (tmax<=tmin).")

span_days_g = (global_tmax - global_tmin).total_seconds() / 86400.0
pref_need_g = _required_span(PREF["n_folds"], PREF["is_days"], PREF["oos_days"], PREF["embargo_days"])
fb_need_g   = _required_span(FALLBACK["n_folds"], FALLBACK["is_days"], FALLBACK["oos_days"], FALLBACK["embargo_days"])

if span_days_g >= pref_need_g:
    used_g = dict(PREF); mode_g = "WFO_pref"
elif span_days_g >= fb_need_g:
    used_g = dict(FALLBACK); mode_g = "WFO_fallback"
else:
    used_g = _auto_params(span_days_g); mode_g = "SINGLE_FOLD_AUTO"

folds_global = _build_folds_for_range(global_tmin, global_tmax, used_g["n_folds"], used_g["is_days"], used_g["oos_days"], used_g["embargo_days"])
if len(folds_global) < 1:
    raise RuntimeError(f"[Celda 04] GATE FAIL (GLOBAL): no se pudo construir ni 1 fold. span_days‚âà{span_days_g:.1f}")

qg = _gate_folds(folds_global, label="GLOBAL")

print(f"[Celda 04] GLOBAL common_range :: tmin={global_tmin} | tmax={global_tmax} | span_days‚âà{span_days_g:.1f}")
print(f"[Celda 04] GLOBAL mode={mode_g} folds={len(folds_global)} degraded_minimums={qg['degraded_minimums']}")
for f in folds_global:
    print(f"  - {f['fold_id']} :: IS[{f['IS_start']} -> {f['IS_end']}] | OOS[{f['OOS_start']} -> {f['OOS_end']}] | embargo={f['embargo_days']}d")

# ========================= Persistencia =========================
wfo_payload = {
    "symbols": selected,
    "folds_global": folds_global,
    "folds_by_symbol": folds_by_symbol,
    "common_range_global": {"tmin": global_tmin.isoformat(), "tmax": global_tmax.isoformat(), "span_days": span_days_g},
    "global_mode": mode_g,
    "global_params_used": used_g,
    "by_symbol_meta": params_by_symbol,
    "by_symbol_quality": flags_by_symbol,
    "rules": [
        "Calibrar SOLO en IS.",
        "Aplicar thresholds frozen en OOS.",
        "No usar OOS en calibraci√≥n.",
    ],
}

out_path = OUT_WFO_DIR / "folds_wfo.json"
out_path.write_text(json.dumps(wfo_payload, indent=2, ensure_ascii=False), encoding="utf-8")

snap_path = snap_dir / "folds_wfo.json"
snap_path.write_text(json.dumps(wfo_payload, indent=2, ensure_ascii=False), encoding="utf-8")

GLOBAL_STATE.setdefault("wfo", {})
GLOBAL_STATE["wfo"]["folds_path"] = str(out_path)
GLOBAL_STATE["wfo"]["folds"] = folds_global                 # compat: lista
GLOBAL_STATE["wfo"]["folds_by_symbol"] = folds_by_symbol    # robust multi-asset
GLOBAL_STATE["wfo"]["common_range"] = wfo_payload["common_range_global"]
GLOBAL_STATE["wfo"]["params_used"] = used_g
GLOBAL_STATE["wfo"]["quality_flags"] = {"mode": mode_g, **qg}

print(f"üíæ OUTPUT   ‚Üí {out_path} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {snap_path} (OK)")
print(">>> Celda 04 v1.3 :: OK")


>>> Celda 04 v1.3 :: WFO folds (hard-consistency + multi-symbol safe)
[Celda 04] selected_TREND = ['BNBUSD', 'BTCUSD', 'LVMH', 'XAUAUD']
[Celda 04] m5_paths keys sample = ['BNBUSD', 'XAUAUD', 'BTCUSD', 'LVMH']
[Celda 04] BNBUSD :: tmin=2025-06-05 05:50:00+00:00 | tmax=2025-12-02 05:50:00+00:00 | rows=49040
[Celda 04] BTCUSD :: tmin=2025-06-05 23:50:00+00:00 | tmax=2025-12-02 23:50:00+00:00 | rows=48551
[Celda 04] LVMH :: tmin=2025-06-04 18:25:00+00:00 | tmax=2025-12-01 18:25:00+00:00 | rows=12929
[Celda 04] XAUAUD :: tmin=2025-06-05 05:50:00+00:00 | tmax=2025-12-02 05:50:00+00:00 | rows=34788
[Celda 04] GLOBAL common_range :: tmin=2025-06-05 23:50:00+00:00 | tmax=2025-12-01 18:25:00+00:00 | span_days‚âà178.8
[Celda 04] GLOBAL mode=SINGLE_FOLD_AUTO folds=1 degraded_minimums=True
  - F1 :: IS[2025-06-06T18:25:00+00:00 -> 2025-10-09T18:25:00+00:00] | OOS[2025-10-16T18:25:00+00:00 -> 2025-12-01T18:25:00+00:00] | embargo=7d
üíæ OUTPUT   ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\arti

In [34]:
# ===================== Celda 05 v1.1 ‚Äî Features Causales (TREND, M5) [OHLCV-aware + state robust + ATR] =====================
# OBJETIVO:
#   1) Construir features base *causales* sobre series M5 limpias (Celda 02 v2.0).
#   2) Features:
#       - returns (logret, ret)
#       - volatilidad rolling (std de logret)
#       - momentum (ratio close/close.shift(L) - 1)
#       - ER_kaufman + PD_kaufman (=1-ER)
#       - ATR (Wilder) si hay OHLC
#   3) Persistir features por s√≠mbolo en artifacts/features/features_base_v11/
#
# CAUSALIDAD:
#   - Ventanas rolling usan SOLO pasado (t y anteriores).
#   - Nada de shift(-1) ni ventanas centradas.
#   - Dise√±ado para se√±al al close(t) y ejecuci√≥n t+1 (Celda 03).
#
# GATE 05:
#   - Spot-check anti-lookahead: perturbar √∫ltimo close y verificar que filas previas no cambian.
# ========================================================================================================

from __future__ import annotations

from pathlib import Path
import json
import math
import polars as pl

print(">>> Celda 05 v1.1 :: Features causales (TREND, M5) [OHLCV-aware + state robust + ATR]")

# ========================= Validaciones de estado =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 05] ERROR: GLOBAL_STATE no existe.")

for k in ("paths", "data", "universe"):
    if k not in GLOBAL_STATE:
        raise RuntimeError(f"[Celda 05] ERROR: falta GLOBAL_STATE['{k}'].")

paths = GLOBAL_STATE["paths"]
data_state = GLOBAL_STATE["data"]
uni_state = GLOBAL_STATE["universe"]

selected = (uni_state.get("selected_symbols_TREND") or [])
if not selected:
    raise RuntimeError("[Celda 05] ERROR: no hay selected_symbols_TREND. Ejecuta Celda 01.")

# Compatibilidad: aceptar m5_ohlcv_paths o m5_clean_paths
m5_paths = data_state.get("m5_ohlcv_paths") or data_state.get("m5_clean_paths") or {}
m5_key_used = "m5_ohlcv_paths" if data_state.get("m5_ohlcv_paths") else ("m5_clean_paths" if data_state.get("m5_clean_paths") else "NONE")
if not isinstance(m5_paths, dict) or not m5_paths:
    raise RuntimeError("[Celda 05] ERROR: no hay m5_ohlcv_paths ni m5_clean_paths en GLOBAL_STATE. Ejecuta Celda 02 v2.0.")

# Output dir (no pisar v1.0)
OUT_FEATURES_BASE = Path(paths["artifacts"]).resolve() / "features" / "features_base_v11"
OUT_FEATURES_BASE.mkdir(parents=True, exist_ok=True)

print(f"[Celda 05] selected_TREND          = {selected}")
print(f"[Celda 05] data paths key used    = {m5_key_used}")
print(f"[Celda 05] OUT_FEATURES_BASE      = {OUT_FEATURES_BASE}")

# ========================= Par√°metros =========================
# Ventanas t√≠picas M5:
BARS_1H  = 12
BARS_4H  = 48
BARS_1D  = 288
BARS_7D  = 2016

# ER Kaufman
ER_N_FAST = 10
ER_N_MAIN = 20

# Vol windows (std logret)
VOL_W1 = BARS_4H
VOL_W2 = BARS_1D
VOL_W3 = BARS_7D

# Momentum windows (ratio)
MOM_W1 = BARS_4H
MOM_W2 = BARS_1D
MOM_W3 = BARS_7D

# ATR (Wilder)
ATR_N = 72  # ~6h

# Spot-check anti-lookahead
SPOTCHECK_ENABLE = True
SPOTCHECK_MAX_SYMBOLS = 1
SPOTCHECK_PERTURB_MULT = 1.25

# ========================= Helpers =========================
def _ensure_contract(df: pl.DataFrame) -> pl.DataFrame:
    # Requeridos m√≠nimos
    if "time_utc" not in df.columns:
        raise RuntimeError("[Celda 05] ERROR: falta time_utc en dataset.")
    if "close" not in df.columns:
        raise RuntimeError("[Celda 05] ERROR: falta close en dataset.")

    # Tipos + orden + dedupe
    df = df.with_columns([
        pl.col("time_utc").cast(pl.Datetime("us", "UTC"), strict=False),
        pl.col("close").cast(pl.Float64, strict=False),
    ])

    # OHLC opcional pero si existe lo tipamos
    for c in ("open", "high", "low"):
        if c in df.columns:
            df = df.with_columns(pl.col(c).cast(pl.Float64, strict=False))

    df = df.sort("time_utc").unique(subset=["time_utc"], keep="last")
    return df

def _alpha_wilder(n: int) -> float:
    # Wilder smoothing alpha
    return 1.0 / float(n)

def _compute_atr_wilder(df: pl.DataFrame, n: int) -> pl.Series | None:
    # ATR requiere OHLC
    if not all(c in df.columns for c in ("high", "low", "close")):
        return None

    # True Range: max(high-low, abs(high-prev_close), abs(low-prev_close))
    prev_close = pl.col("close").shift(1)
    tr = pl.max_horizontal([
        (pl.col("high") - pl.col("low")).abs(),
        (pl.col("high") - prev_close).abs(),
        (pl.col("low") - prev_close).abs(),
    ]).alias("__tr__")

    # Wilder ATR = ewm_mean(alpha=1/n, adjust=False) sobre TR
    a = _alpha_wilder(n)
    atr_expr = tr.ewm_mean(alpha=a, adjust=False).alias(f"atr_{n}")
    out = df.select([atr_expr]).get_column(f"atr_{n}")
    return out

def _compute_features(df: pl.DataFrame) -> pl.DataFrame:
    df = _ensure_contract(df)

    # Returns
    df = df.with_columns([
        (pl.col("close") / pl.col("close").shift(1)).log().alias("logret_1"),
        (pl.col("close") / pl.col("close").shift(1) - 1.0).alias("ret_1"),
    ])

    # Volatilidad rolling (std) sobre logret (Polars: min_samples)
    df = df.with_columns([
        pl.col("logret_1").rolling_std(window_size=VOL_W1, min_samples=max(10, VOL_W1 // 5)).alias(f"vol_logret_{VOL_W1}"),
        pl.col("logret_1").rolling_std(window_size=VOL_W2, min_samples=max(20, VOL_W2 // 10)).alias(f"vol_logret_{VOL_W2}"),
        pl.col("logret_1").rolling_std(window_size=VOL_W3, min_samples=max(50, VOL_W3 // 20)).alias(f"vol_logret_{VOL_W3}"),
    ])

    # Momentum (ratio)
    df = df.with_columns([
        (pl.col("close") / pl.col("close").shift(MOM_W1) - 1.0).alias(f"mom_{MOM_W1}"),
        (pl.col("close") / pl.col("close").shift(MOM_W2) - 1.0).alias(f"mom_{MOM_W2}"),
        (pl.col("close") / pl.col("close").shift(MOM_W3) - 1.0).alias(f"mom_{MOM_W3}"),
    ])

    # Kaufman ER
    abs_diff_1 = (pl.col("close") - pl.col("close").shift(1)).abs()

    # fast
    change_fast = (pl.col("close") - pl.col("close").shift(ER_N_FAST)).abs()
    vol_fast = abs_diff_1.rolling_sum(window_size=ER_N_FAST, min_samples=ER_N_FAST).alias("__k_vol_fast__")
    df = df.with_columns([vol_fast])
    df = df.with_columns([
        pl.when(pl.col("__k_vol_fast__") > 0.0)
          .then((change_fast / pl.col("__k_vol_fast__")).clip(0.0, 1.0))
          .otherwise(None)
          .alias(f"ER_kaufman_{ER_N_FAST}")
    ]).with_columns([
        (1.0 - pl.col(f"ER_kaufman_{ER_N_FAST}")).alias(f"PD_kaufman_{ER_N_FAST}")
    ])

    # main
    change_main = (pl.col("close") - pl.col("close").shift(ER_N_MAIN)).abs()
    vol_main = abs_diff_1.rolling_sum(window_size=ER_N_MAIN, min_samples=ER_N_MAIN).alias("__k_vol_main__")
    df = df.with_columns([vol_main])
    df = df.with_columns([
        pl.when(pl.col("__k_vol_main__") > 0.0)
          .then((change_main / pl.col("__k_vol_main__")).clip(0.0, 1.0))
          .otherwise(None)
          .alias(f"ER_kaufman_{ER_N_MAIN}")
    ]).with_columns([
        (1.0 - pl.col(f"ER_kaufman_{ER_N_MAIN}")).alias(f"PD_kaufman_{ER_N_MAIN}")
    ])

    # Canon aliases
    df = df.with_columns([
        pl.col(f"ER_kaufman_{ER_N_MAIN}").alias("ER_kaufman"),
        pl.col(f"PD_kaufman_{ER_N_MAIN}").alias("PD_kaufman"),
    ])

    # ATR (si OHLC)
    atr_series = _compute_atr_wilder(df, ATR_N)
    if atr_series is not None:
        df = df.with_columns(pl.Series(name=f"atr_{ATR_N}", values=atr_series))
        df = df.with_columns(pl.col(f"atr_{ATR_N}").alias("atr"))

    # Limpieza helpers
    drop_cols = [c for c in ("__k_vol_fast__", "__k_vol_main__") if c in df.columns]
    if drop_cols:
        df = df.drop(drop_cols)

    return df

def _spotcheck_no_lookahead(df_raw: pl.DataFrame) -> None:
    df_a = _compute_features(df_raw)
    if df_a.height < 1000:
        print("[Celda 05][Gate] Spot-check omitido: muy pocas filas.")
        return

    df_b = df_raw.clone()
    last_close = df_b.select(pl.col("close").tail(1)).item()
    if last_close is None or not math.isfinite(float(last_close)):
        print("[Celda 05][Gate] Spot-check omitido: last_close no finito.")
        return

    # perturbar √∫ltimo close
    n = df_b.height
    df_b = df_b.with_columns([
        pl.when(pl.arange(0, n) == (n - 1))
          .then(pl.lit(float(last_close) * float(SPOTCHECK_PERTURB_MULT)))
          .otherwise(pl.col("close"))
          .alias("close")
    ])

    df_b = _compute_features(df_b)

    # comparar todas las filas excepto la √∫ltima
    df_a2 = df_a.slice(0, n - 1)
    df_b2 = df_b.slice(0, n - 1)

    feat_cols = [c for c in df_a2.columns if c not in ("time_utc", "open", "high", "low", "close", "volume", "spread")]
    eps = 1e-12
    diffs = []

    for c in feat_cols:
        a = df_a2.get_column(c)
        b = df_b2.get_column(c)
        if a.dtype in (pl.Float32, pl.Float64):
            cnt = (
                pl.DataFrame({"a": a, "b": b})
                .with_columns((pl.col("a") - pl.col("b")).abs().alias("d"))
                .filter(pl.col("d").is_not_null() & (pl.col("d") > eps))
                .height
            )
        else:
            cnt = int((a != b).sum() or 0)

        if cnt > 0:
            diffs.append((c, int(cnt)))

    if diffs:
        msg = "[Celda 05] GATE FAIL: posible lookahead (features cambiaron antes del √∫ltimo bar).\n"
        msg += "Columnas afectadas (top 10):\n" + "\n".join([f"  - {c}: {k} diffs" for c, k in diffs[:10]])
        raise RuntimeError(msg)

    print("[Celda 05] Gate 05 :: spot-check anti-lookahead OK (sin cambios en filas previas).")

# ========================= Construcci√≥n y persistencia =========================
features_paths: dict[str, str] = {}
features_meta: dict[str, dict] = {}

for sym in selected:
    sym_u = sym.upper().strip()
    src_path = m5_paths.get(sym_u) or m5_paths.get(sym)  # tolerancia
    if not src_path:
        # fallback case-insensitive
        keys = {k.upper(): k for k in m5_paths.keys()}
        if sym_u in keys:
            src_path = m5_paths[keys[sym_u]]
        else:
            raise RuntimeError(f"[Celda 05] ERROR: no encuentro dataset para {sym_u} en m5_paths.")

    df_raw = pl.read_parquet(str(src_path))
    df_raw = _ensure_contract(df_raw)

    if SPOTCHECK_ENABLE and (len(features_paths) < SPOTCHECK_MAX_SYMBOLS):
        _spotcheck_no_lookahead(df_raw)

    df_feat = _compute_features(df_raw)

    out_path = OUT_FEATURES_BASE / f"{sym_u}_features_base.parquet"
    df_feat.write_parquet(str(out_path), compression="zstd")
    if (not out_path.exists()) or out_path.stat().st_size == 0:
        raise RuntimeError(f"[Celda 05] ERROR: no se pudo escribir {out_path}")

    features_paths[sym_u] = str(out_path)
    features_meta[sym_u] = {
        "symbol": sym_u,
        "source_m5_path": str(src_path),
        "rows": int(df_feat.height),
        "tmin": str(df_feat.select(pl.col("time_utc").min()).item()),
        "tmax": str(df_feat.select(pl.col("time_utc").max()).item()),
        "ER_N_FAST": int(ER_N_FAST),
        "ER_N_MAIN": int(ER_N_MAIN),
        "VOL_WINDOWS": [int(VOL_W1), int(VOL_W2), int(VOL_W3)],
        "MOM_WINDOWS": [int(MOM_W1), int(MOM_W2), int(MOM_W3)],
        "ATR_N": int(ATR_N) if "atr" in df_feat.columns else None,
        "has_ohlc": all(c in df_raw.columns for c in ("open","high","low","close")),
    }

    print(f"[Celda 05] {sym_u} :: features saved ‚Üí {out_path.name} | rows={df_feat.height} | cols={len(df_feat.columns)}")

# Snapshot
snap_dir = Path(paths["run_snapshots"]).resolve()
snap_dir.mkdir(parents=True, exist_ok=True)
meta_path = snap_dir / "features_base_v11_meta.json"
meta_path.write_text(json.dumps(features_meta, indent=2, ensure_ascii=False), encoding="utf-8")

GLOBAL_STATE.setdefault("features", {})
GLOBAL_STATE["features"]["features_base_dir"] = str(OUT_FEATURES_BASE)
GLOBAL_STATE["features"]["features_base_paths"] = features_paths
GLOBAL_STATE["features"]["features_base_meta_path"] = str(meta_path)
GLOBAL_STATE["features"]["features_base_version"] = "v1.1"

print(f"üíæ OUTPUT   ‚Üí {OUT_FEATURES_BASE} (n_files={len(features_paths)})")
print(f"üíæ SNAPSHOT ‚Üí {meta_path} (OK)")
print(">>> Celda 05 v1.1 :: OK")


>>> Celda 05 v1.1 :: Features causales (TREND, M5) [OHLCV-aware + state robust + ATR]
[Celda 05] selected_TREND          = ['BNBUSD', 'BTCUSD', 'LVMH', 'XAUAUD']
[Celda 05] data paths key used    = m5_ohlcv_paths
[Celda 05] OUT_FEATURES_BASE      = C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\features\features_base_v11
[Celda 05] Gate 05 :: spot-check anti-lookahead OK (sin cambios en filas previas).
[Celda 05] BNBUSD :: features saved ‚Üí BNBUSD_features_base.parquet | rows=49040 | cols=30
[Celda 05] BTCUSD :: features saved ‚Üí BTCUSD_features_base.parquet | rows=48551 | cols=30
[Celda 05] LVMH :: features saved ‚Üí LVMH_features_base.parquet | rows=12929 | cols=30
[Celda 05] XAUAUD :: features saved ‚Üí XAUAUD_features_base.parquet | rows=34788 | cols=30
üíæ OUTPUT   ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\features\features_base_v11 (n_files=4)
üíæ SNAPSHOT ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\research_logs\runs\20251222_103043\snapshots\fe

In [35]:
# ===================== Celda 06 v1.0 ‚Äî Regime Gate por Fold (TREND, M5) [IS-only calibration, no leakage] =====================
# OBJETIVO:
#   - Calibrar un "regime gate" para TREND usando SOLO el tramo IS de cada fold (WFO).
#   - Gate basado en features causales ya calculadas (Celda 05): ER_kaufman, mom_1D, vol_logret_1D (y opcional ATR).
#   - Medir cobertura (share de barras donde gate=True) en IS y OOS como diagn√≥stico.
#
# PRODUCE:
#   - artifacts/regime_gates/regime_gate_by_fold.parquet
#   - snapshots/regime_gate_by_fold.parquet + .json
#   - GLOBAL_STATE["regime_gate"] con paths + thresholds por s√≠mbolo/fold
#
# NOTA:
#   - No mete s√≠mbolos fuera del universo: usa data_quality.final_symbols si existe.
#   - No usa datos futuros: todo threshold sale de IS, luego se eval√∫a en OOS.
# =========================================================================================================

from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json
import math
import polars as pl

print(">>> Celda 06 v1.0 :: Regime Gate por fold (TREND, M5) [IS-only calibration, no leakage]")

# ========================= Validaciones GLOBAL_STATE =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 06] ERROR: GLOBAL_STATE no existe o no es dict.")

for k in ("paths", "wfo", "features", "universe"):
    if k not in GLOBAL_STATE:
        raise RuntimeError(f"[Celda 06] ERROR: falta GLOBAL_STATE['{k}'].")

paths = GLOBAL_STATE["paths"]
wfo_state = GLOBAL_STATE["wfo"]
feat_state = GLOBAL_STATE["features"]

# s√≠mbolos efectivos (preferir QA final)
dq = GLOBAL_STATE.get("data_quality", {}) or {}
final_symbols = dq.get("final_symbols") or (GLOBAL_STATE["universe"].get("selected_symbols_TREND") or [])
if not final_symbols:
    raise RuntimeError("[Celda 06] ERROR: no hay s√≠mbolos (data_quality.final_symbols / selected_symbols_TREND).")

symbols_u = [str(s).upper().strip() for s in final_symbols]

# folds
folds = wfo_state.get("folds") or []
if not folds:
    # fallback: leer desde archivo si existe
    folds_path = wfo_state.get("folds_path")
    if not folds_path:
        raise RuntimeError("[Celda 06] ERROR: no hay wfo.folds ni wfo.folds_path. Ejecuta Celda 04.")
    folds = json.loads(Path(folds_path).read_text(encoding="utf-8")).get("folds", [])
if not folds:
    raise RuntimeError("[Celda 06] ERROR: folds vac√≠o (Celda 04 no produjo folds).")

# features paths
feat_paths = feat_state.get("features_base_paths") or {}
feat_dir = feat_state.get("features_base_dir")
if (not feat_paths) and feat_dir:
    # reconstruir mapping desde dir
    pdir = Path(feat_dir).resolve()
    if pdir.exists():
        tmp = {}
        for p in pdir.glob("*_features_base.parquet"):
            sym = p.name.split("_features_base.parquet")[0].upper()
            tmp[sym] = str(p.resolve())
        feat_paths = tmp

if not feat_paths:
    raise RuntimeError("[Celda 06] ERROR: no encuentro features_base_paths/features_base_dir. Ejecuta Celda 05.")

# normalizar keys a upper
feat_paths_u = {str(k).upper().strip(): str(v) for k, v in feat_paths.items()}

# ========================= Par√°metros institucionales =========================
# Columnas esperadas (Celda 05)
ER_COL = "ER_kaufman"
VOL_COL = "vol_logret_288"   # 1D ~288 barras en M5
MOM_COL = "mom_288"          # 1D momentum

# quantiles base y reglas de ajuste para evitar gates degenerados
Q_SCHEMES = [
    {"name": "BASE",    "q_er": 0.60, "q_mom": 0.55, "q_vol": 0.90},
    {"name": "RELAX1",  "q_er": 0.50, "q_mom": 0.50, "q_vol": 0.95},
    {"name": "RELAX2",  "q_er": 0.40, "q_mom": 0.50, "q_vol": 0.99},
    {"name": "TIGHT1",  "q_er": 0.70, "q_mom": 0.60, "q_vol": 0.85},
]

# Cobertura deseada del gate en IS (evitar ‚Äúsiempre falso‚Äù o ‚Äúsiempre true‚Äù)
COV_IS_MIN = 0.05
COV_IS_MAX = 0.80

# m√≠nimos de filas √∫tiles por tramo (post drop_nulls)
MIN_IS_ROWS = 5_000
MIN_OOS_ROWS = 2_000

OUT_DIR = Path(paths["artifacts"]).resolve() / "regime_gates"
OUT_DIR.mkdir(parents=True, exist_ok=True)

SNAP_DIR = Path(paths.get("run_snapshots") or (Path(paths["artifacts"]).resolve() / "snapshots")).resolve()
SNAP_DIR.mkdir(parents=True, exist_ok=True)

# ========================= Helpers =========================
def _parse_iso_utc(s: str) -> datetime:
    dt = datetime.fromisoformat(s)
    if dt.tzinfo is None:
        dt = dt.replace(tzinfo=timezone.utc)
    return dt.astimezone(timezone.utc)

def _need_cols(df: pl.DataFrame, cols: list[str], sym: str) -> None:
    miss = [c for c in cols if c not in df.columns]
    if miss:
        raise RuntimeError(f"[Celda 06] ERROR: {sym} no tiene columnas requeridas: {miss}. cols={df.columns}")

def _quantile_safe(s: pl.Series, q: float) -> float | None:
    s2 = s.drop_nulls()
    if s2.len() == 0:
        return None
    v = s2.quantile(q, interpolation="nearest")
    if v is None:
        return None
    try:
        fv = float(v)
        if not math.isfinite(fv):
            return None
        return fv
    except Exception:
        return None

def _apply_gate(df: pl.DataFrame, thr_er: float, thr_mom: float, thr_vol: float) -> pl.Series:
    # long-only TREND: momentum nunca por debajo de 0
    thr_mom_eff = max(0.0, float(thr_mom))
    return (
        (pl.col(ER_COL) >= pl.lit(float(thr_er))) &
        (pl.col(MOM_COL) >= pl.lit(float(thr_mom_eff))) &
        (pl.col(VOL_COL) <= pl.lit(float(thr_vol)))
    ).alias("__gate__")

def _calibrate_thresholds_is(df_is: pl.DataFrame) -> dict:
    # devuelve thresholds + esquema usado + cobertura IS
    er_s = df_is.get_column(ER_COL)
    mom_s = df_is.get_column(MOM_COL)
    vol_s = df_is.get_column(VOL_COL)

    best = None

    for sch in Q_SCHEMES:
        thr_er = _quantile_safe(er_s, sch["q_er"])
        thr_mom = _quantile_safe(mom_s, sch["q_mom"])
        thr_vol = _quantile_safe(vol_s, sch["q_vol"])

        if thr_er is None or thr_mom is None or thr_vol is None:
            continue

        gate = df_is.select(_apply_gate(df_is, thr_er, thr_mom, thr_vol)).get_column("__gate__")
        cov = float(gate.mean()) if gate.len() > 0 else 0.0

        payload = {
            "scheme": sch["name"],
            "q_er": float(sch["q_er"]),
            "q_mom": float(sch["q_mom"]),
            "q_vol": float(sch["q_vol"]),
            "thr_er": float(thr_er),
            "thr_mom": float(max(0.0, float(thr_mom))),
            "thr_vol": float(thr_vol),
            "cov_is": float(cov),
        }

        # elegir el primer esquema que cae en rango deseado
        if COV_IS_MIN <= cov <= COV_IS_MAX:
            return payload

        # si ninguno cae en rango, guarda el ‚Äúmenos malo‚Äù por cercan√≠a al centro
        target = 0.25
        score = abs(cov - target)
        if best is None or score < best["score"]:
            best = {"score": score, "payload": payload}

    if best is None:
        # no se pudo calibrar por falta total de datos
        return {"scheme": "FAIL", "q_er": None, "q_mom": None, "q_vol": None, "thr_er": None, "thr_mom": None, "thr_vol": None, "cov_is": 0.0}

    return best["payload"]

# ========================= Main =========================
rows = []
thresholds_by_symbol_fold = {}

print(f"[Celda 06] symbols = {symbols_u}")
print(f"[Celda 06] folds   = {[f.get('fold_id') for f in folds]}")

for sym in symbols_u:
    if sym not in feat_paths_u:
        raise RuntimeError(f"[Celda 06] ERROR: no encuentro features parquet para {sym}. keys_sample={list(feat_paths_u)[:10]}")

    df = pl.read_parquet(feat_paths_u[sym])

    # contrato m√≠nimo
    _need_cols(df, ["time_utc", ER_COL, MOM_COL, VOL_COL], sym)
    df = df.with_columns(pl.col("time_utc").cast(pl.Datetime("us", "UTC"), strict=False)).sort("time_utc")

    for f in folds:
        fid = str(f["fold_id"])
        is_s = _parse_iso_utc(f["IS_start"])
        is_e = _parse_iso_utc(f["IS_end"])
        o_s  = _parse_iso_utc(f["OOS_start"])
        o_e  = _parse_iso_utc(f["OOS_end"])
        emb  = int(f.get("embargo_days", 0))

        df_is = df.filter((pl.col("time_utc") >= pl.lit(is_s)) & (pl.col("time_utc") <= pl.lit(is_e)))
        df_oos = df.filter((pl.col("time_utc") >= pl.lit(o_s)) & (pl.col("time_utc") <= pl.lit(o_e)))

        # drop nulls en columnas clave
        df_is_u = df_is.drop_nulls([ER_COL, MOM_COL, VOL_COL])
        df_oos_u = df_oos.drop_nulls([ER_COL, MOM_COL, VOL_COL])

        n_is = int(df_is_u.height)
        n_oos = int(df_oos_u.height)

        status = "OK"
        if n_is < MIN_IS_ROWS:
            status = "FAIL_IS_TOO_SMALL"
        if n_oos < MIN_OOS_ROWS:
            status = "FAIL_OOS_TOO_SMALL" if status == "OK" else status

        calib = _calibrate_thresholds_is(df_is_u) if status.startswith("OK") else {"scheme":"SKIP", "q_er":None,"q_mom":None,"q_vol":None,"thr_er":None,"thr_mom":None,"thr_vol":None,"cov_is":0.0}

        thr_er = calib.get("thr_er")
        thr_mom = calib.get("thr_mom")
        thr_vol = calib.get("thr_vol")

        cov_is = float(calib.get("cov_is") or 0.0)
        cov_oos = 0.0
        n_is_pass = 0
        n_oos_pass = 0

        if status == "OK" and thr_er is not None and thr_mom is not None and thr_vol is not None:
            gate_is = df_is_u.select(_apply_gate(df_is_u, thr_er, thr_mom, thr_vol)).get_column("__gate__")
            gate_oos = df_oos_u.select(_apply_gate(df_oos_u, thr_er, thr_mom, thr_vol)).get_column("__gate__")

            cov_is = float(gate_is.mean()) if gate_is.len() > 0 else 0.0
            cov_oos = float(gate_oos.mean()) if gate_oos.len() > 0 else 0.0
            n_is_pass = int(gate_is.sum())
            n_oos_pass = int(gate_oos.sum())

            # marcar warnings si el gate qued√≥ degenerado
            if cov_is < 0.02 or cov_is > 0.90:
                status = "WARN_DEGENERATE_COVERAGE_IS"

        row = {
            "symbol": sym,
            "fold_id": fid,
            "IS_start": is_s.isoformat(),
            "IS_end": is_e.isoformat(),
            "OOS_start": o_s.isoformat(),
            "OOS_end": o_e.isoformat(),
            "embargo_days": emb,
            "scheme": calib.get("scheme"),
            "q_er": calib.get("q_er"),
            "q_mom": calib.get("q_mom"),
            "q_vol": calib.get("q_vol"),
            "thr_er": thr_er,
            "thr_mom": thr_mom,
            "thr_vol": thr_vol,
            "n_is": n_is,
            "n_oos": n_oos,
            "n_is_pass": n_is_pass,
            "n_oos_pass": n_oos_pass,
            "cov_is": float(cov_is),
            "cov_oos": float(cov_oos),
            "status": status,
        }
        rows.append(row)
        thresholds_by_symbol_fold.setdefault(sym, {})[fid] = row

        print(f"[Celda 06] {sym} {fid} :: status={status} | scheme={row['scheme']} | cov_IS={row['cov_is']:.3f} cov_OOS={row['cov_oos']:.3f} | thr_ER={row['thr_er']} thr_MOM={row['thr_mom']} thr_VOL={row['thr_vol']}")

gate_df = pl.DataFrame(rows).sort(["symbol", "fold_id"])

out_parq = OUT_DIR / "regime_gate_by_fold.parquet"
gate_df.write_parquet(str(out_parq), compression="zstd")

snap_parq = SNAP_DIR / "regime_gate_by_fold.parquet"
gate_df.write_parquet(str(snap_parq), compression="zstd")

snapshot = {
    "created_utc": datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
    "symbols": symbols_u,
    "fold_ids": [f.get("fold_id") for f in folds],
    "params": {
        "ER_COL": ER_COL,
        "MOM_COL": MOM_COL,
        "VOL_COL": VOL_COL,
        "COV_IS_MIN": COV_IS_MIN,
        "COV_IS_MAX": COV_IS_MAX,
        "MIN_IS_ROWS": MIN_IS_ROWS,
        "MIN_OOS_ROWS": MIN_OOS_ROWS,
        "Q_SCHEMES": Q_SCHEMES,
    },
    "outputs": {
        "artifacts_parquet": str(out_parq),
        "snapshot_parquet": str(snap_parq),
    }
}
snap_json = SNAP_DIR / "regime_gate_snapshot.json"
snap_json.write_text(json.dumps(snapshot, indent=2, ensure_ascii=False), encoding="utf-8")

GLOBAL_STATE["regime_gate"] = {
    "gate_table_path": str(out_parq),
    "gate_table_snapshot_path": str(snap_parq),
    "snapshot_json": str(snap_json),
    "thresholds_by_symbol_fold": thresholds_by_symbol_fold,
    "params": snapshot["params"],
}

print(f"üíæ OUTPUT   ‚Üí {out_parq} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {snap_parq} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {snap_json} (OK)")
print(">>> Celda 06 v1.0 :: OK")


>>> Celda 06 v1.0 :: Regime Gate por fold (TREND, M5) [IS-only calibration, no leakage]
[Celda 06] symbols = ['BNBUSD', 'BTCUSD', 'LVMH', 'XAUAUD']
[Celda 06] folds   = ['F1']
[Celda 06] BNBUSD F1 :: status=OK | scheme=BASE | cov_IS=0.166 cov_OOS=0.086 | thr_ER=0.24351851851851655 thr_MOM=0.007062234009745261 thr_VOL=0.0020915961985700417
[Celda 06] BTCUSD F1 :: status=OK | scheme=BASE | cov_IS=0.165 cov_OOS=0.074 | thr_ER=0.22960119184047928 thr_MOM=0.0031787892661669925 thr_VOL=0.0013088198411497986
[Celda 06] LVMH F1 :: status=OK | scheme=BASE | cov_IS=0.160 cov_OOS=0.153 | thr_ER=0.2665832290362983 thr_MOM=0.008020938872002725 thr_VOL=0.0022830000387667666
[Celda 06] XAUAUD F1 :: status=OK | scheme=BASE | cov_IS=0.168 cov_OOS=0.067 | thr_ER=0.2361707501894501 thr_MOM=0.0024392239682979877 thr_VOL=0.0006660874714588537
üíæ OUTPUT   ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\regime_gates\regime_gate_by_fold.parquet (OK)
üíæ SNAPSHOT ‚Üí C:\Quant\MT5_Data_Extraction\

In [36]:
 # ===================== Celda 07 v1.0.2 ‚Äî Se√±al TREND + Ejecuci√≥n t+1 + Costos (BASE/STRESS) [WFO-safe] =====================
from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json
import polars as pl

print(">>> Celda 07 v1.0.2 :: Se√±al TREND + Ejecuci√≥n t+1 + Costos (BASE/STRESS) [WFO-safe]")

# ========================= Validaciones GLOBAL_STATE =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 07] ERROR: GLOBAL_STATE no existe o no es dict.")

for k in ("paths", "data", "features", "wfo", "regime_gate", "cost_model", "universe"):
    if k not in GLOBAL_STATE:
        raise RuntimeError(f"[Celda 07] ERROR: falta GLOBAL_STATE['{k}'].")

paths = GLOBAL_STATE["paths"]
data_state = GLOBAL_STATE["data"]
feat_state = GLOBAL_STATE["features"]
wfo_state = GLOBAL_STATE["wfo"]
gate_state = GLOBAL_STATE["regime_gate"]
cost_state = GLOBAL_STATE["cost_model"]

# s√≠mbolos efectivos (preferir QA final)
dq = GLOBAL_STATE.get("data_quality", {}) or {}
symbols = dq.get("final_symbols") or (GLOBAL_STATE["universe"].get("selected_symbols_TREND") or [])
if not symbols:
    raise RuntimeError("[Celda 07] ERROR: no hay s√≠mbolos (data_quality.final_symbols / selected_symbols_TREND).")
symbols_u = [str(s).upper().strip() for s in symbols]

# folds
folds = wfo_state.get("folds") or []
if not folds:
    folds_path = wfo_state.get("folds_path")
    if not folds_path:
        raise RuntimeError("[Celda 07] ERROR: no hay wfo.folds ni wfo.folds_path. Ejecuta Celda 04.")
    folds = json.loads(Path(folds_path).read_text(encoding="utf-8")).get("folds", [])
if not folds:
    raise RuntimeError("[Celda 07] ERROR: folds vac√≠o (Celda 04 no produjo folds).")

# m5 paths
m5_paths = (
    data_state.get("m5_ohlcv_paths")
    or data_state.get("m5_clean_paths")
    or data_state.get("ohlcv_clean_paths")
    or {}
)
if not isinstance(m5_paths, dict) or not m5_paths:
    raise RuntimeError("[Celda 07] ERROR: no hay m5_ohlcv_paths/m5_clean_paths/ohlcv_clean_paths. Ejecuta Celda 02/02C.")

# features paths
feat_paths = feat_state.get("features_base_paths") or {}
if not feat_paths:
    raise RuntimeError("[Celda 07] ERROR: no hay features_base_paths. Ejecuta Celda 05.")

# gate table
gate_path = gate_state.get("gate_table_path")
if not gate_path:
    raise RuntimeError("[Celda 07] ERROR: no hay regime_gate.gate_table_path. Ejecuta Celda 06.")
gate_df = pl.read_parquet(str(gate_path))

# costs
costs_by_symbol = cost_state.get("costs_by_symbol") or cost_state.get("symbols") or {}
if not costs_by_symbol:
    raise RuntimeError("[Celda 07] ERROR: no hay cost_model.costs_by_symbol. Ejecuta Celda 03.")

cost_reported_is_roundtrip = bool(cost_state.get("cost_reported_is_roundtrip", False))

# ========================= Par√°metros (contract) =========================
ER_COL  = "ER_kaufman"
MOM_COL = "mom_288"
VOL_COL = "vol_logret_288"

REQ_FEAT_COLS = ["time_utc", ER_COL, MOM_COL, VOL_COL]

# ========================= Outputs =========================
OUT_DIR = Path(paths["artifacts"]).resolve() / "strategy_signals"
OUT_DIR.mkdir(parents=True, exist_ok=True)

SNAP_DIR = Path(paths.get("run_snapshots") or (Path(paths["artifacts"]).resolve() / "snapshots")).resolve()
SNAP_DIR.mkdir(parents=True, exist_ok=True)

# ========================= Helpers =========================
def _ensure_utc(dt: datetime) -> datetime:
    if dt.tzinfo is None:
        return dt.replace(tzinfo=timezone.utc)
    return dt.astimezone(timezone.utc)

def _parse_iso_utc(s: str) -> datetime:
    return _ensure_utc(datetime.fromisoformat(s))

def _pick_path_case_insensitive(d: dict, sym_u: str) -> str:
    if sym_u in d:
        return str(d[sym_u])
    keys = {str(k).upper().strip(): k for k in d.keys()}
    if sym_u in keys:
        return str(d[keys[sym_u]])
    raise KeyError(f"[Celda 07] ERROR: no encuentro path para {sym_u}. keys_sample={list(d)[:10]}")

def _need_cols(df: pl.DataFrame, cols: list[str], sym: str, tag: str) -> None:
    miss = [c for c in cols if c not in df.columns]
    if miss:
        raise RuntimeError(f"[Celda 07] ERROR: {sym} ({tag}) missing cols={miss}. cols={df.columns}")

def _cost_roundtrip_dec(cost_bps: float, reported_is_roundtrip: bool) -> float:
    c = float(cost_bps) / 10000.0
    return c if reported_is_roundtrip else (2.0 * c)

def _apply_gate_expr(thr_er: float, thr_mom: float, thr_vol: float) -> pl.Expr:
    thr_mom_eff = max(0.0, float(thr_mom))
    return (
        (pl.col(ER_COL) >= pl.lit(float(thr_er))) &
        (pl.col(MOM_COL) >= pl.lit(float(thr_mom_eff))) &
        (pl.col(VOL_COL) <= pl.lit(float(thr_vol)))
    ).alias("signal_gate")

def _entry_exit_prices(df: pl.DataFrame) -> pl.DataFrame:
    # entry at t+1, exit at t+2; prefer open else close
    return df.with_columns([
        pl.col("time_utc").shift(-1).alias("entry_time"),
        pl.col("time_utc").shift(-2).alias("exit_time"),
        pl.when(pl.col("open").shift(-1).is_not_null())
          .then(pl.col("open").shift(-1))
          .otherwise(pl.col("close").shift(-1))
          .alias("entry_price"),
        pl.when(pl.col("open").shift(-2).is_not_null())
          .then(pl.col("open").shift(-2))
          .otherwise(pl.col("close").shift(-2))
          .alias("exit_price"),
    ])

def _segment_label_expr(is_s: datetime, is_e: datetime, o_s: datetime, o_e: datetime) -> pl.Expr:
    return (
        pl.when((pl.col("entry_time") >= pl.lit(is_s)) & (pl.col("entry_time") <= pl.lit(is_e))).then(pl.lit("IS"))
         .when((pl.col("entry_time") >= pl.lit(o_s)) & (pl.col("entry_time") <= pl.lit(o_e))).then(pl.lit("OOS"))
         .otherwise(pl.lit(None))
         .alias("segment")
    )

# ========================= Main =========================
all_trades: list[pl.DataFrame] = []
summary_rows: list[dict] = []
per_symbol_outputs: dict[str, str] = {}
trades_by_symbol: dict[str, list[pl.DataFrame]] = {s: [] for s in symbols_u}

print(f"[Celda 07] symbols = {symbols_u}")
print(f"[Celda 07] folds   = {[f.get('fold_id') for f in folds]}")

for sym in symbols_u:
    sym_u = sym.upper().strip()

    # --- Load OHLCV ---
    m5_path = _pick_path_case_insensitive(m5_paths, sym_u)
    df_px = pl.read_parquet(str(m5_path))
    _need_cols(df_px, ["time_utc", "close"], sym_u, "OHLCV")
    if "open" not in df_px.columns:
        df_px = df_px.with_columns(pl.col("close").alias("open"))

    df_px = (
        df_px
        .with_columns([
            pl.col("time_utc").cast(pl.Datetime("us", "UTC"), strict=False),
            pl.col("open").cast(pl.Float64, strict=False),
            pl.col("close").cast(pl.Float64, strict=False),
        ])
        .sort("time_utc")
        .unique(subset=["time_utc"], keep="last")
    )

    # --- Load Features ---
    feat_path = _pick_path_case_insensitive(feat_paths, sym_u)
    df_f = pl.read_parquet(str(feat_path))
    _need_cols(df_f, REQ_FEAT_COLS, sym_u, "FEATURES")
    df_f = df_f.with_columns(pl.col("time_utc").cast(pl.Datetime("us", "UTC"), strict=False)).sort("time_utc")

    # --- Join ---
    df = df_px.join(df_f.select(REQ_FEAT_COLS), on="time_utc", how="inner")

    # cost for symbol
    cinfo = costs_by_symbol.get(sym_u) or costs_by_symbol.get(sym) or {}
    cost_base_bps = float(cinfo.get("COST_BASE_BPS", 0.0))
    cost_stress_bps = float(cinfo.get("COST_STRESS_BPS", 0.0))
    cost_base_dec = _cost_roundtrip_dec(cost_base_bps, cost_reported_is_roundtrip)
    cost_stress_dec = _cost_roundtrip_dec(cost_stress_bps, cost_reported_is_roundtrip)

    # --- Per fold build trades ---
    for f in folds:
        fid = str(f["fold_id"])
        is_s = _parse_iso_utc(f["IS_start"])
        is_e = _parse_iso_utc(f["IS_end"])
        o_s  = _parse_iso_utc(f["OOS_start"])
        o_e  = _parse_iso_utc(f["OOS_end"])
        emb  = int(f.get("embargo_days", 0))

        g = gate_df.filter((pl.col("symbol") == sym_u) & (pl.col("fold_id") == fid))
        if g.is_empty():
            raise RuntimeError(f"[Celda 07] ERROR: no hay thresholds en gate_df para {sym_u} {fid}.")

        thr_er = float(g.select(pl.col("thr_er")).item())
        thr_mom = float(g.select(pl.col("thr_mom")).item())
        thr_vol = float(g.select(pl.col("thr_vol")).item())

        dfx = df.with_columns([_apply_gate_expr(thr_er, thr_mom, thr_vol)])
        dfx = _entry_exit_prices(dfx)
        dfx = dfx.with_columns(_segment_label_expr(is_s, is_e, o_s, o_e))

        # trade list: solo filas donde gate=True
        trades = (
            dfx
            .filter(pl.col("segment").is_not_null())
            .filter(pl.col("entry_price").is_not_null() & pl.col("exit_price").is_not_null())
            .filter((pl.col("entry_price") > 0.0) & (pl.col("exit_price") > 0.0))
            .filter(pl.col("signal_gate") == True)
            .with_columns([
                pl.lit(sym_u).alias("symbol"),
                pl.lit(fid).alias("fold_id"),
                pl.lit(emb).alias("embargo_days"),
                (pl.col("exit_price") / pl.col("entry_price") - 1.0).alias("gross_ret"),
                (pl.col("exit_price") / pl.col("entry_price") - 1.0 - pl.lit(cost_base_dec)).alias("net_ret_base"),
                (pl.col("exit_price") / pl.col("entry_price") - 1.0 - pl.lit(cost_stress_dec)).alias("net_ret_stress"),
                pl.lit(cost_base_bps).alias("cost_base_bps"),
                pl.lit(cost_stress_bps).alias("cost_stress_bps"),
            ])
            .select([
                "symbol","fold_id","segment","embargo_days",
                "time_utc","entry_time","exit_time",
                "entry_price","exit_price",
                "signal_gate",
                "gross_ret","net_ret_base","net_ret_stress",
                "cost_base_bps","cost_stress_bps",
                ER_COL,MOM_COL,VOL_COL,
            ])
        )

        if trades.height == 0:
            print(f"[Celda 07] WARN: {sym_u} {fid} :: 0 trades (segment split por entry_time).")
            continue

        # summary por segmento (diagn√≥stico)
        seg_sum = (
            trades
            .group_by(["symbol","fold_id","segment"])
            .agg([
                pl.len().alias("n_trades"),
                pl.col("gross_ret").mean().alias("gross_mean"),
                pl.col("net_ret_base").mean().alias("net_base_mean"),
                pl.col("net_ret_stress").mean().alias("net_stress_mean"),
                pl.col("net_ret_base").std().alias("net_base_std"),
                pl.col("net_ret_base").median().alias("net_base_median"),
                pl.col("net_ret_base").quantile(0.05, interpolation="nearest").alias("net_base_p05"),
                pl.col("net_ret_base").quantile(0.95, interpolation="nearest").alias("net_base_p95"),
                (pl.col("net_ret_base") > 0).mean().alias("win_rate_base"),
            ])
            .with_columns([
                (pl.col("net_base_mean") / pl.col("net_base_std")).alias("sharpe_like_base"),
            ])
        )

        summary_rows.extend(seg_sum.to_dicts())
        all_trades.append(trades)
        trades_by_symbol[sym_u].append(trades)

        print(f"[Celda 07] {sym_u} {fid} :: trades={trades.height} | IS/OOS split OK | cost_base_bps={cost_base_bps} cost_stress_bps={cost_stress_bps}")

    # persist per-symbol
    if trades_by_symbol[sym_u]:
        df_sym = pl.concat(trades_by_symbol[sym_u], how="vertical")
        out_sym = OUT_DIR / f"trades_m5_{sym_u}.parquet"
        df_sym.write_parquet(str(out_sym), compression="zstd")
        per_symbol_outputs[sym_u] = str(out_sym)

# ========================= Persist all + summary =========================
if not all_trades:
    raise RuntimeError("[Celda 07] GATE FAIL: no se gener√≥ ning√∫n trade en ning√∫n s√≠mbolo.")

trades_all = pl.concat(all_trades, how="vertical")
out_all = OUT_DIR / "trades_m5_all.parquet"
trades_all.write_parquet(str(out_all), compression="zstd")

summary_df = pl.DataFrame(summary_rows).sort(["symbol","fold_id","segment"]) if summary_rows else pl.DataFrame()
out_sum = OUT_DIR / "summary_by_symbol_fold.parquet"
summary_df.write_parquet(str(out_sum), compression="zstd")

# snapshots
snap_all = SNAP_DIR / "trades_m5_all.parquet"
trades_all.write_parquet(str(snap_all), compression="zstd")

snap_sum = SNAP_DIR / "summary_by_symbol_fold.parquet"
summary_df.write_parquet(str(snap_sum), compression="zstd")

snap_json = SNAP_DIR / "signals_snapshot.json"
snapshot = {
    "created_utc": datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
    "symbols": symbols_u,
    "fold_ids": [f.get("fold_id") for f in folds],
    "execution_convention": (GLOBAL_STATE.get("execution", {}) or {}).get("convention"),
    "cost_reported_is_roundtrip": cost_reported_is_roundtrip,
    "outputs": {
        "per_symbol": per_symbol_outputs,
        "all_trades": str(out_all),
        "summary": str(out_sum),
        "snap_all_trades": str(snap_all),
        "snap_summary": str(snap_sum),
    },
    "notes": [
        "trade list: solo filas con signal_gate=True",
        "segment split por entry_time para evitar leakage en bordes",
        "entry=open(t+1) fallback close(t+1); exit=open(t+2) fallback close(t+2)",
    ],
}
snap_json.write_text(json.dumps(snapshot, indent=2, ensure_ascii=False), encoding="utf-8")

GLOBAL_STATE["signals"] = {
    "trades_dir": str(OUT_DIR),
    "trades_all_path": str(out_all),
    "summary_path": str(out_sum),
    "per_symbol_paths": per_symbol_outputs,
    "snapshot_json": str(snap_json),
}

print(f"üíæ OUTPUT   ‚Üí {out_all} (OK) | rows={trades_all.height}")
print(f"üíæ OUTPUT   ‚Üí {out_sum} (OK) | rows={summary_df.height}")
print(f"üíæ SNAPSHOT ‚Üí {snap_all} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {snap_sum} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {snap_json} (OK)")
print(">>> Celda 07 v1.0.2 :: OK")


>>> Celda 07 v1.0.2 :: Se√±al TREND + Ejecuci√≥n t+1 + Costos (BASE/STRESS) [WFO-safe]
[Celda 07] symbols = ['BNBUSD', 'BTCUSD', 'LVMH', 'XAUAUD']
[Celda 07] folds   = ['F1']
[Celda 07] BNBUSD F1 :: trades=6685 | IS/OOS split OK | cost_base_bps=8.0 cost_stress_bps=16.0
[Celda 07] BTCUSD F1 :: trades=6408 | IS/OOS split OK | cost_base_bps=8.0 cost_stress_bps=16.0
[Celda 07] LVMH F1 :: trades=1919 | IS/OOS split OK | cost_base_bps=12.0 cost_stress_bps=25.0
[Celda 07] XAUAUD F1 :: trades=4640 | IS/OOS split OK | cost_base_bps=4.0 cost_stress_bps=8.0
üíæ OUTPUT   ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\strategy_signals\trades_m5_all.parquet (OK) | rows=19652
üíæ OUTPUT   ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\strategy_signals\summary_by_symbol_fold.parquet (OK) | rows=8
üíæ SNAPSHOT ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\research_logs\runs\20251222_103043\snapshots\trades_m5_all.parquet (OK)
üíæ SNAPSHOT ‚Üí C:\Quant\MT5_Data_Extraction\

In [37]:
# ===================== Celda 07B v1.0 ‚Äî QA timing trades (gap-aware diagnostics) =====================
from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json
import polars as pl

print(">>> Celda 07B v1.0 :: QA timing trades (entry/hold gaps)")

if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 07B] ERROR: GLOBAL_STATE no existe o no es dict.")
if "paths" not in GLOBAL_STATE or "signals" not in GLOBAL_STATE:
    raise RuntimeError("[Celda 07B] ERROR: faltan GLOBAL_STATE['paths'] o GLOBAL_STATE['signals'].")

paths = GLOBAL_STATE["paths"]
sig = GLOBAL_STATE["signals"]

trades_all_path = sig.get("trades_all_path")
if not trades_all_path:
    raise RuntimeError("[Celda 07B] ERROR: no existe GLOBAL_STATE['signals']['trades_all_path']. Ejecuta Celda 07.")

OUT_DIR = Path(paths["artifacts"]).resolve() / "strategy_signals"
OUT_DIR.mkdir(parents=True, exist_ok=True)

SNAP_DIR = Path(paths.get("run_snapshots") or (Path(paths["artifacts"]).resolve() / "snapshots")).resolve()
SNAP_DIR.mkdir(parents=True, exist_ok=True)

df = pl.read_parquet(str(trades_all_path))

need = ["symbol","segment","time_utc","entry_time","exit_time","entry_price","exit_price","net_ret_base","net_ret_stress"]
miss = [c for c in need if c not in df.columns]
if miss:
    raise RuntimeError(f"[Celda 07B] ERROR: trades_all missing cols={miss}. cols={df.columns}")

# asegurar tipos datetime
df = df.with_columns([
    pl.col("time_utc").cast(pl.Datetime("us", "UTC"), strict=False),
    pl.col("entry_time").cast(pl.Datetime("us", "UTC"), strict=False),
    pl.col("exit_time").cast(pl.Datetime("us", "UTC"), strict=False),
])

# deltas en segundos
df = df.with_columns([
    (pl.col("entry_time") - pl.col("time_utc")).dt.total_seconds().alias("dt_signal_to_entry_s"),
    (pl.col("exit_time") - pl.col("entry_time")).dt.total_seconds().alias("dt_hold_s"),
    (pl.col("exit_time") - pl.col("time_utc")).dt.total_seconds().alias("dt_signal_to_exit_s"),
])

# thresholds institucionales para diagn√≥stico (M5 ~300s)
TH = {
    "entry_gt_900s": 900,   # > 15 min
    "hold_gt_900s": 900,    # > 15 min
    "hold_gt_3600s": 3600,  # > 1h
    "hold_gt_86400s": 86400 # > 1d
}

qa = (
    df.group_by(["symbol","segment"])
      .agg([
          pl.len().alias("n_trades"),
          pl.col("dt_signal_to_entry_s").median().alias("entry_med_s"),
          pl.col("dt_signal_to_entry_s").quantile(0.90, interpolation="nearest").alias("entry_p90_s"),
          pl.col("dt_signal_to_entry_s").max().alias("entry_max_s"),

          pl.col("dt_hold_s").median().alias("hold_med_s"),
          pl.col("dt_hold_s").quantile(0.90, interpolation="nearest").alias("hold_p90_s"),
          pl.col("dt_hold_s").quantile(0.99, interpolation="nearest").alias("hold_p99_s"),
          pl.col("dt_hold_s").max().alias("hold_max_s"),

          (pl.col("dt_signal_to_entry_s") > TH["entry_gt_900s"]).mean().alias("share_entry_gt_900s"),
          (pl.col("dt_hold_s") > TH["hold_gt_900s"]).mean().alias("share_hold_gt_900s"),
          (pl.col("dt_hold_s") > TH["hold_gt_3600s"]).mean().alias("share_hold_gt_1h"),
          (pl.col("dt_hold_s") > TH["hold_gt_86400s"]).mean().alias("share_hold_gt_1d"),
      ])
      .sort(["symbol","segment"])
)

print("\n[Celda 07B] QA timing por symbol/segment:")
print(qa)

out_parq = OUT_DIR / "qa_trade_timing.parquet"
qa.write_parquet(str(out_parq), compression="zstd")

snap_parq = SNAP_DIR / "qa_trade_timing.parquet"
qa.write_parquet(str(snap_parq), compression="zstd")

snap_json = SNAP_DIR / "qa_trade_timing_snapshot.json"
payload = {
    "created_utc": datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
    "inputs": {"trades_all_path": str(trades_all_path)},
    "thresholds_seconds": TH,
    "outputs": {"artifacts_parquet": str(out_parq), "snapshot_parquet": str(snap_parq)},
}
snap_json.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8")

print(f"\nüíæ OUTPUT   ‚Üí {out_parq} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {snap_parq} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {snap_json} (OK)")
print(">>> Celda 07B v1.0 :: OK")


>>> Celda 07B v1.0 :: QA timing trades (entry/hold gaps)

[Celda 07B] QA timing por symbol/segment:
shape: (8, 14)
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ symbol ‚îÜ segment ‚îÜ n_trades ‚îÜ entry_med_ ‚îÜ ‚Ä¶ ‚îÜ share_entr ‚îÜ share_hold ‚îÜ share_hold ‚îÜ share_hold ‚îÇ
‚îÇ ---    ‚îÜ ---     ‚îÜ ---      ‚îÜ s          ‚îÜ   ‚îÜ y_gt_900s  ‚îÜ _gt_900s   ‚îÜ _gt_1h     ‚îÜ _gt_1d     ‚îÇ
‚îÇ str    ‚îÜ str     ‚îÜ u32      ‚îÜ ---        ‚îÜ   ‚îÜ ---        ‚îÜ ---        ‚îÜ ---        ‚îÜ ---        ‚îÇ
‚îÇ        ‚îÜ         ‚îÜ          ‚îÜ f64        ‚îÜ   ‚îÜ f64        ‚îÜ f64        ‚îÜ f64        ‚îÜ f64        ‚îÇ
‚ïû‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï™‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï™‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï™‚ïê‚ï

In [38]:
# ===================== Celda 07C v1.0 ‚Äî Alpha Report MULTI-HORIZON (LONG/SHORT) + Costs + Mon‚ÄìFri [WFO-safe] =====================
from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json
import math
import polars as pl

print(">>> Celda 07C v1.0 :: Alpha Report MULTI-HORIZON (LONG/SHORT) + Costs + Mon‚ÄìFri [WFO-safe]")

# ========================= Validaciones GLOBAL_STATE =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 07C] ERROR: GLOBAL_STATE no existe o no es dict.")

for k in ("paths", "data", "features", "wfo", "regime_gate", "cost_model", "universe"):
    if k not in GLOBAL_STATE:
        raise RuntimeError(f"[Celda 07C] ERROR: falta GLOBAL_STATE['{k}'].")

paths = GLOBAL_STATE["paths"]
data_state = GLOBAL_STATE["data"]
feat_state = GLOBAL_STATE["features"]
wfo_state  = GLOBAL_STATE["wfo"]
gate_state = GLOBAL_STATE["regime_gate"]
cost_state = GLOBAL_STATE["cost_model"]

# s√≠mbolos efectivos (preferir QA final)
dq = GLOBAL_STATE.get("data_quality", {}) or {}
symbols = dq.get("final_symbols") or (GLOBAL_STATE["universe"].get("selected_symbols_TREND") or [])
if not symbols:
    raise RuntimeError("[Celda 07C] ERROR: no hay s√≠mbolos (data_quality.final_symbols / selected_symbols_TREND).")
symbols_u = [str(s).upper().strip() for s in symbols]

# folds
folds = wfo_state.get("folds") or []
if not folds:
    folds_path = wfo_state.get("folds_path")
    if not folds_path:
        raise RuntimeError("[Celda 07C] ERROR: no hay wfo.folds ni wfo.folds_path. Ejecuta Celda 04.")
    folds = json.loads(Path(folds_path).read_text(encoding="utf-8")).get("folds", [])
if not folds:
    raise RuntimeError("[Celda 07C] ERROR: folds vac√≠o (Celda 04 no produjo folds).")

# m5 paths
m5_paths = (
    data_state.get("m5_ohlcv_paths")
    or data_state.get("m5_clean_paths")
    or data_state.get("ohlcv_clean_paths")
    or {}
)
if not isinstance(m5_paths, dict) or not m5_paths:
    raise RuntimeError("[Celda 07C] ERROR: no hay m5 paths (Celda 02/02C).")

# features paths
feat_paths = feat_state.get("features_base_paths") or {}
if not feat_paths:
    raise RuntimeError("[Celda 07C] ERROR: no hay features_base_paths. Ejecuta Celda 05.")

# gate table
gate_path = gate_state.get("gate_table_path")
if not gate_path:
    raise RuntimeError("[Celda 07C] ERROR: no hay regime_gate.gate_table_path. Ejecuta Celda 06.")
gate_df = pl.read_parquet(str(gate_path))

# costs
costs_by_symbol = cost_state.get("costs_by_symbol") or cost_state.get("symbols") or {}
if not costs_by_symbol:
    raise RuntimeError("[Celda 07C] ERROR: no hay cost_model.costs_by_symbol. Ejecuta Celda 03.")
cost_reported_is_roundtrip = bool(cost_state.get("cost_reported_is_roundtrip", False))

# ========================= Par√°metros =========================
ER_COL  = "ER_kaufman"
MOM_COL = "mom_288"
VOL_COL = "vol_logret_288"
REQ_FEAT_COLS = ["time_utc", ER_COL, MOM_COL, VOL_COL]

# horizontes en ‚Äúbarras despu√©s de entry‚Äù (entry=t+1). Ej: h=1 => exit=t+2 (tu celda 07 original)
HORIZONS = [1, 3, 6, 12, 24, 48, 96, 288]

# Institucional: operar Lun‚ÄìVie (aplica tambi√©n a CRYPTO si quieres estandarizar)
ENFORCE_MON_FRI = True  # si True, descarta entries con weekday>=5

# ========================= Outputs =========================
OUT_DIR = Path(paths["artifacts"]).resolve() / "alpha_reports"
OUT_DIR.mkdir(parents=True, exist_ok=True)

SNAP_DIR = Path(paths.get("run_snapshots") or (Path(paths["artifacts"]).resolve() / "snapshots")).resolve()
SNAP_DIR.mkdir(parents=True, exist_ok=True)

# ========================= Helpers =========================
def _ensure_utc(dt: datetime) -> datetime:
    if dt.tzinfo is None:
        return dt.replace(tzinfo=timezone.utc)
    return dt.astimezone(timezone.utc)

def _parse_iso_utc(s: str) -> datetime:
    return _ensure_utc(datetime.fromisoformat(s))

def _pick_path_case_insensitive(d: dict, sym_u: str) -> str:
    if sym_u in d:
        return str(d[sym_u])
    keys = {str(k).upper().strip(): k for k in d.keys()}
    if sym_u in keys:
        return str(d[keys[sym_u]])
    raise KeyError(f"[Celda 07C] ERROR: no encuentro path para {sym_u}. keys_sample={list(d)[:10]}")

def _need_cols(df: pl.DataFrame, cols: list[str], sym: str, tag: str) -> None:
    miss = [c for c in cols if c not in df.columns]
    if miss:
        raise RuntimeError(f"[Celda 07C] ERROR: {sym} ({tag}) missing cols={miss}. cols={df.columns}")

def _cost_roundtrip_dec(cost_bps: float, reported_is_roundtrip: bool) -> float:
    c = float(cost_bps) / 10000.0
    return c if reported_is_roundtrip else (2.0 * c)

def _segment_label_expr(is_s: datetime, is_e: datetime, o_s: datetime, o_e: datetime) -> pl.Expr:
    return (
        pl.when((pl.col("entry_time") >= pl.lit(is_s)) & (pl.col("entry_time") <= pl.lit(is_e))).then(pl.lit("IS"))
         .when((pl.col("entry_time") >= pl.lit(o_s)) & (pl.col("entry_time") <= pl.lit(o_e))).then(pl.lit("OOS"))
         .otherwise(pl.lit(None))
         .alias("segment")
    )

def _weekday_expr() -> pl.Expr:
    # Polars: weekday() => Monday=0 ... Sunday=6 (en Datetime)
    return pl.col("entry_time").dt.weekday()

# ========================= Main =========================
rows = []
print(f"[Celda 07C] symbols   = {symbols_u}")
print(f"[Celda 07C] folds     = {[f.get('fold_id') for f in folds]}")
print(f"[Celda 07C] horizons  = {HORIZONS}")
print(f"[Celda 07C] mon_fri   = {ENFORCE_MON_FRI}")

for sym in symbols_u:
    sym_u = sym.upper().strip()

    # --- Load OHLCV ---
    m5_path = _pick_path_case_insensitive(m5_paths, sym_u)
    df_px = pl.read_parquet(str(m5_path))
    _need_cols(df_px, ["time_utc", "close"], sym_u, "OHLCV")
    if "open" not in df_px.columns:
        df_px = df_px.with_columns(pl.col("close").alias("open"))
    df_px = (
        df_px
        .with_columns([
            pl.col("time_utc").cast(pl.Datetime("us", "UTC"), strict=False),
            pl.col("open").cast(pl.Float64, strict=False),
            pl.col("close").cast(pl.Float64, strict=False),
        ])
        .sort("time_utc")
        .unique(subset=["time_utc"], keep="last")
    )

    # --- Load Features ---
    feat_path = _pick_path_case_insensitive(feat_paths, sym_u)
    df_f = pl.read_parquet(str(feat_path))
    _need_cols(df_f, REQ_FEAT_COLS, sym_u, "FEATURES")
    df_f = df_f.with_columns(pl.col("time_utc").cast(pl.Datetime("us", "UTC"), strict=False)).sort("time_utc")

    # --- Join ---
    df = df_px.join(df_f.select(REQ_FEAT_COLS), on="time_utc", how="inner")

    # costs
    cinfo = costs_by_symbol.get(sym_u) or costs_by_symbol.get(sym) or {}
    cost_base_bps   = float(cinfo.get("COST_BASE_BPS", 0.0))
    cost_stress_bps = float(cinfo.get("COST_STRESS_BPS", 0.0))
    cost_base_dec   = _cost_roundtrip_dec(cost_base_bps, cost_reported_is_roundtrip)
    cost_stress_dec = _cost_roundtrip_dec(cost_stress_bps, cost_reported_is_roundtrip)

    for f in folds:
        fid = str(f["fold_id"])
        is_s = _parse_iso_utc(f["IS_start"])
        is_e = _parse_iso_utc(f["IS_end"])
        o_s  = _parse_iso_utc(f["OOS_start"])
        o_e  = _parse_iso_utc(f["OOS_end"])

        g = gate_df.filter((pl.col("symbol") == sym_u) & (pl.col("fold_id") == fid))
        if g.is_empty():
            raise RuntimeError(f"[Celda 07C] ERROR: no hay thresholds en gate_df para {sym_u} {fid}.")

        thr_er  = float(g.select(pl.col("thr_er")).item())
        thr_mom = float(g.select(pl.col("thr_mom")).item())
        thr_vol = float(g.select(pl.col("thr_vol")).item())

        # NOTE: gate de r√©gimen es direccionalidad+vol; para SHORT usamos simetr√≠a simple con -thr_mom_eff.
        thr_mom_eff = max(0.0, float(thr_mom))

        df0 = (
            df
            .with_columns([
                pl.col("time_utc").shift(-1).alias("entry_time"),
                pl.when(pl.col("open").shift(-1).is_not_null()).then(pl.col("open").shift(-1)).otherwise(pl.col("close").shift(-1)).alias("entry_price"),
                (
                    (pl.col(ER_COL) >= pl.lit(thr_er)) &
                    (pl.col(MOM_COL) >= pl.lit(thr_mom_eff)) &
                    (pl.col(VOL_COL) <= pl.lit(thr_vol))
                ).alias("signal_long"),
                (
                    (pl.col(ER_COL) >= pl.lit(thr_er)) &
                    (pl.col(MOM_COL) <= pl.lit(-thr_mom_eff)) &
                    (pl.col(VOL_COL) <= pl.lit(thr_vol))
                ).alias("signal_short"),
            ])
            .with_columns(_segment_label_expr(is_s, is_e, o_s, o_e))
        )

        if ENFORCE_MON_FRI:
            df0 = df0.filter(_weekday_expr() < 5)

        # por cada horizonte, medir retorno desde entry (t+1) a exit (t+1+h)
        for h in HORIZONS:
            exit_shift = -(1 + int(h))
            tag_h = f"H{h}"

            dfx = (
                df0
                .with_columns([
                    pl.col("time_utc").shift(exit_shift).alias("exit_time"),
                    pl.when(pl.col("open").shift(exit_shift).is_not_null())
                      .then(pl.col("open").shift(exit_shift))
                      .otherwise(pl.col("close").shift(exit_shift))
                      .alias("exit_price"),
                ])
                .filter(pl.col("segment").is_not_null())
                .filter(pl.col("entry_time").is_not_null() & pl.col("exit_time").is_not_null())
                .filter(pl.col("entry_price").is_not_null() & pl.col("exit_price").is_not_null())
                .filter((pl.col("entry_price") > 0.0) & (pl.col("exit_price") > 0.0))
                .drop_nulls([ER_COL, MOM_COL, VOL_COL])
            )

            if dfx.height == 0:
                continue

            # retorno base
            dfx = dfx.with_columns([
                (pl.col("exit_price") / pl.col("entry_price") - 1.0).alias("fwd_ret"),
            ])

            # LONG sample
            long_tr = dfx.filter(pl.col("signal_long") == True).with_columns([
                pl.lit("LONG").alias("side"),
                pl.col("fwd_ret").alias("gross_ret"),
                (pl.col("fwd_ret") - pl.lit(cost_base_dec)).alias("net_ret_base"),
                (pl.col("fwd_ret") - pl.lit(cost_stress_dec)).alias("net_ret_stress"),
            ])

            # SHORT sample (PnL sim√©trico lineal)
            short_tr = dfx.filter(pl.col("signal_short") == True).with_columns([
                pl.lit("SHORT").alias("side"),
                (-pl.col("fwd_ret")).alias("gross_ret"),
                (-pl.col("fwd_ret") - pl.lit(cost_base_dec)).alias("net_ret_base"),
                (-pl.col("fwd_ret") - pl.lit(cost_stress_dec)).alias("net_ret_stress"),
            ])

            comb = []
            if long_tr.height > 0:
                comb.append(long_tr)
            if short_tr.height > 0:
                comb.append(short_tr)
            if not comb:
                continue

            tr = pl.concat(comb, how="vertical")

            # resumen
            agg = (
                tr
                .group_by(["segment", "side"])
                .agg([
                    pl.len().alias("n_trades"),
                    pl.col("gross_ret").mean().alias("gross_mean"),
                    pl.col("net_ret_base").mean().alias("net_base_mean"),
                    pl.col("net_ret_stress").mean().alias("net_stress_mean"),
                    pl.col("net_ret_base").std().alias("net_base_std"),
                    pl.col("net_ret_base").median().alias("net_base_median"),
                    pl.col("net_ret_base").quantile(0.05, interpolation="nearest").alias("net_base_p05"),
                    pl.col("net_ret_base").quantile(0.95, interpolation="nearest").alias("net_base_p95"),
                    (pl.col("net_ret_base") > 0).mean().alias("win_rate_base"),
                ])
                .with_columns([
                    pl.when(pl.col("net_base_std") > 0)
                      .then(pl.col("net_base_mean") / pl.col("net_base_std"))
                      .otherwise(None)
                      .alias("sharpe_like_base")
                ])
            )

            for r in agg.to_dicts():
                rows.append({
                    "symbol": sym_u,
                    "fold_id": fid,
                    "horizon_tag": tag_h,
                    "h_bars_after_entry": int(h),
                    "segment": r["segment"],
                    "side": r["side"],
                    "n_trades": int(r["n_trades"]),
                    "gross_mean": float(r["gross_mean"]) if r["gross_mean"] is not None else None,
                    "net_base_mean": float(r["net_base_mean"]) if r["net_base_mean"] is not None else None,
                    "net_stress_mean": float(r["net_stress_mean"]) if r["net_stress_mean"] is not None else None,
                    "net_base_std": float(r["net_base_std"]) if r["net_base_std"] is not None else None,
                    "net_base_median": float(r["net_base_median"]) if r["net_base_median"] is not None else None,
                    "net_base_p05": float(r["net_base_p05"]) if r["net_base_p05"] is not None else None,
                    "net_base_p95": float(r["net_base_p95"]) if r["net_base_p95"] is not None else None,
                    "win_rate_base": float(r["win_rate_base"]) if r["win_rate_base"] is not None else None,
                    "sharpe_like_base": float(r["sharpe_like_base"]) if r["sharpe_like_base"] is not None else None,
                    "cost_base_bps": float(cost_base_bps),
                    "cost_stress_bps": float(cost_stress_bps),
                    "thr_er": float(thr_er),
                    "thr_mom_eff": float(thr_mom_eff),
                    "thr_vol": float(thr_vol),
                })

alpha_df = pl.DataFrame(rows) if rows else pl.DataFrame()

out_parq = OUT_DIR / "alpha_multi_horizon_report.parquet"
alpha_df.write_parquet(str(out_parq), compression="zstd")

snap_parq = SNAP_DIR / "alpha_multi_horizon_report.parquet"
alpha_df.write_parquet(str(snap_parq), compression="zstd")

snap_json = SNAP_DIR / "alpha_multi_horizon_snapshot.json"
snapshot = {
    "created_utc": datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
    "symbols": symbols_u,
    "fold_ids": [f.get("fold_id") for f in folds],
    "params": {
        "HORIZONS": HORIZONS,
        "ENFORCE_MON_FRI": ENFORCE_MON_FRI,
        "ER_COL": ER_COL,
        "MOM_COL": MOM_COL,
        "VOL_COL": VOL_COL,
        "NOTE": "SHORT usa simetr√≠a simple con -thr_mom_eff (mejora: calibrar umbral short sobre abs(mom) o cola negativa).",
    },
    "outputs": {
        "artifacts_parquet": str(out_parq),
        "snapshot_parquet": str(snap_parq),
    }
}
snap_json.write_text(json.dumps(snapshot, indent=2, ensure_ascii=False), encoding="utf-8")

GLOBAL_STATE["alpha_report"] = {
    "alpha_report_path": str(out_parq),
    "alpha_report_snapshot_path": str(snap_parq),
    "snapshot_json": str(snap_json),
    "params": snapshot["params"],
}

print(f"üíæ OUTPUT   ‚Üí {out_parq} (OK) | rows={alpha_df.height}")
print(f"üíæ SNAPSHOT ‚Üí {snap_parq} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {snap_json} (OK)")
print(">>> Celda 07C v1.0 :: OK")


>>> Celda 07C v1.0 :: Alpha Report MULTI-HORIZON (LONG/SHORT) + Costs + Mon‚ÄìFri [WFO-safe]
[Celda 07C] symbols   = ['BNBUSD', 'BTCUSD', 'LVMH', 'XAUAUD']
[Celda 07C] folds     = ['F1']
[Celda 07C] horizons  = [1, 3, 6, 12, 24, 48, 96, 288]
[Celda 07C] mon_fri   = True
üíæ OUTPUT   ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\alpha_reports\alpha_multi_horizon_report.parquet (OK) | rows=128
üíæ SNAPSHOT ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\research_logs\runs\20251222_103043\snapshots\alpha_multi_horizon_report.parquet (OK)
üíæ SNAPSHOT ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\research_logs\runs\20251222_103043\snapshots\alpha_multi_horizon_snapshot.json (OK)
>>> Celda 07C v1.0 :: OK


In [39]:
# ===================== Celda 08 v1.1.1 ‚Äî Backtest Engine (TREND, M5) [LONG/SHORT + SL/TP/Trail + Gate-Hysteresis + Cooldown + Mon‚ÄìFri FLATTEN + Costs + WFO-safe] =====================
from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json
import math
import polars as pl

print(">>> Celda 08 v1.1.1 :: Backtest Engine (TREND, M5) [LONG/SHORT + SL/TP/Trail + Gate-Hysteresis + Cooldown + Mon‚ÄìFri FLATTEN + Costs + WFO-safe]")

# ========================= Validaciones GLOBAL_STATE =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 08] ERROR: GLOBAL_STATE no existe o no es dict.")

for k in ("paths", "data", "features", "wfo", "regime_gate", "cost_model", "universe"):
    if k not in GLOBAL_STATE:
        raise RuntimeError(f"[Celda 08] ERROR: falta GLOBAL_STATE['{k}'].")

paths      = GLOBAL_STATE["paths"]
data_state = GLOBAL_STATE["data"]
feat_state = GLOBAL_STATE["features"]
wfo_state  = GLOBAL_STATE["wfo"]
gate_state = GLOBAL_STATE["regime_gate"]
cost_state = GLOBAL_STATE["cost_model"]

dq = GLOBAL_STATE.get("data_quality", {}) or {}
symbols = dq.get("final_symbols") or (GLOBAL_STATE["universe"].get("selected_symbols_TREND") or [])
if not symbols:
    raise RuntimeError("[Celda 08] ERROR: no hay s√≠mbolos (data_quality.final_symbols / selected_symbols_TREND).")
symbols_u = [str(s).upper().strip() for s in symbols]

folds = wfo_state.get("folds") or []
if not folds:
    folds_path = wfo_state.get("folds_path")
    if not folds_path:
        raise RuntimeError("[Celda 08] ERROR: no hay wfo.folds ni wfo.folds_path. Ejecuta Celda 04.")
    folds = json.loads(Path(folds_path).read_text(encoding="utf-8")).get("folds", [])
if not folds:
    raise RuntimeError("[Celda 08] ERROR: folds vac√≠o (Celda 04 no produjo folds).")

m5_paths = (
    data_state.get("m5_ohlcv_paths")
    or data_state.get("m5_clean_paths")
    or data_state.get("ohlcv_clean_paths")
    or {}
)
if not isinstance(m5_paths, dict) or not m5_paths:
    raise RuntimeError("[Celda 08] ERROR: no hay m5_ohlcv_paths/m5_clean_paths/ohlcv_clean_paths. Ejecuta Celda 02/02C.")

feat_paths = feat_state.get("features_base_paths") or {}
if not feat_paths:
    raise RuntimeError("[Celda 08] ERROR: no hay features_base_paths. Ejecuta Celda 05.")

gate_path = gate_state.get("gate_table_path")
if not gate_path:
    raise RuntimeError("[Celda 08] ERROR: no hay regime_gate.gate_table_path. Ejecuta Celda 06.")
gate_df = pl.read_parquet(str(gate_path))

costs_by_symbol = cost_state.get("costs_by_symbol") or cost_state.get("symbols") or {}
if not costs_by_symbol:
    raise RuntimeError("[Celda 08] ERROR: no hay cost_model.costs_by_symbol. Ejecuta Celda 03.")
cost_reported_is_roundtrip = bool(cost_state.get("cost_reported_is_roundtrip", False))

# ========================= Par√°metros (engine v1.1.1) =========================
ENGINE_LOGIC_VERSION = "v1.1.1"

# Se√±al/r√©gimen (feature cols)
ER_COL   = "ER_kaufman"
MOM_COL  = "mom_288"
VOL_COL  = "vol_logret_288"

# Gesti√≥n (ATR)
ATR_COL_CANDIDATES = ["atr", "ATR", "atr_14", "atr_28"]
ATR_PCT_FALLBACK = 0.005  # si no hay ATR, proxy 0.5% del precio

# Motor anti-churn
SL_ATR    = 2.5
TP_ATR    = 5.0
TRAIL_ATR = 2.0
TIME_STOP_BARS = 288          # ~1 d√≠a M5

ENTRY_CONFIRM_BARS = 12       # gate ON sostenido (1h)
EXIT_GATE_OFF_BARS = 12       # gate OFF sostenido (1h)
MIN_HOLD_BARS      = 6        # 30 min antes de permitir REGIME_OFF
COOLDOWN_BARS      = 24       # 2h cooldown tras salida

# Trend filter
EMA_FILTER = True
EMA_FAST = 48
EMA_SLOW = 288

# Calendario
MON_FRI = True   # Mon‚ÄìFri only + flatten

# Sizing simple risk-based
USE_RISK_SIZING = True
RISK_PER_TRADE  = 0.01
MIN_POS_SIZE    = 0.25
MAX_POS_SIZE    = 3.00

print(f"[Celda 08] symbols = {symbols_u}")
print(f"[Celda 08] folds   = {[f.get('fold_id') for f in folds]}")
print(f"[Celda 08] params  = SL_ATR={SL_ATR} TP_ATR={TP_ATR} TRAIL_ATR={TRAIL_ATR} TIME_STOP_BARS={TIME_STOP_BARS} "
      f"ENTRY_CONFIRM_BARS={ENTRY_CONFIRM_BARS} EXIT_GATE_OFF_BARS={EXIT_GATE_OFF_BARS} MIN_HOLD_BARS={MIN_HOLD_BARS} COOLDOWN_BARS={COOLDOWN_BARS} "
      f"EMA_FILTER={EMA_FILTER} MON_FRI={MON_FRI}")

# ========================= Outputs (compat v10) =========================
OUT_DIR = Path(paths["artifacts"]).resolve() / "backtests" / "backtest_engine_v10"
OUT_DIR.mkdir(parents=True, exist_ok=True)

SNAP_DIR = Path(paths.get("run_snapshots") or (Path(paths["artifacts"]).resolve() / "snapshots")).resolve()
SNAP_DIR.mkdir(parents=True, exist_ok=True)

OUT_TRADES  = OUT_DIR / "trades_engine_v10.parquet"
OUT_SUMMARY = OUT_DIR / "summary_engine_v10.parquet"
SNAP_TRADES  = SNAP_DIR / "trades_engine_v10.parquet"
SNAP_SUMMARY = SNAP_DIR / "summary_engine_v10.parquet"
SNAP_JSON    = SNAP_DIR / "backtest_engine_v10_snapshot.json"

# ========================= Helpers =========================
def _ensure_utc(dt: datetime) -> datetime:
    if dt.tzinfo is None:
        return dt.replace(tzinfo=timezone.utc)
    return dt.astimezone(timezone.utc)

def _parse_iso_utc(s: str) -> datetime:
    return _ensure_utc(datetime.fromisoformat(s))

def _pick_path_case_insensitive(d: dict, sym_u: str) -> str:
    if sym_u in d:
        return str(d[sym_u])
    keys = {str(k).upper().strip(): k for k in d.keys()}
    if sym_u in keys:
        return str(d[keys[sym_u]])
    raise KeyError(f"[Celda 08] ERROR: no encuentro path para {sym_u}. keys_sample={list(d)[:10]}")

def _need_cols(df: pl.DataFrame, cols: list[str], sym: str, tag: str) -> None:
    miss = [c for c in cols if c not in df.columns]
    if miss:
        raise RuntimeError(f"[Celda 08] ERROR: {sym} ({tag}) missing cols={miss}. cols={df.columns}")

def _cost_roundtrip_dec(cost_bps: float, reported_is_roundtrip: bool) -> float:
    c = float(cost_bps) / 10000.0
    return c if reported_is_roundtrip else (2.0 * c)

def _get_atr_col(df: pl.DataFrame) -> str | None:
    for c in ATR_COL_CANDIDATES:
        if c in df.columns:
            return c
    return None

def _is_finite(x) -> bool:
    if x is None:
        return False
    try:
        return math.isfinite(float(x))
    except Exception:
        return False

def _is_weekend_dt(dt: datetime) -> bool:
    # Python: 0=Mon ... 6=Sun
    return int(dt.weekday()) >= 5

# ========================= Simulaci√≥n =========================
def _simulate_symbol_fold(
    sym_u: str,
    df_join: pl.DataFrame,
    fold: dict,
    thr_er: float,
    thr_mom: float,
    thr_vol: float,
    cost_base_dec: float,
    cost_stress_dec: float,
    cost_base_bps: float,
    cost_stress_bps: float,
) -> tuple[list[dict], dict]:
    fid = str(fold["fold_id"])
    is_s = _parse_iso_utc(fold["IS_start"])
    is_e = _parse_iso_utc(fold["IS_end"])
    o_s  = _parse_iso_utc(fold["OOS_start"])
    o_e  = _parse_iso_utc(fold["OOS_end"])

    # ATR
    atr_col = _get_atr_col(df_join)
    if atr_col is None:
        df_join = df_join.with_columns((pl.col("close") * pl.lit(ATR_PCT_FALLBACK)).alias("__atr_used"))
        atr_col = "__atr_used"

    # EMA filter
    if EMA_FILTER:
        df_join = df_join.with_columns([
            pl.col("close").ewm_mean(span=EMA_FAST, adjust=False).alias("__ema_fast"),
            pl.col("close").ewm_mean(span=EMA_SLOW, adjust=False).alias("__ema_slow"),
        ])
    else:
        df_join = df_join.with_columns([
            pl.lit(None).cast(pl.Float64).alias("__ema_fast"),
            pl.lit(None).cast(pl.Float64).alias("__ema_slow"),
        ])

    # Gate raw
    thr_mom_eff = max(0.0, float(thr_mom))
    long_raw = (
        (pl.col(ER_COL) >= pl.lit(float(thr_er))) &
        (pl.col(MOM_COL) >= pl.lit(float(thr_mom_eff))) &
        (pl.col(VOL_COL) <= pl.lit(float(thr_vol)))
    )
    short_raw = (
        (pl.col(ER_COL) >= pl.lit(float(thr_er))) &
        (pl.col(MOM_COL) <= pl.lit(-float(thr_mom_eff))) &
        (pl.col(VOL_COL) <= pl.lit(float(thr_vol)))
    )

    if EMA_FILTER:
        long_raw  = long_raw  & (pl.col("__ema_fast") > pl.col("__ema_slow"))
        short_raw = short_raw & (pl.col("__ema_fast") < pl.col("__ema_slow"))

    df_join = df_join.with_columns([
        long_raw.alias("__gate_long_raw"),
        short_raw.alias("__gate_short_raw"),
    ])

    # ========== DOW robusto (Polars weekday puede ser 0-6 o 1-7 seg√∫n versi√≥n) ==========
    # 1) calcular weekday raw
    df_join = df_join.with_columns(pl.col("time_utc").dt.weekday().cast(pl.Int16).alias("__dow_raw"))
    max_dow = df_join.select(pl.col("__dow_raw").max()).item()
    # Si max > 6, asumimos convenci√≥n 1..7 y convertimos a 0..6
    if (max_dow is not None) and int(max_dow) > 6:
        df_join = df_join.with_columns((pl.col("__dow_raw") - 1).alias("__dow0"))
    else:
        df_join = df_join.with_columns(pl.col("__dow_raw").alias("__dow0"))

    df_join = df_join.with_columns((pl.col("__dow0") >= 5).alias("__is_weekend"))

    # Confirmaci√≥n de entrada (rolling window)
    df_join = df_join.with_columns([
        (pl.col("__gate_long_raw").cast(pl.Int8)
            .rolling_sum(ENTRY_CONFIRM_BARS, min_samples=ENTRY_CONFIRM_BARS)
            .eq(pl.lit(ENTRY_CONFIRM_BARS))
        ).fill_null(False).alias("__gate_long_confirm"),
        (pl.col("__gate_short_raw").cast(pl.Int8)
            .rolling_sum(ENTRY_CONFIRM_BARS, min_samples=ENTRY_CONFIRM_BARS)
            .eq(pl.lit(ENTRY_CONFIRM_BARS))
        ).fill_null(False).alias("__gate_short_confirm"),
    ])

    # --- Listas ---
    t   = df_join.get_column("time_utc").to_list()
    o   = df_join.get_column("open").to_list()
    h   = df_join.get_column("high").to_list()
    l_  = df_join.get_column("low").to_list()
    c   = df_join.get_column("close").to_list()
    atr = df_join.get_column(atr_col).to_list()
    mom = df_join.get_column(MOM_COL).to_list()

    gateL = df_join.get_column("__gate_long_raw").to_list()
    gateS = df_join.get_column("__gate_short_raw").to_list()
    confL = df_join.get_column("__gate_long_confirm").to_list()
    confS = df_join.get_column("__gate_short_confirm").to_list()
    is_wk = df_join.get_column("__is_weekend").to_list()

    n = len(t)
    if n < 10:
        return [], {"IS": 0, "OOS": 0}

    # Bars por segmento (exposure)
    seg_bar_counts = {"IS": 0, "OOS": 0}
    for i in range(n):
        ti = t[i]
        if ti is None:
            continue
        if is_s <= ti <= is_e:
            seg_bar_counts["IS"] += 1
        elif o_s <= ti <= o_e:
            seg_bar_counts["OOS"] += 1

    trades_rows: list[dict] = []

    pos = 0
    side_str = None
    entry_idx = None
    entry_time = None
    entry_price = None
    seg = None
    seg_end = None

    stop = None
    tp = None
    trail_stop = None
    best_price = None
    sl_dist = None
    tp_dist = None
    trail_dist = None

    pos_size = 1.0
    gate_off_streak = 0
    cooldown = 0

    def _is_weekend_idx(ix: int) -> bool:
        if ix < 0 or ix >= n:
            return False
        v = is_wk[ix]
        return bool(v) if v is not None else False

    def _segment_for_entry_time(et: datetime):
        if is_s <= et <= is_e:
            return "IS", is_e
        if o_s <= et <= o_e:
            return "OOS", o_e
        return None, None

    for idx in range(n):
        # ========================= Exits =========================
        if pos != 0 and entry_idx is not None and idx >= entry_idx:
            bars_held = idx - entry_idx + 1

            gate_now = bool(gateL[idx]) if pos == 1 else bool(gateS[idx])
            if gate_now:
                gate_off_streak = 0
            else:
                gate_off_streak += 1

            exit_reason = None
            exit_price = None

            hi = float(h[idx]) if _is_finite(h[idx]) else float(c[idx])
            lo = float(l_[idx]) if _is_finite(l_[idx]) else float(c[idx])
            cl = float(c[idx]) if _is_finite(c[idx]) else float(o[idx])

            if pos == 1:
                if best_price is None:
                    best_price = float(entry_price)
                best_price = max(best_price, hi)
                if trail_dist is not None:
                    ts = best_price - float(trail_dist)
                    trail_stop = ts if trail_stop is None else max(float(trail_stop), ts)

                if stop is not None and lo <= float(stop):
                    exit_reason, exit_price = "SL", float(stop)
                elif trail_stop is not None and lo <= float(trail_stop):
                    exit_reason, exit_price = "TRAIL", float(trail_stop)
                elif tp is not None and hi >= float(tp):
                    exit_reason, exit_price = "TP", float(tp)

            else:
                if best_price is None:
                    best_price = float(entry_price)
                best_price = min(best_price, lo)
                if trail_dist is not None:
                    ts = best_price + float(trail_dist)
                    trail_stop = ts if trail_stop is None else min(float(trail_stop), ts)

                if stop is not None and hi >= float(stop):
                    exit_reason, exit_price = "SL", float(stop)
                elif trail_stop is not None and hi >= float(trail_stop):
                    exit_reason, exit_price = "TRAIL", float(trail_stop)
                elif tp is not None and lo <= float(tp):
                    exit_reason, exit_price = "TP", float(tp)

            if exit_reason is None:
                if bars_held >= int(TIME_STOP_BARS):
                    exit_reason, exit_price = "TIME", cl
                elif bars_held >= int(MIN_HOLD_BARS) and gate_off_streak >= int(EXIT_GATE_OFF_BARS):
                    exit_reason, exit_price = "REGIME_OFF", cl
                elif seg_end is not None and idx + 1 < n and t[idx + 1] > seg_end:
                    exit_reason, exit_price = "SEGMENT_END", cl
                elif MON_FRI and idx + 1 < n and _is_weekend_idx(idx + 1):
                    exit_reason, exit_price = "WEEKEND_FLATTEN", cl

            if exit_reason is not None:
                et = t[idx]
                ep = float(entry_price)
                xp = float(exit_price)
                if ep > 0.0 and xp > 0.0:
                    raw_ret = (xp / ep - 1.0) * float(pos)
                    gross_ret = raw_ret * float(pos_size)
                    net_ret_base = gross_ret - float(cost_base_dec) * float(pos_size)
                    net_ret_stress = gross_ret - float(cost_stress_dec) * float(pos_size)

                    trades_rows.append({
                        "symbol": sym_u,
                        "fold_id": str(fid),
                        "segment": str(seg),
                        "side": str(side_str),
                        "entry_time": _ensure_utc(entry_time).isoformat(),
                        "exit_time": _ensure_utc(et).isoformat(),
                        "entry_price": float(ep),
                        "exit_price": float(xp),
                        "bars_held": int(bars_held),
                        "exit_reason": str(exit_reason),
                        "gross_ret": float(gross_ret),
                        "net_ret_base": float(net_ret_base),
                        "net_ret_stress": float(net_ret_stress),
                        "cost_base_bps": float(cost_base_bps),
                        "cost_stress_bps": float(cost_stress_bps),
                        "pos_size": float(pos_size),
                        "sl_atr": float(SL_ATR),
                        "tp_atr": float(TP_ATR),
                        "trail_atr": float(TRAIL_ATR),
                        "time_stop_bars": int(TIME_STOP_BARS),
                    })

                pos = 0
                side_str = None
                entry_idx = None
                entry_time = None
                entry_price = None
                seg = None
                seg_end = None

                stop = None
                tp = None
                trail_stop = None
                best_price = None
                sl_dist = None
                tp_dist = None
                trail_dist = None

                gate_off_streak = 0
                cooldown = int(COOLDOWN_BARS)

        # ========================= Entradas =========================
        if pos == 0:
            if idx >= n - 1:
                continue

            if cooldown > 0:
                cooldown -= 1
                continue

            entry_ix = idx + 1
            if entry_ix >= n:
                continue

            et = t[entry_ix]
            if et is None:
                continue

            # Mon‚ÄìFri (doble blindaje):
            #  1) por flag Polars robusto
            #  2) por Python weekday() (fuente de verdad para QA)
            if MON_FRI and (_is_weekend_idx(entry_ix) or _is_weekend_dt(et)):
                continue

            seg2, seg_end2 = _segment_for_entry_time(et)
            if seg2 is None:
                continue

            can_long  = bool(confL[idx])
            can_short = bool(confS[idx])
            if not (can_long or can_short):
                continue

            chosen_pos = None
            if can_long and (not can_short):
                chosen_pos = 1
            elif can_short and (not can_long):
                chosen_pos = -1
            else:
                m = float(mom[idx]) if _is_finite(mom[idx]) else 0.0
                chosen_pos = 1 if m >= 0 else -1

            op = float(o[entry_ix]) if _is_finite(o[entry_ix]) else float(c[entry_ix])
            if op <= 0.0:
                continue

            atr_e = float(atr[entry_ix]) if _is_finite(atr[entry_ix]) else (float(atr[idx]) if _is_finite(atr[idx]) else (op * ATR_PCT_FALLBACK))
            if atr_e <= 0.0:
                atr_e = op * ATR_PCT_FALLBACK

            sl_d = float(SL_ATR) * float(atr_e)
            tp_d = float(TP_ATR) * float(atr_e)
            tr_d = float(TRAIL_ATR) * float(atr_e)
            if sl_d <= 0.0 or tp_d <= 0.0 or tr_d <= 0.0:
                continue

            ps = 1.0
            if USE_RISK_SIZING:
                stop_pct = sl_d / op
                if stop_pct > 0.0:
                    ps = float(RISK_PER_TRADE) / float(stop_pct)
                    ps = max(float(MIN_POS_SIZE), min(float(MAX_POS_SIZE), ps))

            if chosen_pos == 1:
                st = op - sl_d
                tpv = op + tp_d
                ts0 = op - tr_d
                side = "LONG"
            else:
                st = op + sl_d
                tpv = op - tp_d
                ts0 = op + tr_d
                side = "SHORT"

            pos = int(chosen_pos)
            side_str = side
            entry_idx = int(entry_ix)
            entry_time = et
            entry_price = float(op)
            seg = str(seg2)
            seg_end = seg_end2

            stop = float(st)
            tp = float(tpv)
            trail_stop = float(ts0)
            best_price = float(op)

            sl_dist = float(sl_d)
            tp_dist = float(tp_d)
            trail_dist = float(tr_d)

            pos_size = float(ps)
            gate_off_streak = 0

    return trades_rows, seg_bar_counts

# ========================= Main =========================
all_trades = []
segbars_rows = []

for sym in symbols_u:
    sym_u = sym.upper().strip()

    px_path = _pick_path_case_insensitive(m5_paths, sym_u)
    df_px = pl.read_parquet(str(px_path))
    _need_cols(df_px, ["time_utc", "close"], sym_u, "OHLCV")

    if "open" not in df_px.columns:
        df_px = df_px.with_columns(pl.col("close").alias("open"))
    if "high" not in df_px.columns:
        df_px = df_px.with_columns(pl.col("close").alias("high"))
    if "low" not in df_px.columns:
        df_px = df_px.with_columns(pl.col("close").alias("low"))

    df_px = (
        df_px
        .with_columns([
            pl.col("time_utc").cast(pl.Datetime("us", "UTC"), strict=False),
            pl.col("open").cast(pl.Float64, strict=False),
            pl.col("high").cast(pl.Float64, strict=False),
            pl.col("low").cast(pl.Float64, strict=False),
            pl.col("close").cast(pl.Float64, strict=False),
        ])
        .sort("time_utc")
        .unique(subset=["time_utc"], keep="last")
    )

    feat_path = _pick_path_case_insensitive(feat_paths, sym_u)
    df_f = pl.read_parquet(str(feat_path))
    _need_cols(df_f, ["time_utc", ER_COL, MOM_COL, VOL_COL], sym_u, "FEATURES")
    df_f = df_f.with_columns(pl.col("time_utc").cast(pl.Datetime("us", "UTC"), strict=False)).sort("time_utc")

    df = df_px.join(df_f, on="time_utc", how="inner")

    cinfo = costs_by_symbol.get(sym_u) or costs_by_symbol.get(sym) or {}
    cost_base_bps = float(cinfo.get("COST_BASE_BPS", 0.0))
    cost_stress_bps = float(cinfo.get("COST_STRESS_BPS", 0.0))
    cost_base_dec = _cost_roundtrip_dec(cost_base_bps, cost_reported_is_roundtrip)
    cost_stress_dec = _cost_roundtrip_dec(cost_stress_bps, cost_reported_is_roundtrip)

    for f in folds:
        fid = str(f["fold_id"])
        g = gate_df.filter((pl.col("symbol") == sym_u) & (pl.col("fold_id") == fid))
        if g.is_empty():
            raise RuntimeError(f"[Celda 08] ERROR: no hay thresholds en gate_df para {sym_u} {fid}.")

        thr_er = float(g.select(pl.col("thr_er")).item())
        thr_mom = float(g.select(pl.col("thr_mom")).item())
        thr_vol = float(g.select(pl.col("thr_vol")).item())

        rows, segbars = _simulate_symbol_fold(
            sym_u=sym_u,
            df_join=df,
            fold=f,
            thr_er=thr_er,
            thr_mom=thr_mom,
            thr_vol=thr_vol,
            cost_base_dec=cost_base_dec,
            cost_stress_dec=cost_stress_dec,
            cost_base_bps=cost_base_bps,
            cost_stress_bps=cost_stress_bps,
        )

        if rows:
            all_trades.extend(rows)

        for segk, nbars in (segbars or {}).items():
            segbars_rows.append({
                "symbol": sym_u,
                "fold_id": fid,
                "segment": segk,
                "segment_bars": int(nbars),
            })

        print(f"[Celda 08] {sym_u} {fid} :: done (sim) | trades={len(rows)} | costs(bps) base={cost_base_bps} stress={cost_stress_bps}")

# ========================= Persist trades + summary =========================
if not all_trades:
    raise RuntimeError("[Celda 08] GATE FAIL: no se gener√≥ ning√∫n trade (revisa thresholds / confirm / filtros).")

trades_df = pl.DataFrame(all_trades)

REQ_TRADES_COLS = [
    "symbol","fold_id","segment","side",
    "entry_time","exit_time",
    "entry_price","exit_price",
    "bars_held","exit_reason",
    "gross_ret","net_ret_base","net_ret_stress",
    "cost_base_bps","cost_stress_bps",
    "pos_size","sl_atr","tp_atr","trail_atr","time_stop_bars",
]
miss = [c for c in REQ_TRADES_COLS if c not in trades_df.columns]
if miss:
    raise RuntimeError(f"[Celda 08] ERROR: trades_df missing cols={miss}. cols={trades_df.columns}")

segbars_df = pl.DataFrame(segbars_rows).group_by(["symbol","fold_id","segment"]).agg(
    pl.col("segment_bars").max().alias("segment_bars")
)

summary_df = (
    trades_df
    .group_by(["symbol","fold_id","segment","side"])
    .agg([
        pl.len().alias("n_trades"),
        pl.col("gross_ret").mean().alias("gross_mean"),
        pl.col("net_ret_base").mean().alias("net_base_mean"),
        pl.col("net_ret_stress").mean().alias("net_stress_mean"),
        pl.col("net_ret_base").std().alias("net_base_std"),
        (pl.col("net_ret_base") > 0).mean().alias("win_rate_base"),
        (pl.col("net_ret_base").fill_null(0.0).log1p().sum().exp() - 1.0).alias("tot_ret_base"),
        (pl.col("net_ret_stress").fill_null(0.0).log1p().sum().exp() - 1.0).alias("tot_ret_stress"),
        pl.col("bars_held").median().alias("bars_held_med"),
        pl.col("bars_held").sum().alias("__held_bars"),
    ])
    .join(segbars_df, on=["symbol","fold_id","segment"], how="left")
    .with_columns([
        (pl.col("__held_bars") / pl.col("segment_bars").cast(pl.Float64)).alias("exposure_bar_share"),
    ])
    .drop(["__held_bars"])
    .sort(["symbol","fold_id","segment","side"])
)

trades_df.write_parquet(str(OUT_TRADES), compression="zstd")
summary_df.write_parquet(str(OUT_SUMMARY), compression="zstd")
trades_df.write_parquet(str(SNAP_TRADES), compression="zstd")
summary_df.write_parquet(str(SNAP_SUMMARY), compression="zstd")

snapshot = {
    "created_utc": datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
    "engine_logic_version": ENGINE_LOGIC_VERSION,
    "symbols": symbols_u,
    "fold_ids": [f.get("fold_id") for f in folds],
    "params": {
        "SL_ATR": SL_ATR,
        "TP_ATR": TP_ATR,
        "TRAIL_ATR": TRAIL_ATR,
        "TIME_STOP_BARS": TIME_STOP_BARS,
        "ENTRY_CONFIRM_BARS": ENTRY_CONFIRM_BARS,
        "EXIT_GATE_OFF_BARS": EXIT_GATE_OFF_BARS,
        "MIN_HOLD_BARS": MIN_HOLD_BARS,
        "COOLDOWN_BARS": COOLDOWN_BARS,
        "EMA_FILTER": EMA_FILTER,
        "EMA_FAST": EMA_FAST,
        "EMA_SLOW": EMA_SLOW,
        "MON_FRI": MON_FRI,
        "USE_RISK_SIZING": USE_RISK_SIZING,
        "RISK_PER_TRADE": RISK_PER_TRADE,
        "MIN_POS_SIZE": MIN_POS_SIZE,
        "MAX_POS_SIZE": MAX_POS_SIZE,
        "COST_REPORTED_IS_ROUNDTRIP": cost_reported_is_roundtrip,
        "DOW_ROBUST_FIX": True,
    },
    "outputs": {
        "trades_path": str(OUT_TRADES),
        "summary_path": str(OUT_SUMMARY),
        "snap_trades": str(SNAP_TRADES),
        "snap_summary": str(SNAP_SUMMARY),
    },
    "notes": [
        "Engine v1.1.1: FIX DOW robusto (Polars weekday 0-6 o 1-7) + blindaje Python weekday para Mon‚ÄìFri.",
        "Entrada con confirmaci√≥n y salida por r√©gimen con histeresis + min-hold + cooldown.",
        "SL/TP/Trail intrabar (high/low).",
        "Mon‚ÄìFri FLATTEN + Segment boundary flatten.",
    ],
}
SNAP_JSON.write_text(json.dumps(snapshot, indent=2, ensure_ascii=False), encoding="utf-8")

GLOBAL_STATE["engine"] = {
    "engine_logic_version": ENGINE_LOGIC_VERSION,
    "out_dir": str(OUT_DIR),
    "trades_path": str(OUT_TRADES),
    "summary_path": str(OUT_SUMMARY),
    "snapshot_json": str(SNAP_JSON),
    "params": snapshot["params"],
}

print(f"üíæ OUTPUT   ‚Üí {OUT_TRADES} (OK) | rows={trades_df.height}")
print(f"üíæ OUTPUT   ‚Üí {OUT_SUMMARY} (OK) | rows={summary_df.height}")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_TRADES} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_SUMMARY} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_JSON} (OK)")
print(">>> Celda 08 v1.1.1 :: OK")


>>> Celda 08 v1.1.1 :: Backtest Engine (TREND, M5) [LONG/SHORT + SL/TP/Trail + Gate-Hysteresis + Cooldown + Mon‚ÄìFri FLATTEN + Costs + WFO-safe]
[Celda 08] symbols = ['BNBUSD', 'BTCUSD', 'LVMH', 'XAUAUD']
[Celda 08] folds   = ['F1']
[Celda 08] params  = SL_ATR=2.5 TP_ATR=5.0 TRAIL_ATR=2.0 TIME_STOP_BARS=288 ENTRY_CONFIRM_BARS=12 EXIT_GATE_OFF_BARS=12 MIN_HOLD_BARS=6 COOLDOWN_BARS=24 EMA_FILTER=True MON_FRI=True
[Celda 08] BNBUSD F1 :: done (sim) | trades=184 | costs(bps) base=8.0 stress=16.0
[Celda 08] BTCUSD F1 :: done (sim) | trades=182 | costs(bps) base=8.0 stress=16.0
[Celda 08] LVMH F1 :: done (sim) | trades=68 | costs(bps) base=12.0 stress=25.0
[Celda 08] XAUAUD F1 :: done (sim) | trades=122 | costs(bps) base=4.0 stress=8.0
üíæ OUTPUT   ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\backtests\backtest_engine_v10\trades_engine_v10.parquet (OK) | rows=556
üíæ OUTPUT   ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\backtests\backtest_engine_v10\summary_eng

In [40]:
# ===================== Celda 08B v1.0.4 ‚Äî QA Institucional del Motor (Engine) [Mon‚ÄìFri evidence + hard gate | weekday-robusto + dtype-fix] =====================
from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json
import polars as pl

print(">>> Celda 08B v1.0.4 :: QA Institucional del Motor (Engine) [Mon‚ÄìFri evidence + hard gate | weekday-robusto + dtype-fix]")

# ========================= Validaciones GLOBAL_STATE =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 08B] ERROR: GLOBAL_STATE no existe o no es dict.")

# tolerar nombres de estado (por compatibilidad)
bt = None
if "backtest_engine" in GLOBAL_STATE:
    bt = GLOBAL_STATE["backtest_engine"]
elif "engine" in GLOBAL_STATE:
    bt = GLOBAL_STATE["engine"]
else:
    raise RuntimeError("[Celda 08B] ERROR: falta GLOBAL_STATE['backtest_engine'] o GLOBAL_STATE['engine'].")

paths = GLOBAL_STATE.get("paths") or {}
if not paths:
    raise RuntimeError("[Celda 08B] ERROR: falta GLOBAL_STATE['paths'].")

trades_path = bt.get("trades_path")
summary_path = bt.get("summary_path")
if not trades_path or not Path(trades_path).exists():
    raise RuntimeError(f"[Celda 08B] ERROR: trades_path inv√°lido/no existe: {trades_path}")
if not summary_path or not Path(summary_path).exists():
    raise RuntimeError(f"[Celda 08B] ERROR: summary_path inv√°lido/no existe: {summary_path}")

# params (tal como los guarda Celda 08)
params = bt.get("params", {}) or {}
ENFORCE_MON_FRI_GATE = bool(params.get("MON_FRI", params.get("ENFORCE_MON_FRI", True)))

# ========================= Outputs =========================
OUT_DIR = Path(paths["artifacts"]).resolve() / "backtests" / "backtest_engine_v10"
OUT_DIR.mkdir(parents=True, exist_ok=True)

SNAP_DIR = Path(paths.get("run_snapshots") or (Path(paths["artifacts"]).resolve() / "snapshots")).resolve()
SNAP_DIR.mkdir(parents=True, exist_ok=True)

out_weekend = OUT_DIR / "qa_weekend_entries_engine_v10.parquet"
snap_weekend = SNAP_DIR / "qa_weekend_entries_engine_v10.parquet"
snap_json = SNAP_DIR / "qa_engine_monfri_evidence_snapshot.json"

# ========================= Helpers =========================
def _ensure_utc(dt: datetime) -> datetime:
    if dt.tzinfo is None:
        return dt.replace(tzinfo=timezone.utc)
    return dt.astimezone(timezone.utc)

def _parse_dt_any(s: str) -> datetime | None:
    """
    Robusto a ISO con 'Z' o con offset.
    Devuelve datetime UTC o None.
    """
    if s is None:
        return None
    try:
        ss = str(s).strip()
        if ss.endswith("Z"):
            ss = ss[:-1] + "+00:00"
        return _ensure_utc(datetime.fromisoformat(ss))
    except Exception:
        return None

def _py_weekday(dtobj: datetime | None) -> int | None:
    """
    Python weekday(): Mon=0 ... Sun=6 (estable).
    """
    if dtobj is None:
        return None
    try:
        return int(dtobj.weekday())
    except Exception:
        return None

# ========================= Load trades =========================
df = pl.read_parquet(str(trades_path))

REQ = ["symbol","fold_id","segment","side","entry_time","exit_time","bars_held","exit_reason","net_ret_base","net_ret_stress"]
miss = [c for c in REQ if c not in df.columns]
if miss:
    raise RuntimeError(f"[Celda 08B] ERROR: trades parquet no tiene columnas requeridas: {miss}")

# Cast a string y parse a datetime UTC (python)
df = (
    df
    .with_columns([
        pl.col("entry_time").cast(pl.Utf8),
        pl.col("exit_time").cast(pl.Utf8),
    ])
    .with_columns([
        pl.col("entry_time").map_elements(_parse_dt_any, return_dtype=pl.Datetime("us","UTC")).alias("entry_dt"),
        pl.col("exit_time").map_elements(_parse_dt_any, return_dtype=pl.Datetime("us","UTC")).alias("exit_dt"),
    ])
    # weekday robusto v√≠a Python; FIX dtype: declarar Int64 y luego castear a Int32
    .with_columns([
        pl.col("entry_dt").map_elements(_py_weekday, return_dtype=pl.Int64).cast(pl.Int32).alias("entry_dow"),  # Mon=0..Sun=6
        pl.col("exit_dt").map_elements(_py_weekday, return_dtype=pl.Int64).cast(pl.Int32).alias("exit_dow"),
    ])
    .with_columns([
        (pl.col("entry_dow") >= 5).alias("entry_is_weekend"),
        (pl.col("exit_dow") >= 5).alias("exit_is_weekend"),
    ])
)

print(f"[Celda 08B] trades rows = {df.height}")

weekend_entry_share = float(df.select(pl.col("entry_is_weekend").mean()).item() or 0.0)
weekend_exit_share  = float(df.select(pl.col("exit_is_weekend").mean()).item() or 0.0)
print(f"[Celda 08B] weekend_entry_share = {weekend_entry_share:.6f} weekend_exit_share={weekend_exit_share:.6f} (ENFORCE_MON_FRI_GATE={ENFORCE_MON_FRI_GATE})")

# ========================= Evidencia por s√≠mbolo =========================
by_sym = (
    df.group_by(["symbol","fold_id","segment"])
      .agg([
          pl.len().alias("n_trades"),
          pl.col("entry_is_weekend").mean().alias("weekend_entry_share"),
          pl.col("exit_is_weekend").mean().alias("weekend_exit_share"),
      ])
      .sort(["weekend_entry_share","n_trades"], descending=[True, True])
)

print("[Celda 08B] weekend_entry_share por symbol/segment (top):")
print(by_sym.head(20))

# Persist offenders (para inspecci√≥n)
off = (
    df.filter(pl.col("entry_is_weekend") == True)
      .select([
          "symbol","fold_id","segment","side",
          "entry_time","exit_time","bars_held","exit_reason",
          "net_ret_base","net_ret_stress","entry_dow","exit_dow"
      ])
      .sort(["symbol","entry_time"])
)

off.write_parquet(str(out_weekend), compression="zstd")
off.write_parquet(str(snap_weekend), compression="zstd")

snapshot = {
    "created_utc": datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
    "inputs": {"trades_path": str(trades_path), "summary_path": str(summary_path)},
    "params": {"ENFORCE_MON_FRI_GATE": ENFORCE_MON_FRI_GATE},
    "metrics": {
        "weekend_entry_share": float(weekend_entry_share),
        "weekend_exit_share": float(weekend_exit_share),
        "n_weekend_entries": int(off.height),
    },
    "outputs": {"weekend_entries_artifacts": str(out_weekend), "weekend_entries_snapshot": str(snap_weekend)},
    "notes": [
        "weekday se calcula con python datetime.weekday(): Mon=0..Sun=6 (estable).",
        "FIX: map_elements devuelve Int64; se castea a Int32 para consistencia.",
        "El gate Mon‚ÄìFri debe aplicarse sobre entry_time (t+1), no sobre signal_time (t).",
    ],
}
snap_json.write_text(json.dumps(snapshot, indent=2, ensure_ascii=False), encoding="utf-8")

print(f"üíæ OUTPUT   ‚Üí {out_weekend} (OK) | rows={off.height}")
print(f"üíæ SNAPSHOT ‚Üí {snap_weekend} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {snap_json} (OK)")

# ========================= Gate: Mon‚ÄìFri compliance =========================
if ENFORCE_MON_FRI_GATE and off.height > 0:
    top_bad = by_sym.select(["symbol","segment","weekend_entry_share","n_trades"]).head(8).to_dicts()
    raise RuntimeError(
        f"[Celda 08B] GATE FAIL: Hay entradas en fin de semana con Mon‚ÄìFri ON. "
        f"share={weekend_entry_share:.6f} | offenders={off.height} | top={top_bad}"
    )

print(">>> Celda 08B v1.0.4 :: OK")


>>> Celda 08B v1.0.4 :: QA Institucional del Motor (Engine) [Mon‚ÄìFri evidence + hard gate | weekday-robusto + dtype-fix]
[Celda 08B] trades rows = 524
[Celda 08B] weekend_entry_share = 0.000000 weekend_exit_share=0.000000 (ENFORCE_MON_FRI_GATE=True)
[Celda 08B] weekend_entry_share por symbol/segment (top):
shape: (8, 6)
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ symbol ‚îÜ fold_id ‚îÜ segment ‚îÜ n_trades ‚îÜ weekend_entry_share ‚îÜ weekend_exit_share ‚îÇ
‚îÇ ---    ‚îÜ ---     ‚îÜ ---     ‚îÜ ---      ‚îÜ ---                 ‚îÜ ---                ‚îÇ
‚îÇ str    ‚îÜ str     ‚îÜ str     ‚îÜ u32      ‚îÜ f64                 ‚îÜ f64                ‚îÇ
‚ïû‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï™‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï™‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï™‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï™‚ïê

In [41]:
# ===================== Celda 09 v1.0.1 ‚Äî Engine Institutional Report (post-08B)
# [Equity curve + MDD + Exposure + Turnover + Exit reasons + WFO-safe | NO map_groups]
# FIX: reemplaza group_by().map_groups() por window functions (cum_sum/cum_max over group)
# =====================================================================================================

from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json
import polars as pl

print(">>> Celda 09 v1.0.1 :: Engine Institutional Report (post-08B) [equity + MDD + exposure + turnover + exits | WFO-safe]")

# ========================= Validaciones GLOBAL_STATE =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 09] ERROR: GLOBAL_STATE no existe o no es dict.")

if "paths" not in GLOBAL_STATE:
    raise RuntimeError("[Celda 09] ERROR: falta GLOBAL_STATE['paths'].")

paths = GLOBAL_STATE["paths"]
ART = Path(paths["artifacts"]).resolve()
SNAP_DIR = Path(paths.get("run_snapshots") or (ART / "snapshots")).resolve()
SNAP_DIR.mkdir(parents=True, exist_ok=True)

# ========================= Localizar inputs del engine =========================
# Preferir rutas en GLOBAL_STATE si existen; si no, usar layout est√°ndar
engine_state = GLOBAL_STATE.get("backtest_engine", {}) or GLOBAL_STATE.get("engine", {}) or {}
engine_dir = ART / "backtests" / "backtest_engine_v10"

trades_path = (
    engine_state.get("trades_path")
    or engine_state.get("trades_engine_path")
    or str(engine_dir / "trades_engine_v10.parquet")
)
summary_path = (
    engine_state.get("summary_path")
    or engine_state.get("summary_engine_path")
    or str(engine_dir / "summary_engine_v10.parquet")
)

trades_path = str(Path(trades_path).resolve())
summary_path = str(Path(summary_path).resolve())

if not Path(trades_path).exists():
    raise RuntimeError(f"[Celda 09] ERROR: trades parquet no existe: {trades_path}")

df = pl.read_parquet(trades_path)

REQ = ["symbol","fold_id","segment","side","entry_time","exit_time","net_ret_base","net_ret_stress","exit_reason","bars_held"]
miss = [c for c in REQ if c not in df.columns]
if miss:
    raise RuntimeError(f"[Celda 09] ERROR: trades parquet no tiene columnas requeridas: {miss}. cols={df.columns}")

# ========================= Cast times robusto (sin map_elements) =========================
# entry_time/exit_time pueden venir como str o datetime; estandarizamos a datetime UTC
df = df.with_columns([
    pl.col("entry_time").cast(pl.Utf8, strict=False).str.to_datetime(time_zone="UTC", strict=False).alias("entry_dt"),
    pl.col("exit_time").cast(pl.Utf8, strict=False).str.to_datetime(time_zone="UTC", strict=False).alias("exit_dt"),
])

if df.select([pl.col("entry_dt").is_null().mean()]).item() > 0.0:
    # fallback adicional: si por alguna raz√≥n to_datetime no parsea, intenta cast directo
    df = df.with_columns([
        pl.col("entry_time").cast(pl.Datetime("us", "UTC"), strict=False).alias("entry_dt"),
        pl.col("exit_time").cast(pl.Datetime("us", "UTC"), strict=False).alias("exit_dt"),
    ])

# sanity
null_entry = float(df.select(pl.col("entry_dt").is_null().mean()).item())
null_exit  = float(df.select(pl.col("exit_dt").is_null().mean()).item())
if null_entry > 1e-9 or null_exit > 1e-9:
    raise RuntimeError(f"[Celda 09] ERROR: no se pudieron parsear fechas. null_entry={null_entry} null_exit={null_exit}")

BARS_PER_DAY = 288
df = df.with_columns([
    (pl.col("bars_held") * 5).cast(pl.Int64).alias("hold_minutes"),
    (pl.col("bars_held") >= pl.lit(BARS_PER_DAY)).alias("hold_ge_1d"),
])

# ========================= Mon‚ÄìFri evidence (deber√≠a estar en 0 tras 08 v1.0.1) =========================
df = df.with_columns([
    pl.col("entry_dt").dt.weekday().cast(pl.Int32).alias("entry_dow"),
    pl.col("exit_dt").dt.weekday().cast(pl.Int32).alias("exit_dow"),
]).with_columns([
    (pl.col("entry_dow") >= 5).alias("entry_is_weekend"),
    (pl.col("exit_dow") >= 5).alias("exit_is_weekend"),
])

weekend_entry_share = float(df.select(pl.col("entry_is_weekend").mean()).item())
weekend_exit_share  = float(df.select(pl.col("exit_is_weekend").mean()).item())

monfri = bool((GLOBAL_STATE.get("execution", {}) or {}).get("MON_FRI", True))
print(f"[Celda 09] weekend_entry_share={weekend_entry_share:.6f} weekend_exit_share={weekend_exit_share:.6f} (MON_FRI={monfri})")

# ========================= Outputs =========================
OUT_DIR = engine_dir / "reports_engine_v10"
OUT_DIR.mkdir(parents=True, exist_ok=True)

OUT_EQUITY = OUT_DIR / "equity_curve_engine_v10.parquet"
OUT_KPIS   = OUT_DIR / "kpis_engine_v10.parquet"
OUT_EXITS  = OUT_DIR / "exit_reasons_engine_v10.parquet"

SNAP_EQUITY = SNAP_DIR / "equity_curve_engine_v10.parquet"
SNAP_KPIS   = SNAP_DIR / "kpis_engine_v10.parquet"
SNAP_EXITS  = SNAP_DIR / "exit_reasons_engine_v10.parquet"

# ========================= Equity curve (FIX: window functions) =========================
group_cols = ["symbol","fold_id","segment","side"]

# Orden estricto para que cum_sum/cum_max sea determinista
df2 = (
    df.select([
        "symbol","fold_id","segment","side",
        "entry_dt","exit_dt",
        "net_ret_base","net_ret_stress",
        "exit_reason","bars_held","hold_minutes"
    ])
    .sort(group_cols + ["entry_dt","exit_dt"])
    .with_columns([
        # clip para evitar log1p inv√°lido si hay retornos extremos (< -100%)
        pl.col("net_ret_base").fill_null(0.0).clip(-0.999999999, 10.0).alias("__r_base"),
        pl.col("net_ret_stress").fill_null(0.0).clip(-0.999999999, 10.0).alias("__r_stress"),
    ])
    .with_columns([
        # trade_seq robusto (1..n) sin depender de cum_count/rank
        pl.lit(1).cum_sum().over(group_cols).cast(pl.Int32).alias("trade_seq"),
        pl.col("__r_base").log1p().cum_sum().over(group_cols).alias("__cumlog_base"),
        pl.col("__r_stress").log1p().cum_sum().over(group_cols).alias("__cumlog_stress"),
    ])
    .with_columns([
        pl.col("__cumlog_base").exp().alias("equity_base"),
        pl.col("__cumlog_stress").exp().alias("equity_stress"),
    ])
    .with_columns([
        pl.col("equity_base").cum_max().over(group_cols).alias("__peak_base"),
        pl.col("equity_stress").cum_max().over(group_cols).alias("__peak_stress"),
    ])
    .with_columns([
        (pl.col("equity_base") / pl.col("__peak_base") - 1.0).alias("dd_base"),
        (pl.col("equity_stress") / pl.col("__peak_stress") - 1.0).alias("dd_stress"),
    ])
    .drop(["__r_base","__r_stress","__cumlog_base","__cumlog_stress","__peak_base","__peak_stress"])
)

equity_df = df2.select([
    "symbol","fold_id","segment","side",
    "trade_seq","entry_dt","exit_dt",
    "equity_base","equity_stress","dd_base","dd_stress",
    "exit_reason","bars_held","hold_minutes"
])

equity_df.write_parquet(str(OUT_EQUITY), compression="zstd")
equity_df.write_parquet(str(SNAP_EQUITY), compression="zstd")

# ========================= KPIs por grupo =========================
kpis = (
    df2.group_by(group_cols)
       .agg([
           pl.len().alias("n_trades"),

           pl.col("net_ret_base").mean().alias("mean_ret_base"),
           pl.col("net_ret_base").std().alias("std_ret_base"),
           (pl.col("net_ret_base") > 0).mean().alias("win_rate_base"),

           pl.col("net_ret_stress").mean().alias("mean_ret_stress"),
           pl.col("net_ret_stress").std().alias("std_ret_stress"),
           (pl.col("net_ret_stress") > 0).mean().alias("win_rate_stress"),

           # total return por log-suma (m√°s estable)
           (pl.col("net_ret_base").fill_null(0.0).clip(-0.999999999, 10.0).log1p().sum().exp() - 1.0).alias("tot_ret_base"),
           (pl.col("net_ret_stress").fill_null(0.0).clip(-0.999999999, 10.0).log1p().sum().exp() - 1.0).alias("tot_ret_stress"),

           # MDD desde equity_df: como tenemos dd_base/dd_stress por trade, min es drawdown m√°ximo
           pl.col("dd_base").min().alias("mdd_base"),
           pl.col("dd_stress").min().alias("mdd_stress"),

           pl.col("bars_held").mean().alias("avg_bars_held"),
           pl.col("hold_minutes").sum().alias("sum_hold_minutes"),

           pl.col("entry_dt").min().alias("tmin"),
           pl.col("exit_dt").max().alias("tmax"),
           pl.col("entry_dt").dt.date().n_unique().alias("n_active_days"),
       ])
       .with_columns([
           # sharpe-like por trade (no anualizado; diagn√≥stico)
           (pl.col("mean_ret_base") / pl.col("std_ret_base")).alias("sharpe_like_base"),
           (pl.col("mean_ret_stress") / pl.col("std_ret_stress")).alias("sharpe_like_stress"),

           # turnover: trades por d√≠a activo
           (pl.col("n_trades") / pl.col("n_active_days").clip(1, 10_000)).alias("trades_per_active_day"),

           # exposure aproximada: minutos en mercado / minutos entre tmin..tmax
           (
               pl.col("sum_hold_minutes") /
               ((pl.col("tmax") - pl.col("tmin")).dt.total_minutes().clip(1, 10_000_000))
           ).alias("exposure_share"),
       ])
       .sort(group_cols)
)

kpis.write_parquet(str(OUT_KPIS), compression="zstd")
kpis.write_parquet(str(SNAP_KPIS), compression="zstd")

# ========================= Exit reasons =========================
exits = (
    df.group_by(["symbol","fold_id","segment","side","exit_reason"])
      .agg([pl.len().alias("n")])
      .with_columns([
          (pl.col("n") / pl.col("n").sum().over(["symbol","fold_id","segment","side"])).alias("share")
      ])
      .sort(["symbol","fold_id","segment","side","n"], descending=[False,False,False,False,True])
)

exits.write_parquet(str(OUT_EXITS), compression="zstd")
exits.write_parquet(str(SNAP_EXITS), compression="zstd")

# ========================= Snapshot JSON =========================
snap = {
    "created_utc": datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
    "inputs": {
        "trades_path": trades_path,
        "summary_path": summary_path if Path(summary_path).exists() else None,
    },
    "mon_fri": {
        "enabled": monfri,
        "weekend_entry_share": weekend_entry_share,
        "weekend_exit_share": weekend_exit_share,
    },
    "outputs": {
        "equity_curve": str(OUT_EQUITY),
        "kpis": str(OUT_KPIS),
        "exit_reasons": str(OUT_EXITS),
        "snap_equity": str(SNAP_EQUITY),
        "snap_kpis": str(SNAP_KPIS),
        "snap_exit_reasons": str(SNAP_EXITS),
    },
    "notes": [
        "Equity curve computada con window functions (sin map_groups).",
        "tot_ret_* computado por suma log1p para estabilidad num√©rica.",
        "Exposure es aproximaci√≥n por minutos en mercado / minutos entre tmin..tmax (por grupo).",
    ],
}

snap_json = OUT_DIR / "engine_report_snapshot.json"
snap_json.write_text(json.dumps(snap, indent=2, ensure_ascii=False), encoding="utf-8")

snap_json2 = SNAP_DIR / "engine_report_snapshot.json"
snap_json2.write_text(json.dumps(snap, indent=2, ensure_ascii=False), encoding="utf-8")

GLOBAL_STATE["engine_report"] = {
    "equity_curve_path": str(OUT_EQUITY),
    "kpis_path": str(OUT_KPIS),
    "exit_reasons_path": str(OUT_EXITS),
    "snapshot_json": str(snap_json),
}

print(f"üíæ OUTPUT   ‚Üí {OUT_EQUITY} (OK) | rows={equity_df.height}")
print(f"üíæ OUTPUT   ‚Üí {OUT_KPIS} (OK)   | rows={kpis.height}")
print(f"üíæ OUTPUT   ‚Üí {OUT_EXITS} (OK)  | rows={exits.height}")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_EQUITY} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_KPIS} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_EXITS} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {snap_json2} (OK)")
print(">>> Celda 09 v1.0.1 :: OK")


>>> Celda 09 v1.0.1 :: Engine Institutional Report (post-08B) [equity + MDD + exposure + turnover + exits | WFO-safe]
[Celda 09] weekend_entry_share=0.122137 weekend_exit_share=0.124046 (MON_FRI=True)
üíæ OUTPUT   ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\backtests\backtest_engine_v10\reports_engine_v10\equity_curve_engine_v10.parquet (OK) | rows=524
üíæ OUTPUT   ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\backtests\backtest_engine_v10\reports_engine_v10\kpis_engine_v10.parquet (OK)   | rows=16
üíæ OUTPUT   ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\backtests\backtest_engine_v10\reports_engine_v10\exit_reasons_engine_v10.parquet (OK)  | rows=47
üíæ SNAPSHOT ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\research_logs\runs\20251222_103043\snapshots\equity_curve_engine_v10.parquet (OK)
üíæ SNAPSHOT ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\research_logs\runs\20251222_103043\snapshots\kpis_engine_v10.parquet (OK)
üíæ SNAPSHOT 

In [42]:
# ===================== Celda 10 v1.0 ‚Äî Selecci√≥n Institucional post-Engine (TREND, M5)
# [OOS-first + gates + score + pick side per symbol | WFO-safe]
# =====================================================================================================

from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json
import polars as pl

print(">>> Celda 10 v1.0 :: Selecci√≥n Institucional post-Engine [OOS-first + gates + score]")

# ========================= Validaciones GLOBAL_STATE =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 10] ERROR: GLOBAL_STATE no existe o no es dict.")

paths = GLOBAL_STATE.get("paths", {}) or {}
ART = Path(paths["artifacts"]).resolve()
SNAP_DIR = Path(paths.get("run_snapshots") or (ART / "snapshots")).resolve()
SNAP_DIR.mkdir(parents=True, exist_ok=True)

rep = GLOBAL_STATE.get("engine_report", {}) or {}
kpis_path = rep.get("kpis_path")
if not kpis_path or not Path(kpis_path).exists():
    raise RuntimeError("[Celda 10] ERROR: no encuentro engine_report.kpis_path. Ejecuta Celda 09 v1.0.1.")

kpis = pl.read_parquet(str(kpis_path))

REQ = ["symbol","fold_id","segment","side","n_trades","tot_ret_base","mdd_base","win_rate_base","exposure_share","trades_per_active_day","sharpe_like_base"]
miss = [c for c in REQ if c not in kpis.columns]
if miss:
    raise RuntimeError(f"[Celda 10] ERROR: kpis missing cols={miss}. cols={kpis.columns}")

# ========================= Par√°metros institucionales (gates) =========================
# Nota: Estos gates son ‚Äúprimer pase‚Äù (comit√©). Luego viene la etapa de WFO/optim por IS.
MIN_OOS_TRADES = 80
MAX_MDD_BASE   = -0.20     # no peor que -20% (trade-level equity, por grupo)
MIN_TOTRET_OOS = 0.00      # OOS >= 0 (base)
MIN_WINRATE    = 0.48      # permisivo (depende del payoff)
MAX_EXPOSURE   = 0.65      # no estar siempre en mercado

# Score institucional (simple, expl√≠cito, audit-able)
# - premia retorno OOS
# - penaliza drawdown
# - penaliza exceso de exposici√≥n
# - bonus leve por sharpe-like
def _score_expr():
    return (
        pl.col("tot_ret_base")
        + 0.15 * pl.col("sharpe_like_base").fill_null(0.0).clip(-5, 5)
        + 0.05 * (pl.col("win_rate_base").fill_null(0.0) - 0.5)
        - 1.25 * (-pl.col("mdd_base")).fill_null(0.0)   # mdd_base es negativo
        - 0.25 * pl.col("exposure_share").fill_null(0.0)
    ).alias("score_oos")

# ========================= Filtrar OOS y evaluar gates =========================
oos = kpis.filter(pl.col("segment") == "OOS").with_columns([_score_expr()])

oos = oos.with_columns([
    (pl.col("n_trades") >= pl.lit(MIN_OOS_TRADES)).alias("gate_trades"),
    (pl.col("mdd_base") >= pl.lit(MAX_MDD_BASE)).alias("gate_mdd"),
    (pl.col("tot_ret_base") >= pl.lit(MIN_TOTRET_OOS)).alias("gate_ret"),
    (pl.col("win_rate_base") >= pl.lit(MIN_WINRATE)).alias("gate_wr"),
    (pl.col("exposure_share") <= pl.lit(MAX_EXPOSURE)).alias("gate_exposure"),
]).with_columns([
    (pl.all_horizontal(["gate_trades","gate_mdd","gate_ret","gate_wr","gate_exposure"])).alias("gate_pass"),
])

# ========================= Elegir side por s√≠mbolo (mejor score OOS) =========================
# Si ning√∫n side pasa gate_pass, igual guardamos diagn√≥stico como FAIL.
best = (
    oos.sort(["symbol","fold_id","gate_pass","score_oos"], descending=[False,False,True,True])
       .group_by(["symbol","fold_id"], maintain_order=True)
       .agg([
           pl.first("side").alias("picked_side"),
           pl.first("gate_pass").alias("picked_gate_pass"),
           pl.first("score_oos").alias("picked_score_oos"),
           pl.first("n_trades").alias("picked_n_trades_oos"),
           pl.first("tot_ret_base").alias("picked_tot_ret_oos"),
           pl.first("mdd_base").alias("picked_mdd_oos"),
           pl.first("win_rate_base").alias("picked_win_rate_oos"),
           pl.first("exposure_share").alias("picked_exposure_oos"),
           pl.first("sharpe_like_base").alias("picked_sharpe_like_oos"),
           pl.first("trades_per_active_day").alias("picked_trades_per_day_oos"),
       ])
)

# diagn√≥stico: cu√°ntos pasan gates
diag = (
    oos.group_by(["symbol","fold_id"])
       .agg([
           pl.len().alias("n_candidates"),
           pl.col("gate_pass").sum().alias("n_gate_pass"),
           pl.col("score_oos").max().alias("best_score_any"),
       ])
)

sel = best.join(diag, on=["symbol","fold_id"], how="left").with_columns([
    pl.when(pl.col("picked_gate_pass") == True).then(pl.lit("GO")).otherwise(pl.lit("NO_GO")).alias("status"),
])

# ========================= Persistencia =========================
OUT_SEL_DIR = ART / "selection"
OUT_SEL_DIR.mkdir(parents=True, exist_ok=True)

OUT_SEL = OUT_SEL_DIR / "selection_engine_v10.parquet"
SNAP_SEL = SNAP_DIR / "selection_engine_v10.parquet"

sel.write_parquet(str(OUT_SEL), compression="zstd")
sel.write_parquet(str(SNAP_SEL), compression="zstd")

snapshot = {
    "created_utc": datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
    "inputs": {"kpis_path": str(kpis_path)},
    "gates": {
        "MIN_OOS_TRADES": MIN_OOS_TRADES,
        "MAX_MDD_BASE": MAX_MDD_BASE,
        "MIN_TOTRET_OOS": MIN_TOTRET_OOS,
        "MIN_WINRATE": MIN_WINRATE,
        "MAX_EXPOSURE": MAX_EXPOSURE,
    },
    "outputs": {"selection": str(OUT_SEL), "snapshot_selection": str(SNAP_SEL)},
    "notes": [
        "Selecci√≥n basada en OOS (post-engine) por s√≠mbolo/fold: elige side con mejor score si pasa gates.",
        "Si ning√∫n side pasa, status=NO_GO (pero queda diagn√≥stico y best_score_any).",
    ],
}

snap_json = OUT_SEL_DIR / "selection_engine_v10_snapshot.json"
snap_json.write_text(json.dumps(snapshot, indent=2, ensure_ascii=False), encoding="utf-8")

snap_json2 = SNAP_DIR / "selection_engine_v10_snapshot.json"
snap_json2.write_text(json.dumps(snapshot, indent=2, ensure_ascii=False), encoding="utf-8")

GLOBAL_STATE["selection"] = {
    "selection_path": str(OUT_SEL),
    "snapshot_parquet": str(SNAP_SEL),
    "snapshot_json": str(snap_json),
}

print(f"üíæ OUTPUT   ‚Üí {OUT_SEL} (OK) | rows={sel.height}")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_SEL} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {snap_json2} (OK)")
print(">>> Celda 10 v1.0 :: OK")


>>> Celda 10 v1.0 :: Selecci√≥n Institucional post-Engine [OOS-first + gates + score]
üíæ OUTPUT   ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\selection\selection_engine_v10.parquet (OK) | rows=4
üíæ SNAPSHOT ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\research_logs\runs\20251222_103043\snapshots\selection_engine_v10.parquet (OK)
üíæ SNAPSHOT ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\research_logs\runs\20251222_103043\snapshots\selection_engine_v10_snapshot.json (OK)
>>> Celda 10 v1.0 :: OK


In [43]:
# ===================== Celda 11 v1.0.2 ‚Äî Deploy Pack institucional (Freeze config)
# [robusto: GO-like -> si no hay, fallback TOPK por score (incluye picked_score_oos) -> si no hay score, export ALL]
# =====================================================================================================

from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json
import polars as pl

print(">>> Celda 11 v1.0.2 :: Deploy Pack institucional (freeze) [fix score-detect + fallback TOPK | WFO-safe]")

# ========================= Validaciones GLOBAL_STATE =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 11] ERROR: GLOBAL_STATE no existe o no es dict.")

paths = GLOBAL_STATE.get("paths", {}) or {}
ART = Path(paths["artifacts"]).resolve()
SNAP_DIR = Path(paths.get("run_snapshots") or (ART / "snapshots")).resolve()
SNAP_DIR.mkdir(parents=True, exist_ok=True)

# ========================= Config =========================
FILTER_GO_LIKE_FIRST = True
GO_LIKE = {"GO", "OK", "PASS", "SELECTED", "SELECT"}   # ampliable

# Si NO hay GO-like, intentar TOPK por score OOS (institucional research deploy)
FALLBACK_TOPK = 2

# Si existe picked_gate_pass, puedes forzar que el TOPK salga de esos (si no quedan, usa todos)
FALLBACK_PREFER_GATE_PASS = True

# √öltimo fallback si no hay ninguna columna score detectada
FALLBACK_EXPORT_ALL_IF_NO_SCORE = True

# ========================= Load selection =========================
sel_state = GLOBAL_STATE.get("selection", {}) or {}
sel_path = sel_state.get("selection_path") or str((ART / "selection" / "selection_engine_v10.parquet").resolve())
sel_path = str(Path(sel_path).resolve())
if not Path(sel_path).exists():
    raise RuntimeError(f"[Celda 11] ERROR: selection parquet no existe: {sel_path}")

sel = pl.read_parquet(sel_path)
if sel.height == 0:
    raise RuntimeError("[Celda 11] ERROR: selection vac√≠o (0 filas).")

print(f"[Celda 11] selection rows = {sel.height}")
print(f"[Celda 11] selection cols = {sel.columns}")

REQ_BASE = ["symbol", "fold_id"]
miss = [c for c in REQ_BASE if c not in sel.columns]
if miss:
    raise RuntimeError(f"[Celda 11] ERROR: selection missing cols={miss}. cols={sel.columns}")

# side col
SIDE_COL_CANDS = ["picked_side", "best_side", "side"]
SIDE_COL = next((c for c in SIDE_COL_CANDS if c in sel.columns), None)
if SIDE_COL is None:
    raise RuntimeError(f"[Celda 11] ERROR: no encuentro columna de lado. cands={SIDE_COL_CANDS} cols={sel.columns}")

# status col
STATUS_COL = "status" if "status" in sel.columns else None
if STATUS_COL is None:
    sel = sel.with_columns(pl.lit("UNKNOWN").alias("status"))
    STATUS_COL = "status"

# evidencia de status
status_counts = sel.group_by(STATUS_COL).agg(pl.len().alias("n")).sort("n", descending=True)
print("[Celda 11] status counts:")
print(status_counts)

# ========================= Folds (WFO windows) =========================
wfo = GLOBAL_STATE.get("wfo", {}) or {}
folds = wfo.get("folds") or []
if not folds:
    folds_path = wfo.get("folds_path")
    if folds_path and Path(folds_path).exists():
        folds = json.loads(Path(folds_path).read_text(encoding="utf-8")).get("folds", [])
if not folds:
    raise RuntimeError("[Celda 11] ERROR: no hay folds en GLOBAL_STATE['wfo']. Ejecuta Celda 04.")

fold_map = {str(f["fold_id"]): f for f in folds if "fold_id" in f}
if not fold_map:
    raise RuntimeError("[Celda 11] ERROR: fold_map vac√≠o.")

def _fold_get(fid: str, k: str):
    f = fold_map.get(str(fid))
    return None if f is None else f.get(k)

# ========================= Gate thresholds (Celda 06) =========================
gate_state = GLOBAL_STATE.get("regime_gate", {}) or {}
gate_path = gate_state.get("gate_table_path")
if not gate_path or not Path(gate_path).exists():
    raise RuntimeError("[Celda 11] ERROR: no encuentro regime_gate.gate_table_path. Ejecuta Celda 06.")
gate_df = pl.read_parquet(str(gate_path))

REQ_GATE = ["symbol","fold_id","thr_er","thr_mom","thr_vol","scheme"]
miss = [c for c in REQ_GATE if c not in gate_df.columns]
if miss:
    raise RuntimeError(f"[Celda 11] ERROR: gate_table missing cols={miss}. cols={gate_df.columns}")

# ========================= Costs (Celda 03) =========================
cost_state = GLOBAL_STATE.get("cost_model", {}) or {}
costs_by_symbol = cost_state.get("costs_by_symbol") or cost_state.get("symbols") or {}
if not costs_by_symbol:
    raise RuntimeError("[Celda 11] ERROR: no encuentro cost_model.costs_by_symbol. Ejecuta Celda 03.")
cost_roundtrip = bool(cost_state.get("cost_reported_is_roundtrip", False))

def _cost(sym: str, key: str, default=0.0):
    d = costs_by_symbol.get(sym) or costs_by_symbol.get(sym.upper()) or {}
    return float(d.get(key, default))

# ========================= Engine params (Celda 08) =========================
engine_state = GLOBAL_STATE.get("backtest_engine", {}) or GLOBAL_STATE.get("engine", {}) or {}
engine_params = engine_state.get("params") or engine_state.get("engine_params") or {}

engine_dir = ART / "backtests" / "backtest_engine_v10"
snap_json = engine_dir / "backtest_engine_v10_snapshot.json"
if (not engine_params) and snap_json.exists():
    try:
        j = json.loads(snap_json.read_text(encoding="utf-8"))
        engine_params = j.get("params") or {}
    except Exception:
        engine_params = {}

SL_ATR = float(engine_params.get("SL_ATR", 2.5))
TP_ATR = float(engine_params.get("TP_ATR", 5.0))
TRAIL_ATR = float(engine_params.get("TRAIL_ATR", 2.0))
TIME_STOP_BARS = int(engine_params.get("TIME_STOP_BARS", 288))
EMA_FILTER = bool(engine_params.get("EMA_FILTER", True))
MON_FRI = bool(engine_params.get("MON_FRI", True))

exec_conv = (GLOBAL_STATE.get("execution", {}) or {}).get("convention") or "signal@close(t) -> entry@t+1"

# ========================= Selecci√≥n efectiva (GO-like o fallback TOPK) =========================
sel_norm = sel.with_columns([
    pl.col("symbol").cast(pl.Utf8).str.to_uppercase().alias("symbol"),
    pl.col("fold_id").cast(pl.Utf8).alias("fold_id"),
    pl.col(SIDE_COL).cast(pl.Utf8).alias("picked_side"),
    pl.col(STATUS_COL).cast(pl.Utf8).alias("status"),
])

mode = None
sel_eff = None

# GO-like first
if FILTER_GO_LIKE_FIRST:
    sel_go = sel_norm.filter(pl.col("status").is_in(list(GO_LIKE)))
    if sel_go.height > 0:
        sel_eff = sel_go
        mode = "GO_LIKE_ONLY"

# Fallback TOPK por score (incluye picked_score_oos)
if sel_eff is None:
    SCORE_CANDS = [
        # preferido (tu caso)
        "picked_score_oos",
        # alternativos razonables
        "best_score_any",
        "picked_sharpe_like_oos",
        "picked_tot_ret_oos",
        "picked_win_rate_oos",
        # gen√©ricos
        "score", "score_oos", "oos_score", "score_final", "score_engine",
        "score_total", "score_oos_final", "score_rank",
    ]
    score_col = next((c for c in SCORE_CANDS if c in sel_norm.columns), None)

    base_pool = sel_norm
    if FALLBACK_PREFER_GATE_PASS and ("picked_gate_pass" in sel_norm.columns):
        pool_gp = sel_norm.filter(pl.col("picked_gate_pass") == True)
        if pool_gp.height > 0:
            base_pool = pool_gp

    if score_col is not None:
        sel_eff = base_pool.sort(score_col, descending=True).head(int(min(FALLBACK_TOPK, base_pool.height)))
        mode = f"FALLBACK_TOPK_BY_{score_col}"
    else:
        if not FALLBACK_EXPORT_ALL_IF_NO_SCORE:
            raise RuntimeError("[Celda 11] GATE FAIL: no hay GO-like y no hay score para fallback TOPK.")
        sel_eff = base_pool
        mode = "FALLBACK_EXPORT_ALL_NO_SCORE"

print(f"[Celda 11] deploy selection mode = {mode} | rows={sel_eff.height}")

# ========================= Enrich + joins =========================
sel3 = sel_eff.join(
    gate_df.select(["symbol","fold_id","thr_er","thr_mom","thr_vol","scheme"]),
    on=["symbol","fold_id"],
    how="left"
)

sel3 = sel3.with_columns([
    pl.col("fold_id").map_elements(lambda x: _fold_get(str(x), "IS_start"), return_dtype=pl.Utf8).alias("IS_start"),
    pl.col("fold_id").map_elements(lambda x: _fold_get(str(x), "IS_end"), return_dtype=pl.Utf8).alias("IS_end"),
    pl.col("fold_id").map_elements(lambda x: _fold_get(str(x), "OOS_start"), return_dtype=pl.Utf8).alias("OOS_start"),
    pl.col("fold_id").map_elements(lambda x: _fold_get(str(x), "OOS_end"), return_dtype=pl.Utf8).alias("OOS_end"),
])

sel3 = sel3.with_columns([
    pl.col("symbol").map_elements(lambda s: _cost(str(s), "COST_BASE_BPS", 0.0), return_dtype=pl.Float64).alias("COST_BASE_BPS"),
    pl.col("symbol").map_elements(lambda s: _cost(str(s), "COST_STRESS_BPS", 0.0), return_dtype=pl.Float64).alias("COST_STRESS_BPS"),
])

sel3 = sel3.with_columns([
    pl.lit(mode).alias("deploy_mode"),
    pl.lit(SL_ATR).alias("SL_ATR"),
    pl.lit(TP_ATR).alias("TP_ATR"),
    pl.lit(TRAIL_ATR).alias("TRAIL_ATR"),
    pl.lit(TIME_STOP_BARS).alias("TIME_STOP_BARS"),
    pl.lit(EMA_FILTER).alias("EMA_FILTER"),
    pl.lit(MON_FRI).alias("MON_FRI"),
    pl.lit(cost_roundtrip).alias("COST_REPORTED_IS_ROUNDTRIP"),
    pl.lit(exec_conv).alias("EXEC_CONVENTION"),
])

# columnas de evidencia si existen (tu selection trae varias picked_*)
EVID_CANDS = [
    "picked_gate_pass","picked_score_oos","picked_n_trades_oos","picked_tot_ret_oos","picked_mdd_oos",
    "picked_win_rate_oos","picked_exposure_oos","picked_sharpe_like_oos","picked_trades_per_day_oos",
    "n_candidates","n_gate_pass","best_score_any"
]
EVID_PRESENT = [c for c in EVID_CANDS if c in sel3.columns]

DEPLOY_COLS = [
    "symbol","fold_id","picked_side","status","deploy_mode",
    *EVID_PRESENT,
    "IS_start","IS_end","OOS_start","OOS_end",
    "scheme","thr_er","thr_mom","thr_vol",
    "SL_ATR","TP_ATR","TRAIL_ATR","TIME_STOP_BARS",
    "EMA_FILTER","MON_FRI",
    "COST_BASE_BPS","COST_STRESS_BPS","COST_REPORTED_IS_ROUNDTRIP",
    "EXEC_CONVENTION",
]
deploy_df = sel3.select([c for c in DEPLOY_COLS if c in sel3.columns]).sort(["symbol","fold_id"])

# ========================= Persistencia =========================
OUT_DIR = ART / "deploy"
OUT_DIR.mkdir(parents=True, exist_ok=True)

OUT_PARQ = OUT_DIR / "deploy_pack_v10.parquet"
SNAP_PARQ = SNAP_DIR / "deploy_pack_v10.parquet"

deploy_df.write_parquet(str(OUT_PARQ), compression="zstd")
deploy_df.write_parquet(str(SNAP_PARQ), compression="zstd")

deploy_pack = {
    "created_utc": datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
    "source_selection": sel_path,
    "deploy_mode": mode,
    "engine_params_frozen": {
        "SL_ATR": SL_ATR,
        "TP_ATR": TP_ATR,
        "TRAIL_ATR": TRAIL_ATR,
        "TIME_STOP_BARS": TIME_STOP_BARS,
        "EMA_FILTER": EMA_FILTER,
        "MON_FRI": MON_FRI,
        "execution_convention": exec_conv,
        "cost_reported_is_roundtrip": cost_roundtrip,
    },
    "rows": deploy_df.to_dicts(),
    "notes": [
        "Si deploy_mode != GO_LIKE_ONLY, esto es RESEARCH fallback (no producci√≥n).",
        "Thresholds de regime gate por fold (IS-calibrated).",
        "Incluye costos y par√°metros del motor.",
    ],
}

OUT_JSON = OUT_DIR / "deploy_pack_v10.json"
SNAP_JSON = SNAP_DIR / "deploy_pack_v10.json"
OUT_JSON.write_text(json.dumps(deploy_pack, indent=2, ensure_ascii=False), encoding="utf-8")
SNAP_JSON.write_text(json.dumps(deploy_pack, indent=2, ensure_ascii=False), encoding="utf-8")

GLOBAL_STATE["deploy"] = {
    "deploy_pack_parquet": str(OUT_PARQ),
    "deploy_pack_json": str(OUT_JSON),
    "snapshot_parquet": str(SNAP_PARQ),
    "snapshot_json": str(SNAP_JSON),
    "deploy_mode": mode,
}

print(f"[Celda 11] exported rows = {deploy_df.height} | mode={mode}")
print(f"üíæ OUTPUT   ‚Üí {OUT_PARQ} (OK)")
print(f"üíæ OUTPUT   ‚Üí {OUT_JSON} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_PARQ} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_JSON} (OK)")
print(">>> Celda 11 v1.0.2 :: OK")


>>> Celda 11 v1.0.2 :: Deploy Pack institucional (freeze) [fix score-detect + fallback TOPK | WFO-safe]
[Celda 11] selection rows = 4
[Celda 11] selection cols = ['symbol', 'fold_id', 'picked_side', 'picked_gate_pass', 'picked_score_oos', 'picked_n_trades_oos', 'picked_tot_ret_oos', 'picked_mdd_oos', 'picked_win_rate_oos', 'picked_exposure_oos', 'picked_sharpe_like_oos', 'picked_trades_per_day_oos', 'n_candidates', 'n_gate_pass', 'best_score_any', 'status']
[Celda 11] status counts:
shape: (1, 2)
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ status ‚îÜ n   ‚îÇ
‚îÇ ---    ‚îÜ --- ‚îÇ
‚îÇ str    ‚îÜ u32 ‚îÇ
‚ïû‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ï™‚ïê‚ïê‚ïê‚ïê‚ïê‚ï°
‚îÇ NO_GO  ‚îÜ 4   ‚îÇ
‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¥‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò
[Celda 11] deploy selection mode = FALLBACK_TOPK_BY_picked_score_oos | rows=2
[Celda 11] exported rows = 2 | mode=FALLBACK_TOPK_BY_picked_score_oos
üíæ OUTPUT   ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\deploy\deploy_pack_v10.parquet (OK)
ü

In [44]:
# ===================== Celda 12 v1.0 ‚Äî Deploy Pack QA + Materializaci√≥n por s√≠mbolo [schema-check + exports] =====================
from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json
import polars as pl

print(">>> Celda 12 v1.0 :: Deploy Pack QA + Materializaci√≥n por s√≠mbolo [schema-check + exports]")

# ========================= Validaciones GLOBAL_STATE =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 12] ERROR: GLOBAL_STATE no existe o no es dict.")

if "paths" not in GLOBAL_STATE or not isinstance(GLOBAL_STATE["paths"], dict):
    raise RuntimeError("[Celda 12] ERROR: falta GLOBAL_STATE['paths'].")

paths = GLOBAL_STATE["paths"]
ART_DIR = Path(paths["artifacts"]).resolve()
SNAP_DIR = Path(paths.get("run_snapshots") or (ART_DIR / "snapshots")).resolve()
SNAP_DIR.mkdir(parents=True, exist_ok=True)

# ========================= Localizar deploy_pack =========================
DEPLOY_DIR = ART_DIR / "deploy"
DEPLOY_PARQ = DEPLOY_DIR / "deploy_pack_v10.parquet"
DEPLOY_JSON = DEPLOY_DIR / "deploy_pack_v10.json"

if not DEPLOY_PARQ.exists():
    raise RuntimeError(f"[Celda 12] ERROR: no existe {DEPLOY_PARQ}. Ejecuta Celda 11.")
if not DEPLOY_JSON.exists():
    print(f"[Celda 12] WARN: no existe {DEPLOY_JSON}. Continuo solo con parquet.")

df = pl.read_parquet(str(DEPLOY_PARQ))

print(f"[Celda 12] deploy_pack rows = {df.height}")
print(f"[Celda 12] deploy_pack cols = {df.columns}")

# ========================= Gate: esquema m√≠nimo esperado =========================
# (No forzamos columnas que puedan variar entre versiones; pero s√≠ lo m√≠nimo institucional)
MIN_REQ = ["symbol", "fold_id"]
miss = [c for c in MIN_REQ if c not in df.columns]
if miss:
    raise RuntimeError(f"[Celda 12] GATE FAIL: faltan columnas m√≠nimas {miss} en deploy_pack.")

# Tip: columnas t√≠picas que deber√≠an existir si el freeze est√° completo (no estrictas)
TYPICAL = [
    "picked_side", "picked_score_oos", "picked_gate_pass", "status",
    "thr_er", "thr_mom", "thr_vol",
    "COST_BASE_BPS", "COST_STRESS_BPS",
]
present_typical = [c for c in TYPICAL if c in df.columns]
missing_typical = [c for c in TYPICAL if c not in df.columns]
print(f"[Celda 12] typical_present={present_typical}")
print(f"[Celda 12] typical_missing={missing_typical}")

# ========================= Normalizaci√≥n + vista r√°pida =========================
df2 = df.with_columns([
    pl.col("symbol").cast(pl.Utf8).str.to_uppercase().alias("symbol"),
    pl.col("fold_id").cast(pl.Utf8).alias("fold_id"),
])

# Orden: mejores primero si existe score
if "picked_score_oos" in df2.columns:
    df2 = df2.sort("picked_score_oos", descending=True)
else:
    df2 = df2.sort(["symbol", "fold_id"])

# Resumen breve
cols_preview = [c for c in ["symbol","fold_id","status","picked_side","picked_score_oos","picked_gate_pass"] if c in df2.columns]
if cols_preview:
    print("[Celda 12] preview (top 10):")
    print(df2.select(cols_preview).head(10))

# ========================= Export por s√≠mbolo (JSON) =========================
OUT_CFG_DIR = DEPLOY_DIR / "per_symbol_configs_v10"
OUT_CFG_DIR.mkdir(parents=True, exist_ok=True)

rows = df2.to_dicts()
by_symbol = {}
for r in rows:
    sym = str(r.get("symbol", "")).upper().strip()
    if not sym:
        continue
    by_symbol.setdefault(sym, []).append(r)

# Si hay m√∫ltiples filas por s√≠mbolo (ej. multi-fold), guardamos lista; si es 1, guardamos dict
exported = []
for sym, items in by_symbol.items():
    payload = items[0] if len(items) == 1 else {"symbol": sym, "rows": items}
    outp = OUT_CFG_DIR / f"{sym}_deploy_config_v10.json"
    outp.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8")
    exported.append(str(outp))

print(f"[Celda 12] exported per-symbol json = {len(exported)} files ‚Üí {OUT_CFG_DIR}")

# ========================= Snapshot: copia deploy_pack + meta =========================
ts = datetime.utcnow().replace(tzinfo=timezone.utc).isoformat()
meta = {
    "created_utc": ts,
    "deploy_pack_parquet": str(DEPLOY_PARQ),
    "deploy_pack_json": str(DEPLOY_JSON) if DEPLOY_JSON.exists() else None,
    "per_symbol_configs_dir": str(OUT_CFG_DIR),
    "n_rows": int(df2.height),
    "symbols": sorted(list(by_symbol.keys())),
    "notes": [
        "Este export NO implica GO. Si status=NO_GO, es para an√°lisis/iteraci√≥n.",
        "Configs por s√≠mbolo pensadas para conectar con motor/EA o notebooks posteriores.",
    ],
}
META_PATH = SNAP_DIR / "deploy_pack_v10_materialization_snapshot.json"
META_PATH.write_text(json.dumps(meta, indent=2, ensure_ascii=False), encoding="utf-8")

# Guardar tambi√©n una copia del parquet como snapshot
SNAP_PARQ = SNAP_DIR / "deploy_pack_v10.parquet"
df2.write_parquet(str(SNAP_PARQ), compression="zstd")

GLOBAL_STATE["deploy_pack"] = {
    "deploy_pack_parquet": str(DEPLOY_PARQ),
    "deploy_pack_json": str(DEPLOY_JSON) if DEPLOY_JSON.exists() else None,
    "per_symbol_configs_dir": str(OUT_CFG_DIR),
    "materialization_snapshot": str(META_PATH),
    "snapshot_parquet": str(SNAP_PARQ),
    "n_rows": int(df2.height),
    "symbols": sorted(list(by_symbol.keys())),
}

print(f"üíæ SNAPSHOT ‚Üí {SNAP_PARQ} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {META_PATH} (OK)")
print(">>> Celda 12 v1.0 :: OK")


>>> Celda 12 v1.0 :: Deploy Pack QA + Materializaci√≥n por s√≠mbolo [schema-check + exports]
[Celda 12] deploy_pack rows = 2
[Celda 12] deploy_pack cols = ['symbol', 'fold_id', 'picked_side', 'status', 'deploy_mode', 'picked_gate_pass', 'picked_score_oos', 'picked_n_trades_oos', 'picked_tot_ret_oos', 'picked_mdd_oos', 'picked_win_rate_oos', 'picked_exposure_oos', 'picked_sharpe_like_oos', 'picked_trades_per_day_oos', 'n_candidates', 'n_gate_pass', 'best_score_any', 'IS_start', 'IS_end', 'OOS_start', 'OOS_end', 'scheme', 'thr_er', 'thr_mom', 'thr_vol', 'SL_ATR', 'TP_ATR', 'TRAIL_ATR', 'TIME_STOP_BARS', 'EMA_FILTER', 'MON_FRI', 'COST_BASE_BPS', 'COST_STRESS_BPS', 'COST_REPORTED_IS_ROUNDTRIP', 'EXEC_CONVENTION']
[Celda 12] typical_present=['picked_side', 'picked_score_oos', 'picked_gate_pass', 'status', 'thr_er', 'thr_mom', 'thr_vol', 'COST_BASE_BPS', 'COST_STRESS_BPS']
[Celda 12] typical_missing=[]
[Celda 12] preview (top 10):
shape: (2, 6)
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î

In [45]:
# ===================== Celda 13 v1.1.0 ‚Äî Diagn√≥stico de Rentabilidad + Edge Alignment (alpha‚Üîmotor) [AUTO-RECOVERY + schema canonical | WFO-safe] =====================
from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json
import math
import polars as pl

print(">>> Celda 13 v1.1.0 :: Diagn√≥stico de Rentabilidad + Edge Alignment (alpha‚Üîmotor) [WFO-safe]")

# ========================= Utilidades =========================
def _utc_now_iso() -> str:
    return datetime.utcnow().replace(tzinfo=timezone.utc).isoformat()

def _ensure_utc(dt: datetime) -> datetime:
    if dt.tzinfo is None:
        return dt.replace(tzinfo=timezone.utc)
    return dt.astimezone(timezone.utc)

def _parse_dt_any(s: str) -> datetime:
    dt0 = datetime.fromisoformat(s)
    return _ensure_utc(dt0)

def _py_weekday(dt0: datetime) -> int:
    return int(dt0.weekday())  # Mon=0..Sun=6

def _pick_latest(files: list[Path]) -> Path | None:
    files2 = [p for p in files if p.exists()]
    if not files2:
        return None
    return sorted(files2, key=lambda p: p.stat().st_mtime, reverse=True)[0]

def _find_latest_by_glob(root: Path, pattern: str) -> Path | None:
    if not root.exists():
        return None
    hits = list(root.rglob(pattern))
    return _pick_latest(hits)

def _ensure_backtest_engine_state() -> None:
    """Garantiza GLOBAL_STATE['backtest_engine'] incluso despu√©s de restart."""
    global GLOBAL_STATE

    if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
        GLOBAL_STATE = {}

    if "paths" not in GLOBAL_STATE or not isinstance(GLOBAL_STATE["paths"], dict):
        cwd = Path.cwd().resolve()
        GLOBAL_STATE["paths"] = {
            "artifacts": str((cwd / "artifacts").resolve()),
            "run_snapshots": str((cwd / "snapshots").resolve()),
        }

    paths = GLOBAL_STATE["paths"]
    artifacts = Path(paths.get("artifacts", "")).resolve()
    snap_dir  = Path(paths.get("run_snapshots", "")).resolve()

    bt = GLOBAL_STATE.get("backtest_engine", {}) if isinstance(GLOBAL_STATE.get("backtest_engine", {}), dict) else {}
    tp = bt.get("trades_path"); sp = bt.get("summary_path")

    if tp and Path(tp).exists() and sp and Path(sp).exists():
        return

    # Recuperaci√≥n desde snapshot (prioriza overlay)
    cand_snap = []
    if snap_dir.exists():
        cand_snap += [
            snap_dir / "overlay_engine_v16_snapshot.json",
            snap_dir / "backtest_engine_v10_snapshot.json",
        ]
    if artifacts.exists():
        cand_snap += [
            artifacts / "snapshots" / "overlay_engine_v16_snapshot.json",
            artifacts / "snapshots" / "backtest_engine_v10_snapshot.json",
        ]

    snap_file = _pick_latest(cand_snap)
    params = {}

    if snap_file and snap_file.exists():
        try:
            j = json.loads(snap_file.read_text(encoding="utf-8"))
            params = j.get("params", {}) if isinstance(j.get("params", {}), dict) else {}
        except Exception:
            pass

    # Paths por glob (overlay primero)
    tp2 = _find_latest_by_glob(artifacts, "trades_engine_v10*_overlay_*.parquet") \
          or _find_latest_by_glob(artifacts, "trades_engine_v10*.parquet")
    sp2 = _find_latest_by_glob(artifacts, "summary_engine_v10*_overlay_*.parquet") \
          or _find_latest_by_glob(artifacts, "summary_engine_v10*.parquet")

    if not tp2 or not tp2.exists():
        raise RuntimeError(f"[Celda 13] ERROR: no encuentro trades_engine_v10*.parquet bajo artifacts={artifacts}")
    if not sp2 or not sp2.exists():
        raise RuntimeError(f"[Celda 13] ERROR: no encuentro summary_engine_v10*.parquet bajo artifacts={artifacts}")

    GLOBAL_STATE["backtest_engine"] = {
        "trades_path": str(tp2),
        "summary_path": str(sp2),
        "params": params,
        "recovered_utc": _utc_now_iso(),
        "recovered_from": str(snap_file) if snap_file else None,
    }

# ========================= Estado m√≠nimo =========================
_ensure_backtest_engine_state()

paths = GLOBAL_STATE["paths"]
bt = GLOBAL_STATE["backtest_engine"]

trades_path = bt.get("trades_path")
summary_path = bt.get("summary_path")
if not trades_path or not Path(trades_path).exists():
    raise RuntimeError(f"[Celda 13] ERROR: trades_path inv√°lido/no existe: {trades_path}")
if not summary_path or not Path(summary_path).exists():
    raise RuntimeError(f"[Celda 13] ERROR: summary_path inv√°lido/no existe: {summary_path}")

params = bt.get("params", {}) or {}
MON_FRI = bool(params.get("MON_FRI", True))

# Alpha report (07C) ‚Äî localizar robusto
alpha_path = (
    (GLOBAL_STATE.get("alpha_reports", {}) or {}).get("alpha_multi_horizon_report_path")
    or (GLOBAL_STATE.get("alpha_report", {}) or {}).get("report_path")
)
if not alpha_path:
    alpha_path = str(Path(paths["artifacts"]).resolve() / "alpha_reports" / "alpha_multi_horizon_report.parquet")
alpha_exists = Path(alpha_path).exists()

# ========================= Outputs =========================
OUT_DIR = Path(paths["artifacts"]).resolve() / "backtests" / "backtest_engine_v10" / "diagnostics_engine_v10"
OUT_DIR.mkdir(parents=True, exist_ok=True)

SNAP_DIR = Path(paths.get("run_snapshots") or (Path(paths["artifacts"]).resolve() / "snapshots")).resolve()
SNAP_DIR.mkdir(parents=True, exist_ok=True)

OUT_KPIS = OUT_DIR / "diag_kpis_engine_v10.parquet"
OUT_EXITS = OUT_DIR / "diag_exit_reason_mix_engine_v10.parquet"
OUT_HOLDS = OUT_DIR / "diag_hold_stats_engine_v10.parquet"
OUT_ALPHA_PICK = OUT_DIR / "alpha_best_horizon_side_engine_v10.parquet"
OUT_TUNING_JSON = OUT_DIR / "tuning_plan_engine_v10.json"

SNAP_KPIS = SNAP_DIR / "diag_kpis_engine_v10.parquet"
SNAP_JSON = SNAP_DIR / "diag_engine_alignment_snapshot.json"

# ========================= Load trades =========================
df = pl.read_parquet(str(trades_path))

REQ = ["symbol","fold_id","segment","side","entry_time","exit_time","bars_held","exit_reason","net_ret_base","net_ret_stress"]
miss = [c for c in REQ if c not in df.columns]
if miss:
    raise RuntimeError(f"[Celda 13] ERROR: trades parquet no tiene columnas requeridas: {miss}")

df = (
    df
    .with_columns([
        pl.col("entry_time").cast(pl.Utf8),
        pl.col("exit_time").cast(pl.Utf8),
    ])
    .with_columns([
        pl.col("entry_time").map_elements(_parse_dt_any, return_dtype=pl.Datetime("us","UTC")).alias("entry_dt"),
        pl.col("exit_time").map_elements(_parse_dt_any, return_dtype=pl.Datetime("us","UTC")).alias("exit_dt"),
    ])
    .with_columns([
        pl.col("entry_dt").map_elements(_py_weekday, return_dtype=pl.Int64).cast(pl.Int32).alias("entry_dow"),
        pl.col("exit_dt").map_elements(_py_weekday, return_dtype=pl.Int64).cast(pl.Int32).alias("exit_dow"),
    ])
    .with_columns([
        (pl.col("entry_dow") >= 5).alias("entry_is_weekend"),
        (pl.col("exit_dow") >= 5).alias("exit_is_weekend"),
    ])
)

n_rows = df.height
weekend_entry_share = float(df.select(pl.col("entry_is_weekend").mean()).item() or 0.0)
weekend_exit_share  = float(df.select(pl.col("exit_is_weekend").mean()).item() or 0.0)

print(f"[Celda 13] trades rows = {n_rows}")
print(f"[Celda 13] weekend_entry_share={weekend_entry_share:.6f} weekend_exit_share={weekend_exit_share:.6f} (MON_FRI={MON_FRI})")

# ========================= KPIs (trade-level aggregation) =========================
agg_exprs = [
    pl.len().alias("n_trades"),
    pl.col("net_ret_base").mean().alias("mean_net_base"),
    pl.col("net_ret_base").std().alias("std_net_base"),
    (pl.col("net_ret_base") > 0).mean().alias("win_rate_base"),
    pl.col("net_ret_stress").mean().alias("mean_net_stress"),
    (pl.col("net_ret_stress") > 0).mean().alias("win_rate_stress"),
    pl.col("bars_held").mean().alias("mean_bars_held"),
    pl.col("bars_held").median().alias("med_bars_held"),
    pl.col("bars_held").quantile(0.90, interpolation="nearest").alias("p90_bars_held"),
]

kpis = (
    df.group_by(["symbol","fold_id","segment","side"])
      .agg(agg_exprs)
      .with_columns([
          (pl.col("mean_net_base") / pl.col("std_net_base")).alias("sharpe_like_base"),
      ])
      .sort(["segment","sharpe_like_base","n_trades"], descending=[False, True, True])
)

print("[Celda 13] KPIs (top 20):")
print(kpis.head(20))

# ========================= Exit reason mix + trailing dominance =========================
exit_mix = (
    df.group_by(["symbol","fold_id","segment","side","exit_reason"])
      .agg([pl.len().alias("n")])
      .with_columns([
          (pl.col("n") / pl.col("n").sum().over(["symbol","fold_id","segment","side"])).alias("share"),
      ])
      .sort(["symbol","segment","side","n"], descending=[False,False,False,True])
)

trail_dom = (
    exit_mix.filter(pl.col("exit_reason") == "TRAIL")
            .select(["symbol","fold_id","segment","side","share"])
            .rename({"share":"trail_share"})
            .sort(["segment","trail_share"], descending=[False, True])
)

print("[Celda 13] Exit reason mix (top 30):")
print(exit_mix.head(30))
print("[Celda 13] TRAIL dominance (top 20):")
print(trail_dom.head(20))

# ========================= Hold stats distribution =========================
holds = (
    df.group_by(["symbol","fold_id","segment","side"])
      .agg([
          pl.len().alias("n_trades"),
          pl.col("bars_held").mean().alias("mean_bars"),
          pl.col("bars_held").median().alias("med_bars"),
          pl.col("bars_held").quantile(0.90, interpolation="nearest").alias("p90_bars"),
          (pl.col("bars_held") <= 6).mean().alias("share_le_6"),
          (pl.col("bars_held") <= 12).mean().alias("share_le_12"),
          (pl.col("bars_held") >= 288).mean().alias("share_ge_1d"),
      ])
      .sort(["segment","mean_bars"], descending=[False, True])
)

print("[Celda 13] Hold stats (top 20):")
print(holds.head(20))

# ========================= Alpha report canonicalization + best horizon/side =========================
alpha_best = pl.DataFrame()
alpha_metric_used = None

def _canonicalize_alpha(ar: pl.DataFrame) -> pl.DataFrame:
    # Mapeo desde tu schema real 07C ‚Üí schema can√≥nico
    rename_map = {}
    if "h_bars_after_entry" in ar.columns and "horizon_bars" not in ar.columns:
        rename_map["h_bars_after_entry"] = "horizon_bars"
    if "net_base_mean" in ar.columns and "mean_net_base" not in ar.columns:
        rename_map["net_base_mean"] = "mean_net_base"
    if "net_stress_mean" in ar.columns and "mean_net_stress" not in ar.columns:
        rename_map["net_stress_mean"] = "mean_net_stress"
    if "sharpe_like_base" not in ar.columns and "sharpe_like_base" in ar.columns:
        pass
    if rename_map:
        ar = ar.rename(rename_map)
    return ar

if alpha_exists:
    ar0 = pl.read_parquet(str(alpha_path))
    ar = _canonicalize_alpha(ar0)

    needed = {"symbol","fold_id","segment","side","horizon_bars"}
    if needed.issubset(set(ar.columns)):
        # M√©trica preferida: stress (institucional); si no existe, base.
        if "mean_net_stress" in ar.columns:
            metric = "mean_net_stress"
        elif "mean_net_base" in ar.columns:
            metric = "mean_net_base"
        else:
            metric = None

        if metric:
            alpha_metric_used = metric
            alpha_best = (
                ar.filter(pl.col("segment") == "OOS")
                  .sort([metric], descending=True)
                  .group_by(["symbol","fold_id"])
                  .agg([
                      pl.first("side").alias("best_side_oos"),
                      pl.first("horizon_bars").alias("best_horizon_bars_oos"),
                      pl.first(metric).alias(f"best_{metric}_oos"),
                      pl.first("n_trades").alias("best_n_trades_oos") if "n_trades" in ar.columns else pl.lit(None).alias("best_n_trades_oos"),
                  ])
                  .sort([f"best_{metric}_oos"], descending=True)
            )
            alpha_best.write_parquet(str(OUT_ALPHA_PICK), compression="zstd")
            print("[Celda 13] Alpha best (OOS) por s√≠mbolo:")
            print(alpha_best)
        else:
            print(f"[Celda 13] WARN: alpha report existe pero no encuentro mean_net_stress/mean_net_base. cols={ar.columns}")
    else:
        print(f"[Celda 13] WARN: alpha report existe pero faltan cols esperadas (canonical). cols={ar.columns}")
else:
    print(f"[Celda 13] WARN: no encuentro alpha report en: {alpha_path}")

# ========================= Alignment QA: horizonte objetivo vs holds reales =========================
alignment = pl.DataFrame()
if alpha_best.height > 0:
    # Unimos con holds (OOS) para ver si el engine est√° dejando respirar al horizonte ganador
    oos_holds = holds.filter(pl.col("segment") == "OOS").select(["symbol","fold_id","side","p90_bars","mean_bars","n_trades"])
    alignment = (
        alpha_best.join(oos_holds, left_on=["symbol","fold_id","best_side_oos"], right_on=["symbol","fold_id","side"], how="left")
                 .with_columns([
                     (pl.col("p90_bars") / pl.col("best_horizon_bars_oos")).alias("p90_vs_target"),
                 ])
                 .sort(["p90_vs_target"], descending=True)
    )
    # Flags institucionales
    bad = alignment.filter((pl.col("p90_vs_target").is_not_null()) & (pl.col("p90_vs_target") < 0.10))
    if bad.height > 0:
        print("[Celda 13] WARN: Horizon mismatch detectado (p90 < 10% del horizonte objetivo):")
        print(bad.select(["symbol","best_side_oos","best_horizon_bars_oos","p90_bars","p90_vs_target"]).head(20))

# ========================= Tuning plan (sugerido) =========================
DEFAULT_GRID = {
    "SL_ATR":    [2.0, 3.0, 5.0, 8.0],            # m√°s tipo catastrophic para swing
    "TP_ATR":    [4.0, 6.0, 8.0],
    "TRAIL_ATR": [3.0, 4.0, 5.0],
    "TRAIL_START_ATR": [2.0, 3.0],                # evita trailing temprano
    "TIME_STOP_BARS": [96, 288],                  # enfoca en horizontes con edge
    "MIN_HOLD_BARS":  [24, 72],                   # ‚âà 0.25*288
    "ENTRY_CONFIRM_BARS": [6, 12],
    "COOLDOWN_BARS": [12, 24, 48],
    "EMA_FILTER": [True],
    "MON_FRI": [True],
    "EVAL_SIDE_MODE": ["ALPHA_SIDE_ONLY"],        # no mezclar lados
}

tuning = {
    "created_utc": _utc_now_iso(),
    "inputs": {
        "trades_path": str(trades_path),
        "summary_path": str(summary_path),
        "alpha_report_path": str(alpha_path),
        "alpha_report_exists": bool(alpha_exists),
        "alpha_metric_used": alpha_metric_used,
    },
    "current_engine_params": params,
    "monfri_metrics": {
        "MON_FRI": MON_FRI,
        "weekend_entry_share": weekend_entry_share,
        "weekend_exit_share": weekend_exit_share,
    },
    "red_flags": [
        "Si TRAIL domina y p90_bars << horizonte ganador, el motor est√° matando el alpha.",
        "Si el mejor lado OOS del alpha es SHORT y t√∫ ejecutas LONG, est√°s optimizando el error.",
    ],
    "suggested_param_grid": DEFAULT_GRID,
    "alpha_best_oos": alpha_best.to_dicts() if alpha_best.height > 0 else [],
    "alignment_preview": alignment.to_dicts() if alignment.height > 0 else [],
    "next_steps": [
        "1) Baseline engine que replique 07C: entrada t+1, salida fija a H (p.ej. 288), costos STRESS, mismas restricciones reales.",
        "2) Ejecutar engine solo en el lado ganador por s√≠mbolo (OOS-first) y con MIN_HOLD‚âà0.25H, TIME_STOP=H, trailing con START alto o apagado.",
        "3) Exigir GO con m√©tricas STRESS OOS + >=3 folds + N OOS suficiente; eliminar fallback TOPK en producci√≥n.",
    ],
}

OUT_TUNING_JSON.write_text(json.dumps(tuning, indent=2, ensure_ascii=False), encoding="utf-8")

# ========================= Persist =========================
kpis.write_parquet(str(OUT_KPIS), compression="zstd")
kpis.write_parquet(str(SNAP_KPIS), compression="zstd")
exit_mix.write_parquet(str(OUT_EXITS), compression="zstd")
holds.write_parquet(str(OUT_HOLDS), compression="zstd")

snap = {
    "created_utc": tuning["created_utc"],
    "outputs": {
        "kpis": str(OUT_KPIS),
        "exit_mix": str(OUT_EXITS),
        "holds": str(OUT_HOLDS),
        "alpha_best": str(OUT_ALPHA_PICK) if alpha_best.height > 0 else None,
        "tuning_plan": str(OUT_TUNING_JSON),
    },
    "monfri": tuning["monfri_metrics"],
    "alpha_report_exists": bool(alpha_exists),
    "alpha_metric_used": alpha_metric_used,
}
SNAP_JSON.write_text(json.dumps(snap, indent=2, ensure_ascii=False), encoding="utf-8")

GLOBAL_STATE["diagnostics_engine"] = {
    "kpis_path": str(OUT_KPIS),
    "exit_mix_path": str(OUT_EXITS),
    "holds_path": str(OUT_HOLDS),
    "alpha_best_path": str(OUT_ALPHA_PICK) if alpha_best.height > 0 else None,
    "tuning_plan_path": str(OUT_TUNING_JSON),
    "snapshot_json": str(SNAP_JSON),
}

print(f"üíæ OUTPUT   ‚Üí {OUT_KPIS} (OK)")
print(f"üíæ OUTPUT   ‚Üí {OUT_EXITS} (OK)")
print(f"üíæ OUTPUT   ‚Üí {OUT_HOLDS} (OK)")
if alpha_best.height > 0:
    print(f"üíæ OUTPUT   ‚Üí {OUT_ALPHA_PICK} (OK)")
print(f"üíæ OUTPUT   ‚Üí {OUT_TUNING_JSON} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_KPIS} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_JSON} (OK)")
print(">>> Celda 13 v1.1.0 :: OK")


>>> Celda 13 v1.1.0 :: Diagn√≥stico de Rentabilidad + Edge Alignment (alpha‚Üîmotor) [WFO-safe]
[Celda 13] trades rows = 524
[Celda 13] weekend_entry_share=0.000000 weekend_exit_share=0.000000 (MON_FRI=True)
[Celda 13] KPIs (top 20):
shape: (16, 14)
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ symbol ‚îÜ fold_id ‚îÜ segment ‚îÜ side  ‚îÜ ‚Ä¶ ‚îÜ mean_bars_he ‚îÜ med_bars_hel ‚îÜ p90_bars_he ‚îÜ sharpe_like ‚îÇ
‚îÇ ---    ‚îÜ ---     ‚îÜ ---     ‚îÜ ---   ‚îÜ   ‚îÜ ld           ‚îÜ d            ‚îÜ ld          ‚îÜ _base       ‚îÇ
‚îÇ str    ‚îÜ str     ‚îÜ str     ‚îÜ str   ‚îÜ   ‚îÜ ---          ‚îÜ ---          ‚îÜ ---         ‚îÜ ---         ‚îÇ
‚îÇ        ‚îÜ         ‚îÜ         ‚îÜ       ‚îÜ   ‚îÜ f64          ‚îÜ f64         

In [46]:
# ===================== Celda 14 v1.0 ‚Äî Engine Tuning (IS-only) [alpha‚Üîmotor alignment + TRAIL_START + longer holds | WFO-safe] =====================
from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json
import math
import itertools
import numpy as np
import polars as pl

print(">>> Celda 14 v1.0 :: Engine Tuning (IS-only) [alpha‚Üîmotor alignment + TRAIL_START + longer holds | WFO-safe]")

# ========================= Validaciones GLOBAL_STATE =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 14] ERROR: GLOBAL_STATE no existe o no es dict.")

for k in ("paths", "data", "features", "wfo", "regime_gate", "cost_model", "universe"):
    if k not in GLOBAL_STATE:
        raise RuntimeError(f"[Celda 14] ERROR: falta GLOBAL_STATE['{k}'].")

paths = GLOBAL_STATE["paths"]
data_state = GLOBAL_STATE["data"]
feat_state = GLOBAL_STATE["features"]
wfo_state  = GLOBAL_STATE["wfo"]
gate_state = GLOBAL_STATE["regime_gate"]
cost_state = GLOBAL_STATE["cost_model"]

dq = GLOBAL_STATE.get("data_quality", {}) or {}
symbols = dq.get("final_symbols") or (GLOBAL_STATE["universe"].get("selected_symbols_TREND") or [])
if not symbols:
    raise RuntimeError("[Celda 14] ERROR: no hay s√≠mbolos (data_quality.final_symbols / selected_symbols_TREND).")
symbols_u = [str(s).upper().strip() for s in symbols]

folds = wfo_state.get("folds") or []
if not folds:
    folds_path = wfo_state.get("folds_path")
    if not folds_path:
        raise RuntimeError("[Celda 14] ERROR: no hay wfo.folds ni wfo.folds_path. Ejecuta Celda 04.")
    folds = json.loads(Path(folds_path).read_text(encoding="utf-8")).get("folds", [])
if not folds:
    raise RuntimeError("[Celda 14] ERROR: folds vac√≠o.")

m5_paths = (
    data_state.get("m5_ohlcv_paths")
    or data_state.get("m5_clean_paths")
    or data_state.get("ohlcv_clean_paths")
    or {}
)
if not isinstance(m5_paths, dict) or not m5_paths:
    raise RuntimeError("[Celda 14] ERROR: no hay m5_paths. Ejecuta Celda 02/02C.")

feat_paths = feat_state.get("features_base_paths") or {}
if not feat_paths:
    raise RuntimeError("[Celda 14] ERROR: no hay features_base_paths. Ejecuta Celda 05.")

gate_path = gate_state.get("gate_table_path")
if not gate_path or not Path(gate_path).exists():
    raise RuntimeError("[Celda 14] ERROR: no hay regime_gate.gate_table_path. Ejecuta Celda 06.")
gate_df = pl.read_parquet(str(gate_path))

costs_by_symbol = cost_state.get("costs_by_symbol") or cost_state.get("symbols") or {}
if not costs_by_symbol:
    raise RuntimeError("[Celda 14] ERROR: no hay cost_model.costs_by_symbol. Ejecuta Celda 03.")
cost_reported_is_roundtrip = bool(cost_state.get("cost_reported_is_roundtrip", False))

# ========================= Columnas contract =========================
ER_COL  = "ER_kaufman"
MOM_COL = "mom_288"
VOL_COL = "vol_logret_288"
REQ_FEAT_COLS = ["time_utc", ER_COL, MOM_COL, VOL_COL]

# ATR: preferir feature "atr" (Celda 05) o "atr_72"; si no existe, aproximar con TR/ewm
ATR_PREF_COLS = ["atr", "atr_72"]

# OHLC m√≠nimos
REQ_PX_COLS = ["time_utc", "open", "high", "low", "close"]

# ========================= Outputs =========================
OUT_DIR = Path(paths["artifacts"]).resolve() / "backtests" / "backtest_engine_v10" / "tuning_engine_v14"
OUT_DIR.mkdir(parents=True, exist_ok=True)

SNAP_DIR = Path(paths.get("run_snapshots") or (Path(paths["artifacts"]).resolve() / "snapshots")).resolve()
SNAP_DIR.mkdir(parents=True, exist_ok=True)

OUT_RES  = OUT_DIR / "tuning_results_engine_v14.parquet"
OUT_BEST = OUT_DIR / "best_params_engine_v14.parquet"
SNAP_RES  = SNAP_DIR / "tuning_results_engine_v14.parquet"
SNAP_BEST = SNAP_DIR / "best_params_engine_v14.parquet"
SNAP_JSON = SNAP_DIR / "tuning_engine_v14_snapshot.json"

print(f"[Celda 14] symbols = {symbols_u}")
print(f"[Celda 14] folds   = {[f.get('fold_id') for f in folds]}")
print(f"[Celda 14] OUT_DIR = {OUT_DIR}")

# ========================= Helpers =========================
def _ensure_utc(dt: datetime) -> datetime:
    if dt.tzinfo is None:
        return dt.replace(tzinfo=timezone.utc)
    return dt.astimezone(timezone.utc)

def _parse_iso_utc(s: str) -> datetime:
    return _ensure_utc(datetime.fromisoformat(s))

def _pick_path_case_insensitive(d: dict, sym_u: str) -> str:
    if sym_u in d:
        return str(d[sym_u])
    keys = {str(k).upper().strip(): k for k in d.keys()}
    if sym_u in keys:
        return str(d[keys[sym_u]])
    raise KeyError(f"[Celda 14] ERROR: no encuentro path para {sym_u}. keys_sample={list(d)[:10]}")

def _need_cols(df: pl.DataFrame, cols: list[str], sym: str, tag: str) -> None:
    miss = [c for c in cols if c not in df.columns]
    if miss:
        raise RuntimeError(f"[Celda 14] ERROR: {sym} ({tag}) missing cols={miss}. cols={df.columns}")

def _cost_roundtrip_dec(cost_bps: float, reported_is_roundtrip: bool) -> float:
    c = float(cost_bps) / 10000.0
    return c if reported_is_roundtrip else (2.0 * c)

def _ema_np(x: np.ndarray, span: int) -> np.ndarray:
    # EMA est√°ndar (alpha = 2/(span+1))
    span = int(span)
    if span <= 1:
        return x.copy()
    a = 2.0 / (span + 1.0)
    out = np.empty_like(x, dtype=np.float64)
    out[:] = np.nan
    # init
    first = np.where(np.isfinite(x))[0]
    if first.size == 0:
        return out
    i0 = int(first[0])
    out[i0] = float(x[i0])
    for i in range(i0 + 1, x.size):
        xi = x[i]
        out[i] = (a * xi + (1.0 - a) * out[i - 1]) if math.isfinite(float(xi)) else out[i - 1]
    return out

def _compute_atr_wilder_from_ohlc(df: pl.DataFrame, n: int = 72) -> pl.Series:
    # True Range + ewm_mean(alpha=1/n, adjust=False)
    prev_close = pl.col("close").shift(1)
    tr = pl.max_horizontal([
        (pl.col("high") - pl.col("low")).abs(),
        (pl.col("high") - prev_close).abs(),
        (pl.col("low") - prev_close).abs(),
    ]).alias("__tr__")
    atr = tr.ewm_mean(alpha=(1.0 / float(n)), adjust=False).alias("atr")
    return df.select([atr]).get_column("atr")

def _segment_of_entry(entry_dt: datetime, is_s: datetime, is_e: datetime, o_s: datetime, o_e: datetime) -> str | None:
    if is_s <= entry_dt <= is_e:
        return "IS"
    if o_s <= entry_dt <= o_e:
        return "OOS"
    return None

def _kpis_from_rets(rets: np.ndarray) -> dict:
    # rets = net returns por trade (decimal)
    rets = rets.astype(np.float64)
    rets = rets[np.isfinite(rets)]
    n = int(rets.size)
    if n == 0:
        return {"n": 0, "mean": None, "std": None, "tot": None, "mdd": None, "win": None, "sharpe_like": None}
    mean = float(np.mean(rets))
    std = float(np.std(rets, ddof=1)) if n >= 2 else 0.0
    # total return compuesta
    tot = float(np.expm1(np.sum(np.log1p(rets))))
    win = float(np.mean(rets > 0.0))
    sharpe_like = float(mean / std) if std > 1e-12 else None

    # MDD sobre equity compuesta
    eq = np.exp(np.cumsum(np.log1p(rets)))
    peak = np.maximum.accumulate(eq)
    dd = (eq / peak) - 1.0
    mdd = float(np.min(dd)) if dd.size else 0.0

    return {"n": n, "mean": mean, "std": std, "tot": tot, "mdd": mdd, "win": win, "sharpe_like": sharpe_like}

def _score_is(k: dict, min_trades: int, max_mdd_abs: float) -> float:
    # score para elegir params SIN tocar OOS
    if k["n"] < min_trades:
        return -1e9
    if k["tot"] is None or k["mean"] is None:
        return -1e9
    if k["tot"] <= 0.0:
        return -1e6 + float(k["tot"])
    if k["mdd"] is not None and abs(float(k["mdd"])) > max_mdd_abs:
        return -1e6 - abs(float(k["mdd"]))
    # t-stat like (mean/std * sqrt(n)) penaliza drawdown
    if k["std"] is None or float(k["std"]) <= 1e-12:
        core = 0.0
    else:
        core = (float(k["mean"]) / float(k["std"])) * math.sqrt(float(k["n"]))
    pen = 2.0 * abs(float(k["mdd"] or 0.0))
    return float(core - pen)

# ========================= Motor sim (stateful, numpy) =========================
def _simulate_engine(
    df: pl.DataFrame,
    sym: str,
    fid: str,
    is_s: datetime, is_e: datetime, o_s: datetime, o_e: datetime,
    thr_er: float, thr_mom: float, thr_vol: float,
    cost_base_dec: float, cost_stress_dec: float,
    params: dict,
) -> list[dict]:
    """
    Simula trades usando convenci√≥n:
      - se√±al al close(t)
      - entrada en open(t+1)
      - stops/tp/trail evaluados intrabar (OHLC del bar de holding)
    """
    # arrays
    t = df.get_column("time_utc").to_list()  # list[datetime]
    n = len(t)
    if n < 10:
        return []

    op = df.get_column("open").to_numpy()
    hi = df.get_column("high").to_numpy()
    lo = df.get_column("low").to_numpy()
    cl = df.get_column("close").to_numpy()

    er  = df.get_column(ER_COL).to_numpy()
    mom = df.get_column(MOM_COL).to_numpy()
    vol = df.get_column(VOL_COL).to_numpy()

    # weekday robusto desde time_utc (ya UTC)
    dow = df.get_column("__dow__").to_numpy()  # Mon=0..Sun=6

    # ATR
    atr = df.get_column("__atr__").to_numpy()

    # EMA filter (opcional)
    ema_filter = bool(params.get("EMA_FILTER", True))
    if ema_filter:
        ema_fast = _ema_np(cl.astype(np.float64), span=48)
        ema_slow = _ema_np(cl.astype(np.float64), span=288)
    else:
        ema_fast = None
        ema_slow = None

    thr_mom_eff = max(0.0, float(thr_mom))
    sig_long = (er >= thr_er) & (mom >= thr_mom_eff) & (vol <= thr_vol)
    sig_short = (er >= thr_er) & (mom <= -thr_mom_eff) & (vol <= thr_vol)

    if ema_filter:
        sig_long = sig_long & (ema_fast > ema_slow)
        sig_short = sig_short & (ema_fast < ema_slow)

    # params
    SL_ATR = float(params["SL_ATR"])
    TP_ATR = float(params["TP_ATR"])
    TRAIL_ATR = float(params["TRAIL_ATR"])
    TRAIL_START_ATR = float(params["TRAIL_START_ATR"])
    TIME_STOP_BARS = int(params["TIME_STOP_BARS"])
    ENTRY_CONFIRM_BARS = int(params["ENTRY_CONFIRM_BARS"])
    EXIT_GATE_OFF_BARS = int(params["EXIT_GATE_OFF_BARS"])
    MIN_HOLD_BARS = int(params["MIN_HOLD_BARS"])
    COOLDOWN_BARS = int(params["COOLDOWN_BARS"])
    MON_FRI = bool(params.get("MON_FRI", True))

    trades = []

    pos = 0  # 0 flat, +1 long, -1 short
    entry_idx = -1
    entry_px = None
    entry_atr = None

    stop_px = None
    tp_px = None
    trail_px = None
    peak = None
    trough = None
    trail_armed = False
    gate_off_count = 0
    cooldown = 0

    long_consec = 0
    short_consec = 0

    # loop: i es √≠ndice del bar ACTUAL; la entrada ocurre en i usando se√±al de i-1
    for i in range(1, n):
        # cooldown update
        if cooldown > 0 and pos == 0:
            cooldown -= 1

        # ====== gestionar posici√≥n abierta ======
        if pos != 0 and entry_idx >= 0:
            bars_held = i - entry_idx

            # weekend flatten: si MON_FRI, forzar salida antes del fin de semana
            if MON_FRI:
                # si el bar actual ya es fin de semana, salimos al close (deber√≠a no ocurrir si entry filtrado bien)
                if int(dow[i]) >= 5:
                    exit_px = float(cl[i])
                    exit_dt = t[i]
                    reason = "WEEKEND_FLATTEN"
                else:
                    # si hoy es viernes y el siguiente bar cae en fin de semana, flatten ahora al close
                    if i + 1 < n and int(dow[i]) == 4 and int(dow[i + 1]) >= 5:
                        exit_px = float(cl[i])
                        exit_dt = t[i]
                        reason = "WEEKEND_FLATTEN"
                    else:
                        exit_px = None
                        exit_dt = None
                        reason = None
            else:
                exit_px = None
                exit_dt = None
                reason = None

            # stops/tp/trail intrabar (solo si no salimos ya por weekend)
            if reason is None:
                # protecci√≥n: ATR debe ser finito
                a_i = float(atr[i]) if math.isfinite(float(atr[i])) else (float(entry_atr) if entry_atr else 0.0)
                if a_i <= 0.0 or not math.isfinite(a_i):
                    a_i = float(entry_atr) if entry_atr and entry_atr > 0 else 0.0

                if pos == 1:
                    # actualizar peak/trailing
                    peak = float(max(peak, hi[i])) if peak is not None else float(hi[i])
                    if (not trail_armed) and entry_px is not None and entry_atr is not None:
                        if peak >= float(entry_px) + (TRAIL_START_ATR * float(entry_atr)):
                            trail_armed = True
                            # inicializar trail desde peak
                            trail_px = float(peak - (TRAIL_ATR * float(entry_atr)))

                    if trail_armed and entry_atr is not None:
                        cand = float(peak - (TRAIL_ATR * float(entry_atr)))
                        trail_px = float(max(trail_px, cand)) if trail_px is not None else cand

                    # prioridades conservadoras: SL -> TP -> TRAIL
                    sl_hit = (stop_px is not None) and (float(lo[i]) <= float(stop_px))
                    tp_hit = (tp_px is not None) and (float(hi[i]) >= float(tp_px))
                    tr_hit = (trail_armed and trail_px is not None) and (float(lo[i]) <= float(trail_px))

                    if sl_hit:
                        exit_px = float(stop_px)
                        exit_dt = t[i]
                        reason = "SL"
                    elif tp_hit:
                        exit_px = float(tp_px)
                        exit_dt = t[i]
                        reason = "TP"
                    elif tr_hit:
                        exit_px = float(trail_px)
                        exit_dt = t[i]
                        reason = "TRAIL"

                else:  # pos == -1 short
                    trough = float(min(trough, lo[i])) if trough is not None else float(lo[i])
                    if (not trail_armed) and entry_px is not None and entry_atr is not None:
                        if trough <= float(entry_px) - (TRAIL_START_ATR * float(entry_atr)):
                            trail_armed = True
                            trail_px = float(trough + (TRAIL_ATR * float(entry_atr)))

                    if trail_armed and entry_atr is not None:
                        cand = float(trough + (TRAIL_ATR * float(entry_atr)))
                        trail_px = float(min(trail_px, cand)) if trail_px is not None else cand

                    # prioridades conservadoras: SL -> TP -> TRAIL
                    sl_hit = (stop_px is not None) and (float(hi[i]) >= float(stop_px))
                    tp_hit = (tp_px is not None) and (float(lo[i]) <= float(tp_px))
                    tr_hit = (trail_armed and trail_px is not None) and (float(hi[i]) >= float(trail_px))

                    if sl_hit:
                        exit_px = float(stop_px)
                        exit_dt = t[i]
                        reason = "SL"
                    elif tp_hit:
                        exit_px = float(tp_px)
                        exit_dt = t[i]
                        reason = "TP"
                    elif tr_hit:
                        exit_px = float(trail_px)
                        exit_dt = t[i]
                        reason = "TRAIL"

            # gate-off / time-stop (al close), respetando MIN_HOLD_BARS
            if reason is None and bars_held >= MIN_HOLD_BARS:
                # gate-off hysteresis (miramos la se√±al del bar i-1)
                if pos == 1:
                    gate_on = bool(sig_long[i - 1])
                else:
                    gate_on = bool(sig_short[i - 1])

                gate_off_count = 0 if gate_on else (gate_off_count + 1)

                if gate_off_count >= EXIT_GATE_OFF_BARS:
                    exit_px = float(cl[i])
                    exit_dt = t[i]
                    reason = "REGIME_OFF"
                elif bars_held >= TIME_STOP_BARS:
                    exit_px = float(cl[i])
                    exit_dt = t[i]
                    reason = "TIME_STOP"

            # ejecutar salida si aplica
            if reason is not None and exit_px is not None and entry_px is not None:
                entry_dt = t[entry_idx]
                seg = _segment_of_entry(entry_dt, is_s, is_e, o_s, o_e)
                if seg is not None:
                    if pos == 1:
                        gross = (float(exit_px) / float(entry_px)) - 1.0
                    else:
                        gross = (float(entry_px) / float(exit_px)) - 1.0

                    trades.append({
                        "symbol": sym,
                        "fold_id": fid,
                        "segment": seg,
                        "side": "LONG" if pos == 1 else "SHORT",
                        "entry_time": entry_dt.isoformat(),
                        "exit_time": exit_dt.isoformat(),
                        "entry_price": float(entry_px),
                        "exit_price": float(exit_px),
                        "bars_held": int(exit_dt == exit_dt and (i - entry_idx)),
                        "exit_reason": str(reason),
                        "gross_ret": float(gross),
                        "net_ret_base": float(gross - cost_base_dec),
                        "net_ret_stress": float(gross - cost_stress_dec),
                    })

                # reset state
                pos = 0
                entry_idx = -1
                entry_px = None
                entry_atr = None
                stop_px = None
                tp_px = None
                trail_px = None
                peak = None
                trough = None
                trail_armed = False
                gate_off_count = 0
                cooldown = COOLDOWN_BARS

        # ====== si estamos flat, evaluar entrada (en open[i]) usando se√±al de i-1 ======
        if pos == 0 and cooldown == 0:
            # confirmar consecutivos con se√±al del bar i-1
            long_consec = (long_consec + 1) if bool(sig_long[i - 1]) else 0
            short_consec = (short_consec + 1) if bool(sig_short[i - 1]) else 0

            # entrada ocurre en open[i]; por lo tanto el filtro Mon‚ÄìFri debe aplicarse sobre el bar i (entry_time)
            if MON_FRI and int(dow[i]) >= 5:
                continue

            # ATR en entry bar
            a_entry = float(atr[i]) if math.isfinite(float(atr[i])) else 0.0
            if (not math.isfinite(a_entry)) or a_entry <= 0.0:
                continue

            # preferencia: si ambos confirman (posible si thr_mom_eff=0), resolvemos por signo mom
            enter_long = long_consec >= ENTRY_CONFIRM_BARS
            enter_short = short_consec >= ENTRY_CONFIRM_BARS

            if enter_long and enter_short:
                enter_long = bool(mom[i - 1] >= 0.0)
                enter_short = not enter_long

            if enter_long:
                px = float(op[i]) if float(op[i]) > 0 else float(cl[i])
                if px <= 0:
                    continue
                pos = 1
                entry_idx = i
                entry_px = px
                entry_atr = a_entry
                stop_px = float(entry_px - SL_ATR * entry_atr)
                tp_px   = float(entry_px + TP_ATR * entry_atr)
                peak = float(hi[i])
                trail_armed = (TRAIL_START_ATR <= 0.0)
                trail_px = float(peak - TRAIL_ATR * entry_atr) if trail_armed else None
                gate_off_count = 0

                # reset counters para evitar re-entradas inmediatas
                long_consec = 0
                short_consec = 0

            elif enter_short:
                px = float(op[i]) if float(op[i]) > 0 else float(cl[i])
                if px <= 0:
                    continue
                pos = -1
                entry_idx = i
                entry_px = px
                entry_atr = a_entry
                stop_px = float(entry_px + SL_ATR * entry_atr)
                tp_px   = float(entry_px - TP_ATR * entry_atr)
                trough = float(lo[i])
                trail_armed = (TRAIL_START_ATR <= 0.0)
                trail_px = float(trough + TRAIL_ATR * entry_atr) if trail_armed else None
                gate_off_count = 0

                long_consec = 0
                short_consec = 0

    return trades

# ========================= Par√°metros tuning (acotados) =========================
# Institucional: tuning en IS, reporte OOS. Grilla intencional para corregir:
#   - holding demasiado corto
#   - trailing dominante demasiado temprano
GRID = {
    "SL_ATR":            [2.0, 2.5, 3.0],
    "TP_ATR":            [4.0, 6.0, 8.0],
    "TRAIL_ATR":         [3.0, 4.0, 5.0],
    "TRAIL_START_ATR":   [0.0, 1.0, 2.0],     # clave: no activar trailing hasta que avance
    "MIN_HOLD_BARS":     [6, 24],            # fuerza capturar ‚Äúleg‚Äù
    "ENTRY_CONFIRM_BARS":[6, 12],
    "EXIT_GATE_OFF_BARS":[12],               # mantener estable para no mezclar efectos
    "COOLDOWN_BARS":     [12, 24],
    "TIME_STOP_BARS":    [288, 576],         # 1d / 2d (M5)
    "EMA_FILTER":        [True],
    "MON_FRI":           [True],
}

MAX_COMBOS_PER_SYMBOL = 220  # seguridad
MIN_TRADES_IS = 25
MAX_MDD_ABS_IS = 0.35

# ========================= Main: per s√≠mbolo/fold tuning =========================
rows = []
best_rows = []

for sym in symbols_u:
    # ---- Load OHLCV ----
    px_path = _pick_path_case_insensitive(m5_paths, sym)
    df_px = pl.read_parquet(str(px_path))
    # normalizar columnas faltantes
    if "open" not in df_px.columns:
        df_px = df_px.with_columns(pl.col("close").alias("open"))
    if "high" not in df_px.columns:
        df_px = df_px.with_columns(pl.max_horizontal([pl.col("open"), pl.col("close")]).alias("high"))
    if "low" not in df_px.columns:
        df_px = df_px.with_columns(pl.min_horizontal([pl.col("open"), pl.col("close")]).alias("low"))

    _need_cols(df_px, ["time_utc", "open", "high", "low", "close"], sym, "OHLCV")
    df_px = (
        df_px
        .with_columns([
            pl.col("time_utc").cast(pl.Datetime("us","UTC"), strict=False),
            pl.col("open").cast(pl.Float64, strict=False),
            pl.col("high").cast(pl.Float64, strict=False),
            pl.col("low").cast(pl.Float64, strict=False),
            pl.col("close").cast(pl.Float64, strict=False),
        ])
        .sort("time_utc")
        .unique(subset=["time_utc"], keep="last")
    )

    # ---- Load Features ----
    f_path = _pick_path_case_insensitive(feat_paths, sym)
    df_f = pl.read_parquet(str(f_path))
    _need_cols(df_f, REQ_FEAT_COLS, sym, "FEATURES")
    df_f = df_f.with_columns(pl.col("time_utc").cast(pl.Datetime("us","UTC"), strict=False)).sort("time_utc")

    # ---- Join ----
    df = df_px.join(df_f, on="time_utc", how="inner")
    if df.height < 2000:
        print(f"[Celda 14] WARN: {sym} muy pocas filas post-join: {df.height}")
        continue

    # weekday robusto desde time_utc (no strings)
    df = df.with_columns(pl.col("time_utc").dt.weekday().cast(pl.Int16).alias("__dow__"))

    # ATR: usar col existente si hay; si no, calcular de OHLC
    atr_col = None
    for c in ATR_PREF_COLS:
        if c in df.columns:
            atr_col = c
            break
    if atr_col is None:
        atr_s = _compute_atr_wilder_from_ohlc(df.select(["high","low","close"]), n=72)
        df = df.with_columns(pl.Series(name="__atr__", values=atr_s))
    else:
        df = df.with_columns(pl.col(atr_col).cast(pl.Float64, strict=False).alias("__atr__"))

    # costs
    cinfo = costs_by_symbol.get(sym) or {}
    cost_base_bps = float(cinfo.get("COST_BASE_BPS", 0.0))
    cost_stress_bps = float(cinfo.get("COST_STRESS_BPS", 0.0))
    cost_base_dec = _cost_roundtrip_dec(cost_base_bps, cost_reported_is_roundtrip)
    cost_stress_dec = _cost_roundtrip_dec(cost_stress_bps, cost_reported_is_roundtrip)

    # grid combos (determin√≠stico)
    keys = list(GRID.keys())
    combos = list(itertools.product(*[GRID[k] for k in keys]))
    if len(combos) > MAX_COMBOS_PER_SYMBOL:
        combos = combos[:MAX_COMBOS_PER_SYMBOL]

    for f in folds:
        fid = str(f["fold_id"])
        is_s = _parse_iso_utc(f["IS_start"])
        is_e = _parse_iso_utc(f["IS_end"])
        o_s  = _parse_iso_utc(f["OOS_start"])
        o_e  = _parse_iso_utc(f["OOS_end"])

        g = gate_df.filter((pl.col("symbol") == sym) & (pl.col("fold_id") == fid))
        if g.is_empty():
            raise RuntimeError(f"[Celda 14] ERROR: no hay thresholds en gate_df para {sym} {fid}.")

        thr_er = float(g.select(pl.col("thr_er")).item())
        thr_mom = float(g.select(pl.col("thr_mom")).item())
        thr_vol = float(g.select(pl.col("thr_vol")).item())

        best = None

        for tpl in combos:
            params = {k: tpl[i] for i, k in enumerate(keys)}
            params["COST_BASE_BPS"] = cost_base_bps
            params["COST_STRESS_BPS"] = cost_stress_bps

            trades = _simulate_engine(
                df=df,
                sym=sym, fid=fid,
                is_s=is_s, is_e=is_e, o_s=o_s, o_e=o_e,
                thr_er=thr_er, thr_mom=thr_mom, thr_vol=thr_vol,
                cost_base_dec=cost_base_dec, cost_stress_dec=cost_stress_dec,
                params=params,
            )

            if not trades:
                k_is = {"n": 0, "mean": None, "std": None, "tot": None, "mdd": None, "win": None, "sharpe_like": None}
                k_oos = k_is.copy()
            else:
                # separar IS/OOS por segment (ya asignado con entry_time)
                is_rets = np.array([t["net_ret_base"] for t in trades if t["segment"] == "IS"], dtype=np.float64)
                oos_rets = np.array([t["net_ret_base"] for t in trades if t["segment"] == "OOS"], dtype=np.float64)
                k_is = _kpis_from_rets(is_rets)
                k_oos = _kpis_from_rets(oos_rets)

            score_is = _score_is(k_is, min_trades=MIN_TRADES_IS, max_mdd_abs=MAX_MDD_ABS_IS)

            row = {
                "symbol": sym,
                "fold_id": fid,
                "score_is": float(score_is),
                "n_is": int(k_is["n"]),
                "tot_is": k_is["tot"],
                "mdd_is": k_is["mdd"],
                "win_is": k_is["win"],
                "sharpe_like_is": k_is["sharpe_like"],
                "n_oos": int(k_oos["n"]),
                "tot_oos": k_oos["tot"],
                "mdd_oos": k_oos["mdd"],
                "win_oos": k_oos["win"],
                "sharpe_like_oos": k_oos["sharpe_like"],
                "thr_er": thr_er,
                "thr_mom": thr_mom,
                "thr_vol": thr_vol,
                "cost_base_bps": cost_base_bps,
                "cost_stress_bps": cost_stress_bps,
                **{k: params[k] for k in keys},
            }
            rows.append(row)

            if best is None or score_is > best["score_is"]:
                best = row

        if best is not None:
            best_rows.append(best)
            print(
                f"[Celda 14] BEST {sym} {fid} :: score_IS={best['score_is']:.3f} "
                f"| IS(n={best['n_is']}, tot={best['tot_is']}) "
                f"| OOS(n={best['n_oos']}, tot={best['tot_oos']}) "
                f"| SL={best['SL_ATR']} TP={best['TP_ATR']} TRAIL={best['TRAIL_ATR']} START={best['TRAIL_START_ATR']} "
                f"| MINH={best['MIN_HOLD_BARS']} EC={best['ENTRY_CONFIRM_BARS']} TS={best['TIME_STOP_BARS']}"
            )

# ========================= Persist =========================
res_df = pl.DataFrame(rows).sort(["symbol","fold_id","score_is"], descending=[False, False, True]) if rows else pl.DataFrame()
best_df = pl.DataFrame(best_rows).sort(["symbol","fold_id"]) if best_rows else pl.DataFrame()

if res_df.height == 0 or best_df.height == 0:
    raise RuntimeError("[Celda 14] GATE FAIL: tuning vac√≠o (sin resultados).")

res_df.write_parquet(str(OUT_RES), compression="zstd")
best_df.write_parquet(str(OUT_BEST), compression="zstd")
res_df.write_parquet(str(SNAP_RES), compression="zstd")
best_df.write_parquet(str(SNAP_BEST), compression="zstd")

snapshot = {
    "created_utc": datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
    "symbols": symbols_u,
    "fold_ids": [f.get("fold_id") for f in folds],
    "grid": GRID,
    "limits": {"MAX_COMBOS_PER_SYMBOL": MAX_COMBOS_PER_SYMBOL, "MIN_TRADES_IS": MIN_TRADES_IS, "MAX_MDD_ABS_IS": MAX_MDD_ABS_IS},
    "inputs": {"gate_table_path": str(gate_path)},
    "outputs": {"tuning_results": str(OUT_RES), "best_params": str(OUT_BEST), "snap_results": str(SNAP_RES), "snap_best": str(SNAP_BEST)},
    "notes": [
        "Selecci√≥n de par√°metros: SOLO por IS (score_is).",
        "OOS se reporta para validaci√≥n, no se optimiza sobre OOS.",
        "Mon‚ÄìFri se aplica sobre entry_time (bar de entrada).",
        "TRAIL_START_ATR evita que el trailing mate trades antes de que maduren."
    ],
}
SNAP_JSON.write_text(json.dumps(snapshot, indent=2, ensure_ascii=False), encoding="utf-8")

GLOBAL_STATE["tuning_engine_v14"] = {
    "tuning_results_path": str(OUT_RES),
    "best_params_path": str(OUT_BEST),
    "snapshot_json": str(SNAP_JSON),
    "grid": GRID,
    "limits": snapshot["limits"],
}

print(f"üíæ OUTPUT   ‚Üí {OUT_RES} (OK) | rows={res_df.height}")
print(f"üíæ OUTPUT   ‚Üí {OUT_BEST} (OK) | rows={best_df.height}")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_RES} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_BEST} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_JSON} (OK)")
print(">>> Celda 14 v1.0 :: OK")


>>> Celda 14 v1.0 :: Engine Tuning (IS-only) [alpha‚Üîmotor alignment + TRAIL_START + longer holds | WFO-safe]
[Celda 14] symbols = ['BNBUSD', 'BTCUSD', 'LVMH', 'XAUAUD']
[Celda 14] folds   = ['F1']
[Celda 14] OUT_DIR = C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\backtests\backtest_engine_v10\tuning_engine_v14
[Celda 14] BEST BNBUSD F1 :: score_IS=-1000000.066 | IS(n=102, tot=-0.0662798607020612) | OOS(n=28, tot=-0.030626756246091243) | SL=2.0 TP=6.0 TRAIL=4.0 START=0.0 | MINH=6 EC=12 TS=288
[Celda 14] BEST BTCUSD F1 :: score_IS=-1000000.165 | IS(n=112, tot=-0.1645285823913961) | OOS(n=16, tot=-0.03948348129233368) | SL=2.0 TP=4.0 TRAIL=3.0 START=2.0 | MINH=6 EC=12 TS=288
[Celda 14] BEST LVMH F1 :: score_IS=-1000000.055 | IS(n=47, tot=-0.05502471212782521) | OOS(n=14, tot=-0.021841151967083493) | SL=2.0 TP=6.0 TRAIL=3.0 START=2.0 | MINH=24 EC=12 TS=288
[Celda 14] BEST XAUAUD F1 :: score_IS=-1000000.068 | IS(n=100, tot=-0.06755642775164171) | OOS(n=9, tot=0.004039572464097953

In [47]:
# ===================== Celda 15 v1.0.1 ‚Äî Alpha Design (IS-only) [side + horizon selection ‚Üí motor targets] [WFO-safe] =====================
from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json
import polars as pl

print(">>> Celda 15 v1.0.1 :: Alpha Design (IS-only) [side + horizon selection ‚Üí motor targets] [WFO-safe]")

# ========================= Validaciones GLOBAL_STATE =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 15] ERROR: GLOBAL_STATE no existe o no es dict.")

for k in ("paths", "wfo", "universe"):
    if k not in GLOBAL_STATE:
        raise RuntimeError(f"[Celda 15] ERROR: falta GLOBAL_STATE['{k}'].")

paths = GLOBAL_STATE["paths"]
wfo_state = GLOBAL_STATE["wfo"]

# s√≠mbolos efectivos (preferir QA final)
dq = GLOBAL_STATE.get("data_quality", {}) or {}
symbols = dq.get("final_symbols") or (GLOBAL_STATE["universe"].get("selected_symbols_TREND") or [])
if not symbols:
    raise RuntimeError("[Celda 15] ERROR: no hay s√≠mbolos (data_quality.final_symbols / selected_symbols_TREND).")
symbols_u = [str(s).upper().strip() for s in symbols]

# folds
folds = wfo_state.get("folds") or []
if not folds:
    folds_path = wfo_state.get("folds_path")
    if not folds_path:
        raise RuntimeError("[Celda 15] ERROR: no hay wfo.folds ni wfo.folds_path. Ejecuta Celda 04.")
    folds = json.loads(Path(folds_path).read_text(encoding="utf-8")).get("folds", [])
if not folds:
    raise RuntimeError("[Celda 15] ERROR: folds vac√≠o (Celda 04 no produjo folds).")

fold_ids = [str(f.get("fold_id")) for f in folds if f.get("fold_id") is not None]
if not fold_ids:
    raise RuntimeError("[Celda 15] ERROR: fold_ids vac√≠o.")

print(f"[Celda 15] symbols = {symbols_u}")
print(f"[Celda 15] folds   = {fold_ids}")

# ========================= Inputs =========================
alpha_report_path = (
    (GLOBAL_STATE.get("alpha_reports", {}) or {}).get("alpha_multi_horizon_report_path")
    or str(Path(paths["artifacts"]).resolve() / "alpha_reports" / "alpha_multi_horizon_report.parquet")
)
alpha_report_path = str(Path(alpha_report_path).resolve())

if not Path(alpha_report_path).exists():
    raise RuntimeError(f"[Celda 15] ERROR: alpha report no existe: {alpha_report_path}. Ejecuta Celda 07C.")

alpha = pl.read_parquet(alpha_report_path)

# ========================= Contract / Required cols =========================
REQ = [
    "symbol","fold_id","segment","side",
    "h_bars_after_entry","horizon_tag",
    "n_trades",
    "net_base_mean","net_stress_mean",
    "net_base_std",
    "win_rate_base",
    "sharpe_like_base",
    "cost_base_bps","cost_stress_bps",
]
miss = [c for c in REQ if c not in alpha.columns]
if miss:
    raise RuntimeError(f"[Celda 15] ERROR: alpha report missing cols={miss}. cols={alpha.columns}")

alpha = (
    alpha
    .with_columns([
        pl.col("symbol").cast(pl.Utf8).str.to_uppercase().alias("symbol"),
        pl.col("fold_id").cast(pl.Utf8),
        pl.col("segment").cast(pl.Utf8),
        pl.col("side").cast(pl.Utf8),
        pl.col("h_bars_after_entry").cast(pl.Int64),
        pl.col("n_trades").cast(pl.Int64),
    ])
    .filter(pl.col("symbol").is_in(symbols_u))
    .filter(pl.col("fold_id").is_in(fold_ids))
)

# ========================= Par√°metros de selecci√≥n (institucionales) =========================
MIN_TRADES_IS  = int((GLOBAL_STATE.get("alpha_design", {}) or {}).get("MIN_TRADES_IS", 80))
MIN_TRADES_OOS = int((GLOBAL_STATE.get("alpha_design", {}) or {}).get("MIN_TRADES_OOS", 20))
MIN_NET_MEAN_IS = float((GLOBAL_STATE.get("alpha_design", {}) or {}).get("MIN_NET_MEAN_IS", 0.0))

def _score_expr() -> pl.Expr:
    # FIX: Polars antiguo puede no tener pl.sqrt(). Usamos pow(0.5) sobre Expr.
    n_sqrt = pl.when(pl.col("n_trades") > 0).then(pl.col("n_trades").cast(pl.Float64).pow(0.5)).otherwise(pl.lit(0.0))
    return (pl.col("sharpe_like_base") * n_sqrt).alias("score_is")

# ========================= Selecci√≥n IS-only (lado + horizonte) =========================
is_tbl = (
    alpha
    .filter(pl.col("segment") == "IS")
    .filter(pl.col("n_trades") >= pl.lit(MIN_TRADES_IS))
    .with_columns([
        _score_expr(),
        (pl.col("net_base_mean") > 0).alias("is_pos_base"),
        (pl.col("net_stress_mean") > 0).alias("is_pos_stress"),
    ])
    .filter(pl.col("net_base_mean") >= pl.lit(MIN_NET_MEAN_IS))
)

if is_tbl.height == 0:
    raise RuntimeError(
        f"[Celda 15] GATE FAIL: no hay candidatos IS con n>={MIN_TRADES_IS} y net_base_mean>={MIN_NET_MEAN_IS}. "
        f"Esto sugiere que NO hay alpha neto bajo el gate actual o que el universo es demasiado chico."
    )

best_is = (
    is_tbl
    .sort(["symbol","fold_id","score_is"], descending=[False, False, True])
    .group_by(["symbol","fold_id"])
    .agg([
        pl.first("side").alias("picked_side_alpha"),
        pl.first("h_bars_after_entry").alias("picked_h_bars"),
        pl.first("horizon_tag").alias("picked_h_tag"),

        pl.first("n_trades").alias("is_n"),
        pl.first("net_base_mean").alias("is_net_base_mean"),
        pl.first("net_stress_mean").alias("is_net_stress_mean"),
        pl.first("win_rate_base").alias("is_win_rate_base"),
        pl.first("sharpe_like_base").alias("is_sharpe_like_base"),
        pl.first("score_is").alias("is_score"),
        pl.first("cost_base_bps").alias("cost_base_bps"),
        pl.first("cost_stress_bps").alias("cost_stress_bps"),
    ])
)

oos_tbl = (
    alpha
    .filter(pl.col("segment") == "OOS")
    .select([
        "symbol","fold_id","side","h_bars_after_entry",
        "n_trades","net_base_mean","net_stress_mean","win_rate_base","sharpe_like_base"
    ])
    .rename({
        "side": "oos_side",
        "h_bars_after_entry": "oos_h_bars",
        "n_trades": "oos_n",
        "net_base_mean": "oos_net_base_mean",
        "net_stress_mean": "oos_net_stress_mean",
        "win_rate_base": "oos_win_rate_base",
        "sharpe_like_base": "oos_sharpe_like_base",
    })
)

def _cap_min(expr: pl.Expr, minv: int) -> pl.Expr:
    return pl.when(expr < pl.lit(minv)).then(pl.lit(minv)).otherwise(expr)

design = (
    best_is
    .join(
        oos_tbl,
        left_on=["symbol","fold_id","picked_side_alpha","picked_h_bars"],
        right_on=["symbol","fold_id","oos_side","oos_h_bars"],
        how="left"
    )
    .with_columns([
        pl.when(pl.col("oos_n").is_null()).then(pl.lit("NO_OOS_DATA"))
         .when(pl.col("oos_n") < pl.lit(MIN_TRADES_OOS)).then(pl.lit("LOW_OOS_N"))
         .when((pl.col("oos_net_base_mean") > 0) & (pl.col("oos_net_stress_mean") > 0)).then(pl.lit("OOS_OK"))
         .when(pl.col("oos_net_base_mean") > 0).then(pl.lit("OOS_WEAK_STRESS"))
         .otherwise(pl.lit("OOS_NEG"))
         .alias("oos_status_evidence"),
    ])
    .with_columns([
        # Targets de motor alineados al horizonte (swing permitido: NO fuerza cierre diario)
        pl.col("picked_h_bars").cast(pl.Int64).alias("TIME_STOP_BARS_target"),
        _cap_min((pl.col("picked_h_bars").cast(pl.Float64) * 0.25).cast(pl.Int64), 6).alias("MIN_HOLD_BARS_target"),
        _cap_min((pl.col("picked_h_bars").cast(pl.Float64) * 0.10).cast(pl.Int64), 3).alias("ENTRY_CONFIRM_BARS_target"),
        _cap_min((pl.col("picked_h_bars").cast(pl.Float64) * 0.10).cast(pl.Int64), 6).alias("EXIT_GATE_OFF_BARS_target"),
        _cap_min((pl.col("picked_h_bars").cast(pl.Float64) * 0.10).cast(pl.Int64), 12).alias("COOLDOWN_BARS_target"),
    ])
)

# ========================= Outputs =========================
OUT_DIR = Path(paths["artifacts"]).resolve() / "alpha_design"
OUT_DIR.mkdir(parents=True, exist_ok=True)

SNAP_DIR = Path(paths.get("run_snapshots") or (Path(paths["artifacts"]).resolve() / "snapshots")).resolve()
SNAP_DIR.mkdir(parents=True, exist_ok=True)

OUT_PARQ = OUT_DIR / "alpha_design_v15.parquet"
OUT_JSON = OUT_DIR / "alpha_design_v15.json"
SNAP_PARQ = SNAP_DIR / "alpha_design_v15.parquet"
SNAP_JSON = SNAP_DIR / "alpha_design_v15_snapshot.json"

design = design.sort(["symbol","fold_id"])

design.write_parquet(str(OUT_PARQ), compression="zstd")
design.write_parquet(str(SNAP_PARQ), compression="zstd")

snapshot = {
    "created_utc": datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
    "inputs": {"alpha_report_path": alpha_report_path},
    "symbols": symbols_u,
    "fold_ids": fold_ids,
    "params": {
        "MIN_TRADES_IS": MIN_TRADES_IS,
        "MIN_TRADES_OOS": MIN_TRADES_OOS,
        "MIN_NET_MEAN_IS": MIN_NET_MEAN_IS,
        "selection_rule": "Pick max(score_is) using IS-only; attach OOS as evidence only.",
        "score_is": "sharpe_like_base * sqrt(n_trades) [implemented as pow(0.5)]",
        "motor_targets": ["TIME_STOP_BARS_target","MIN_HOLD_BARS_target","ENTRY_CONFIRM_BARS_target","EXIT_GATE_OFF_BARS_target","COOLDOWN_BARS_target"],
    },
    "outputs": {"alpha_design_path": str(OUT_PARQ), "alpha_design_json": str(OUT_JSON), "snapshot_parquet": str(SNAP_PARQ)},
}
SNAP_JSON.write_text(json.dumps(snapshot, indent=2, ensure_ascii=False), encoding="utf-8")
OUT_JSON.write_text(json.dumps(design.to_dicts(), indent=2, ensure_ascii=False), encoding="utf-8")

print("[Celda 15] Alpha design preview (top 20):")
print(design.select([
    "symbol","fold_id","picked_side_alpha","picked_h_bars",
    "is_n","is_net_base_mean","is_net_stress_mean","is_sharpe_like_base","is_score",
    "oos_n","oos_net_base_mean","oos_net_stress_mean","oos_status_evidence",
    "TIME_STOP_BARS_target","MIN_HOLD_BARS_target","ENTRY_CONFIRM_BARS_target","COOLDOWN_BARS_target"
]).head(20))

GLOBAL_STATE["alpha_design"] = {
    "alpha_report_path": alpha_report_path,
    "alpha_design_path": str(OUT_PARQ),
    "alpha_design_json": str(OUT_JSON),
    "snapshot_json": str(SNAP_JSON),
    "MIN_TRADES_IS": MIN_TRADES_IS,
    "MIN_TRADES_OOS": MIN_TRADES_OOS,
    "MIN_NET_MEAN_IS": MIN_NET_MEAN_IS,
}

print(f"üíæ OUTPUT   ‚Üí {OUT_PARQ} (OK) | rows={design.height}")
print(f"üíæ OUTPUT   ‚Üí {OUT_JSON} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_PARQ} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_JSON} (OK)")
print(">>> Celda 15 v1.0.1 :: OK")


>>> Celda 15 v1.0.1 :: Alpha Design (IS-only) [side + horizon selection ‚Üí motor targets] [WFO-safe]
[Celda 15] symbols = ['BNBUSD', 'BTCUSD', 'LVMH', 'XAUAUD']
[Celda 15] folds   = ['F1']
[Celda 15] Alpha design preview (top 20):
shape: (3, 17)
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ symbol ‚îÜ fold_id ‚îÜ picked_sid ‚îÜ picked_h_b ‚îÜ ‚Ä¶ ‚îÜ TIME_STOP_ ‚îÜ MIN_HOLD_B ‚îÜ ENTRY_CON ‚îÜ COOLDOWN_ ‚îÇ
‚îÇ ---    ‚îÜ ---     ‚îÜ e_alpha    ‚îÜ ars        ‚îÜ   ‚îÜ BARS_targe ‚îÜ ARS_target ‚îÜ FIRM_BARS ‚îÜ BARS_targ ‚îÇ
‚îÇ str    ‚îÜ str     ‚îÜ ---        ‚îÜ ---        ‚îÜ   ‚îÜ t          ‚îÜ ---        ‚îÜ _target   ‚îÜ et        ‚îÇ
‚îÇ        ‚îÜ         ‚îÜ str        ‚îÜ i64        ‚îÜ   ‚îÜ ---        ‚îÜ i64      

In [48]:
# ===================== Celda 16 v1.0 ‚Äî Execution & Risk Overlay institucional (post-engine) [hours + daily stops + profit lock + max trades/day] [WFO-safe] =====================
from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json
import polars as pl

print(">>> Celda 16 v1.0 :: Execution & Risk Overlay institucional (post-engine) [hours + daily stops + profit lock + max trades/day] [WFO-safe]")

# ========================= Validaciones GLOBAL_STATE =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    raise RuntimeError("[Celda 16] ERROR: GLOBAL_STATE no existe o no es dict.")

for k in ("paths", "backtest_engine"):
    if k not in GLOBAL_STATE:
        raise RuntimeError(f"[Celda 16] ERROR: falta GLOBAL_STATE['{k}'].")

paths = GLOBAL_STATE["paths"]
bt = GLOBAL_STATE["backtest_engine"]

trades_path = bt.get("trades_path")
summary_path = bt.get("summary_path")
if not trades_path or not Path(trades_path).exists():
    raise RuntimeError(f"[Celda 16] ERROR: trades_path inv√°lido/no existe: {trades_path}")
if not summary_path or not Path(summary_path).exists():
    raise RuntimeError(f"[Celda 16] ERROR: summary_path inv√°lido/no existe: {summary_path}")

# ========================= Config institucional (defaults razonables) =========================
# NOTA: todo se puede sobreescribir desde GLOBAL_STATE["execution_policy"] si ya existe.
pol = GLOBAL_STATE.get("execution_policy", {}) or {}

# 1) Entradas por d√≠a/horas (NO fuerza flatten)
ENTRY_WEEKDAYS_ONLY = bool(pol.get("ENTRY_WEEKDAYS_ONLY", True))  # si True: entradas solo Lun-Vie
# Ventanas por s√≠mbolo en UTC: {"LVMH": {"start_hour": 7, "end_hour": 16}, ...}
# end_hour es EXCLUSIVO (start<=hour<end).
TRADING_HOURS_UTC_BY_SYMBOL = pol.get("TRADING_HOURS_UTC_BY_SYMBOL", {}) or {}

# 2) Overlay diario (por s√≠mbolo, por segmento/fold): stops sobre retornos netos (unidad notional = 1)
DAILY_MAX_LOSS_BASE   = float(pol.get("DAILY_MAX_LOSS_BASE", 0.02))   # -2% (corta nuevas entradas)
DAILY_MAX_PROFIT_BASE = float(pol.get("DAILY_MAX_PROFIT_BASE", 0.03)) # +3% (bloquea nuevas entradas)
MAX_TRADES_PER_DAY    = int(pol.get("MAX_TRADES_PER_DAY", 3))

# Si True, al llegar a daily stop/profit, se bloquean entradas restantes del d√≠a (comportamiento institucional)
HARD_DAILY_CUTOFF = bool(pol.get("HARD_DAILY_CUTOFF", True))

# Si True, el overlay se vuelve la salida oficial del engine (para re-ejecutar 09‚Üí12 sin tocar celdas)
USE_OVERLAY_AS_ENGINE_OUTPUT = bool(pol.get("USE_OVERLAY_AS_ENGINE_OUTPUT", True))

print(f"[Celda 16] policy :: ENTRY_WEEKDAYS_ONLY={ENTRY_WEEKDAYS_ONLY} | DAILY_MAX_LOSS_BASE={DAILY_MAX_LOSS_BASE} | DAILY_MAX_PROFIT_BASE={DAILY_MAX_PROFIT_BASE} | MAX_TRADES_PER_DAY={MAX_TRADES_PER_DAY} | HARD_DAILY_CUTOFF={HARD_DAILY_CUTOFF}")
print(f"[Celda 16] policy :: TRADING_HOURS_UTC_BY_SYMBOL keys={list(TRADING_HOURS_UTC_BY_SYMBOL.keys())[:10]}")

# ========================= Outputs =========================
OUT_DIR = Path(paths["artifacts"]).resolve() / "backtests" / "backtest_engine_v10" / "overlay_engine_v16"
OUT_DIR.mkdir(parents=True, exist_ok=True)

SNAP_DIR = Path(paths.get("run_snapshots") or (Path(paths["artifacts"]).resolve() / "snapshots")).resolve()
SNAP_DIR.mkdir(parents=True, exist_ok=True)

OUT_TRADES = OUT_DIR / "trades_engine_v10_overlay_v16.parquet"
OUT_SUM    = OUT_DIR / "summary_engine_v10_overlay_v16.parquet"
OUT_QA     = OUT_DIR / "qa_overlay_drop_reasons_v16.parquet"
OUT_JSON   = OUT_DIR / "overlay_policy_v16.json"

SNAP_TRADES = SNAP_DIR / "trades_engine_v10_overlay_v16.parquet"
SNAP_SUM    = SNAP_DIR / "summary_engine_v10_overlay_v16.parquet"
SNAP_QA     = SNAP_DIR / "qa_overlay_drop_reasons_v16.parquet"
SNAP_JSON   = SNAP_DIR / "overlay_engine_v16_snapshot.json"

# ========================= Helpers (robustos) =========================
def _ensure_utc(dt: datetime) -> datetime:
    if dt.tzinfo is None:
        return dt.replace(tzinfo=timezone.utc)
    return dt.astimezone(timezone.utc)

def _parse_dt_any(s: str) -> datetime:
    # soporta iso con/sin tz; lo normaliza a UTC
    dt = datetime.fromisoformat(s)
    return _ensure_utc(dt)

def _py_weekday_int(dt: datetime) -> int:
    # Mon=0 ... Sun=6
    return int(dt.weekday())

def _py_hour_int(dt: datetime) -> int:
    return int(dt.hour)

# ========================= Load trades =========================
df = pl.read_parquet(str(trades_path))

REQ = ["symbol","fold_id","segment","side","entry_time","exit_time","net_ret_base","net_ret_stress"]
miss = [c for c in REQ if c not in df.columns]
if miss:
    raise RuntimeError(f"[Celda 16] ERROR: trades parquet missing cols={miss}. cols={df.columns}")

df = (
    df
    .with_columns([
        pl.col("symbol").cast(pl.Utf8).str.to_uppercase().alias("symbol"),
        pl.col("fold_id").cast(pl.Utf8),
        pl.col("segment").cast(pl.Utf8),
        pl.col("side").cast(pl.Utf8),
        pl.col("entry_time").cast(pl.Utf8),
        pl.col("exit_time").cast(pl.Utf8),
        pl.col("net_ret_base").cast(pl.Float64, strict=False),
        pl.col("net_ret_stress").cast(pl.Float64, strict=False),
    ])
    .with_columns([
        pl.col("entry_time").map_elements(_parse_dt_any, return_dtype=pl.Datetime("us","UTC")).alias("entry_dt"),
        pl.col("exit_time").map_elements(_parse_dt_any, return_dtype=pl.Datetime("us","UTC")).alias("exit_dt"),
    ])
    # weekday/hour via Python para evitar inconsistencias backend
    .with_columns([
        pl.col("entry_dt").map_elements(lambda x: int(_py_weekday_int(x)), return_dtype=pl.Int64).alias("entry_dow"),
        pl.col("exit_dt").map_elements(lambda x: int(_py_weekday_int(x)), return_dtype=pl.Int64).alias("exit_dow"),
        pl.col("entry_dt").map_elements(lambda x: int(_py_hour_int(x)), return_dtype=pl.Int64).alias("entry_hour"),
    ])
    .with_columns([
        pl.col("entry_dt").dt.date().alias("entry_date"),
    ])
    .sort(["symbol","fold_id","segment","entry_dt"])
)

print(f"[Celda 16] loaded trades rows = {df.height}")

# ========================= Pre-filtros (horas + weekdays) =========================
drop_rows = []

def _in_hours(sym: str, hour: int) -> bool:
    w = TRADING_HOURS_UTC_BY_SYMBOL.get(sym)
    if not w:
        return True
    sh = int(w.get("start_hour", 0))
    eh = int(w.get("end_hour", 24))
    # end_hour exclusivo
    return (hour >= sh) and (hour < eh)

def _weekday_ok(dow: int) -> bool:
    if not ENTRY_WEEKDAYS_ONLY:
        return True
    return int(dow) < 5  # Mon-Fri

# Convertimos por grupos (pocos trades, gating secuencial exacto)
kept_dicts = []

for (sym, fid, seg), gdf in df.group_by(["symbol","fold_id","segment"], maintain_order=True):
    rows = gdf.to_dicts()

    # Estado diario
    cur_day = None
    day_pnl = 0.0
    day_trades = 0
    day_blocked = False

    for r in rows:
        reason = None

        # horarios + weekday (solo para ENTRADAS)
        if not _weekday_ok(r["entry_dow"]):
            reason = "DROP_ENTRY_WEEKEND"
        elif not _in_hours(sym, int(r["entry_hour"])):
            reason = "DROP_OUTSIDE_HOURS"

        # Reset d√≠a
        d = r["entry_date"]
        if cur_day is None or d != cur_day:
            cur_day = d
            day_pnl = 0.0
            day_trades = 0
            day_blocked = False

        # Overlay diario (aplica tras filtros de calendario)
        if reason is None:
            if HARD_DAILY_CUTOFF and day_blocked:
                reason = "DROP_DAILY_CUTOFF_ACTIVE"
            elif day_trades >= MAX_TRADES_PER_DAY:
                reason = "DROP_MAX_TRADES_PER_DAY"

        # Si pasa, se ejecuta y actualiza pnl del d√≠a
        if reason is None:
            kept_dicts.append(r)
            day_trades += 1
            nr = float(r.get("net_ret_base") or 0.0)
            day_pnl += nr

            # gatillos de cutoff
            if HARD_DAILY_CUTOFF:
                if day_pnl <= -abs(DAILY_MAX_LOSS_BASE):
                    day_blocked = True
                if day_pnl >= abs(DAILY_MAX_PROFIT_BASE):
                    day_blocked = True
        else:
            drop_rows.append({
                "symbol": sym,
                "fold_id": fid,
                "segment": seg,
                "entry_dt": r["entry_dt"],
                "entry_date": r["entry_date"],
                "side": r["side"],
                "net_ret_base": float(r.get("net_ret_base") or 0.0),
                "drop_reason": reason,
            })

df_keep = pl.DataFrame(kept_dicts) if kept_dicts else pl.DataFrame(schema=df.schema)
df_drop = pl.DataFrame(drop_rows) if drop_rows else pl.DataFrame(schema={
    "symbol": pl.Utf8, "fold_id": pl.Utf8, "segment": pl.Utf8,
    "entry_dt": pl.Datetime("us","UTC"), "entry_date": pl.Date,
    "side": pl.Utf8, "net_ret_base": pl.Float64, "drop_reason": pl.Utf8
})

print(f"[Celda 16] kept trades = {df_keep.height} | dropped trades = {df_drop.height}")

# ========================= Summary overlay =========================
if df_keep.height == 0:
    raise RuntimeError("[Celda 16] GATE FAIL: overlay elimin√≥ todos los trades. Relaja horarios/stops/max_trades o revisa datos.")

summary = (
    df_keep
    .group_by(["symbol","fold_id","segment","side"])
    .agg([
        pl.len().alias("n_trades"),
        pl.col("net_ret_base").sum().alias("tot_ret_base"),
        pl.col("net_ret_stress").sum().alias("tot_ret_stress"),
        pl.col("net_ret_base").mean().alias("mean_ret_base"),
        pl.col("net_ret_base").std().alias("std_ret_base"),
        (pl.col("net_ret_base") > 0).mean().alias("win_rate_base"),
    ])
    .with_columns([
        (pl.col("mean_ret_base") / pl.col("std_ret_base")).alias("sharpe_like_base"),
    ])
    .sort(["symbol","fold_id","segment","side"])
)

drop_qa = (
    df_drop
    .group_by(["symbol","fold_id","segment","drop_reason"])
    .agg([pl.len().alias("n_drop")])
    .sort(["n_drop"], descending=True)
)

# ========================= Persist =========================
df_keep.write_parquet(str(OUT_TRADES), compression="zstd")
summary.write_parquet(str(OUT_SUM), compression="zstd")
drop_qa.write_parquet(str(OUT_QA), compression="zstd")

df_keep.write_parquet(str(SNAP_TRADES), compression="zstd")
summary.write_parquet(str(SNAP_SUM), compression="zstd")
drop_qa.write_parquet(str(SNAP_QA), compression="zstd")

policy_payload = {
    "ENTRY_WEEKDAYS_ONLY": ENTRY_WEEKDAYS_ONLY,
    "TRADING_HOURS_UTC_BY_SYMBOL": TRADING_HOURS_UTC_BY_SYMBOL,
    "DAILY_MAX_LOSS_BASE": DAILY_MAX_LOSS_BASE,
    "DAILY_MAX_PROFIT_BASE": DAILY_MAX_PROFIT_BASE,
    "MAX_TRADES_PER_DAY": MAX_TRADES_PER_DAY,
    "HARD_DAILY_CUTOFF": HARD_DAILY_CUTOFF,
    "USE_OVERLAY_AS_ENGINE_OUTPUT": USE_OVERLAY_AS_ENGINE_OUTPUT,
}
OUT_JSON.write_text(json.dumps(policy_payload, indent=2, ensure_ascii=False), encoding="utf-8")

snap = {
    "created_utc": datetime.utcnow().replace(tzinfo=timezone.utc).isoformat(),
    "inputs": {"trades_path_raw": str(trades_path), "summary_path_raw": str(summary_path)},
    "policy": policy_payload,
    "stats": {
        "raw_trades": int(df.height),
        "kept_trades": int(df_keep.height),
        "dropped_trades": int(df_drop.height),
        "drop_share": float(df_drop.height / max(1, df.height)),
    },
    "outputs": {
        "trades_overlay": str(OUT_TRADES),
        "summary_overlay": str(OUT_SUM),
        "drop_qa": str(OUT_QA),
        "policy_json": str(OUT_JSON),
        "snap_trades": str(SNAP_TRADES),
        "snap_summary": str(SNAP_SUM),
        "snap_drop_qa": str(SNAP_QA),
    },
    "notes": [
        "Overlay aplicado sobre trades ya generados por el motor: filtra entradas por calendario/horas y corta nuevas entradas por reglas diarias.",
        "Swing permitido: NO fuerza cierre intrad√≠a. Solo controla cu√°ndo entrar y cu√°ndo dejar de abrir m√°s ese d√≠a.",
    ],
}
SNAP_JSON.write_text(json.dumps(snap, indent=2, ensure_ascii=False), encoding="utf-8")

print(f"üíæ OUTPUT   ‚Üí {OUT_TRADES} (OK) | rows={df_keep.height}")
print(f"üíæ OUTPUT   ‚Üí {OUT_SUM} (OK) | rows={summary.height}")
print(f"üíæ OUTPUT   ‚Üí {OUT_QA} (OK) | rows={drop_qa.height}")
print(f"üíæ OUTPUT   ‚Üí {OUT_JSON} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_TRADES} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_SUM} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_QA} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_JSON} (OK)")

# ========================= Wire-up (para reusar Celdas 09‚Üí12 sin tocar c√≥digo) =========================
GLOBAL_STATE["execution_policy"] = policy_payload
GLOBAL_STATE["backtest_engine_overlay"] = {
    "trades_path_raw": str(trades_path),
    "summary_path_raw": str(summary_path),
    "trades_path_overlay": str(OUT_TRADES),
    "summary_path_overlay": str(OUT_SUM),
    "drop_qa_path": str(OUT_QA),
    "policy_json": str(OUT_JSON),
    "snapshot_json": str(SNAP_JSON),
}

if USE_OVERLAY_AS_ENGINE_OUTPUT:
    # respaldar
    bt["trades_path_raw"] = str(trades_path)
    bt["summary_path_raw"] = str(summary_path)
    # reemplazar para que Celda 09/10/11/12 lean el overlay
    bt["trades_path"] = str(OUT_TRADES)
    bt["summary_path"] = str(OUT_SUM)
    GLOBAL_STATE["backtest_engine"] = bt
    print("[Celda 16] INFO: backtest_engine actualizado a OVERLAY outputs (para re-ejecutar 09‚Üí12 sin cambiar celdas).")

print(">>> Celda 16 v1.0 :: OK")


>>> Celda 16 v1.0 :: Execution & Risk Overlay institucional (post-engine) [hours + daily stops + profit lock + max trades/day] [WFO-safe]
[Celda 16] policy :: ENTRY_WEEKDAYS_ONLY=True | DAILY_MAX_LOSS_BASE=0.02 | DAILY_MAX_PROFIT_BASE=0.03 | MAX_TRADES_PER_DAY=3 | HARD_DAILY_CUTOFF=True
[Celda 16] policy :: TRADING_HOURS_UTC_BY_SYMBOL keys=[]
[Celda 16] loaded trades rows = 524
[Celda 16] kept trades = 524 | dropped trades = 0
üíæ OUTPUT   ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\backtests\backtest_engine_v10\overlay_engine_v16\trades_engine_v10_overlay_v16.parquet (OK) | rows=524
üíæ OUTPUT   ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\backtests\backtest_engine_v10\overlay_engine_v16\summary_engine_v10_overlay_v16.parquet (OK) | rows=16
üíæ OUTPUT   ‚Üí C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\backtests\backtest_engine_v10\overlay_engine_v16\qa_overlay_drop_reasons_v16.parquet (OK) | rows=0
üíæ OUTPUT   ‚Üí C:\Quant\MT5_Data_Extractio

In [52]:
# ===================== Celda 17 v1.0.2 ‚Äî QA Alineaci√≥n Alpha‚ÜîMotor (OOS-first + mismatch report) [WFO-safe] =====================
from __future__ import annotations

from pathlib import Path
from datetime import datetime, timezone
import json
import polars as pl

print(">>> Celda 17 v1.0.2 :: QA Alineaci√≥n Alpha‚ÜîMotor (OOS-first + mismatch report) [WFO-safe]")

# ========================= Helpers =========================
def _utc_now_iso() -> str:
    return datetime.utcnow().replace(tzinfo=timezone.utc).isoformat()

def _safe_exists(p: str | Path | None) -> bool:
    try:
        return p is not None and Path(p).exists()
    except Exception:
        return False

def _first_existing(paths: list[str | Path | None]) -> str | None:
    for p in paths:
        if _safe_exists(p):
            return str(Path(p))
    return None

def _canonicalize_alpha(ar: pl.DataFrame) -> pl.DataFrame:
    """
    Normaliza schemas t√≠picos de alpha_multi_horizon_report.
    Canonical:
      - horizon_bars (int)
      - mean_net_base (float)
      - mean_net_stress (float)
      - n_trades (int)
      - symbol, fold_id, segment, side
    """
    cols = set(ar.columns)

    # horizon
    if "horizon_bars" not in cols:
        if "h_bars_after_entry" in cols:
            ar = ar.with_columns(pl.col("h_bars_after_entry").cast(pl.Int64).alias("horizon_bars"))
        elif "horizon" in cols:
            ar = ar.with_columns(pl.col("horizon").cast(pl.Int64).alias("horizon_bars"))

    # means (map desde nombres reales del 07C t√≠pico)
    if "mean_net_base" not in cols and "net_base_mean" in cols:
        ar = ar.with_columns(pl.col("net_base_mean").cast(pl.Float64).alias("mean_net_base"))
    if "mean_net_stress" not in cols and "net_stress_mean" in cols:
        ar = ar.with_columns(pl.col("net_stress_mean").cast(pl.Float64).alias("mean_net_stress"))

    # n_trades
    if "n_trades" in ar.columns:
        ar = ar.with_columns(pl.col("n_trades").cast(pl.Int64))
    elif "n" in ar.columns:
        ar = ar.with_columns(pl.col("n").cast(pl.Int64).alias("n_trades"))

    # required minimal
    req = ["symbol", "fold_id", "segment", "side", "horizon_bars"]
    miss = [c for c in req if c not in ar.columns]
    if miss:
        raise RuntimeError(f"[Celda 17] ERROR: alpha report no tiene cols requeridas (post-canonical): {miss}. cols={ar.columns}")

    # normalizar tipos y side uppercase
    ar = ar.with_columns([
        pl.col("symbol").cast(pl.Utf8),
        pl.col("fold_id").cast(pl.Utf8),
        pl.col("segment").cast(pl.Utf8),
        pl.col("side").cast(pl.Utf8).str.to_uppercase(),
        pl.col("horizon_bars").cast(pl.Int64),
    ])

    for c in ["mean_net_base", "mean_net_stress"]:
        if c in ar.columns:
            ar = ar.with_columns(pl.col(c).cast(pl.Float64))

    return ar

# ========================= Validaciones GLOBAL_STATE (con fallback) =========================
if "GLOBAL_STATE" not in globals() or not isinstance(GLOBAL_STATE, dict):
    GLOBAL_STATE = {}

paths = GLOBAL_STATE.get("paths", {}) or {}
art_root = Path(paths.get("artifacts", r"C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts")).resolve()

bt = GLOBAL_STATE.get("backtest_engine", {}) or {}
trades_path = bt.get("trades_path")
summary_path = bt.get("summary_path")

trades_path = _first_existing([
    trades_path,
    art_root / "backtests" / "backtest_engine_v10" / "overlay_engine_v16" / "trades_engine_v10_overlay_v16.parquet",
    art_root / "backtests" / "backtest_engine_v10" / "trades_engine_v10.parquet",
])
summary_path = _first_existing([
    summary_path,
    art_root / "backtests" / "backtest_engine_v10" / "overlay_engine_v16" / "summary_engine_v10_overlay_v16.parquet",
    art_root / "backtests" / "backtest_engine_v10" / "summary_engine_v10.parquet",
])

if not trades_path or not Path(trades_path).exists():
    raise RuntimeError(f"[Celda 17] ERROR: no encuentro trades parquet. trades_path={trades_path}")
if not summary_path or not Path(summary_path).exists():
    raise RuntimeError(f"[Celda 17] ERROR: no encuentro summary parquet. summary_path={summary_path}")

alpha_path = (
    (GLOBAL_STATE.get("alpha_reports", {}) or {}).get("alpha_multi_horizon_report_path")
    or (GLOBAL_STATE.get("alpha_report", {}) or {}).get("report_path")
)
alpha_path = _first_existing([
    alpha_path,
    art_root / "alpha_reports" / "alpha_multi_horizon_report.parquet",
])
if not alpha_path or not Path(alpha_path).exists():
    raise RuntimeError(f"[Celda 17] ERROR: no encuentro alpha_multi_horizon_report.parquet. alpha_path={alpha_path}")

# ========================= Outputs =========================
OUT_DIR = art_root / "backtests" / "backtest_engine_v10" / "diagnostics_engine_v10"
OUT_DIR.mkdir(parents=True, exist_ok=True)

run_snaps = Path(paths.get("run_snapshots", art_root / "snapshots")).resolve()
run_snaps.mkdir(parents=True, exist_ok=True)

OUT_ALIGN = OUT_DIR / "qa_alpha_engine_alignment_v17.parquet"
OUT_JSON  = OUT_DIR / "qa_alpha_engine_alignment_v17.json"
SNAP_JSON = run_snaps / "qa_alpha_engine_alignment_v17_snapshot.json"

# ========================= Load data =========================
df = pl.read_parquet(trades_path)
ar = pl.read_parquet(alpha_path)
ar = _canonicalize_alpha(ar)

# normalizar side en trades tambi√©n
if "side" not in df.columns:
    raise RuntimeError(f"[Celda 17] ERROR: trades no tiene columna 'side'. cols={df.columns}")

df = df.with_columns([
    pl.col("symbol").cast(pl.Utf8),
    pl.col("fold_id").cast(pl.Utf8),
    pl.col("segment").cast(pl.Utf8),
    pl.col("side").cast(pl.Utf8).str.to_uppercase(),
])

print(f"[Celda 17] trades rows={df.height} | alpha rows={ar.height}")
print(f"[Celda 17] trades_path={trades_path}")
print(f"[Celda 17] alpha_path ={alpha_path}")

# ========================= Alpha best OOS (stress-first si existe) =========================
metric = "mean_net_stress" if "mean_net_stress" in ar.columns else "mean_net_base"
if metric not in ar.columns:
    raise RuntimeError(f"[Celda 17] ERROR: alpha report no tiene mean_net_stress ni mean_net_base post-canonical. cols={ar.columns}")

alpha_best_oos = (
    ar.filter(pl.col("segment") == "OOS")
      .sort([metric], descending=True)
      .group_by(["symbol","fold_id"])
      .agg([
          pl.first("side").alias("alpha_best_side_oos"),
          pl.first("horizon_bars").alias("alpha_best_horizon_bars_oos"),
          pl.first(metric).alias(f"alpha_best_{metric}_oos"),
          pl.first("n_trades").alias("alpha_best_n_trades_oos") if "n_trades" in ar.columns else pl.lit(None).alias("alpha_best_n_trades_oos"),
      ])
      # llave can√≥nica para join con engine
      .with_columns(pl.col("alpha_best_side_oos").cast(pl.Utf8).str.to_uppercase().alias("side"))
)

# ========================= Engine behavior OOS (holds + trailing share) =========================
req_tr = ["symbol","fold_id","segment","side","bars_held","exit_reason"]
miss_tr = [c for c in req_tr if c not in df.columns]
if miss_tr:
    raise RuntimeError(f"[Celda 17] ERROR: trades no tiene columnas requeridas: {miss_tr}")

engine_oos = (
    df.filter(pl.col("segment") == "OOS")
      .group_by(["symbol","fold_id","side"])
      .agg([
          pl.len().alias("engine_n_trades_oos"),
          pl.col("bars_held").quantile(0.90, interpolation="nearest").alias("engine_hold_p90_bars_oos"),
          pl.col("bars_held").median().alias("engine_hold_med_bars_oos"),
          (pl.col("exit_reason") == "TRAIL").mean().alias("engine_trail_share_oos"),
          (pl.col("exit_reason") == "TP").mean().alias("engine_tp_share_oos"),
          (pl.col("exit_reason") == "SL").mean().alias("engine_sl_share_oos"),
          (pl.col("exit_reason") == "TIME_STOP").mean().alias("engine_time_share_oos"),
      ])
)

# ========================= Join + mismatch metrics (2-step with_columns; FIX polars) =========================
align0 = alpha_best_oos.join(engine_oos, on=["symbol","fold_id","side"], how="left")

align1 = (
    align0
    .with_columns([
        pl.col("engine_n_trades_oos").is_null().alias("engine_missing_for_best_side"),
        pl.when(pl.col("engine_hold_p90_bars_oos").is_not_null() & (pl.col("alpha_best_horizon_bars_oos") > 0))
          .then(pl.col("engine_hold_p90_bars_oos") / pl.col("alpha_best_horizon_bars_oos"))
          .otherwise(None)
          .alias("hold_p90_over_alphaH"),
        (pl.col(f"alpha_best_{metric}_oos") <= 0).alias("alpha_edge_nonpos_oos"),
    ])
)

align = (
    align1
    .with_columns([
        # ahora s√≠ podemos referenciar hold_p90_over_alphaH
        pl.when(pl.col("hold_p90_over_alphaH").is_not_null())
          .then(pl.col("hold_p90_over_alphaH") < 0.25)
          .otherwise(None)
          .alias("mismatch_hold_lt_25pctH"),
        pl.when(pl.col("hold_p90_over_alphaH").is_not_null() & pl.col("engine_trail_share_oos").is_not_null())
          .then((pl.col("engine_trail_share_oos") > 0.60) & (pl.col("hold_p90_over_alphaH") < 0.25))
          .otherwise(None)
          .alias("mismatch_trail_dominates_short_hold"),
    ])
    .sort([f"alpha_best_{metric}_oos"], descending=True)
)

print("[Celda 17] Alignment table:")
print(align)

# Vista auxiliar: engine OOS por ambos lados (para ver si el motor est√° ‚Äúvivo‚Äù)
engine_side_view = (
    engine_oos
    .join(alpha_best_oos.select(["symbol","fold_id","alpha_best_side_oos","alpha_best_horizon_bars_oos"]), on=["symbol","fold_id"], how="left")
    .with_columns([
        (pl.col("side") == pl.col("alpha_best_side_oos")).alias("is_alpha_best_side"),
    ])
    .sort(["symbol","fold_id","is_alpha_best_side"], descending=[False,False,True])
)

print("[Celda 17] Engine OOS by side (with alpha-best marker):")
print(engine_side_view)

# ========================= Persist =========================
align.write_parquet(str(OUT_ALIGN), compression="zstd")

payload = {
    "created_utc": _utc_now_iso(),
    "inputs": {"trades_path": str(trades_path), "summary_path": str(summary_path), "alpha_path": str(alpha_path)},
    "metric_used": metric,
    "rules": {
        "hold_mismatch_flag": "hold_p90_over_alphaH < 0.25",
        "trail_short_hold_flag": "engine_trail_share_oos > 0.60 AND hold_p90_over_alphaH < 0.25",
        "alpha_nonpos_flag": f"alpha_best_{metric}_oos <= 0",
        "engine_missing_for_best_side": "No hay trades OOS del motor en el lado que alpha marca como best",
    },
    "output": {"align_parquet": str(OUT_ALIGN), "align_json": str(OUT_JSON)},
}

OUT_JSON.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8")
SNAP_JSON.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8")

print(f"üíæ OUTPUT   ‚Üí {OUT_ALIGN} (OK)")
print(f"üíæ OUTPUT   ‚Üí {OUT_JSON} (OK)")
print(f"üíæ SNAPSHOT ‚Üí {SNAP_JSON} (OK)")
print(">>> Celda 17 v1.0.2 :: OK")
 

>>> Celda 17 v1.0.2 :: QA Alineaci√≥n Alpha‚ÜîMotor (OOS-first + mismatch report) [WFO-safe]
[Celda 17] trades rows=524 | alpha rows=128
[Celda 17] trades_path=C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\backtests\backtest_engine_v10\overlay_engine_v16\trades_engine_v10_overlay_v16.parquet
[Celda 17] alpha_path =C:\Quant\MT5_Data_Extraction\ER_STRATEGY_LAB\artifacts\alpha_reports\alpha_multi_horizon_report.parquet
[Celda 17] Alignment table:
shape: (4, 19)
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ symbol ‚îÜ fold_id ‚îÜ alpha_best ‚îÜ alpha_best ‚îÜ ‚Ä¶ ‚îÜ hold_p90_o ‚îÜ alpha_edge ‚îÜ mismatch_ ‚îÜ mismatch_ ‚îÇ
‚îÇ ---    ‚îÜ ---     ‚îÜ _side_oos  ‚îÜ _horizon_b ‚îÜ   ‚îÜ ver_alphaH ‚îÜ _nonpos_oo ‚îÜ hold_lt_2 ‚