<a href="https://colab.research.google.com/github/racoope70/exploratory-daytrading/blob/main/ppo_alpaca_paper_trading_v5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

#Clean any partials
!pip uninstall -y stable-baselines3 shimmy gymnasium gym autorom AutoROM.accept-rom-license ale-py

#Install the compatible trio (no [extra] to avoid Atari deps)
!pip install "gymnasium==0.29.1" "shimmy==1.3.0" "stable-baselines3==2.3.0"

#Your other libs (safe to keep separate)
!pip install alpaca-trade-api ta python-dotenv gym-anytrading


[0mFound existing installation: gymnasium 1.2.2
Uninstalling gymnasium-1.2.2:
  Successfully uninstalled gymnasium-1.2.2
Found existing installation: gym 0.25.2
Uninstalling gym-0.25.2:
  Successfully uninstalled gym-0.25.2
[0mFound existing installation: ale-py 0.11.2
Uninstalling ale-py-0.11.2:
  Successfully uninstalled ale-py-0.11.2
Collecting gymnasium==0.29.1
  Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)
Collecting shimmy==1.3.0
  Downloading Shimmy-1.3.0-py3-none-any.whl.metadata (3.7 kB)
Collecting stable-baselines3==2.3.0
  Downloading stable_baselines3-2.3.0-py3-none-any.whl.metadata (5.1 kB)
Reason for being yanked: Loading broken with PyTorch 1.13[0m[33m
[0mDownloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Shimmy-1.3.0-py3-none-any.whl (37 kB)
Downloading stable_baselines3-2.3.0-py3-none-any.whl (182 kB)
[2K

In [None]:
import torch, gymnasium, shimmy, stable_baselines3 as sb3
import alpaca_trade_api, websockets, pywt

print("torch:", torch.__version__)
print("gymnasium:", gymnasium.__version__)
print("shimmy:", shimmy.__version__)
print("stable-baselines3:", sb3.__version__)
print("alpaca-trade-api:", alpaca_trade_api.__version__)
print("websockets:", websockets.__version__)
print("pywavelets:", pywt.__version__)


torch: 2.9.0+cu126
gymnasium: 0.29.1
shimmy: 1.3.0
stable-baselines3: 2.3.0
alpaca-trade-api: 3.2.0
websockets: 10.4
pywavelets: 1.8.0


In [None]:
from __future__ import annotations

#Python standard library
import csv
import gc
import json
import logging
import math
import os
import pickle
import re
import shutil
import time
import warnings
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from decimal import Decimal, ROUND_DOWN, ROUND_HALF_UP
from functools import lru_cache
from pathlib import Path
from typing import Any, Dict, List, Mapping, Optional, Tuple, Union

#Scientific / data stack
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from dotenv import load_dotenv

#Alpaca trading API
import alpaca_trade_api as tradeapi
from alpaca_trade_api.rest import APIError, TimeFrame

#Reinforcement learning models
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecNormalize


#(Optional) Colab helpers
IN_COLAB = False
try:
    import google.colab  #type: ignore
    from google.colab import drive, files  #type: ignore
    IN_COLAB = True
except Exception:
    IN_COLAB = False

#Utils / Paths
def round_to_cents(x: float) -> float:
    return float(Decimal(str(x)).quantize(Decimal("0.01"), rounding=ROUND_DOWN))

if IN_COLAB:
    try:
        drive.mount("/content/drive", force_remount=False)
    except Exception:
        pass

#Project root (Drive in Colab; cwd locally)
if IN_COLAB:
    PROJECT_ROOT = Path("/content/drive/MyDrive/AlpacaPaper")
else:
    PROJECT_ROOT = Path.cwd() / "AlpacaPaper"
PROJECT_ROOT.mkdir(parents=True, exist_ok=True)

#--- lightweight per-symbol cooldown to prevent rapid re-fires ---
_LAST_ORDER_TS: dict = {}

SESSION_OPEN_EQUITY: Optional[float] = None
_last_kill_ts: float = 0.0

#Faster cooldown for the very first seed fill (rebalances still use 30s)
_SEED_COOLDOWN_SEC = 10

_NO_POS_CYCLE_COUNT: Dict[str, int] = {}

#Re-entry cooldown after a forced flatten (end-of-day, kill-switch, manual flatten).
_REENTRY_BLOCK_UNTIL: Dict[str, float] = {}
REENTRY_COOLDOWN_SEC = int(os.getenv("REENTRY_COOLDOWN_SEC", "300"))

#End-of-day flatten policy. If enabled, flatten positions into the final minutes.
FLATTEN_INTO_CLOSE = os.getenv("FLATTEN_INTO_CLOSE", "0").strip().lower() in ("1", "true", "yes", "on")
FORCE_FIRST_BUY = os.getenv("FORCE_FIRST_BUY", "0").strip().lower() in ("1","true","yes","on")

def _too_soon(symbol: str, min_gap_sec: int = 30) -> bool:
    now = time.time()
    last = _LAST_ORDER_TS.get(symbol, 0.0)
    if (now - last) < float(min_gap_sec):
        return True
    _LAST_ORDER_TS[symbol] = now
    return False

#---------------------------------- Upload / Conversion Helpers --------------------------------
def upload_env_and_artifacts_in_colab():
    """Prompt for .env and model artifacts when running in Colab."""
    if not IN_COLAB:
        return

    target_dir = Path(os.getenv("ARTIFACTS_DIR", str(PROJECT_ROOT / "artifacts")))
    target_dir.mkdir(parents=True, exist_ok=True)

    print("Upload your .env (or Alpaca_keys.env.txt). Cancel if already on Drive.")
    up = files.upload()
    if up:
        if "Alpaca_keys.env.txt" in up:
            src = Path("Alpaca_keys.env.txt")
            dst = PROJECT_ROOT / ".env"
            shutil.move(str(src), str(dst))
            print(f"Saved env → {dst}")
        elif ".env" in up:
            src = Path(".env")
            dst = PROJECT_ROOT / ".env"
            shutil.move(str(src), str(dst))
            print(f"Saved env → {dst}")
        else:
            any_name = next(iter(up.keys()))
            src = Path(any_name)
            dst = PROJECT_ROOT / ".env"
            shutil.move(str(src), str(dst))
            print(f"Saved env (renamed {any_name}) → {dst}")

    print("Upload your artifacts (ppo_*_model.zip, *_vecnorm*.pkl, *_features*.json or .txt).")
    up2 = files.upload()
    for name in up2.keys():
        shutil.move(name, target_dir / name)
    print("Artifacts now in:", sorted(p.name for p in target_dir.iterdir()))

def _maybe_convert_features_txt_to_json():
    """Convert any 'features_<TICKER>.txt' into 'ppo_<TICKER>_features.json' (simple list)."""
    art_dir = Path(os.getenv("ARTIFACTS_DIR", str(PROJECT_ROOT / "artifacts")))
    art_dir.mkdir(parents=True, exist_ok=True)
    for p in art_dir.glob("features_*.txt"):
        ticker = re.sub(r"^features_|\.txt$", "", p.name, flags=re.IGNORECASE)
        try:
            raw = p.read_text().strip()
            items = [x.strip() for x in raw.replace(",", "\n").splitlines() if x.strip()]
            out = {"features": items}
            out_path = art_dir / f"ppo_{ticker}_features.json"
            out_path.write_text(json.dumps(out, indent=2))
            print(f"Converted {p.name} → {out_path.name}  ({len(items)} features)")
        except Exception as e:
            print(f"Could not convert {p.name}: {e}")

def _maybe_rename_vecnorm_scaler():
    """Rename any 'scaler_<TICKER>.pkl' to 'ppo_<TICKER>_vecnorm.pkl'."""
    art_dir = Path(os.getenv("ARTIFACTS_DIR", str(PROJECT_ROOT / "artifacts")))
    art_dir.mkdir(parents=True, exist_ok=True)
    for p in art_dir.glob("scaler_*.pkl"):
        ticker = re.sub(r"^scaler_|\.pkl$", "", p.name, flags=re.IGNORECASE)
        dst = art_dir / f"ppo_{ticker}_vecnorm.pkl"
        if not dst.exists():
            shutil.move(str(p), str(dst))
            print(f"Renamed {p.name} → {dst.name}")

#---------------------------------- Env & logging ---------------------------------------------
warnings.filterwarnings("ignore")

#Load env (supports PROJECT_ROOT/.env)
env_candidates = [PROJECT_ROOT / ".env", Path(".env")]
for env_path in env_candidates:
    if env_path.exists():
        load_dotenv(dotenv_path=env_path, override=True)
        break
else:
    load_dotenv(override=True)  #fallback

#Default timeout for portfolio history fetches (overridable via env)
os.environ.setdefault("PH_TIMEOUT_SEC", "8")

#DEBUG idle-seed knobs (env-driven defaults). Override in .env or shell.
os.environ.setdefault("DEBUG_FORCE_SEED_IF_IDLE", "0")
os.environ.setdefault("DEBUG_SEED_IDLE_CYCLES", "10")  #leave default 10; set 3 in .env while testing

logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
logging.getLogger().setLevel(
    getattr(logging, os.getenv("LOG_LEVEL", "INFO").upper(), logging.INFO)
)


#---------------------------------- Config dataclass -------------------------------------------
def _to_bool(x: str) -> bool:
    return str(x).strip().lower() in ("1","true","yes","y","on")

def _to_list_csv(x: str) -> list:
    return [s.strip().upper() for s in str(x).split(",") if s.strip()]

@dataclass
class Knobs:
    #API / mode
    APCA_API_BASE_URL: str = "https://paper-api.alpaca.markets"
    DRY_RUN: bool          = False        #False => place PAPER orders on PAPER endpoint
    AUTO_RUN_LIVE: bool    = True
    INF_DETERMINISTIC: bool= True

    #Equity logging controls
    EQUITY_LOG_THROTTLE_SEC: int = 900     #log at most every 15m unless a trade happens
    SKIP_EQUITY_WHEN_DRY_RUN: bool = True  #don’t log equity during dry-run

    #Universe / files
    TICKERS: list          = None
    ARTIFACTS_DIR: str     = ""
    RESULTS_ROOT: str      = ""

    #Data feed / cadence / staleness
    BARS_FEED: str         = "iex"          #"" lets Alpaca choose; "iex" for IEX
    COOLDOWN_MIN: int      = 1
    STALE_MAX_SEC: int     = 600

    #Sizing & entry/exit sensitivity
    SIZING_MODE: str       = "linear"    #"linear" | "threshold"
    WEIGHT_CAP: float      = 0.35
    CONF_FLOOR: float      = 0.20        #threshold-mode only
    ENTER_CONF_MIN: float  = 0.00
    ENTER_WEIGHT_MIN: float= 0.003
    EXIT_WEIGHT_MAX: float = 0.004
    REBALANCE_MIN_NOTIONAL: float = 5.00
    USE_FRACTIONALS: bool  = True
    SEED_FIRST_SHARE: bool = True
    ALLOW_SHORTS: bool     = False

    #add-ons
    DELTA_WEIGHT_MIN: float = 0.000
    RAW_POS_MIN: float = -1.0
    RAW_NEG_MAX: float = 0.00

    #Risk
    TAKE_PROFIT_PCT: float = 0.05
    STOP_LOSS_PCT: float   = 0.03

    #Misc
    STALE_BEST_WINDOW: str = ""    #e.g. "3" (exposed as BEST_WINDOW_ENV)

    #Secrets
    APCA_API_KEY_ID: str   = ""
    APCA_API_SECRET_KEY: str = ""

    #kill-switch
    MAX_DAILY_DRAWDOWN_PCT: float = 0.05   #flatten if equity falls 5% from session open
    KILL_SWITCH_COOLDOWN_MIN: int = 30

    #exit
    EXIT_AFTER_CLOSE: bool = False

    @classmethod
    def from_env(cls, defaults: "Knobs", project_root: Path, env: Mapping[str, str], overrides: Mapping[str, object] = None):
        kv = {**defaults.__dict__}
        kv.update({
            "APCA_API_BASE_URL": env.get("APCA_API_BASE_URL", kv["APCA_API_BASE_URL"]),
            "AUTO_RUN_LIVE":     _to_bool(env.get("AUTO_RUN_LIVE", str(kv["AUTO_RUN_LIVE"]))),
            "DRY_RUN":           _to_bool(env.get("DRY_RUN",       str(kv["DRY_RUN"]))),
            "INF_DETERMINISTIC": _to_bool(env.get("INF_DETERMINISTIC", str(kv["INF_DETERMINISTIC"]))),

            "EQUITY_LOG_THROTTLE_SEC": int(env.get("EQUITY_LOG_THROTTLE_SEC", str(kv["EQUITY_LOG_THROTTLE_SEC"]))),
            "SKIP_EQUITY_WHEN_DRY_RUN": _to_bool(env.get("SKIP_EQUITY_WHEN_DRY_RUN", str(kv["SKIP_EQUITY_WHEN_DRY_RUN"]))),

            "USE_FRACTIONALS":   _to_bool(env.get("USE_FRACTIONALS", str(kv["USE_FRACTIONALS"]))),
            "SEED_FIRST_SHARE":  _to_bool(env.get("SEED_FIRST_SHARE", str(kv["SEED_FIRST_SHARE"]))),
            "ALLOW_SHORTS":      _to_bool(env.get("ALLOW_SHORTS",     str(kv["ALLOW_SHORTS"]))),

            "TICKERS":           _to_list_csv(env.get("TICKERS", ",".join(kv["TICKERS"] or ["UNH","GE"]))),
            "ARTIFACTS_DIR":     env.get("ARTIFACTS_DIR", kv["ARTIFACTS_DIR"] or str(project_root / "artifacts")),
            "RESULTS_ROOT":      env.get("RESULTS_ROOT",  kv["RESULTS_ROOT"]  or str(project_root / "results")),

            "BARS_FEED":         env.get("BARS_FEED", kv["BARS_FEED"]),
            "COOLDOWN_MIN":      int(env.get("COOLDOWN_MIN", str(kv["COOLDOWN_MIN"])) or kv["COOLDOWN_MIN"]),
            "STALE_MAX_SEC":     int(env.get("STALE_MAX_SEC", str(kv["STALE_MAX_SEC"])) or kv["STALE_MAX_SEC"]),

            "SIZING_MODE":       env.get("SIZING_MODE", kv["SIZING_MODE"]),
            "WEIGHT_CAP":        float(env.get("WEIGHT_CAP",        str(kv["WEIGHT_CAP"]))),
            "CONF_FLOOR":        float(env.get("CONF_FLOOR",        str(kv["CONF_FLOOR"]))),
            "ENTER_CONF_MIN":    float(env.get("ENTER_CONF_MIN",    str(kv["ENTER_CONF_MIN"]))),
            "ENTER_WEIGHT_MIN":  float(env.get("ENTER_WEIGHT_MIN",  str(kv["ENTER_WEIGHT_MIN"]))),
            "EXIT_WEIGHT_MAX":   float(env.get("EXIT_WEIGHT_MAX",   str(kv["EXIT_WEIGHT_MAX"]))),
            "REBALANCE_MIN_NOTIONAL": float(env.get("REBALANCE_MIN_NOTIONAL", str(kv["REBALANCE_MIN_NOTIONAL"]))),

            "TAKE_PROFIT_PCT":   float(env.get("TAKE_PROFIT_PCT",   str(kv["TAKE_PROFIT_PCT"]))),
            "STOP_LOSS_PCT":     float(env.get("STOP_LOSS_PCT",     str(kv["STOP_LOSS_PCT"]))),

            "DELTA_WEIGHT_MIN": float(env.get("DELTA_WEIGHT_MIN", str(kv.get("DELTA_WEIGHT_MIN", 0.02)))),
            "RAW_POS_MIN":      float(env.get("RAW_POS_MIN",      str(kv.get("RAW_POS_MIN", 0.00)))),
            "RAW_NEG_MAX":      float(env.get("RAW_NEG_MAX",      str(kv.get("RAW_NEG_MAX", 0.00)))),
            "EXIT_AFTER_CLOSE": _to_bool(env.get("EXIT_AFTER_CLOSE", str(kv.get("EXIT_AFTER_CLOSE", False)))),

            "STALE_BEST_WINDOW": env.get("BEST_WINDOW", kv["STALE_BEST_WINDOW"]),
        })
        kv["APCA_API_KEY_ID"]     = env.get("APCA_API_KEY_ID")     or env.get("ALPACA_API_KEY_ID", "")     or ""
        kv["APCA_API_SECRET_KEY"] = env.get("APCA_API_SECRET_KEY") or env.get("ALPACA_API_SECRET_KEY", "") or ""
        if overrides:
            for k, v in overrides.items():
                key = str(k)
                if key.upper() == "TICKERS" and isinstance(v, str):
                    v = _to_list_csv(v)
                kv[key] = v
        return cls(**kv)

    def apply_to_globals(self):
        g = globals()
        g["BASE_URL"]           = self.APCA_API_BASE_URL
        g["DRY_RUN"]            = bool(self.DRY_RUN)
        g["INF_DETERMINISTIC"]  = bool(self.INF_DETERMINISTIC)

        g["TICKERS"]            = list(self.TICKERS or ["UNH","GE"])
        g["ARTIFACTS_DIR"]      = Path(self.ARTIFACTS_DIR)
        g["RESULTS_ROOT"]       = Path(self.RESULTS_ROOT)
        g["RESULTS_DIR"]        = RESULTS_ROOT / datetime.now(timezone.utc).strftime("%Y-%m-%d")
        g["LATEST_DIR"]         = RESULTS_ROOT / "latest"
        for p in (ARTIFACTS_DIR, RESULTS_DIR, LATEST_DIR):
            p.mkdir(parents=True, exist_ok=True)

        g["BARS_FEED"]          = str(self.BARS_FEED).strip()
        g["COOLDOWN_MIN"]       = int(self.COOLDOWN_MIN)
        g["STALE_MAX_SEC"]      = int(self.STALE_MAX_SEC)

        g["SIZING_MODE"]        = self.SIZING_MODE
        g["WEIGHT_CAP"]         = float(self.WEIGHT_CAP)
        g["ENTER_CONF_MIN"]     = float(self.ENTER_CONF_MIN)
        g["ENTER_WEIGHT_MIN"]   = float(self.ENTER_WEIGHT_MIN)
        g["EXIT_WEIGHT_MAX"]    = float(self.EXIT_WEIGHT_MAX)
        g["REBALANCE_MIN_NOTIONAL"] = float(self.REBALANCE_MIN_NOTIONAL)
        g["USE_FRACTIONALS"]    = bool(self.USE_FRACTIONALS)
        g["SEED_FIRST_SHARE"]   = bool(self.SEED_FIRST_SHARE)
        g["ALLOW_SHORTS"]       = bool(self.ALLOW_SHORTS)
        g["CONF_FLOOR"]         = float(self.CONF_FLOOR)
        g["TAKE_PROFIT_PCT"]    = float(self.TAKE_PROFIT_PCT)
        g["STOP_LOSS_PCT"]      = float(self.STOP_LOSS_PCT)

        g["BEST_WINDOW_ENV"]    = (self.STALE_BEST_WINDOW or None)

        g["API_KEY"]    = self.APCA_API_KEY_ID or ""
        g["API_SECRET"] = self.APCA_API_SECRET_KEY or ""

        g["TRADE_LOG_CSV"]      = RESULTS_DIR / "trade_log_master.csv"
        g["EQUITY_LOG_CSV"]     = RESULTS_DIR / "equity_log.csv"
        g["PLOT_PATH"]          = RESULTS_DIR / "equity_curve.png"
        g["PLOT_PATH_LATEST"]   = LATEST_DIR / "equity_curve.png"
        g["EQUITY_LOG_LATEST"]  = LATEST_DIR / "equity_log.csv"
        g["TRADE_LOG_LATEST"]   = LATEST_DIR / "trade_log_master.csv"
        g["DELTA_WEIGHT_MIN"]   = float(self.DELTA_WEIGHT_MIN)
        g["RAW_POS_MIN"]        = float(self.RAW_POS_MIN)
        g["RAW_NEG_MAX"]        = float(self.RAW_NEG_MAX)

        os.environ["APCA_API_BASE_URL"] = self.APCA_API_BASE_URL
        os.environ["DRY_RUN"]           = "1" if self.DRY_RUN else "0"
        os.environ["AUTO_RUN_LIVE"]     = "1" if self.AUTO_RUN_LIVE else "0"
        os.environ["BARS_FEED"]         = self.BARS_FEED

        g["EQUITY_LOG_THROTTLE_SEC"]   = int(self.EQUITY_LOG_THROTTLE_SEC)
        g["SKIP_EQUITY_WHEN_DRY_RUN"]  = bool(self.SKIP_EQUITY_WHEN_DRY_RUN)
        g["_LAST_EQUITY_LOG_TS"]       = 0
        g["_TRADE_EVENT_FLAG"]         = False   #set true when an order is (would be) submitted

        g["MAX_DAILY_DRAWDOWN_PCT"]  = float(self.MAX_DAILY_DRAWDOWN_PCT)
        g["KILL_SWITCH_COOLDOWN_MIN"] = int(self.KILL_SWITCH_COOLDOWN_MIN)
        g["EXIT_AFTER_CLOSE"] = bool(self.EXIT_AFTER_CLOSE)
        os.environ["EXIT_AFTER_CLOSE"] = "1" if self.EXIT_AFTER_CLOSE else "0"


def configure_knobs(overrides: Mapping[str, object] = None) -> Knobs:
    defaults = Knobs(
        TICKERS=_to_list_csv(os.getenv("TICKERS", "UNH,GE")),
        ARTIFACTS_DIR=os.getenv("ARTIFACTS_DIR", str(PROJECT_ROOT / "artifacts")),
        RESULTS_ROOT=os.getenv("RESULTS_ROOT",  str(PROJECT_ROOT / "results")),
    )
    cfg = Knobs.from_env(defaults, PROJECT_ROOT, os.environ, overrides=overrides)
    cfg.apply_to_globals()
    return cfg

#---------------------------------- Time helpers -----------------------------------------------
def now_utc() -> datetime:
    return datetime.now(timezone.utc)

def utc_ts(dt_like) -> int:
    if isinstance(dt_like, (int, np.integer)):
        return int(dt_like)
    if isinstance(dt_like, (float, np.floating)):
        return int(dt_like)
    ts = pd.Timestamp(dt_like)
    if ts.tzinfo is None:
        ts = ts.tz_localize("UTC")
    else:
        ts = ts.tz_convert("UTC")
    return int(ts.value // 10**9)

def utcnow_iso() -> str:
    return datetime.now(timezone.utc).isoformat()

def _sleep_to_next_minute_block(n: int):
    n = max(1, int(n))
    now = now_utc()
    base = now.replace(second=0, microsecond=0)
    remainder = base.minute % n
    add = n if remainder == 0 else (n - remainder)
    next_slot = base + timedelta(minutes=add)
    time.sleep(max(0, (next_slot - now).total_seconds()))

#--------------------------------- CSV logging (master, optional) ------------------------------
#put near the top (after paths)
TRADE_FIELDS = ["datetime_utc","ticker","signal","action","price","equity","qty","comment"]

def ensure_trade_log_header():
    if not TRADE_LOG_CSV.exists():
        pd.DataFrame(columns=TRADE_FIELDS).to_csv(TRADE_LOG_CSV, index=False)

def log_trade(ticker:str, signal:float, action:str, price:float, equity:float, qty:float=None, comment:str=""):
    ensure_trade_log_header()
    row = {
        "datetime_utc": utcnow_iso(),
        "ticker": ticker,
        "signal": int(signal) if signal is not None else "",
        "action": action,
        "price": (float(price)  if price  is not None and np.isfinite(price)  else ""),
        "equity": (float(equity) if equity is not None and np.isfinite(equity) else ""),
        "qty":    (float(qty)    if qty    is not None and np.isfinite(qty)    else ""),
        "comment": (str(comment) if comment else "")
    }
    with TRADE_LOG_CSV.open("a", newline="", encoding="utf-8") as f:
        csv.DictWriter(f, fieldnames=TRADE_FIELDS).writerow(row)
    try: shutil.copy2(TRADE_LOG_CSV, TRADE_LOG_LATEST)
    except Exception: pass

#--------------------------------- Alpaca API init --------------------------------------------
def init_alpaca() -> "tradeapi.REST":
    if not (globals().get("API_KEY") and globals().get("API_SECRET")):
        raise RuntimeError("Missing Alpaca API keys (check your .env).")
    return tradeapi.REST(API_KEY, API_SECRET, base_url=BASE_URL)

#-------- Timeout-safe Alpaca calls for portfolio history --------
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError

def _call_with_timeout(func, timeout_sec: int, *args, **kwargs):
    with ThreadPoolExecutor(max_workers=1) as ex:
        fut = ex.submit(func, *args, **kwargs)
        try:
            return fut.result(timeout=timeout_sec)
        except FuturesTimeoutError:
            raise TimeoutError(f"Timed out after {timeout_sec}s")
        except Exception as e:
            raise e

def get_portfolio_history_safe(api, period="1M", timeframe="1H", timeout_sec: int = 8, retries: int = 1):
    """Call get_portfolio_history with a hard timeout and light retry."""
    last_exc = None
    for _ in range(max(1, retries + 1)):
        try:
            return _call_with_timeout(api.get_portfolio_history, timeout_sec, period=period, timeframe=timeframe)
        except Exception as e:
            last_exc = e
            time.sleep(0.5)
    logging.warning(f"get_portfolio_history_safe failed: {last_exc}")
    return None

#------------------------- Portfolio equity logging + metrics ---------------------------------
def fetch_portfolio_history(period="1M", timeframe="1H", api_in=None):
    a = api_in if api_in is not None else globals().get("api", None)
    if a is None:
        return pd.DataFrame(columns=["timestamp_utc", "equity"])

    hist = get_portfolio_history_safe(
        a, period=period, timeframe=timeframe,
        timeout_sec=int(os.getenv("PH_TIMEOUT_SEC", "8")), retries=1
    )

    #Single guard + local CSV fallback
    if (not hist) or (not getattr(hist, "timestamp", None)) or (not getattr(hist, "equity", None)):
        if EQUITY_LOG_CSV.exists():
            try:
                df = pd.read_csv(EQUITY_LOG_CSV, parse_dates=["datetime_utc"])
                return df.rename(columns={"datetime_utc": "timestamp_utc"})[["timestamp_utc", "equity"]]
            except Exception:
                pass
        return pd.DataFrame(columns=["timestamp_utc", "equity"])

    return pd.DataFrame({
        "timestamp_utc": pd.to_datetime(hist.timestamp, unit="s", utc=True),
        "equity": pd.Series(hist.equity, dtype="float64")
    }).dropna()

def log_equity_snapshot(api_in=None):
    snap = fetch_portfolio_history(period="1D", timeframe="5Min", api_in=api_in)
    if snap.empty:
        return
    latest = snap.iloc[-1:].copy().rename(columns={"timestamp_utc": "datetime_utc"})

    if EQUITY_LOG_CSV.exists():
        df_old = pd.read_csv(EQUITY_LOG_CSV, parse_dates=["datetime_utc"])
        if not df_old.empty and pd.to_datetime(df_old["datetime_utc"].iloc[-1]) == latest["datetime_utc"].iloc[0]:
            return  #nothing new; skip write/copy
        pd.concat([df_old, latest], ignore_index=True)\
          .drop_duplicates(subset=["datetime_utc"], keep="last")\
          .to_csv(EQUITY_LOG_CSV, index=False)
    else:
        latest.to_csv(EQUITY_LOG_CSV, index=False)
    try:
        shutil.copy2(EQUITY_LOG_CSV, EQUITY_LOG_LATEST)
    except Exception:
        pass

def maybe_log_equity_snapshot(api_in=None, reason: str = "cycle"):
    """Log equity when appropriate based on run mode, timing, or trade events."""
    global _LAST_EQUITY_LOG_TS, _TRADE_EVENT_FLAG
    if bool(globals().get("DRY_RUN", False)) and bool(globals().get("SKIP_EQUITY_WHEN_DRY_RUN", True)):
        return

    now_ts = time.time()
    force  = reason in {"trade", "finalize", "close"}
    if force or (now_ts - float(_LAST_EQUITY_LOG_TS)) >= int(globals().get("EQUITY_LOG_THROTTLE_SEC", 900)):
        try:
            log_equity_snapshot(api_in=api_in)
            _LAST_EQUITY_LOG_TS = now_ts
        except Exception as e:
            logging.debug(f"maybe_log_equity_snapshot skipped/log failed: {e}")
    #reset trade flag after we had a chance to log
    if reason == "trade":
        _TRADE_EVENT_FLAG = False

def plot_equity_curve(from_equity_csv: bool = True):
    with plt.ioff():
        if from_equity_csv and EQUITY_LOG_CSV.exists():
            df = pd.read_csv(EQUITY_LOG_CSV, parse_dates=["datetime_utc"]).sort_values("datetime_utc")
        else:
            df = fetch_portfolio_history(period="3M", timeframe="1H").rename(columns={"timestamp_utc":"datetime_utc"})
        if df.empty:
            print("No equity data to plot yet.")
            return
        fig, ax = plt.subplots(figsize=(10, 4))
        ax.plot(df["datetime_utc"], df["equity"])
        ax.set_title("Portfolio Value Over Time (Paper)")
        ax.set_xlabel("Time (UTC)")
        ax.set_ylabel("Equity ($)")
        fig.tight_layout()
        fig.savefig(PLOT_PATH, bbox_inches="tight")
        fig.savefig(PLOT_PATH_LATEST, bbox_inches="tight")
        plt.close(fig)
        print(f"Saved equity curve → {PLOT_PATH}")
        print(f"Updated latest copy → {PLOT_PATH_LATEST}")

def compute_performance_metrics(df_equity: pd.DataFrame):
    if df_equity.empty or df_equity["equity"].isna().all():
        return {"cum_return": np.nan, "sharpe": np.nan, "max_drawdown": np.nan}

    df = df_equity.sort_values("datetime_utc")
    e = df["equity"].astype(float)
    r = e.pct_change().dropna()
    if r.empty:
        return {"cum_return": 0.0, "sharpe": np.nan, "max_drawdown": np.nan}

    #estimate periods/year from median spacing
    dt_sec = df["datetime_utc"].diff().dt.total_seconds().dropna().median()
    if not (isinstance(dt_sec, (int, float)) and dt_sec > 0):
        periods_per_year = 252 * 78  #~5-min bars as fallback
    else:
        periods_per_day = (6.5 * 3600) / dt_sec
        periods_per_year = 252 * periods_per_day

    sharpe = (r.mean() / (r.std() + 1e-12)) * math.sqrt(periods_per_year)
    cum = (1 + r).cumprod()
    peak = cum.cummax()
    dd = (cum / peak - 1.0).min()
    cum_return = e.iloc[-1] / e.iloc[0] - 1.0

    return {"cum_return": float(cum_return), "sharpe": float(sharpe), "max_drawdown": float(dd)}

#-------------------------------- Per-ticker CSV logging --------------------------------------
def _append_csv_row(path: Path, row: dict):
    fieldnames = list(row.keys())
    if not path.exists():
        with path.open("w", newline="") as f:
            w = csv.DictWriter(f, fieldnames=fieldnames)
            w.writeheader()
            w.writerow(row)
        return

    try:
        with path.open("r", newline="") as f:
            r = csv.reader(f)
            old_header = next(r)
    except Exception:
        old_header = []

    if old_header != fieldnames:
        tmp = path.with_suffix(".tmp")
        with tmp.open("w", newline="") as wf, path.open("r", newline="") as rf:
            r = csv.DictReader(rf) if old_header else None
            w = csv.DictWriter(wf, fieldnames=fieldnames)
            w.writeheader()
            if r:
                for old_row in r:
                    merged = {k: old_row.get(k, "") for k in fieldnames}
                    w.writerow(merged)
        tmp.replace(path)

    with path.open("a", newline="") as f:
        w = csv.DictWriter(f, fieldnames=fieldnames)
        w.writerow(row)

def log_trade_symbol(symbol: str,
                     bar_time,
                     signal: int,
                     raw_action: float,
                     weight: float,
                     confidence: float,
                     price: float,
                     equity: float,
                     dry_run: bool,
                     note: str = ""):
    try:
        if bar_time is not None and not pd.isna(bar_time):
            ts = pd.to_datetime(bar_time, utc=True)
            bt_iso = ts.isoformat()
            age_sec = max(0, int((now_utc() - ts).total_seconds()))
        else:
            bt_iso, age_sec = "", ""
    except Exception:
        bt_iso, age_sec = "", ""

    resolved_feed = (os.getenv("BARS_FEED", "") or "").strip() or "default"

    #Derive a simple decision label (unless 'note' is explicitly set)
    try:
        ew = float(weight) if np.isfinite(weight) else 0.0
        cf = float(confidence) if np.isfinite(confidence) else 0.0
    except Exception:
        ew, cf = 0.0, 0.0

    decision = note or (
        "rebalance" if (abs(ew) >= float(globals().get("ENTER_WEIGHT_MIN", 0.0))
                        and cf >= float(globals().get("ENTER_CONF_MIN", 0.0)))
        else ("flatten" if abs(ew) <= float(globals().get("EXIT_WEIGHT_MAX", 0.0)) else "hold")
    )

    row = {
        "log_time": now_utc().isoformat(),
        "symbol": symbol,
        "bar_time": bt_iso,
        "bar_age_sec": age_sec,
        "feed": resolved_feed,
        "signal": "BUY" if int(signal) == 1 else "NEUTRAL_OR_SELL",
        "raw_action": float(raw_action) if np.isfinite(raw_action) else "",
        "weight": float(weight) if np.isfinite(weight) else "",
        "confidence": float(confidence) if np.isfinite(confidence) else "",
        "price": float(price) if np.isfinite(price) else "",
        "equity": float(equity) if np.isfinite(equity) else "",
        "dry_run": int(bool(dry_run)),
        "decision": decision,
        "note": note,
    }

    _append_csv_row(RESULTS_DIR / f"trade_log_{symbol}.csv", row)

    try:
        ensure_trade_log_header()
        log_trade(
            ticker=symbol,
            signal=1 if int(signal) == 1 else 0,
            action=row["decision"],
            #pass None (not "") so log_trade doesn't choke on float("")
            price=(row["price"] if row["price"] != "" else None),
            equity=(row["equity"] if row["equity"] != "" else None),
            qty=None,  #or compute from position if you prefer
            comment=row["note"] or row["decision"],
        )
    except Exception:
        pass


#-------------------------------- Artifacts: picker & loaders ---------------------------------
def _extract_window_idx(path: Path) -> Optional[int]:
    m = re.search(r"_window(\d+)_", path.stem, re.IGNORECASE)
    if not m:
        return None
    try:
        return int(m.group(1))
    except Exception:
        return None

def pick_artifacts_for_ticker(
    ticker: str,
    artifacts_dir: str,
    best_window: Optional[str] = None
) -> Dict[str, Optional[Path]]:
    p = Path(artifacts_dir)
    if not p.exists():
        raise FileNotFoundError(f"Artifacts directory not found: {p.resolve()}")

    models = sorted(p.glob(f"ppo_{ticker}_window*_model*.zip"))
    if not models:
        models = sorted(p.glob(f"ppo_{ticker}_model*.zip")) or sorted(p.glob(f"*{ticker}*model*.zip"))
    if not models:
        raise FileNotFoundError(f"No PPO model zip found for {ticker} in {p}")

    def _model_sort_key(path: Path):
        w = _extract_window_idx(path)
        return (w if w is not None else -1, " (1)" in path.stem)

    models = sorted(models, key=_model_sort_key)

    chosen: Optional[Path] = None
    if best_window:
        chosen = next((m for m in models if f"_window{best_window}_" in m.stem), None)
        if chosen is None:
            logging.warning("BEST_WINDOW=%s not found; falling back to best available.", best_window)

    if chosen is None:
        with_idx = [(m, _extract_window_idx(m)) for m in models]
        with_idx = [(m, w) for (m, w) in with_idx if w is not None]
        chosen = max(with_idx, key=lambda t: t[1])[0] if with_idx else models[-1]

    base = chosen.stem.replace("_model", "")
    base_nodup = re.sub(r"\s\(\d+\)$", "", base)

    vec_candidates = list(p.glob(base + "_vecnorm*.pkl")) + \
                     list(p.glob(base_nodup + "_vecnorm*.pkl")) + \
                     list(p.glob(f"ppo_{ticker}_*_vecnorm*.pkl"))
    feat_candidates = list(p.glob(base + "_features*.json")) + \
                      list(p.glob(base_nodup + "_features*.json")) + \
                      list(p.glob(f"ppo_{ticker}_*_features*.json"))

    vecnorm = sorted(vec_candidates)[0] if vec_candidates else None
    feats   = sorted(feat_candidates)[0] if feat_candidates else None

    logging.info(f"[{ticker}] model={chosen.name} | vecnorm={bool(vecnorm)} | features={bool(feats)}")
    return {"model": chosen, "vecnorm": vecnorm, "features": feats}

def load_vecnormalize(path: Optional[Path]):
    if path is None:
        return None
    try:
        with open(path, "rb") as f:
            return pickle.load(f)
    except Exception as e:
        #Try SB3's native VecNormalize loader, then fall back to None
        try:
            from stable_baselines3.common.vec_env import VecNormalize as _VN
            return _VN.load(str(path))
        except Exception:
            logging.warning("VecNormalize load failed (%s). Proceeding without it.", e)
            return None

def load_features(path: Optional[Path]):
    if path is None:
        return None
    with open(path, "r") as f:
        return json.load(f)

def load_ppo_model(model_path: Path):
    return PPO.load(str(model_path))

#---- Cached asset flags (tradable / fractionable / shortable) ----
@lru_cache(maxsize=256)
def _asset_flags(symbol: str) -> Tuple[bool, bool, bool]:
    """Return tradable, fractionable, and shortable flags for a symbol."""
    try:
        _api = globals().get("api") or init_alpaca()
        a = _api.get_asset(symbol)
        return (
            bool(getattr(a, "tradable", True)),
            bool(getattr(a, "fractionable", False)),
            bool(getattr(a, "shortable", False)),
        )
    except Exception:
        #conservative fallback
        return True, False, False

def _can_seed_short(api, symbol: str) -> Tuple[bool, str]:
    """Decide whether short seeding is allowed and why."""
    if not globals().get("ALLOW_SHORTS", False):
        return False, "shorts_disabled_seed"
    try:
        a = api.get_asset(symbol)
        if not getattr(a, "shortable", False):
            return False, "not_shortable_seed"
        return True, ""
    except Exception as e:
        logging.info(f"[{symbol}] get_asset shortable check failed: {e}")
        return False, "shortable_check_error"

#---------------------------- Market data + account helpers -----------------------------------
def get_recent_bars(api, symbol: str, limit: int = 200, timeframe=TimeFrame.Minute) -> pd.DataFrame:
    def _as_df(bars):
        if hasattr(bars, "df"):
            df = bars.df.copy()
            if not df.empty:
                if isinstance(df.index, pd.MultiIndex):
                    try:
                        df = df.xs(symbol, level=0)
                    except KeyError:
                        df = df.reset_index(level=0, drop=True)
                df.index = pd.to_datetime(df.index, utc=True, errors="coerce")
                df = df.rename(columns={"open": "Open", "high": "High", "low": "Low",
                                        "close": "Close", "volume": "Volume"})
                cols = [c for c in ["Open","High","Low","Close","Volume"] if c in df.columns]
                return df[cols].sort_index()
            return pd.DataFrame(columns=["Open","High","Low","Close","Volume"])

        rows = []
        for b in bars:
            ts = getattr(b, "t", None)
            ts = pd.to_datetime(ts, utc=True) if ts is not None else pd.NaT
            rows.append({
                "timestamp": ts,
                "Open":   float(getattr(b, "o", getattr(b, "open",  np.nan))),
                "High":   float(getattr(b, "h", getattr(b, "high",  np.nan))),
                "Low":    float(getattr(b, "l", getattr(b, "low",   np.nan))),
                "Close":  float(getattr(b, "c", getattr(b, "close", np.nan))),
                "Volume": float(getattr(b, "v", getattr(b, "volume",np.nan))),
            })
        df = pd.DataFrame(rows)
        if df.empty:
            return pd.DataFrame(columns=["Open","High","Low","Close","Volume"])
        return df.set_index(pd.to_datetime(df["timestamp"], utc=True)).drop(columns=["timestamp"]).sort_index()

    feed = os.getenv("BARS_FEED", "").strip()
    try:
        logging.info(f"[{symbol}] fetching {limit} {timeframe} bars (feed='{feed or 'default'}')")
        bars = api.get_bars(symbol, timeframe, limit=limit, feed=feed) if feed else api.get_bars(symbol, timeframe, limit=limit)
        df = _as_df(bars)
        if not df.empty:
            return df
        if feed:
            logging.info(f"[{symbol}] explicit feed empty; retrying with default feed")
            df2 = _as_df(api.get_bars(symbol, timeframe, limit=limit))
            if not df2.empty:
                return df2
    except Exception as e:
        logging.warning(f"[{symbol}] get_bars(limit) failed: {e}")

    try:
        end_dt = datetime.now(timezone.utc).replace(microsecond=0)
        start_dt = end_dt - timedelta(days=5)
        end = end_dt.isoformat().replace("+00:00", "Z")
        start = start_dt.isoformat().replace("+00:00", "Z")
        logging.info(f"[{symbol}] retry window start={start} end={end} (feed='{feed or 'default'}')")
        bars = api.get_bars(symbol, timeframe, start=start, end=end, feed=feed) if feed else api.get_bars(symbol, timeframe, start=start, end=end)
        return _as_df(bars)
    except Exception as e:
        logging.warning(f"[{symbol}] get_bars(start/end) failed: {e}")
        return pd.DataFrame(columns=["Open","High","Low","Close","Volume"])

def get_account_equity(api) -> float:
    return float(api.get_account().equity)

def get_position(api, symbol: str):
    try:
        return api.get_position(symbol)
    except Exception:
        return None

def get_position_qty(api, symbol: str):
    try:
        pos = api.get_position(symbol)
    except Exception:
        pos = None
    if not pos:
        return 0.0 if USE_FRACTIONALS else 0
    try:
        q = float(pos.qty)
        return q if USE_FRACTIONALS else int(round(q))
    except Exception:
        return 0.0 if USE_FRACTIONALS else 0

def get_last_price(api, symbol: str) -> float:
    try:
        tr = api.get_latest_trade(symbol)
        price = getattr(tr, "price", None)
        if price is None:
            price = getattr(tr, "p", None)
        if price is not None and np.isfinite(price):
            return float(price)
    except Exception:
        pass

    try:
        feed = os.getenv("BARS_FEED", "").strip() or None
        bars = api.get_bars(symbol, TimeFrame.Minute, limit=1, feed=feed) if feed else api.get_bars(symbol, TimeFrame.Minute, limit=1)
        if hasattr(bars, "df"):
            df = bars.df.copy()
            if isinstance(df.index, pd.MultiIndex):
                try:
                    df = df.xs(symbol, level=0)
                except Exception:
                    df = df.reset_index(level=0, drop=True)
            if not df.empty:
                if "close" in df.columns: return float(df["close"].iloc[-1])
                if "Close" in df.columns: return float(df["Close"].iloc[-1])
        elif bars:
            b = bars[0]
            close = getattr(b, "c", getattr(b, "close", None))
            if close is not None:
                return float(close)
    except Exception as e:
        logging.warning(f"[{symbol}] get_last_price via bars failed: {e}")

    try:
        qt = api.get_latest_quote(symbol)
        ap = getattr(qt, "ap", None) or getattr(qt, "ask_price", None)
        bp = getattr(qt, "bp", None) or getattr(qt, "bid_price", None)
        if ap and bp:
            return float((float(ap) + float(bp)) / 2.0)
        if ap: return float(ap)
        if bp: return float(bp)
    except Exception:
        pass

    try:
        pos = api.get_position(symbol)
        return float(pos.avg_entry_price)
    except Exception:
        return float("nan")

def cancel_open_symbol_orders(api, symbol: str):
    try:
        for o in api.list_orders(status="open"):
            if o.symbol == symbol:
                api.cancel_order(o.id)
    except Exception as e:
        logging.warning(f"[{symbol}] cancel orders failed: {e}")

def to_2dp_str(x) -> str:
    return format(Decimal(str(x)).quantize(Decimal("0.01"), rounding=ROUND_HALF_UP), "f")

def to_6dp_str(x) -> str:
    return format(Decimal(str(x)).quantize(Decimal("0.000001"), rounding=ROUND_DOWN), "f")

def market_order(api, symbol: str, side: str, qty=None, notional: float=None):
    if qty is not None and notional is not None:
        logging.warning(f"[{symbol}] Both qty and notional provided; preferring notional and ignoring qty.")
        qty = None

    if qty is None and notional is None:
        logging.warning(f"[{symbol}] No order size provided; skipping.")
        return None
    if qty is not None:
        try:
            if float(qty) <= 0:
                logging.warning(f"[{symbol}] Non-positive qty ({qty}); skipping.")
                return None
        except Exception:
            pass
    if notional is not None and notional <= 0:
        logging.warning(f"[{symbol}] Non-positive notional (${notional}); skipping.")
        return None

    #ignore dust-sized or non-finite orders
    try:
        if (notional is not None and (not np.isfinite(float(notional)) or float(notional) < 0.01)) or \
           (qty is not None and (not np.isfinite(float(qty)) or float(qty) == 0.0)):
            logging.info(f"[{symbol}] Order size ~0; skipping.")
            return None
    except Exception:
        logging.info(f"[{symbol}] Order size parse issue; skipping.")
        return None

    if DRY_RUN:
        #Safe stringification for display
        notional_str = to_2dp_str(notional) if notional is not None else None
        logging.info(
            f"[DRY_RUN] Would submit {side} "
            f"{('notional=$' + str(notional_str)) if notional_str is not None else ('qty=' + str(qty))} "
            f"{symbol} (market, day)"
        )
        globals()["_TRADE_EVENT_FLAG"] = True  #ensures equity snapshot logs this cycle
        return None

    try:
        qty_arg = None
        if qty is not None:
            qty_arg = to_6dp_str(float(qty)) if USE_FRACTIONALS else int(qty)
        notional_arg = to_2dp_str(float(notional)) if notional is not None else None

        o = api.submit_order(
            symbol=symbol,
            side=side,
            type="market",
            time_in_force="day",
            qty=qty_arg,
            notional=notional_arg,
        )

        logging.info(
            f"[{symbol}] Submitted {side} "
            f"{('notional=$' + str(notional_arg)) if notional_arg is not None else ('qty=' + str(qty_arg))}"
        )
        globals()["_TRADE_EVENT_FLAG"] = True
        return o

    except Exception as e:
        logging.error(f"[{symbol}] submit_order failed: {e}")
        return None

def market_order_to_qty(api, symbol: str, side: str, qty: Union[int, float, str]):
    """Submit a market order for a quantity, handling fractional shares."""
    if USE_FRACTIONALS:
        qf = float(qty)
        q = int(round(qf)) if abs(qf - round(qf)) < 1e-8 else to_6dp_str(qf)
    else:
        q = int(qty)
    return market_order(api, symbol, side=side, qty=q)

#----------------------------- Sizing / risk + (un)flatten / rebalance ------------------------
def action_to_weight(action) -> Tuple[float, float, float]:
    """Map a model action to target weight and confidence."""
    a = float(np.array(action).squeeze())
    conf = float(abs(np.tanh(a)))
    if a == 0:
        return 0.0, conf, a
    if a < 0:
        if not globals().get("ALLOW_SHORTS", False):
            return 0.0, conf, a
        w = -WEIGHT_CAP * conf if SIZING_MODE == "linear" else (
            0.0 if conf < CONF_FLOOR else -WEIGHT_CAP * (conf - CONF_FLOOR) / (1.0 - CONF_FLOOR)
        )
        w = max(-WEIGHT_CAP, min(0.0, float(w)))
        return w, conf, a
    #a > 0 (long)
    if SIZING_MODE == "linear":
        w = WEIGHT_CAP * conf
    else:
        w = 0.0 if conf < CONF_FLOOR else WEIGHT_CAP * (conf - CONF_FLOOR) / (1.0 - CONF_FLOOR)
    w = max(0.0, min(WEIGHT_CAP, float(w)))
    return w, conf, a

def compute_target_qty_by_cash(equity: float, price: float, target_weight: float, api=None) -> int:
    if not np.isfinite(price) or price <= 0:
        return 0

    #Determine budget (buying power if available, else equity fallback)
    if api:
        try:
            acct = api.get_account()
            budget = float(getattr(acct, "buying_power", getattr(acct, "cash", equity)))
        except Exception:
            budget = equity
    else:
        budget = equity

    target_notional = equity * float(target_weight)           #desired exposure (can be negative)
    allowed = min(budget, abs(target_notional))               #cap by budget
    qty = int(allowed // price)                               #whole shares only (used in non-fractional path)

    #Return signed qty (negative only if shorts are allowed)
    if target_weight > 0:
        return max(0, qty)
    else:
        return min(0, -qty) if globals().get("ALLOW_SHORTS", False) else 0

def _close_symbol_position(
    api,
    symbol: str,
    qty_hint: Optional[float] = None,
    cancel_orders: bool = True,
):
    qty = qty_hint if qty_hint is not None else get_position_qty(api, symbol)

    if (USE_FRACTIONALS and abs(qty) < 1e-8) or (not USE_FRACTIONALS and int(qty) == 0):
        return

    if cancel_orders:
        cancel_open_symbol_orders(api, symbol)

    if DRY_RUN:
        logging.info(f"[DRY_RUN] Would close position {symbol} (qty={qty})")
        globals()["_TRADE_EVENT_FLAG"] = True
        return

    try:
        globals()["_TRADE_EVENT_FLAG"] = True
        api.close_position(symbol)
        logging.info(f"[{symbol}] close_position submitted")
    except Exception as e:
        logging.warning(f"[{symbol}] close_position failed ({e}); falling back to market order")
        side = "sell" if qty > 0 else "buy"
        market_order_to_qty(api, symbol, side=side, qty=abs(qty))


def flatten_symbol(api, symbol: str):
    #per-symbol flatten: cancel that symbol’s open orders, then close
    _close_symbol_position(api, symbol, qty_hint=None, cancel_orders=True)


def flatten_all_positions(api) -> None:
    try:
        positions = api.list_positions()
    except Exception as e:
        logging.warning("Flatten-all failed (list_positions): %s", e)
        return

    #In DRY_RUN, avoid account-mutating calls (cancel/close). We still log intent.
    if not DRY_RUN:
        #Cancel all open orders once (faster)
        try:
            open_orders = api.list_orders(status="open")
            pos_syms = {getattr(p, "symbol", None) for p in positions}
            for o in open_orders:
                if getattr(o, "symbol", None) in pos_syms:
                    try:
                        api.cancel_order(o.id)
                    except Exception:
                        pass
        except Exception as e:
            logging.warning("Flatten-all cancel pass failed: %s", e)

    #Now close each position (this is already DRY_RUN-safe inside _close_symbol_position)
    for p in positions:
        sym = getattr(p, "symbol", None)
        if not sym:
            continue
        try:
            qty = float(getattr(p, "qty", 0.0) or 0.0)
        except Exception:
            qty = None

        _close_symbol_position(api, sym, qty_hint=qty, cancel_orders=False)

def rebalance_to_weight(api, symbol: str, equity: float, target_weight: float):
    """Adjust the position toward a target weight, applying safety guards."""
    price = get_last_price(api, symbol)
    if not np.isfinite(price) or price <= 0:
        logging.warning(f"[{symbol}] Price unavailable; skipping rebalance this cycle.")
        return

    tradable, fractionable, shortable = _asset_flags(symbol)
    if not tradable:
        logging.info(f"[{symbol}] Not tradable; skipping rebalance.")
        return
    use_fractionals = bool(USE_FRACTIONALS and fractionable)

    have_qty        = get_position_qty(api, symbol)          #signed (negative if short)
    have_notional   = have_qty * price                       #current exposure
    target_notional = equity * float(target_weight)          #desired exposure
    delta_notional  = target_notional - have_notional        #change in exposure

    if abs(delta_notional) < 1e-9:
        return

    #Compute delta_weight safely and log gates
    delta_weight = abs(delta_notional) / max(float(equity), 1e-9)
    logging.debug(
        f"[{symbol}] have_notional={have_notional:.2f} "
        f"target_notional={target_notional:.2f} delta_notional={delta_notional:.2f} "
        f"delta_weight={delta_weight:.4f} gates: "
        f"Δw_min={DELTA_WEIGHT_MIN} notional_min={REBALANCE_MIN_NOTIONAL}"
    )
    if delta_weight < float(globals().get("DELTA_WEIGHT_MIN", 0.0)):
        return

    if use_fractionals:
        dn = round_to_cents(abs(delta_notional))
        if dn < float(globals().get("REBALANCE_MIN_NOTIONAL", 0.0)):
            return

        side = "buy" if delta_notional > 0 else "sell"
        shorting = (target_notional < 0) and (side == "sell")  #increasing a short
        covering = (have_qty < 0) and (side == "buy")         #reducing a short

        if shorting:
            if not shortable:
                logging.info(f"[{symbol}] Not shortable; skipping rebalance toward short.")
                return
            qty = max(1, int(math.floor(dn / price))) if np.isfinite(price) and price > 0 else 1
            market_order_to_qty(api, symbol, side="sell", qty=qty)
            return

        if covering:
            #Covering shorts: buy whole shares (avoid fractional buy vs integer short)
            qty = max(1, int(math.ceil(dn / price))) if np.isfinite(price) and price > 0 else 1
            qty = min(int(abs(have_qty)), qty) if have_qty < 0 else qty
            market_order_to_qty(api, symbol, side="buy", qty=qty)
            return

        #Long exposure changes can safely use notional
        market_order(api, symbol, side=side, notional=dn)
        return

    #---- Non-fractional mode (whole shares only) ----
    want_qty  = compute_target_qty_by_cash(equity, price, target_weight, api)
    delta_qty = want_qty - have_qty
    if delta_qty == 0:
        return

    approx_delta_notional = abs(delta_qty) * price
    if equity > 0 and approx_delta_notional / equity < float(globals().get("DELTA_WEIGHT_MIN", 0.0)):
        return
    if approx_delta_notional < float(globals().get("REBALANCE_MIN_NOTIONAL", 0.0)):
        return

    side = "buy" if delta_qty > 0 else "sell"
    shorting = (target_notional < 0) and (side == "sell")
    if shorting and not shortable:
        logging.info(f"[{symbol}] Not shortable; skipping rebalance toward short.")
        return

    market_order_to_qty(api, symbol, side=side, qty=int(abs(delta_qty)))

def check_tp_sl_and_maybe_flatten(api, symbol: str) -> bool:
    if TAKE_PROFIT_PCT <= 0 and STOP_LOSS_PCT <= 0:
        return False
    pos = get_position(api, symbol)
    if not pos:
        return False
    try:
        plpc = float(pos.unrealized_plpc)
    except Exception:
        return False
    if TAKE_PROFIT_PCT > 0 and plpc >= TAKE_PROFIT_PCT:
        logging.info(f"[{symbol}] TP hit ({plpc:.4f} >= {TAKE_PROFIT_PCT:.4f}). Flattening.")
        flatten_symbol(api, symbol)
        return True
    if STOP_LOSS_PCT > 0 and plpc <= -abs(STOP_LOSS_PCT):
        logging.info(f"[{symbol}] SL hit ({plpc:.4f} <= {-abs(STOP_LOSS_PCT):.4f}). Flattening.")
        flatten_symbol(api, symbol)
        return True
    return False

#----------------------------- Inference / obs building ---------------------------------------
def expected_obs_shape(model, vecnorm) -> Optional[tuple]:
    for src in (vecnorm, model):
        try:
            shp = tuple(src.observation_space.shape)
            if shp:
                return shp
        except Exception:
            pass
    return None

def compute_art_feat_order(features_hint: Any, df: pd.DataFrame) -> List[str]:
    if features_hint is None:
        return [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
    feats = features_hint.get("features", features_hint) if isinstance(features_hint, dict) else list(features_hint)
    drop = {"datetime", "symbol", "target", "return"}
    return [c for c in feats if c not in drop and (c in df.columns) and pd.api.types.is_numeric_dtype(df[c])]

def build_obs_from_row(row: pd.Series, order: List[str]) -> np.ndarray:
    vals = []
    for c in order:
        v = row.get(c, np.nan)
        vals.append(0.0 if (pd.isna(v) or v is None or v is False) else float(v))
    return np.array(vals, dtype=np.float32)

def _pick_columns_for_channels(features_hint: Any, df: pd.DataFrame, channels: int) -> List[str]:
    numeric = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
    cols: List[str] = []
    if isinstance(features_hint, dict) and "features" in features_hint:
        cand = [c for c in features_hint["features"] if c in df.columns and pd.api.types.is_numeric_dtype(df[c])]
        if len(cand) >= channels:
            cols = cand[:channels]
    if not cols:
        pref = ["Close", "Volume", "Adj Close", "Open", "High", "Low"]
        cols = [c for c in pref if c in numeric]
        cols += [c for c in numeric if c not in cols]
        cols = cols[:channels]
    if len(cols) < channels and cols:
        while len(cols) < channels:
            cols.append(cols[-1])
    return cols[:channels]

def add_regime(df: pd.DataFrame) -> pd.DataFrame:
    df["Vol20"] = df["Close"].pct_change().rolling(20).std()
    df["Ret20"] = df["Close"].pct_change(20)
    vol_hi   = (df["Vol20"] > df["Vol20"].median()).astype(int)
    trend_hi = (df["Ret20"].abs() > df["Ret20"].abs().median()).astype(int)
    df["Regime4"] = vol_hi * 2 + trend_hi
    return df

def denoise_wavelet(series: pd.Series, wavelet: str = "db1", level: int = 2) -> pd.Series:
    try:
        import pywt
    except Exception:
        return pd.Series(series).astype(float).ffill().bfill().ewm(span=5, adjust=False).mean()
    s = pd.Series(series).astype(float).ffill().bfill()
    arr = s.to_numpy()
    try:
        w = pywt.Wavelet(wavelet)
        maxlvl = pywt.dwt_max_level(len(arr), w.dec_len)
        lvl = int(max(0, min(level, maxlvl)))
        if lvl < 1:
            return s
        coeffs = pywt.wavedec(arr, w, mode="symmetric", level=lvl)
        for i in range(1, len(coeffs)):
            coeffs[i] = np.zeros_like(coeffs[i])
        rec = pywt.waverec(coeffs, w, mode="symmetric")
        return pd.Series(rec[:len(arr)], index=s.index)
    except Exception:
        return s.ewm(span=5, adjust=False).mean()

def add_features_live(
    df: pd.DataFrame,
    use_sentiment: bool = False,
    rsi_wilder: bool = True,
    atr_wilder: bool = True,
) -> pd.DataFrame:
    df = df.copy().sort_index()
    cols_ci = {c.lower(): c for c in df.columns}
    rename = {}
    for final, alts in {
        "Open": ["open"], "High": ["high"], "Low": ["low"],
        "Close": ["close","close*","last"], "Adj Close":["adj close","adj_close","adjclose","adjusted close"],
        "Volume":["volume","vol"]
    }.items():
        for a in [final.lower()] + alts:
            if a in cols_ci:
                rename[cols_ci[a]] = final
                break
    df = df.rename(columns=rename)
    if "Adj Close" not in df.columns and "Close" in df.columns:
        df["Adj Close"] = df["Close"]

    #--- Classic techs ---
    df["SMA_20"] = df["Close"].rolling(20).mean()
    df["STD_20"] = df["Close"].rolling(20).std()
    df["Upper_Band"] = df["SMA_20"] + 2 * df["STD_20"]
    df["Lower_Band"] = df["SMA_20"] - 2 * df["STD_20"]

    df["Lowest_Low"]   = df["Low"].rolling(14).min()
    df["Highest_High"] = df["High"].rolling(14).max()
    denom = (df["Highest_High"] - df["Lowest_Low"]).replace(0, np.nan)
    df["Stoch"] = ((df["Close"] - df["Lowest_Low"]) / denom) * 100

    df["ROC"] = df["Close"].pct_change(10)
    sign = np.sign(df["Close"].diff().fillna(0))
    df["OBV"] = (sign * df["Volume"].fillna(0)).cumsum()

    tp = (df["High"] + df["Low"] + df["Close"]) / 3.0
    sma_tp = tp.rolling(20).mean()
    md = (tp - sma_tp).abs().rolling(20).mean().replace(0, np.nan)
    df["CCI"] = (tp - sma_tp) / (0.015 * md)

    df["EMA_10"] = df["Close"].ewm(span=10, adjust=False).mean()
    df["EMA_50"] = df["Close"].ewm(span=50, adjust=False).mean()
    ema12 = df["Close"].ewm(span=12, adjust=False).mean()
    ema26 = df["Close"].ewm(span=26, adjust=False).mean()
    df["MACD_Line"]   = ema12 - ema26
    df["MACD_Signal"] = df["MACD_Line"].ewm(span=9, adjust=False).mean()

    d = df["Close"].diff()
    gain = d.clip(lower=0)
    loss = (-d.clip(upper=0))
    if rsi_wilder:
        avg_gain = gain.ewm(alpha=1/14, adjust=False).mean()
        avg_loss = loss.ewm(alpha=1/14, adjust=False).mean()
    else:
        avg_gain = gain.rolling(14).mean()
        avg_loss = loss.rolling(14).mean()
    rs = avg_gain / avg_loss.replace(0, np.nan)
    df["RSI"] = 100 - (100 / (1 + rs))

    tr = pd.concat([
        (df["High"] - df["Low"]),
        (df["High"] - df["Close"].shift()).abs(),
        (df["Low"]  - df["Close"].shift()).abs(),
    ], axis=1).max(axis=1)
    df["ATR"] = tr.ewm(alpha=1/14, adjust=False).mean() if atr_wilder else tr.rolling(14).mean()

    df["Volatility"]     = df["Close"].pct_change().rolling(20).std()
    df["Denoised_Close"] = denoise_wavelet(df["Close"])

    df = add_regime(df)
    df["SentimentScore"] = (df.get("SentimentScore", 0.0) if use_sentiment else 0.0)
    df["Delta"] = df["Close"].pct_change(1).fillna(0.0)
    df["Gamma"] = df["Delta"].diff().fillna(0.0)

    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    return df

def prepare_observation_from_bars(
    bars_df: pd.DataFrame,
    features_hint: Any = None,
    min_required_rows: int = 60,
    expected_shape: Optional[tuple] = None,
) -> Tuple[np.ndarray, int]:
    feats_df = add_features_live(bars_df).replace([np.inf, -np.inf], np.nan)
    ts = pd.Timestamp.utcnow()
    try:
        idx_ts = pd.Timestamp(feats_df.index[-1])
        ts = idx_ts.tz_convert("UTC") if idx_ts.tzinfo else idx_ts.tz_localize("UTC")
    except Exception:
        pass

    if expected_shape is not None:
        if len(expected_shape) == 2:
            lookback, channels = int(expected_shape[0]), int(expected_shape[1])
            cols = _pick_columns_for_channels(features_hint, feats_df, channels)
            window_df = feats_df[cols].tail(lookback).fillna(0.0)
            arr = window_df.to_numpy(dtype=np.float32)
            if arr.shape[0] < lookback:
                pad_rows = lookback - arr.shape[0]
                arr = np.vstack([np.zeros((pad_rows, channels), dtype=np.float32), arr])
            arr = arr[-lookback:, :channels]
            return arr.reshape(lookback, channels), int(ts.timestamp())

        elif len(expected_shape) == 1:
            n = int(expected_shape[0])
            cand = compute_art_feat_order(features_hint, feats_df)
            if len(feats_df) < max(20, min_required_rows):
                raise ValueError(f"Not enough bars to compute features robustly (have {len(feats_df)}).")
            last = feats_df.iloc[-1]
            vals = []
            for c in cand[:n]:
                v = last.get(c, np.nan)
                vals.append(0.0 if (pd.isna(v) or v is None) else float(v))
            if len(vals) < n:
                vals += [0.0] * (n - len(vals))
            return np.asarray(vals, dtype=np.float32), int(ts.timestamp())

    order = compute_art_feat_order(features_hint, feats_df)
    if not order:
        raise ValueError("No usable features after resolving artifact order.")
    feats_df = feats_df.dropna(subset=order)
    if len(feats_df) < max(20, min_required_rows):
        raise ValueError(f"Not enough bars to compute features robustly (have {len(feats_df)}).")
    last = feats_df.iloc[-1]
    obs = build_obs_from_row(last, order)
    return obs.astype(np.float32), int(ts.timestamp())

#-------------------------------- Live loop helpers -------------------------------------------
def ensure_market_open(api) -> bool:
    try:
        return bool(api.get_clock().is_open)
    except Exception:
        return False

def _sleep_until_open(api):
    try:
        clock = api.get_clock()
        if getattr(clock, "is_open", False):
            return
        nxt = pd.to_datetime(getattr(clock, "next_open"), utc=True, errors="coerce")
        if pd.isna(nxt):
            time.sleep(60)
            return
        wait = max(1, int((nxt - now_utc()).total_seconds()))
        logging.info("Market closed. Sleeping %ds until next open.", wait)
        time.sleep(wait)
    except Exception:
        time.sleep(60)

def write_account_info_to_run_config(api) -> None:
    """Add Alpaca account fields to the run config."""
    try:
        acct = api.get_account()
        acct_info = {
            "account_id": getattr(acct, "id", ""),
            "status": getattr(acct, "status", ""),
            "equity": getattr(acct, "equity", ""),
            "cash": getattr(acct, "cash", ""),
            "pattern_day_trader": getattr(acct, "pattern_day_trader", ""),
        }

        cfg_path = RESULTS_DIR / "run_config.json"
        try:
            meta = json.loads(cfg_path.read_text()) if cfg_path.exists() else {}
        except Exception:
            meta = {}

        meta["alpaca_account"] = acct_info

        #atomic-ish write
        tmp = cfg_path.with_suffix(".tmp")
        tmp.write_text(json.dumps(meta, indent=2))
        tmp.replace(cfg_path)

    except Exception as e:
        logging.warning("Could not augment run_config.json with account info: %s", e)


def infer_target_weight(model: PPO, vecnorm: Optional[VecNormalize], obs: np.ndarray) -> Tuple[float, float, float]:
    x = obs
    if vecnorm is not None and hasattr(vecnorm, "normalize_obs"):
        try:
            x = vecnorm.normalize_obs(x)
        except Exception:
            try:
                x_b = np.expand_dims(obs, axis=0)
                x = vecnorm.normalize_obs(x_b)[0]
            except Exception:
                x = obs

    x = np.asarray(x, dtype=np.float32)
    try:
        action, _ = model.predict(x, deterministic=INF_DETERMINISTIC)
    except Exception:
        action, _ = model.predict(np.expand_dims(x, axis=0), deterministic=INF_DETERMINISTIC)

    return action_to_weight(action)

    try:
        action, _ = model.predict(x, deterministic=INF_DETERMINISTIC)
    except Exception:
        x_b = np.expand_dims(x, axis=0)
        action, _ = model.predict(x_b, deterministic=INF_DETERMINISTIC)
        #peel off batch dim if present
        if isinstance(action, (list, np.ndarray)):
            action = np.array(action)
            if action.ndim > 0:
                action = action[0]

    return action_to_weight(action)

def maybe_patch_stale_with_latest_trade(api, symbol: str, bars_df: pd.DataFrame, max_age_sec: int = None) -> pd.DataFrame:
    if bars_df.empty:
        return bars_df
    max_age_sec = max_age_sec or int(globals().get("STALE_MAX_SEC", 600))
    try:
        last_ts = pd.Timestamp(bars_df.index[-1])
        last_ts = last_ts.tz_convert("UTC") if last_ts.tzinfo else last_ts.tz_localize("UTC")
        age_sec = int((now_utc() - last_ts).total_seconds())
        if age_sec <= max_age_sec:
            return bars_df

        lt = api.get_latest_trade(symbol)
        price = float(getattr(lt, "price", getattr(lt, "p", float("nan"))))
        ts = pd.to_datetime(getattr(lt, "timestamp", getattr(lt, "t", None)), utc=True)
        if not (pd.notna(ts) and np.isfinite(price)):
            return bars_df

        lt_age = int((now_utc() - ts).total_seconds())
        if lt_age > max_age_sec:
            return bars_df

        synth_time = max(last_ts + pd.Timedelta(minutes=1), ts.floor("min"))
        row = pd.DataFrame(
            {"Open":[price], "High":[price], "Low":[price], "Close":[price], "Volume":[0.0]},
            index=pd.DatetimeIndex([synth_time], tz="UTC")
        )
        patched = pd.concat([bars_df, row]).sort_index()
        patched = patched[~patched.index.duplicated(keep="last")]
        logging.info(f"[{symbol}] Patched stale bars with synthetic trade bar @ {synth_time.isoformat()} px={price:.2f}")
        return patched
    except Exception as e:
        logging.debug(f"[{symbol}] maybe_patch_stale_with_latest_trade failed: {e}")
        return bars_df

#-------------------------------- Single-symbol live step -------------------------------------
def run_live_once_for_symbol(
    api,
    symbol: str,
    model: PPO,
    vecnorm: Optional[VecNormalize],
    features_hint: Optional[dict] = None,
    cycle_equity: Optional[float] = None,
):
    """Fetch recent data and run one live decision cycle for a single symbol."""

    # Resolve expected obs shape and fetch bars
    shape = expected_obs_shape(model, vecnorm)
    lookback = int(shape[0]) if (shape and len(shape) == 2) else None
    bars_need = max(200, (lookback or 0) * 3)
    bars_df = get_recent_bars(api, symbol, limit=bars_need, timeframe=TimeFrame.Minute)
    if bars_df is None or bars_df.empty:
        logging.warning("[%s] No recent bars; skipping.", symbol)
        return

    # Patch stale last bar if needed
    bars_df = maybe_patch_stale_with_latest_trade(api, symbol, bars_df)

    # Respect any re-entry cooldown (e.g., after EOD flatten)
    block_until = _REENTRY_COOLDOWN_SEC = _REENTRY_BLOCK_UNTIL.get(symbol, 0.0)
    if time.time() < block_until:
        remaining = int(max(0, block_until - time.time()))
        logging.info(f"[{symbol}] Re-entry cooldown active ({remaining}s left); skipping this cycle.")
        try:
            eq = float(cycle_equity) if cycle_equity is not None else float(get_account_equity(api))
        except Exception:
            eq = float("nan")
        try:
            px = float(bars_df["Close"].iloc[-1]) if not bars_df.empty else float(get_last_price(api, symbol))
        except Exception:
            px = float("nan")

        _NO_POS_CYCLE_COUNT[symbol] = 0
        log_trade_symbol(
            symbol,
            bars_df.index[-1] if not bars_df.empty else pd.NaT,
            signal=0,
            raw_action=0.0,
            weight=0.0,
            confidence=0.0,
            price=px,
            equity=eq,
            dry_run=DRY_RUN,
            note="reentry_cooldown"
        )
        return

    # ---- Build observation (this was missing) ----
    min_rows_needed = max(20, int(shape[0]) if (shape and len(shape) == 2) else 60)
    try:
        obs, obs_ts = prepare_observation_from_bars(
            bars_df,
            features_hint=features_hint,
            min_required_rows=min_rows_needed,
            expected_shape=shape,
        )
    except Exception as e:
        logging.info("[%s] Could not prepare observation (%s); skipping.", symbol, e)
        try:
            eq = float(cycle_equity) if cycle_equity is not None else float(get_account_equity(api))
        except Exception:
            eq = float("nan")
        try:
            px = float(bars_df["Close"].iloc[-1]) if not bars_df.empty else float(get_last_price(api, symbol))
        except Exception:
            px = float("nan")
        log_trade_symbol(
            symbol,
            bars_df.index[-1] if not bars_df.empty else pd.NaT,
            signal=0,
            raw_action=0.0,
            weight=0.0,
            confidence=0.0,
            price=px,
            equity=eq,
            dry_run=DRY_RUN,
            note="obs_build_failed"
        )
        return

    # Observation diagnostics (now safe because obs/obs_ts exist)
    _obs_shape = getattr(obs, "shape", None)
    _vecnorm_str = (
        f"{type(vecnorm).__name__}(training={getattr(vecnorm,'training',None)}, "
        f"norm_reward={getattr(vecnorm,'norm_reward',None)})"
    ) if vecnorm is not None else "None"
    _now_ts = utc_ts(now_utc())
    _age = _now_ts - int(obs_ts)
    logging.info("[%s] obs_shape=%s | exp_shape=%s | age=%ss | vecnorm=%s",
                 symbol, _obs_shape, shape, _age, _vecnorm_str)

    # Staleness guard
    if _now_ts - obs_ts >= STALE_MAX_SEC:
        logging.info(f"[{symbol}] Observation stale (age={_now_ts-obs_ts}s ≥ {STALE_MAX_SEC}s); skipping.")
        try:
            eq = get_account_equity(api)
            px = float(bars_df["Close"].iloc[-1]) if not bars_df.empty else get_last_price(api, symbol)
        except Exception:
            eq, px = float("nan"), float("nan")
        log_trade_symbol(
            symbol,
            bars_df.index[-1] if not bars_df.empty else pd.NaT,
            0, 0.0, 0.0, 0.0, px, eq, DRY_RUN,
            note="skip_stale"
        )
        return

    # TP/SL guard
    if check_tp_sl_and_maybe_flatten(api, symbol):
        return

    # Inference
    target_w, conf, raw = infer_target_weight(model, vecnorm, obs)
    eq = float(cycle_equity) if cycle_equity is not None else get_account_equity(api)
    px = float(bars_df["Close"].iloc[-1]) if not bars_df.empty else get_last_price(api, symbol)
    have = get_position_qty(api, symbol)

    logging.info(
        f"[{symbol}] raw={raw:.4f} conf={conf:.3f} → target_w={target_w:.4f} "
        f"px=${px:.2f} eq=${eq:,.2f} have={have}"
    )
    logging.debug(
        f"[{symbol}] Gates: conf≥ENTER_CONF_MIN? {conf>=ENTER_CONF_MIN} | "
        f"|target_w|≥ENTER_WEIGHT_MIN? {abs(target_w)>=ENTER_WEIGHT_MIN} | "
        f"|target_w|≤EXIT_WEIGHT_MAX? {abs(target_w)<=EXIT_WEIGHT_MAX} | "
        f"Δw floor: {float(globals().get('DELTA_WEIGHT_MIN',0.0))}"
    )

    # Optional: auto-seed after N idle cycles (env-controlled)
    if os.getenv("DEBUG_FORCE_SEED_IF_IDLE", "0").lower() in ("1","true","yes"):
        if have != 0:
            _NO_POS_CYCLE_COUNT[symbol] = 0
        else:
            _NO_POS_CYCLE_COUNT[symbol] = _NO_POS_CYCLE_COUNT.get(symbol, 0) + 1

        idle_cycles = int(os.getenv("DEBUG_SEED_IDLE_CYCLES", "10"))
        if have == 0 and _NO_POS_CYCLE_COUNT[symbol] >= idle_cycles and ensure_market_open(api):
            tradable, fractionable, _ = _asset_flags(symbol)
            if not tradable:
                log_trade_symbol(symbol, bars_df.index[-1], 0, raw, target_w, conf, px, eq, DRY_RUN,
                                 note="not_tradable_seed")
                return
            seed_amt = round_to_cents(REBALANCE_MIN_NOTIONAL)
            if USE_FRACTIONALS and fractionable:
                market_order(api, symbol, side="buy", notional=seed_amt)
            else:
                market_order_to_qty(api, symbol, side="buy", qty=1)
            log_trade_symbol(symbol, bars_df.index[-1], 1, raw, target_w, conf, px, eq, DRY_RUN, note="debug_force_seed")
            return

    # Raw-action gates
    RAW_POS_MIN = float(globals().get("RAW_POS_MIN", 0.0))
    if target_w > 0 and raw < RAW_POS_MIN:
        logging.info(f"[{symbol}] Raw {raw:.4f} < RAW_POS_MIN {RAW_POS_MIN:.4f}; no action.")
        log_trade_symbol(symbol, bars_df.index[-1], 0, raw, target_w, conf, px, eq, DRY_RUN, note="raw_gate_long")
        return

    RAW_NEG_GATE = float(globals().get("RAW_NEG_MAX", 0.0))
    if target_w < 0 and abs(raw) < RAW_NEG_GATE:
        logging.info(f"[{symbol}] |raw| {abs(raw):.4f} < RAW_NEG_GATE {RAW_NEG_GATE:.4f}; no action.")
        log_trade_symbol(symbol, bars_df.index[-1], 0, raw, target_w, conf, px, eq, DRY_RUN, note="raw_gate_short")
        return

    # Flatten if model near-flat and we have a position
    pos = get_position(api, symbol)
    if abs(target_w) <= EXIT_WEIGHT_MAX and pos:
        logging.info(f"[{symbol}] Model near-flat (≤{EXIT_WEIGHT_MAX:.3f}); flattening.")
        flatten_symbol(api, symbol)
        log_trade_symbol(symbol, bars_df.index[-1], int(target_w > 0), raw, target_w, conf, px, eq, DRY_RUN, note="flatten")
        return

    # Low confidence and near-flat → no action
    if conf < ENTER_CONF_MIN and abs(target_w) <= EXIT_WEIGHT_MAX:
        logging.info(f"[{symbol}] Below conf/near-flat gates; no action.")
        log_trade_symbol(symbol, bars_df.index[-1], int(target_w > 0), raw, target_w, conf, px, eq, DRY_RUN, note="no_action")
        return

    # Entry / rebalance
    if abs(target_w) >= ENTER_WEIGHT_MIN and conf >= ENTER_CONF_MIN:
        if SEED_FIRST_SHARE and have == 0:
            if _too_soon(f"{symbol}#seed", _SEED_COOLDOWN_SEC):
                log_trade_symbol(symbol, bars_df.index[-1], 0, raw, target_w, conf, px, eq, DRY_RUN, note="seed_cooldown")
                return

            seed_notional = round_to_cents(REBALANCE_MIN_NOTIONAL)
            side = "buy" if target_w > 0 else "sell"

            tradable, fractionable, _shortable = _asset_flags(symbol)
            if not tradable:
                log_trade_symbol(symbol, bars_df.index[-1], 0, raw, target_w, conf, px, eq, DRY_RUN, note="not_tradable_seed")
                return

            if side == "sell":
                ok, note = _can_seed_short(api, symbol)
                if not ok:
                    log_trade_symbol(symbol, bars_df.index[-1], 0, raw, target_w, conf, px, eq, DRY_RUN, note=note)
                    return
                market_order_to_qty(api, symbol, side="sell", qty=1)
            else:
                if USE_FRACTIONALS and fractionable:
                    market_order(api, symbol, side="buy", notional=seed_notional)
                else:
                    market_order_to_qty(api, symbol, side="buy", qty=1)

            rebalance_to_weight(api, symbol, eq, target_w)
            log_trade_symbol(symbol, bars_df.index[-1], int(target_w > 0), raw, target_w, conf, px, eq, DRY_RUN, note="seed_open")
            _LAST_ORDER_TS[symbol] = time.time()
            return

        if _too_soon(symbol, 30):
            log_trade_symbol(symbol, bars_df.index[-1], 0, raw, target_w, conf, px, eq, DRY_RUN, note="rebalance_cooldown")
            return

        rebalance_to_weight(api, symbol, eq, target_w)
        log_trade_symbol(symbol, bars_df.index[-1], int(target_w > 0), raw, target_w, conf, px, eq, DRY_RUN, note="rebalance_try")
        return

#--------------------------------- Live runner -------------------------------------------------
def run_live(tickers: List[str], api: tradeapi.REST):
    def minutes_to_close(api: tradeapi.REST) -> Optional[int]:
        clk = api.get_clock()
        if getattr(clk, "is_open", False):
            close = pd.to_datetime(clk.next_close, utc=True)
            m = int(max(0, (close - now_utc()).total_seconds() // 60))
            return m
        return None

    api_local = api

    per_ticker: Dict[str, Tuple[PPO, Optional[VecNormalize], Optional[dict]]] = {}
    best = (globals().get("BEST_WINDOW_ENV") or None)

    for t in tickers:
        try:
            picks   = pick_artifacts_for_ticker(t, os.getenv("ARTIFACTS_DIR", str(ARTIFACTS_DIR)), best_window=best)
            model   = load_ppo_model(picks["model"])
            vecnorm = load_vecnormalize(picks.get("vecnorm"))
            if vecnorm and hasattr(vecnorm, "training"): vecnorm.training = False
            if vecnorm and hasattr(vecnorm, "norm_reward"): vecnorm.norm_reward = False
            feats   = load_features(picks.get("features"))
            per_ticker[t] = (model, vecnorm, feats)
            logging.info("[%s] Artifacts loaded and ready.", t)
        except Exception as e:
            logging.exception("[%s] Failed to load artifacts: %s", t, e)

    if not per_ticker:
        raise RuntimeError("No models loaded for any ticker. Check artifacts directory and names.")

    loaded_syms = list(per_ticker.keys())
    logging.info("Starting live execution for (loaded): %s", loaded_syms)

    cycle = 0
    last_plot_ts = 0
    flattened_today = False  #ensure we only flatten once into the close

    try:
        while True:
            if not ensure_market_open(api_local):
                flattened_today = False  #reset for next session
                #reset the anchor so we re-capture at the next open
                globals()["SESSION_OPEN_EQUITY"] = None
                _sleep_until_open(api_local)
                continue

            #Anchor the day's opening equity once per session
            if globals().get("SESSION_OPEN_EQUITY") is None:
                try:
                    globals()["SESSION_OPEN_EQUITY"] = float(api_local.get_account().equity)
                    logging.info("Session open equity anchor set: %.2f", globals().get("SESSION_OPEN_EQUITY", np.nan))
                except Exception as e:
                    logging.debug(f"Could not set SESSION_OPEN_EQUITY: {e}")

            t_cycle_start = time.perf_counter()
            cycle_equity = float(api_local.get_account().equity)

            #---- per-symbol work (no sleeping here) ----
            for t, (model, vecnorm, feat_hint) in per_ticker.items():
                t_sym_start = time.perf_counter()
                run_live_once_for_symbol(api_local, t, model, vecnorm, features_hint=feat_hint, cycle_equity=cycle_equity)
                logging.info("[TIMER] %s symbol work: %.3fs", t, time.perf_counter() - t_sym_start)

            #---- once per cycle (AFTER the for-loop) ----
            maybe_log_equity_snapshot(api_in=api_local, reason=("trade" if globals().get("_TRADE_EVENT_FLAG", False) else "cycle"))

            try:
                anchor = globals().get("SESSION_OPEN_EQUITY", None)
                if anchor is not None:
                    eq_now = float(api_local.get_account().equity)
                    dd = (eq_now / max(1e-9, float(anchor))) - 1.0

                    max_dd = float(os.getenv(
                        "MAX_DAILY_DRAWDOWN_PCT",
                        getattr(cfg, "MAX_DAILY_DRAWDOWN_PCT", globals().get("MAX_DAILY_DRAWDOWN_PCT", 0.05))
                    ))
                    if dd <= -abs(max_dd):
                        global _last_kill_ts
                        if time.time() - _last_kill_ts > 60:  #de-bounce
                            for sym in per_ticker.keys():
                                flatten_symbol(api_local, sym)
                            logging.warning("KILL-SWITCH: daily drawdown %.2f%% reached. Flattening & pausing.",
                                            100.0 * dd)
                            _last_kill_ts = time.time()

                            #Cooldown; if DRY_RUN, skip sleeping so your loop remains responsive
                            cooldown_min = int(os.getenv(
                                "KILL_SWITCH_COOLDOWN_MIN",
                                getattr(cfg, "KILL_SWITCH_COOLDOWN_MIN", globals().get("KILL_SWITCH_COOLDOWN_MIN", 30))
                            ))
                            if not DRY_RUN:
                                time.sleep(60 * cooldown_min)
                            continue  #start next cycle after cooldown
            except Exception as e:
                logging.debug(f"kill-switch check failed: {e}")

            #Flatten into the close (≤5 min), but only once per session
            m2c = minutes_to_close(api_local)
            if FLATTEN_INTO_CLOSE and not flattened_today and m2c is not None and m2c <= 5:
                for sym in per_ticker.keys():
                    flatten_symbol(api_local, sym)
                    #start re-entry cooldown after end-of-day flatten
                    _REENTRY_BLOCK_UNTIL[sym] = time.time() + REENTRY_COOLDOWN_SEC
                logging.info("Flattened all positions into the close.")
                maybe_log_equity_snapshot(api_in=api_local, reason="close")
                flattened_today = True

                if bool(globals().get("EXIT_AFTER_CLOSE", False)):
                    logging.info("EXIT_AFTER_CLOSE=True — exiting live loop after close flatten.")
                    break

            cycle += 1

            #Throttled plot/metrics (~15 min)
            now_ts = time.time()
            if now_ts - last_plot_ts >= 900:
                try:
                    plot_equity_curve(from_equity_csv=True)
                    df = pd.read_csv(EQUITY_LOG_CSV, parse_dates=["datetime_utc"])
                    m = compute_performance_metrics(df)
                    logging.info("Perf: cum_return=%.2f%% | sharpe=%.2f | maxDD=%.2f%%",
                                 100*m["cum_return"], m["sharpe"], 100*m["max_drawdown"])
                except Exception as e:
                    logging.warning("Plot/metrics failed: %s", e)
                last_plot_ts = now_ts

            logging.info("[TIMER] full-cycle active time: %.3fs (cooldown=%d min)",
                         time.perf_counter() - t_cycle_start, COOLDOWN_MIN)

            if (cycle % 12) == 0:
                gc.collect()

            _sleep_to_next_minute_block(COOLDOWN_MIN)

    except KeyboardInterrupt:
        logging.info("KeyboardInterrupt: stopping live loop.")
        try:
            if os.getenv("FORCE_FLATTEN_ON_EXIT", "0").lower() in ("1", "true", "yes"):
                flatten_all_positions(api_local)
        except Exception as e:
            logging.warning("Flatten-on-exit skipped: %s", e)

        #Finalize logs/plots
        try:
            maybe_log_equity_snapshot(api_in=api_local, reason="finalize")
            plot_equity_curve(from_equity_csv=True)
        except Exception as e:
            logging.warning("Finalization failed: %s", e)

    except Exception as e:
        logging.exception("Live loop exception: %s", e)
        try:
            log_equity_snapshot(api_in=api_local)
        except Exception:
            pass
        time.sleep(5)



#--------------------------------- Diagnostic runner -------------------------------------------
def ticker_diagnostic(ticker: str,
                      dry_run: bool = None,
                      timeframe: TimeFrame = TimeFrame.Minute,
                      limit: int = 300,
                      api: Optional[tradeapi.REST] = None):
    """One-shot diagnostic for a single ticker """
    if dry_run is None:
        dry_run = bool(globals().get("DRY_RUN", True))

    print(f"\nRunning strategy for {ticker}...")

    #Init Alpaca + baseline state
    try:
        api_local = api or init_alpaca()
        positions_start = len(api_local.list_positions())
        orders_start    = len(api_local.list_orders(status="open"))
    except Exception as e:
        print(f"Error initializing Alpaca: {e}")
        return

    #Load artifacts
    try:
        best   = (globals().get("BEST_WINDOW_ENV") or None)
        picks  = pick_artifacts_for_ticker(
            ticker,
            os.getenv("ARTIFACTS_DIR", str(globals().get("ARTIFACTS_DIR", PROJECT_ROOT / "artifacts"))),
            best_window=best
        )
        model   = load_ppo_model(picks["model"])
        vecnorm = load_vecnormalize(picks.get("vecnorm")) if picks.get("vecnorm") else None
        if vecnorm and hasattr(vecnorm, "training"): vecnorm.training = False
        if vecnorm and hasattr(vecnorm, "norm_reward"): vecnorm.norm_reward = False
        feats   = load_features(picks.get("features"))
        print(f"Model artifacts loaded for {ticker}")
    except Exception as e:
        print(f"Could not load model for {ticker}: {e}")
        return

    #Fetch bars & build initial observation
    min_rows_needed = 60
    try:
        shape     = expected_obs_shape(model, vecnorm)
        lookback  = int(shape[0]) if (shape is not None and len(shape) == 2) else None
        bars_need = max(200, (lookback or 0) * 3)
        bars_df   = get_recent_bars(api_local, ticker, limit=max(limit, bars_need), timeframe=timeframe)
        min_rows_needed = max(20, int(shape[0]) if (shape and len(shape)==2) else 20)
        if len(bars_df) < min_rows_needed:
            print(f"Not enough data for {ticker}: {len(bars_df)} rows (need ≥ {min_rows_needed})")
            bars_df = pd.DataFrame()
    except Exception as e:
        print(f"Error fetching bars for {ticker}: {e}")
        bars_df = pd.DataFrame()

    obs, obs_ts = None, None
    bars_df = maybe_patch_stale_with_latest_trade(api_local, ticker, bars_df)
    if not bars_df.empty:
        try:
            obs, obs_ts = prepare_observation_from_bars(
                bars_df,
                features_hint=feats,
                min_required_rows=min_rows_needed,
                expected_shape=shape,
            )
        except Exception as e:
            print(f"Error preparing observation for {ticker}: {e}")

    signal = None
    target_w = conf = raw = float("nan")
    predictions_made = 0
    bar_time = pd.NaT
    price = float("nan")
    equity = float("nan")

    orders_submitted = 0
    market_closed = 0

    if obs is not None:
        try:
            #Predict once for the diagnostic summary
            target_w, conf, raw = infer_target_weight(model, vecnorm, obs)
            signal = int(target_w > 0.0)  #(diagnostic display only)
            predictions_made = 1
            print(f"Prediction for {ticker}: {signal} (1 = Buy, 0 = Sell)")

            bar_time = bars_df.index[-1] if not bars_df.empty else pd.NaT
            price    = float(bars_df["Close"].iloc[-1]) if not bars_df.empty else get_last_price(api_local, ticker)
            equity   = get_account_equity(api_local)
            print(f"raw={raw:.4f} conf={conf:.3f} target_w={target_w:.3f} price=${price:.2f} equity=${equity:,.2f}")

            #Log a diagnostic row (even if we don't submit an order)
            log_trade_symbol(
                ticker, bar_time, signal, raw, target_w, conf, price, equity,
                dry_run=dry_run, note="diagnostic"
            )

            #--- Inner “clock/orders” section (fixed) ---
            try:
                clock = api_local.get_clock()
                if not getattr(clock, "is_open", False):
                    print("Market is closed.")
                    market_closed = 1
            else:
                # Rebuild obs right before order logic to reduce staleness
                obs, obs_ts = prepare_observation_from_bars(
                    bars_df,
                    features_hint=feats,
                    min_required_rows=min_rows_needed,
                    expected_shape=shape,
                )

                _age = utc_ts(now_utc()) - int(obs_ts)
                logging.info("[%s] obs_shape=%s | exp_shape=%s | age=%ss | vecnorm=%s",
                            ticker, getattr(obs, "shape", None), shape, _age,
                            (f"{type(vecnorm).__name__}(training={getattr(vecnorm,'training',None)}, "
                              f"norm_reward={getattr(vecnorm,'norm_reward',None)})") if vecnorm else "None")

                if utc_ts(now_utc()) - obs_ts > STALE_MAX_SEC:
                    print("Stale observation; skipping order submission.")
                    log_trade_symbol(
                        ticker, bar_time, 0, raw, target_w, conf, price, equity,
                        dry_run, note="skip_stale_diag"
                    )

                elif signal is not None and not dry_run:
                    # Do we already hold the ticker?
                    try:
                        pos = api_local.get_position(ticker)
                        has_position = float(pos.qty) != 0.0
                    except APIError:
                        has_position = False

                    have = get_position_qty(api_local, ticker)

                    # Optional "first buy" helper for diagnostics only
                    if FORCE_FIRST_BUY and not has_position and signal == 1:
                        market_order(
                            api_local, symbol=ticker, side="buy",
                            qty=(1 if not USE_FRACTIONALS else None),
                            notional=(price if USE_FRACTIONALS else None),
                        )
                        print(f"BUY order submitted for {ticker} (FORCE_FIRST_BUY)")
                        orders_submitted += 1

                    # Seed logic when no position but confidence/weight pass gates
                    elif (
                        SEED_FIRST_SHARE
                        and have == 0
                        and abs(target_w) >= ENTER_WEIGHT_MIN
                        and conf >= ENTER_CONF_MIN
                    ):
                        seed_notional = max(REBALANCE_MIN_NOTIONAL, round_to_cents(price if np.isfinite(price) else 1.00))
                        side = "buy" if target_w > 0 else "sell"

                        if side == "sell":
                            if not globals().get("ALLOW_SHORTS", False):
                                print(f"[{ticker}] Shorts disabled (ALLOW_SHORTS=False); skipping seed short.")
                                log_trade_symbol(ticker, bar_time, 0, raw, target_w, conf, price, equity, dry_run, note="shorts_disabled_seed")
                            else:
                                try:
                                    a = api_local.get_asset(ticker)
                                    if not getattr(a, "shortable", False):
                                        print(f"[{ticker}] Not shortable; skipping seed short.")
                                        log_trade_symbol(ticker, bar_time, 0, raw, target_w, conf, price, equity, dry_run, note="not_shortable_seed")
                                    else:
                                        market_order_to_qty(api_local, ticker, side="sell", qty=1)
                                        log_trade_symbol(ticker, bar_time, int(target_w > 0), raw, target_w, conf, price, equity, dry_run, note="seed_open")
                                        orders_submitted += 1
                                except Exception as e:
                                    print(f"[{ticker}] get_asset shortable check failed: {e}")
                                    log_trade_symbol(ticker, bar_time, 0, raw, target_w, conf, price, equity, dry_run, note="shortable_check_error")
                        else:
                            if USE_FRACTIONALS:
                                market_order(api_local, ticker, side="buy", notional=seed_notional)
                            else:
                                market_order_to_qty(api_local, ticker, side="buy", qty=1)
                            log_trade_symbol(ticker, bar_time, int(target_w > 0), raw, target_w, conf, price, equity, dry_run, note="seed_open")
                            orders_submitted += 1

                    # Simple directional demo for diagnostics
                    elif signal == 1 and not has_position:
                        market_order(api_local, symbol=ticker, side="buy",
                                    qty=(1 if not USE_FRACTIONALS else None),
                                    notional=(price if USE_FRACTIONALS else None))
                        print(f"BUY order submitted for {ticker}")
                        orders_submitted += 1

                    elif signal == 0 and has_position and have > 0:
                        market_order(api_local, symbol=ticker, side="sell",
                                    qty=(1 if not USE_FRACTIONALS else None),
                                    notional=(price if USE_FRACTIONALS else None))
                        print(f"SELL order submitted for {ticker}")
                        orders_submitted += 1

                    else:
                        print(f"No action taken for {ticker}")

                else:
                    # dry_run path
                    print(f"(dry-run) No order submitted for {ticker} — signal={signal}")
            except Exception as e:
                print(f"Trade/clock error for {ticker}: {e}")

        except Exception as e:
            print(f"Inference error for {ticker}: {e}")

    #Final summary
    try:
        positions_end = len(api_local.list_positions())
        orders_end    = len(api_local.list_orders(status="open"))
        print("\n========== SUMMARY ==========")
        print(f"Processed:         1")
        print(f"Models loaded:     1")
        print(f"Predictions made:  {predictions_made}")
        print(f"Market closed:     {market_closed}")
        print(f"Orders submitted:  {orders_submitted} (dry_run={dry_run})")
        print(f"Existing positions (start -> end): {positions_start} -> {positions_end}")
        print(f"Open orders        (start -> end): {orders_start} -> {orders_end}")
        print("=============================")
    except Exception:
        pass

    return {
        "signal": signal,
        "target_w": target_w,
        "conf": conf,
        "raw": raw,
        "bar_time": bar_time,
        "price": price,
        "equity": equity,
        "dry_run": dry_run,
    }

#--------------------------------- Config banner -----------------------------------------------
def log_config_banner():
    try:
        artifacts_list = sorted(p.name for p in ARTIFACTS_DIR.iterdir()) if ARTIFACTS_DIR.exists() else []
    except Exception:
        artifacts_list = []

    logging.info("EXIT_AFTER_CLOSE     : %s", os.getenv("EXIT_AFTER_CLOSE", "0"))
    logging.info("FORCE_FIRST_BUY       : %s", FORCE_FIRST_BUY)
    logging.info("CONFIG")
    logging.info("Project root        : %s", PROJECT_ROOT)
    logging.info("ARTIFACTS_DIR       : %s", ARTIFACTS_DIR)
    logging.info("RESULTS_DIR         : %s", RESULTS_DIR)
    logging.info("Tickers             : %s", TICKERS)
    logging.info("API base            : %s", BASE_URL)
    logging.info("AUTO_RUN_LIVE       : %s", os.getenv("AUTO_RUN_LIVE", ""))
    logging.info("INF_DETERMINISTIC   : %s", INF_DETERMINISTIC)
    logging.info("ALLOW_SHORTS        : %s", ALLOW_SHORTS)
    logging.info("FLATTEN_INTO_CLOSE  : %s", os.getenv("FLATTEN_INTO_CLOSE", str(FLATTEN_INTO_CLOSE)))
    logging.info("REENTRY_COOLDOWN_SEC: %s", os.getenv("REENTRY_COOLDOWN_SEC", str(REENTRY_COOLDOWN_SEC)))
    logging.info(
        "DRY_RUN: %s | BARS_FEED: %s | USE_FRACTIONALS: %s | COOLDOWN_MIN: %s | STALE_MAX_SEC: %s",
        DRY_RUN, BARS_FEED, USE_FRACTIONALS, COOLDOWN_MIN, STALE_MAX_SEC,
    )

    logging.info(
        "DEBUG_FORCE_SEED_IF_IDLE: %s | DEBUG_SEED_IDLE_CYCLES: %s",
        os.getenv("DEBUG_FORCE_SEED_IF_IDLE","1"),
        os.getenv("DEBUG_SEED_IDLE_CYCLES","10"),
    )

    logging.info("PH_TIMEOUT_SEC       : %s", os.getenv("PH_TIMEOUT_SEC", "8"))
    logging.info("MAX_DD_PCT: %.3f | KILL_SWITCH_COOLDOWN_MIN: %s",
                float(globals().get("MAX_DAILY_DRAWDOWN_PCT", 0.05)),
                os.getenv("KILL_SWITCH_COOLDOWN_MIN",
                          str(globals().get("KILL_SWITCH_COOLDOWN_MIN", 30))))

    logging.info(
        "WEIGHT_CAP: %.3f | SIZING_MODE: %s | ENTER_CONF_MIN: %.3f | ENTER_WEIGHT_MIN: %.3f | "
        "EXIT_WEIGHT_MAX: %.3f | REBALANCE_MIN_NOTIONAL: %.2f",
        WEIGHT_CAP, SIZING_MODE, ENTER_CONF_MIN, ENTER_WEIGHT_MIN, EXIT_WEIGHT_MAX, REBALANCE_MIN_NOTIONAL,
    )
    logging.info(
        "TAKE_PROFIT_PCT: %.3f | STOP_LOSS_PCT: %.3f | BEST_WINDOW_ENV: %s",
        TAKE_PROFIT_PCT, STOP_LOSS_PCT, (BEST_WINDOW_ENV or ""),
    )
    logging.info(
        "DELTA_WEIGHT_MIN: %.3f | RAW_POS_MIN: %.3f | RAW_NEG_MAX: %.3f",
        float(globals().get("DELTA_WEIGHT_MIN", 0.0)),
        float(globals().get("RAW_POS_MIN", 0.0)),
        float(globals().get("RAW_NEG_MAX", 0.0)),
    )
    if artifacts_list:
        logging.info("Artifacts present (%d): %s", len(artifacts_list), ", ".join(artifacts_list))

if __name__ == "__main__":
    if IN_COLAB:
        upload_env_and_artifacts_in_colab()
        _maybe_convert_features_txt_to_json()
        _maybe_rename_vecnorm_scaler()
        load_dotenv(dotenv_path=PROJECT_ROOT / ".env", override=True)

    #Always configure (both local and Colab)
    cfg = configure_knobs(overrides={
        #data freshness
        "BARS_FEED": "iex",
        "STALE_MAX_SEC": 600,

        #entry/exit sensitivity (add gentle friction)
        "ENTER_CONF_MIN": 0.02,
        "ENTER_WEIGHT_MIN": 0.010,
        "EXIT_WEIGHT_MAX": 0.007,

        #sizing & minimums
        "WEIGHT_CAP": 0.25,
        "DELTA_WEIGHT_MIN": 0.003,
        "REBALANCE_MIN_NOTIONAL": 7.50,

        #posture
        "ALLOW_SHORTS": False,
        "COOLDOWN_MIN": 1,

        #raw-action gates
        "RAW_POS_MIN": 0.00,
        "RAW_NEG_MAX": 0.00,

        #risk
        "TAKE_PROFIT_PCT": 0.02,
        "STOP_LOSS_PCT": 0.01,

        #logging cadence
        "EQUITY_LOG_THROTTLE_SEC": 300,
        "SKIP_EQUITY_WHEN_DRY_RUN": False,

        #sane kill-switch
        "MAX_DAILY_DRAWDOWN_PCT": 0.05,

    })
    globals()["cfg"] = cfg  #so functions that reference `cfg` can see it

    #Only block the live loop (not diagnostics) if DRY_RUN is on
    if cfg.AUTO_RUN_LIVE:
        assert not cfg.DRY_RUN, "Refusing to start live loop with DRY_RUN=True"

    log_config_banner()

    #Save a one-file snapshot of the run config BEFORE starting the loop
    try:
        cfg_path = RESULTS_DIR / "run_config.json"
        payload = {
            "time": utcnow_iso(),
            "tickers": TICKERS,
            "dry_run": DRY_RUN,
            "bars_feed": BARS_FEED,
            "weight_cap": WEIGHT_CAP,
            "enter_conf_min": ENTER_CONF_MIN,
            "enter_weight_min": ENTER_WEIGHT_MIN,
            "exit_weight_max": EXIT_WEIGHT_MAX,
            "rebalance_min_notional": REBALANCE_MIN_NOTIONAL,
            "delta_weight_min": DELTA_WEIGHT_MIN,
            "tp": TAKE_PROFIT_PCT,
            "sl": STOP_LOSS_PCT,
            "allow_shorts": ALLOW_SHORTS,
        }
        tmp = cfg_path.with_suffix(".tmp")
        tmp.write_text(json.dumps(payload, indent=2))
        tmp.replace(cfg_path)
    except Exception as e:
        logging.warning("Could not write run_config.json: %s", e)

    #Paper safety (BASE_URL is set by cfg.apply_to_globals())
    assert "paper-api" in BASE_URL.lower(), f"Refusing to trade: BASE_URL is not paper ({BASE_URL})"

    #Single init
    api = init_alpaca()
    acct = api.get_account()

    #Optional sanity: don't run if Alpaca says trading is blocked
    assert not bool(getattr(acct, "trading_blocked", False)), f"Trading is blocked on this account: {getattr(acct,'status','')}"

    logging.info("Account status: %s | equity=%s | cash=%s", acct.status, acct.equity, acct.cash)
    write_account_info_to_run_config(api)

    if cfg.AUTO_RUN_LIVE:
        run_live(TICKERS, api)   #pass the client in
    else:
        logging.info("AUTO_RUN_LIVE disabled; live loop not started.")


Mounted at /content/drive
Upload your .env (or Alpaca_keys.env.txt). Cancel if already on Drive.


Saving Alpaca_keys.env.txt to Alpaca_keys.env.txt
Saved env → /content/drive/MyDrive/AlpacaPaper/.env
Upload your artifacts (ppo_*_model.zip, *_vecnorm*.pkl, *_features*.json or .txt).


Saving ppo_GE_window1_features.json to ppo_GE_window1_features.json
Saving ppo_GE_window1_model_info.json to ppo_GE_window1_model_info.json
Saving ppo_GE_window1_model.zip to ppo_GE_window1_model.zip
Saving ppo_GE_window1_probability_config.json to ppo_GE_window1_probability_config.json
Saving ppo_GE_window1_vecnorm.pkl to ppo_GE_window1_vecnorm.pkl
Saving ppo_UNH_window3_features.json to ppo_UNH_window3_features.json
Saving ppo_UNH_window3_model_info.json to ppo_UNH_window3_model_info.json
Saving ppo_UNH_window3_model.zip to ppo_UNH_window3_model.zip
Saving ppo_UNH_window3_probability_config.json to ppo_UNH_window3_probability_config.json
Saving ppo_UNH_window3_vecnorm.pkl to ppo_UNH_window3_vecnorm.pkl
Artifacts now in: ['ppo_GE_window1_features.json', 'ppo_GE_window1_model.zip', 'ppo_GE_window1_model_info.json', 'ppo_GE_window1_probability_config.json', 'ppo_GE_window1_vecnorm.pkl', 'ppo_UNH_window3_features.json', 'ppo_UNH_window3_model.zip', 'ppo_UNH_window3_model_info.json', 'ppo

INFO:root:EXIT_AFTER_CLOSE     : 0
INFO:root:=== CONFIG ===
INFO:root:Project root        : /content/drive/MyDrive/AlpacaPaper
INFO:root:ARTIFACTS_DIR       : /content/drive/MyDrive/AlpacaPaper/artifacts
INFO:root:RESULTS_DIR         : /content/drive/MyDrive/AlpacaPaper/results/2025-12-10
INFO:root:Tickers             : ['UNH', 'GE']
INFO:root:API base            : https://paper-api.alpaca.markets
INFO:root:AUTO_RUN_LIVE       : 1
INFO:root:INF_DETERMINISTIC   : True
INFO:root:ALLOW_SHORTS        : False
INFO:root:DRY_RUN: False | BARS_FEED: iex | USE_FRACTIONALS: True | COOLDOWN_MIN: 1 | STALE_MAX_SEC: 600
INFO:root:DEBUG_FORCE_SEED_IF_IDLE: 0 | DEBUG_SEED_IDLE_CYCLES: 10
INFO:root:PH_TIMEOUT_SEC       : 8
INFO:root:MAX_DD_PCT: 0.050 | KILL_SWITCH_COOLDOWN_MIN: 30
INFO:root:WEIGHT_CAP: 0.250 | SIZING_MODE: linear | ENTER_CONF_MIN: 0.020 | ENTER_WEIGHT_MIN: 0.010 | EXIT_WEIGHT_MAX: 0.007 | REBALANCE_MIN_NOTIONAL: 7.50
INFO:root:TAKE_PROFIT_PCT: 0.020 | STOP_LOSS_PCT: 0.010 | BEST_WINDO

Saved equity curve → /content/drive/MyDrive/AlpacaPaper/results/2025-12-10/equity_curve.png
Updated latest copy → /content/drive/MyDrive/AlpacaPaper/results/latest/equity_curve.png


INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] obs_shape=(10, 2) | exp_shape=(10, 2) | age=60s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[UNH] raw=-0.3828 conf=0.365 → target_w=0.0000 px=$325.12 eq=$99,205.42 have=0.0
INFO:root:[TIMER] UNH symbol work: 0.307s
INFO:root:[GE] fetching 200 1Min bars (feed='iex')
INFO:root:[GE] Patched stale bars with synthetic trade bar @ 2025-12-10T18:20:00+00:00 px=282.24
INFO:root:[GE] obs_shape=(10, 2) | exp_shape=(10, 2) | age=60s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[GE] raw=0.1832 conf=0.181 → target_w=0.0453 px=$282.24 eq=$99,205.42 have=0.0
INFO:root:[GE] Submitted buy notional=$7.50
INFO:root:[GE] Submitted buy notional=$4485.19
INFO:root:[TIMER] GE symbol work: 1.434s
INFO:root:[TIMER] full-cycle active time: 2.024s (cooldown=1 min)
INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] obs_shape=(10, 2) | exp_shape=(10, 2) | age=120s | vecnorm=VecNormali

Saved equity curve → /content/drive/MyDrive/AlpacaPaper/results/2025-12-10/equity_curve.png
Updated latest copy → /content/drive/MyDrive/AlpacaPaper/results/latest/equity_curve.png


INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] obs_shape=(10, 2) | exp_shape=(10, 2) | age=120s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[UNH] raw=-0.3836 conf=0.366 → target_w=0.0000 px=$324.40 eq=$99,207.01 have=0.0
INFO:root:[TIMER] UNH symbol work: 0.295s
INFO:root:[GE] fetching 200 1Min bars (feed='iex')
INFO:root:[GE] Patched stale bars with synthetic trade bar @ 2025-12-10T18:36:00+00:00 px=282.31
INFO:root:[GE] obs_shape=(10, 2) | exp_shape=(10, 2) | age=60s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[GE] raw=0.1832 conf=0.181 → target_w=0.0453 px=$282.31 eq=$99,207.01 have=15.918472631
INFO:root:[TIMER] GE symbol work: 0.514s
INFO:root:[TIMER] full-cycle active time: 1.002s (cooldown=1 min)
INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] obs_shape=(10, 2) | exp_shape=(10, 2) | age=180s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[UNH] raw=-0.3836 conf=0.366 → ta

Saved equity curve → /content/drive/MyDrive/AlpacaPaper/results/2025-12-10/equity_curve.png
Updated latest copy → /content/drive/MyDrive/AlpacaPaper/results/latest/equity_curve.png


INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-12-10T18:51:00+00:00 px=324.18
INFO:root:[UNH] obs_shape=(10, 2) | exp_shape=(10, 2) | age=120s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[UNH] raw=-0.3836 conf=0.366 → target_w=0.0000 px=$324.18 eq=$99,214.81 have=0.0
INFO:root:[TIMER] UNH symbol work: 0.360s
INFO:root:[GE] fetching 200 1Min bars (feed='iex')
INFO:root:[GE] Patched stale bars with synthetic trade bar @ 2025-12-10T18:52:00+00:00 px=282.84
INFO:root:[GE] obs_shape=(10, 2) | exp_shape=(10, 2) | age=60s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[GE] raw=0.1831 conf=0.181 → target_w=0.0453 px=$282.84 eq=$99,214.81 have=15.918472631
INFO:root:[TIMER] GE symbol work: 0.515s
INFO:root:[TIMER] full-cycle active time: 1.075s (cooldown=1 min)
INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-1

Saved equity curve → /content/drive/MyDrive/AlpacaPaper/results/2025-12-10/equity_curve.png
Updated latest copy → /content/drive/MyDrive/AlpacaPaper/results/latest/equity_curve.png


INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-12-10T19:08:00+00:00 px=325.49
INFO:root:[UNH] obs_shape=(10, 2) | exp_shape=(10, 2) | age=60s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[UNH] raw=-0.3834 conf=0.366 → target_w=0.0000 px=$325.49 eq=$99,231.13 have=0.0
INFO:root:[TIMER] UNH symbol work: 0.392s
INFO:root:[GE] fetching 200 1Min bars (feed='iex')
INFO:root:[GE] Patched stale bars with synthetic trade bar @ 2025-12-10T19:09:00+00:00 px=283.76
INFO:root:[GE] obs_shape=(10, 2) | exp_shape=(10, 2) | age=0s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[GE] raw=0.1830 conf=0.181 → target_w=0.0453 px=$283.76 eq=$99,231.13 have=15.918472631
INFO:root:[TIMER] GE symbol work: 0.514s
INFO:root:[TIMER] full-cycle active time: 1.102s (cooldown=1 min)
INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-12-

Saved equity curve → /content/drive/MyDrive/AlpacaPaper/results/2025-12-10/equity_curve.png
Updated latest copy → /content/drive/MyDrive/AlpacaPaper/results/latest/equity_curve.png


INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-12-10T19:23:00+00:00 px=325.06
INFO:root:[UNH] obs_shape=(10, 2) | exp_shape=(10, 2) | age=60s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[UNH] raw=-0.3835 conf=0.366 → target_w=0.0000 px=$325.06 eq=$99,205.74 have=0.0
INFO:root:[TIMER] UNH symbol work: 0.386s
INFO:root:[GE] fetching 200 1Min bars (feed='iex')
INFO:root:[GE] Patched stale bars with synthetic trade bar @ 2025-12-10T19:23:00+00:00 px=282.34
INFO:root:[GE] obs_shape=(10, 2) | exp_shape=(10, 2) | age=60s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[GE] raw=0.1832 conf=0.181 → target_w=0.0453 px=$282.34 eq=$99,205.74 have=15.918472631
INFO:root:[TIMER] GE symbol work: 0.529s
INFO:root:[TIMER] full-cycle active time: 1.116s (cooldown=1 min)
INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-12

Saved equity curve → /content/drive/MyDrive/AlpacaPaper/results/2025-12-10/equity_curve.png
Updated latest copy → /content/drive/MyDrive/AlpacaPaper/results/latest/equity_curve.png


INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-12-10T19:39:00+00:00 px=327.03
INFO:root:[UNH] obs_shape=(10, 2) | exp_shape=(10, 2) | age=60s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[UNH] raw=-0.3832 conf=0.365 → target_w=0.0000 px=$327.03 eq=$99,200.01 have=0.0
INFO:root:[TIMER] UNH symbol work: 0.364s
INFO:root:[GE] fetching 200 1Min bars (feed='iex')
INFO:root:[GE] Patched stale bars with synthetic trade bar @ 2025-12-10T19:39:00+00:00 px=282.03
INFO:root:[GE] obs_shape=(10, 2) | exp_shape=(10, 2) | age=60s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[GE] raw=0.1832 conf=0.181 → target_w=0.0453 px=$282.03 eq=$99,200.01 have=15.918472631
INFO:root:[TIMER] GE symbol work: 0.541s
INFO:root:[TIMER] full-cycle active time: 1.103s (cooldown=1 min)
INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-12

Saved equity curve → /content/drive/MyDrive/AlpacaPaper/results/2025-12-10/equity_curve.png
Updated latest copy → /content/drive/MyDrive/AlpacaPaper/results/latest/equity_curve.png


INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-12-10T19:54:00+00:00 px=326.70
INFO:root:[UNH] obs_shape=(10, 2) | exp_shape=(10, 2) | age=60s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[UNH] raw=-0.3832 conf=0.366 → target_w=0.0000 px=$326.70 eq=$99,216.88 have=0.0
INFO:root:[TIMER] UNH symbol work: 0.371s
INFO:root:[GE] fetching 200 1Min bars (feed='iex')
INFO:root:[GE] Patched stale bars with synthetic trade bar @ 2025-12-10T19:54:00+00:00 px=283.02
INFO:root:[GE] obs_shape=(10, 2) | exp_shape=(10, 2) | age=60s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[GE] raw=0.1831 conf=0.181 → target_w=0.0453 px=$283.02 eq=$99,216.88 have=15.918472631
INFO:root:[TIMER] GE symbol work: 0.520s
INFO:root:[TIMER] full-cycle active time: 1.088s (cooldown=1 min)
INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-12

Saved equity curve → /content/drive/MyDrive/AlpacaPaper/results/2025-12-10/equity_curve.png
Updated latest copy → /content/drive/MyDrive/AlpacaPaper/results/latest/equity_curve.png


INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-12-10T20:08:00+00:00 px=327.26
INFO:root:[UNH] obs_shape=(10, 2) | exp_shape=(10, 2) | age=120s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[UNH] raw=-0.3831 conf=0.365 → target_w=0.0000 px=$327.26 eq=$99,218.31 have=0.0
INFO:root:[TIMER] UNH symbol work: 0.375s
INFO:root:[GE] fetching 200 1Min bars (feed='iex')
INFO:root:[GE] Patched stale bars with synthetic trade bar @ 2025-12-10T20:09:00+00:00 px=283.04
INFO:root:[GE] obs_shape=(10, 2) | exp_shape=(10, 2) | age=60s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[GE] raw=0.1831 conf=0.181 → target_w=0.0453 px=$283.04 eq=$99,218.31 have=15.918472631
INFO:root:[TIMER] GE symbol work: 0.507s
INFO:root:[TIMER] full-cycle active time: 1.074s (cooldown=1 min)
INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-1

Saved equity curve → /content/drive/MyDrive/AlpacaPaper/results/2025-12-10/equity_curve.png
Updated latest copy → /content/drive/MyDrive/AlpacaPaper/results/latest/equity_curve.png


INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-12-10T20:25:00+00:00 px=328.03
INFO:root:[UNH] obs_shape=(10, 2) | exp_shape=(10, 2) | age=60s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[UNH] raw=-0.3830 conf=0.365 → target_w=0.0000 px=$328.03 eq=$99,245.06 have=0.0
INFO:root:[TIMER] UNH symbol work: 0.365s
INFO:root:[GE] fetching 200 1Min bars (feed='iex')
INFO:root:[GE] Patched stale bars with synthetic trade bar @ 2025-12-10T20:25:00+00:00 px=284.73
INFO:root:[GE] obs_shape=(10, 2) | exp_shape=(10, 2) | age=60s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[GE] raw=0.1829 conf=0.181 → target_w=0.0452 px=$284.73 eq=$99,245.06 have=15.918472631
INFO:root:[TIMER] GE symbol work: 0.554s
INFO:root:[TIMER] full-cycle active time: 1.222s (cooldown=1 min)
INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-12

Saved equity curve → /content/drive/MyDrive/AlpacaPaper/results/2025-12-10/equity_curve.png
Updated latest copy → /content/drive/MyDrive/AlpacaPaper/results/latest/equity_curve.png


INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-12-10T20:41:00+00:00 px=327.72
INFO:root:[UNH] obs_shape=(10, 2) | exp_shape=(10, 2) | age=60s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[UNH] raw=-0.3831 conf=0.365 → target_w=0.0000 px=$327.72 eq=$99,232.96 have=0.0
INFO:root:[TIMER] UNH symbol work: 0.365s
INFO:root:[GE] fetching 200 1Min bars (feed='iex')
INFO:root:[GE] Patched stale bars with synthetic trade bar @ 2025-12-10T20:41:00+00:00 px=284.08
INFO:root:[GE] obs_shape=(10, 2) | exp_shape=(10, 2) | age=60s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[GE] raw=0.1830 conf=0.181 → target_w=0.0452 px=$284.08 eq=$99,232.96 have=15.918472631
INFO:root:[TIMER] GE symbol work: 0.511s
INFO:root:[TIMER] full-cycle active time: 1.072s (cooldown=1 min)
INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-12

Saved equity curve → /content/drive/MyDrive/AlpacaPaper/results/2025-12-10/equity_curve.png
Updated latest copy → /content/drive/MyDrive/AlpacaPaper/results/latest/equity_curve.png


INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-12-10T20:57:00+00:00 px=328.17
INFO:root:[UNH] obs_shape=(10, 2) | exp_shape=(10, 2) | age=60s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[UNH] raw=-0.3830 conf=0.365 → target_w=0.0000 px=$328.17 eq=$99,229.48 have=0.0
INFO:root:[TIMER] UNH symbol work: 0.388s
INFO:root:[GE] fetching 200 1Min bars (feed='iex')
INFO:root:[GE] Patched stale bars with synthetic trade bar @ 2025-12-10T20:58:00+00:00 px=283.82
INFO:root:[GE] obs_shape=(10, 2) | exp_shape=(10, 2) | age=0s | vecnorm=VecNormalize(training=False, norm_reward=False)
INFO:root:[GE] raw=0.1830 conf=0.181 → target_w=0.0453 px=$283.82 eq=$99,229.48 have=15.807926036
INFO:root:[TIMER] GE symbol work: 0.535s
INFO:root:[TIMER] full-cycle active time: 1.116s (cooldown=1 min)
INFO:root:[UNH] fetching 200 1Min bars (feed='iex')
INFO:root:[UNH] Patched stale bars with synthetic trade bar @ 2025-12-

In [None]:
# PPO walkforward training + selector
import os, gc, time, json, logging, glob
import shutil
from threading import Lock
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt  # optional for ad-hoc plots

import torch
import gymnasium as gym
from gymnasium.spaces import Box as GBox

import yfinance as yf
from gym_anytrading.envs import StocksEnv

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.utils import set_random_seed

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="gymnasium")
# ---- Sharpe annualization helper (intraday heuristic: 6.5 hrs * 252) ----
def _annualization_factor(_df_like=None) -> float:
    """Annualization factor for intraday bars (6.5 trading hours × 252 days)."""
    return np.sqrt(252 * 6.5)

warnings.filterwarnings("ignore", category=DeprecationWarning, module="jupyter_client.session")
warnings.filterwarnings("ignore", message=".*Gym has been unmaintained.*")

try:
    compute_enhanced_features  # type: ignore
except NameError:
    def compute_enhanced_features(df_in: pd.DataFrame) -> pd.DataFrame:
        df_out = df_in.copy()
        if "Datetime" in df_out.columns:
            df_out["Datetime"] = pd.to_datetime(df_out["Datetime"])
            df_out = df_out.sort_values("Datetime").reset_index(drop=True)
        if "Close" not in df_out.columns:
            raise ValueError("compute_enhanced_features: missing required column 'Close'")
        return df_out

set_random_seed(42)

BASE_RESULTS_DIR = "/content/drive/MyDrive/Results_May_2025"
RUN_TAG = datetime.now().strftime("%Y%m%d_%H%M")

RUN_RESULTS_DIR = os.path.join(BASE_RESULTS_DIR, f"ppo_walkforward_results_{RUN_TAG}")
FINAL_MODEL_DIR = os.path.join(BASE_RESULTS_DIR, "ppo_models_master")
QC_TOP_DIR      = os.path.join(BASE_RESULTS_DIR, "ppo_models_QC_TOP")

os.makedirs(QC_TOP_DIR, exist_ok=True)
os.makedirs(RUN_RESULTS_DIR, exist_ok=True)
os.makedirs(FINAL_MODEL_DIR, exist_ok=True)

# Aggregated selector outputs
SELECTOR_FULL_PATH = os.path.join(BASE_RESULTS_DIR, "ppo_model_selector_FULL.csv")
SELECTOR_JSON_PATH = os.path.join(BASE_RESULTS_DIR, "ppo_model_selector_final.json")
MODEL_NAME = "PPO"

# Global skip aggregation (thread-safe)
SKIP_AGG_PATH = os.path.join(RUN_RESULTS_DIR, "skipped_windows_global.csv")
SKIP_LOCK = Lock()

# Logging Setup
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    force=True
)

# Flags
ENABLE_SENTIMENT = False
ENABLE_SLO       = True
ENABLE_WAVELET   = True
test_mode        = True            # set False for full universe
ENABLE_PLOTS     = False
LIVE_MODE        = False           # set True to run simple live/paper loop
SIM_LATENCY_MS   = 0               # broker latency simulation; 0 = off
BROKER           = "log"           # "log" = do not place orders, just log

# Global training settings
WINDOW_SIZE = 3500
STEP_SIZE   = 500
TIMESTEPS   = 150_000  # overridden in test_mode block to smaller value


DATA_PATH = "multi_stock_feature_engineered_dataset.csv"
if not os.path.exists(DATA_PATH):
    raise FileNotFoundError("Required feature-engineered dataset not found!")

df = pd.read_csv(DATA_PATH)
df["Datetime"] = pd.to_datetime(df["Datetime"])

# Wavelet fallback
if ENABLE_WAVELET and "Denoised_Close" not in df.columns:
    logging.warning("ENABLE_WAVELET=True but 'Denoised_Close' missing; "
                    "falling back to Close->Denoised_Close.")
    df["Denoised_Close"] = df["Close"]


def record_skips_global(ticker: str, skipped_windows: list,
                        total_windows: int = None, fully_skipped: bool = False):
    """Append skipped windows to the global skip log."""
    if not skipped_windows and not fully_skipped:
        return
    import csv
    with SKIP_LOCK:
        new_file = not os.path.exists(SKIP_AGG_PATH)
        with open(SKIP_AGG_PATH, "a", newline="") as f:
            w = csv.writer(f)
            if new_file:
                w.writerow(["Ticker", "Window", "FullySkipped", "TotalWindows"])
            if fully_skipped:
                w.writerow([ticker, "ALL", True, total_windows if total_windows is not None else ""])
            else:
                for wname in skipped_windows:
                    try:
                        _, win_str = wname.split("_window")
                        win = int(win_str)
                    except Exception:
                        win = ""
                    w.writerow([ticker, win, False, total_windows if total_windows is not None else ""])


ENV_KWARGS = dict(
    window_size=10,
    cost_rate=0.0002,
    slip_rate=0.0003,

    k_alpha=0.0,
    k_mom=0.15,
    k_sent=(0.01 if ENABLE_SENTIMENT else 0.0),
    mom_source="denoised",
    mom_lookback=20,

    min_trade_delta=0.08,
    cooldown=10,

    reward_clip=0.05,
    k_vol=0.00,
    k_dd=0.00,
)


class ContinuousPositionEnv(StocksEnv):
    def __init__(self, df, frame_bound, **kwargs):
        # Require window_size from ENV_KWARGS
        if "window_size" not in kwargs:
            raise ValueError("ContinuousPositionEnv requires window_size (pass via ENV_KWARGS).")

        window_size = int(kwargs.pop("window_size"))

        # Pull params (all defaults live in ENV_KWARGS; these are just safety fallbacks)
        cost_rate       = float(kwargs.pop("cost_rate", 0.0002))
        slip_rate       = float(kwargs.pop("slip_rate", 0.0003))
        k_alpha         = float(kwargs.pop("k_alpha", 0.0))
        k_mom           = float(kwargs.pop("k_mom", 0.15))
        k_sent          = float(kwargs.pop("k_sent", 0.0))
        mom_source      = str(kwargs.pop("mom_source", "denoised"))
        mom_lookback    = int(kwargs.pop("mom_lookback", 20))
        min_trade_delta = float(kwargs.pop("min_trade_delta", 0.04))
        cooldown        = int(kwargs.pop("cooldown", 6))
        reward_clip     = float(kwargs.pop("reward_clip", 0.05))
        k_vol           = float(kwargs.pop("k_vol", 0.0))
        k_dd            = float(kwargs.pop("k_dd", 0.0))

        # Fail fast on unexpected env kwargs
        if kwargs:
            raise ValueError(f"Unexpected env kwargs: {list(kwargs.keys())}")

        super().__init__(
            df=df.reset_index(drop=True),
            frame_bound=frame_bound,
            window_size=window_size
        )

        if isinstance(self.observation_space, gym.spaces.Box):
            self.observation_space = GBox(
                low=self.observation_space.low,
                high=self.observation_space.high,
                shape=self.observation_space.shape,
                dtype=self.observation_space.dtype,
            )

        self.k_vol = k_vol
        self.k_dd  = k_dd

        self.ret_history = []
        self.nav_history = []
        self.peak_nav    = 1.0
        self.trade_count = 0

        self.action_space = GBox(low=-1.0, high=1.0, shape=(1,), dtype=np.float32)


        self.cost_rate       = cost_rate
        self.slip_rate       = slip_rate
        self.k_alpha         = k_alpha
        self.k_mom           = k_mom
        self.k_sent          = k_sent
        self.mom_source      = mom_source
        self.mom_lookback    = mom_lookback
        self.min_trade_delta = min_trade_delta
        self.cooldown        = cooldown
        self.reward_clip     = reward_clip

        self.nav = 1.0
        self.pos = 0.0
        self._last_trade_step = -self.cooldown

    def reset(self, **kwargs):
        out = super().reset(**kwargs)
        if isinstance(out, tuple):
            obs, info = out
        else:
            obs, info = out, {}

        self.nav = 1.0
        self.pos = 0.0
        self._last_trade_step = -self.cooldown

        self.trade_count = 0
        self.ret_history = []
        self.nav_history = [self.nav]
        self.peak_nav    = self.nav

        info = info or {}
        info.update({
            "nav": self.nav,
            "pos": self.pos,
            "trade_count": int(self.trade_count),

        })
        return obs, info

    def _step_parent_hold(self):
        step_result = super().step(2)
        if len(step_result) == 5:
            obs, _env_rew, terminated, truncated, info = step_result
        else:
            obs, _env_rew, done, info = step_result
            terminated, truncated = bool(done), False
        return obs, terminated, truncated, info

    def _ret_t(self):
        cur  = float(self.df.loc[self._current_tick, "Close"])
        prev = float(self.df.loc[max(self._current_tick - 1, 0), "Close"])
        return 0.0 if prev <= 0 else (cur - prev) / prev

    def _mom_signal(self):
        if self.mom_source == "macd" and "MACD_Line" in self.df.columns:
            recent = self.df["MACD_Line"].iloc[max(self._current_tick - 200, 0):self._current_tick + 1]
            return float(np.tanh(
                float(self.df.loc[self._current_tick, "MACD_Line"]) /
                (1e-6 + float(recent.std()))
            ))

        if "Denoised_Close" in self.df.columns and self._current_tick - self.mom_lookback >= 0:
            now  = float(self.df.loc[self._current_tick, "Denoised_Close"])
            then = float(self.df.loc[self._current_tick - self.mom_lookback, "Denoised_Close"])
            base = float(self.df.loc[max(self._current_tick - 1, 0), "Close"])
            slope = (now - then) / max(self.mom_lookback, 1)
            return float(np.tanh(10.0 * (slope / max(abs(base), 1e-6))))

        return 0.0

    def step(self, action):
        a = float(np.array(action).squeeze())
        target_pos = float(np.clip(a, -1.0, 1.0))

        r_t = self._ret_t()
        base_ret = self.pos * r_t

        changed = (
            abs(target_pos - self.pos) >= self.min_trade_delta
        ) and (
            (self._current_tick - self._last_trade_step) >= self.cooldown
        )

        delta_pos = (target_pos - self.pos) if changed else 0.0
        trade_cost = (self.cost_rate + self.slip_rate) * abs(delta_pos)

        rel_alpha = base_ret - r_t
        mom_term = self.pos * self._mom_signal()

        alpha_term = self.k_alpha * rel_alpha

        sent_term = 0.0
        if ENABLE_SENTIMENT and "SentimentScore" in self.df.columns:
            sent_term = self.k_sent * float(self.df.loc[self._current_tick, "SentimentScore"])

        shaped = base_ret + alpha_term + (self.k_mom * mom_term) + sent_term - trade_cost
        reward = float(np.clip(shaped, -self.reward_clip, self.reward_clip))


        self.nav *= (1.0 + base_ret - trade_cost)
        self.nav_history.append(self.nav)
        self.peak_nav = max(self.peak_nav, self.nav)

        executed_trade = False
        if changed:
            self.pos = target_pos
            self._last_trade_step = self._current_tick
            self.trade_count += 1
            executed_trade = True

        obs, terminated, truncated, info = self._step_parent_hold()
        info = info or {}
        info.update({
            "ret_t": r_t,
            "nav": self.nav,
            "pos": self.pos,
            "trade_cost": trade_cost,
            "base_ret": base_ret,
            "rel_alpha": rel_alpha,
            "mom": mom_term,
            "changed": bool(changed),
            "executed_trade": bool(executed_trade),
            "trade_count": int(self.trade_count),
            "delta_pos": float(delta_pos),
        })
        return obs, reward, terminated, truncated, info

def get_mu_sigma(model, obs):
    """SB3 v2-safe way to get Gaussian policy mean/std for continuous actions."""
    with torch.no_grad():
        obs_t, _ = model.policy.obs_to_tensor(obs)
        features = model.policy.extract_features(obs_t)
        latent_pi, _ = model.policy.mlp_extractor(features)
        mean_actions = model.policy.action_net(latent_pi)
        log_std = model.policy.log_std
        mu = float(mean_actions.detach().cpu().numpy().squeeze())
        sigma = float(log_std.exp().detach().cpu().numpy().squeeze())
    return mu, sigma

def get_walk_forward_windows(df_in, window_size=3500, step_size=500, min_len=1200):
    return [
        (start, start + window_size)
        for start in range(0, len(df_in) - min_len, step_size)
        if start + window_size < len(df_in)
    ]

def save_quantconnect_model(artifact, prefix, save_dir):
    """Save/copy QC-compatible artifacts into save_dir."""
    import shutil

    os.makedirs(save_dir, exist_ok=True)

    # --- Model zip: save or copy ---
    model_dst = os.path.join(save_dir, f"{prefix}_model.zip")

    model_obj = artifact.get("model", None)
    model_src = artifact.get("model_path", None)

    try:
        if model_obj is not None:
            # Save from in-memory SB3 model
            if not os.path.exists(model_dst):
                model_obj.save(model_dst)

        else:
            # Copy from an existing trained window model zip
            if model_src and os.path.exists(model_src):
                if os.path.abspath(model_src) != os.path.abspath(model_dst):
                    shutil.copyfile(model_src, model_dst)
            else:
                # If neither provided, warn loudly
                if not os.path.exists(model_dst):
                    logging.warning(f"[QC SAVE] Missing model for {prefix}: no model_obj and no valid model_path.")
    except Exception as e:
        logging.warning(f"[QC SAVE] Model handling issue for {prefix}: {e}")

    # --- VecNormalize: copy ---
    vecnorm_src = artifact.get("vecnorm_path")
    if vecnorm_src and os.path.exists(vecnorm_src):
        try:
            vecnorm_dst = os.path.join(save_dir, f"{prefix}_vecnorm.pkl")
            if os.path.abspath(vecnorm_src) != os.path.abspath(vecnorm_dst):
                shutil.copyfile(vecnorm_src, vecnorm_dst)
        except Exception as e:
            logging.warning(f"[QC SAVE] VecNormalize handling issue for {prefix}: {e}")
    else:
        logging.warning(f"[QC SAVE] VecNormalize missing for {prefix}: vecnorm_path not found.")

    # --- Features ---
    try:
        with open(os.path.join(save_dir, f"{prefix}_features.json"), "w") as f:
            json.dump({"features": artifact.get("features", [])}, f)
    except Exception as e:
        logging.warning(f"[QC SAVE] Could not write features.json for {prefix}: {e}")

    # --- Probability config ---
    try:
        thr = 0.2
        try:
            thr = float(artifact.get("result", {}).get("Action_Threshold", 0.2))
        except Exception:
            thr = 0.2

        with open(os.path.join(save_dir, f"{prefix}_probability_config.json"), "w") as f:
            json.dump(
                {"threshold": thr, "use_confidence": True, "inference_mode": "deterministic"},
                f
            )
    except Exception as e:
        logging.warning(f"[QC SAVE] Could not write probability_config.json for {prefix}: {e}")


    # --- Model info ---
    try:
        r = artifact.get("result", {})
        with open(os.path.join(save_dir, f"{prefix}_model_info.json"), "w") as f:
            json.dump({
                "model": "PPO",
                "ticker": r.get("Ticker"),
                "window": r.get("Window"),
                "date_trained": datetime.today().strftime("%Y-%m-%d"),
                "framework": "stable-baselines3",
                "input_features": artifact.get("features", []),
                "final_portfolio": r.get("PPO_Portfolio"),
                "buy_hold": r.get("BuyHold"),
                "sharpe": r.get("Sharpe"),
            }, f)
    except Exception as e:
        logging.warning(f"[QC SAVE] Could not write model_info.json for {prefix}: {e}")

    logging.info(f"[QC SAVE] Saved QC artifacts for {prefix}")

def load_model_and_env(prefix):
    """Load a trained PPO and create a factory to build a matching env window."""
    model_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
    vec_path   = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")
    model = PPO.load(model_path, device="cpu")

    def make_env(df_window):
        frame_bound = (50, len(df_window) - 3)
        e = DummyVecEnv([lambda: ContinuousPositionEnv(
            df=df_window, frame_bound=frame_bound, **ENV_KWARGS
        )])
        if os.path.exists(vec_path):
            e = VecNormalize.load(vec_path, e)
        e.training = False
        e.norm_reward = False
        return e

    return model, make_env

def latest_df_for_symbol(symbol, horizon_days=5, interval="1m"):
    """Fetch fresh bars and rebuild features exactly like training."""
    end = datetime.utcnow()
    start = end - timedelta(days=horizon_days)
    df_live = yf.download(
        symbol,
        start=start.strftime("%Y-%m-%d"),
        end=end.strftime("%Y-%m-%d"),
        interval=interval,
        progress=False,
        auto_adjust=False,
    )
    if df_live is None or df_live.empty:
        return None
    df_live = df_live.reset_index()
    df_live["Symbol"] = symbol
    df_live = compute_enhanced_features(df_live)
    if ENABLE_WAVELET and "Denoised_Close" not in df_live.columns:
        df_live["Denoised_Close"] = df_live["Close"]
    return df_live

def predict_latest(symbol, prefix):
    """Build last window, fast-forward env, call model.predict(), return a signal."""
    # --- load per-model threshold ---
    cfg_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_probability_config.json")
    thr = 0.2
    if os.path.exists(cfg_path):
        try:
            with open(cfg_path, "r") as f:
                thr = float(json.load(f).get("threshold", 0.2))
        except Exception:
            thr = 0.2

    model, make_env = load_model_and_env(prefix)
    live_df = latest_df_for_symbol(symbol)
    if live_df is None or len(live_df) < 100:
        logging.warning("No fresh data yet for live inference.")
        return None

    df_window = live_df.iloc[-2500:].reset_index(drop=True) if len(live_df) > 2500 else live_df.copy()

    env = make_env(df_window)
    obs = env.reset()
    if isinstance(obs, tuple):
        obs, _ = obs

    # fast-forward with HOLD
    for _ in range(len(df_window) - 1):
        obs, _, dones, _ = env.step([np.array([0.0], dtype=np.float32)])
        if isinstance(dones, (np.ndarray, list, tuple)) and len(dones) and dones[0]:
            break

    action, _ = model.predict(obs, deterministic=True)
    mu, sigma = get_mu_sigma(model, obs)

    a = float(np.array(action).squeeze())

    # --- thresholded signal using loaded thr ---
    if a > thr:
        signal = "BUY"
    elif a < -thr:
        signal = "SELL"
    else:
        signal = "HOLD"

    conf = abs(a)
    ts = df_window["Datetime"].iloc[-1] if "Datetime" in df_window.columns else None
    price = float(df_window["Close"].iloc[-1])

    return dict(
        signal=signal,
        confidence=conf,
        action=a,
        threshold=thr,
        ts=ts,
        price=price,
        mu=mu,
        sigma=sigma,
    )

def place_order(signal, qty=1):
    """Stub broker router with latency simulation; logs in Colab."""
    if SIM_LATENCY_MS > 0:
        time.sleep(SIM_LATENCY_MS / 1000.0)
    if BROKER == "log":
        logging.info(f"[PAPER] {signal} x{qty}")
    else:
        logging.info(f"[BROKER={BROKER}] {signal} x{qty} (not implemented)")

def live_loop(symbol, best_prefix):
    """Simple polling loop—set LIVE_MODE=True to run."""
    while LIVE_MODE:
        try:
            pred = predict_latest(symbol, best_prefix)
            if pred:
                logging.info(
                    f"{symbol} {pred['ts']} | {pred['signal']} "
                    f"@ {pred['price']:.2f} (conf {pred['confidence']:.2f})"
                )
                place_order(pred["signal"], qty=1)
        except Exception as e:
            logging.error(f"Live loop error: {e}")
        time.sleep(60)  # Poll each minute

TOP_N_WINDOWS = 3

FAST = {
    "lr": 8e-5,
    "n_steps": 3072,
    "batch": 512,
    "clip": 0.2,
    "ent": 0.01,
}

SLOW = {
    "lr": 3e-5,
    "n_steps": 3072,
    "batch": 512,
    "clip": 0.16,
    "ent": 0.005,
}

fast_names = {
    "TSLA","NVDA","AMD","AVGO","AAPL","MSFT","AMZN","GOOGL","META","ADBE","CRM",
    "INTC","QCOM","TXN","ORCL","NEE","GE","XOM","CVX","LLY","NKE","SBUX"
}
slow_names = {
    "BRK-B","JPM","BAC","JNJ","UNH","MRK","PFE","ABBV","ABT","AMGN","PG","PEP","KO",
    "V","MA","WMT","MCD","TMO","DHR","ACN","IBM","LIN","PM","RTX","UPS","UNP","COST","HD","LOW"
}

def pick_params(symbol: str):
    return FAST if symbol in fast_names else SLOW

def export_qc_top_from_existing(ticker: str, top_n: int = 3):
    """
    If a ticker is fully skipped (models already exist), still populate QC_TOP_DIR.
    Uses existing summary CSVs to pick top Sharpe windows, then copies artifacts from FINAL_MODEL_DIR.
    Prefers using 'Prefix' from summaries (robust). Falls back to WindowIdx reconstruction.
    """
    summary_files = glob.glob(os.path.join(BASE_RESULTS_DIR, "ppo_walkforward_results_*", "summary*.csv"))
    if not summary_files:
        logging.warning(f"[QC_TOP] No summary files found; cannot export QC_TOP for {ticker}.")
        return

    frames = []
    for p in summary_files:
        try:
            tmp = pd.read_csv(p)
            frames.append(tmp)
        except Exception as e:
            logging.warning(f"[QC_TOP] Could not read {p}: {e}")

    if not frames:
        logging.warning(f"[QC_TOP] Could not read any summary files; cannot export QC_TOP for {ticker}.")
        return

    combo = pd.concat(frames, ignore_index=True)

    if "Ticker" not in combo.columns:
        logging.warning("[QC_TOP] Summary files missing 'Ticker' column; cannot export.")
        return

    combo = combo[combo["Ticker"] == ticker].copy()
    if combo.empty or "Sharpe" not in combo.columns:
        logging.warning(f"[QC_TOP] No rows for {ticker} in summaries (or missing Sharpe); cannot export QC_TOP.")
        return

    # Ensure Sharpe is numeric so sorting works reliably
    combo["Sharpe"] = pd.to_numeric(combo["Sharpe"], errors="coerce")
    combo = combo.dropna(subset=["Sharpe"])
    if combo.empty:
        logging.warning(f"[QC_TOP] All Sharpe values were non-numeric for {ticker}; cannot export.")
        return

    use_prefix = ("Prefix" in combo.columns) and combo["Prefix"].notna().any()

    if use_prefix:
        # Robust path: use saved Prefix directly
        top = combo.sort_values("Sharpe", ascending=False).head(top_n).copy()
        top["__prefix__"] = top["Prefix"].astype(str)
    else:
        # Fallback: reconstruct WindowIdx (less robust)
        def _window_start(w):
            try:
                s = str(w)
                return int(s.split("-")[0]) if "-" in s else np.nan
            except Exception:
                return np.nan

        combo["WindowStart"] = combo["Window"].apply(_window_start)
        combo = combo.sort_values(["WindowStart"]).reset_index(drop=True)
        combo["WindowIdx"] = combo.groupby("Ticker").cumcount() + 1

        top = combo.sort_values("Sharpe", ascending=False).head(top_n).copy()
        top["__prefix__"] = top["WindowIdx"].apply(lambda widx: f"ppo_{ticker}_window{int(widx)}")

    exported = 0

    for _, r in top.iterrows():
        prefix = str(r["__prefix__"])

        model_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
        vec_path   = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")

        if not (os.path.exists(model_path) and os.path.exists(vec_path)):
            logging.warning(f"[QC_TOP] Missing model/vecnorm for {prefix}; cannot export.")
            continue

        artifact_for_save = {
            "model": None,
            "model_path": model_path,
            "vecnorm_path": vec_path,
            "features": [],         # ok if unknown; QC can load features elsewhere
            "result": r.to_dict(),  # includes Sharpe, Action_Threshold, etc if present
            "prefix": prefix,
        }
        save_quantconnect_model(artifact_for_save, prefix, QC_TOP_DIR)
        exported += 1

    logging.info(f"[QC_TOP] Exported {exported}/{len(top)} QC artifacts for {ticker}.")

def walkforward_ppo(df_sym, ticker,
                    window_size=3500, step_size=500,
                    timesteps=150_000, learning_rate=1e-4,
                    ppo_overrides=None):
    import heapq

    if ppo_overrides is None:
        ppo_overrides = {}

    if len(df_sym) < window_size:
        logging.warning(
            f"Skipping {ticker}: only {len(df_sym)} rows (min required: {window_size})"
        )
        return []

    results = []
    windows = get_walk_forward_windows(df_sym, window_size, step_size)
    top_heap = []
    skipped_windows = []

    # quick check: all windows already have model+vecnorm?
    all_done = True
    for idx in range(len(windows)):
        prefix = f"ppo_{ticker}_window{idx+1}"
        model_ok   = os.path.exists(os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip"))
        vecnorm_ok = os.path.exists(os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl"))
        if not (model_ok and vecnorm_ok):
            all_done = False
            break

    if all_done:
        logging.info(f"Ticker {ticker} fully skipped (all {len(windows)} windows already complete).")
        record_skips_global(ticker, skipped_windows=[], total_windows=len(windows), fully_skipped=True)

        export_qc_top_from_existing(ticker, top_n=TOP_N_WINDOWS)
        return []



    for w_idx, (start, end) in enumerate(windows):
        window_start_time = time.time()
        gc.collect()

        prefix = f"ppo_{ticker}_window{w_idx+1}"
        model_path   = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
        vecnorm_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")

        if os.path.exists(model_path) and os.path.exists(vecnorm_path):
            logging.info(f"Skipping {ticker} | Window {w_idx+1}, already trained.")
            skipped_windows.append(f"{ticker}_window{w_idx+1}")
            continue

        missing = []
        if not os.path.exists(model_path):   missing.append("model.zip")
        if not os.path.exists(vecnorm_path): missing.append("vecnorm.pkl")
        logging.info(
            f"Will train {ticker} | Window {w_idx+1} because missing: {', '.join(missing)}"
        )

        df_window = df_sym.iloc[start:end].reset_index(drop=True)
        if len(df_window) <= 52 or len(df_window) % 2 != 0:
            df_window = df_window.iloc[:-1]

        frame_bound = (50, len(df_window) - 3)

        env = DummyVecEnv([lambda: ContinuousPositionEnv(
          df=df_window, frame_bound=frame_bound, **ENV_KWARGS
        )])

        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.0)

        try:
            model = PPO(
                "MlpPolicy",
                env,
                verbose=0,
                device=("cuda" if torch.cuda.is_available() else "cpu"),
                learning_rate=ppo_overrides.get("lr", learning_rate),
                n_steps=ppo_overrides.get("n_steps", 256),
                batch_size=ppo_overrides.get("batch", 64),
                n_epochs=5,
                gamma=0.99,
                gae_lambda=0.95,
                clip_range=ppo_overrides.get("clip", 0.2),
                ent_coef=ppo_overrides.get("ent", 0.005),
                policy_kwargs=dict(net_arch=[64, 64]),
            )

            logging.info(f"Training {ticker} Window {w_idx+1}/{len(windows)}")
            model.learn(total_timesteps=timesteps)

            # Evaluation pass
            env.training = False
            env.norm_reward = False
            obs = env.reset()
            if isinstance(obs, tuple):
                obs, _ = obs

            nav_track = [1.0]
            bh_track  = [1.0]
            step_log  = []
            executed_trade_count = 0
            signal_trade_count   = 0
            signal_trade_count_dyn = 0   # dynamic-threshold diagnostic

            DIAG_THR = 0.2  # diagnostic-only fixed threshold

            for i in range(len(df_window) - 1):
                action, _ = model.predict(obs, deterministic=True)
                mu, sigma = get_mu_sigma(model, obs)

                obs, rew, dones, infos = env.step(action)

                if isinstance(infos, (list, tuple)):
                    info = infos[0] if len(infos) else {}
                elif isinstance(infos, dict):
                    info = infos
                else:
                    info = {}

                nav_track.append(float(info.get("nav", nav_track[-1])))
                bh_track.append(bh_track[-1] * (1.0 + float(info.get("ret_t", 0.0))))

                a = float(np.array(action).squeeze())
                dt_val = df_window["Datetime"].iloc[i+1] if "Datetime" in df_window.columns else None
                px     = float(df_window["Close"].iloc[i+1]) if "Close" in df_window.columns else np.nan

                # “signal” trades (fixed threshold) — diagnostic only
                if a > DIAG_THR or a < -DIAG_THR:
                    signal_trade_count += 1

                # real trades executed by env friction logic
                if bool(info.get("executed_trade", False)):
                    executed_trade_count += 1

                # next-bar return (to score BUY/SELL vs the *next* move)
                if i + 2 < len(df_window):
                    p0 = float(df_window["Close"].iloc[i+1])
                    p1 = float(df_window["Close"].iloc[i+2])
                    next_ret = 0.0 if p0 <= 0 else (p1 - p0) / p0
                else:
                    next_ret = 0.0

                rew_val = float(rew[0]) if isinstance(rew, (list, tuple, np.ndarray)) else float(rew)

                step_log.append({
                    "Index": i+1,
                    "Datetime": dt_val,
                    "Close": px,
                    "Action": a,
                    "mu": mu,
                    "sigma": sigma,
                    "nav": nav_track[-1],
                    "ret_t": float(info.get("ret_t", 0.0)),
                    "next_ret": float(next_ret),
                    "reward": rew_val,
                    "pos": float(info.get("pos", 0.0)),
                    "trade_cost": float(info.get("trade_cost", 0.0)),
                    "base_ret": float(info.get("base_ret", 0.0)),
                    "rel_alpha": float(info.get("rel_alpha", 0.0)),
                    "mom": float(info.get("mom", 0.0)),
                })

                # done handling (VecEnv)
                if isinstance(dones, (np.ndarray, list, tuple)):
                    if dones[0]:
                        break
                elif dones:
                    break


            # --- Metrics ---
            final_value = float(nav_track[-1]) * 100_000.0
            hold_value  = float(bh_track[-1])  * 100_000.0

            #dynamic action threshold for this window (prevents “no signals” windows)
            abs_actions = np.array([abs(float(r["Action"])) for r in step_log], dtype=float)
            if len(abs_actions) > 0:
                thr = float(np.quantile(abs_actions, 0.70))  # 70th percentile
                thr = float(np.clip(thr, 0.08, 0.30))
            else:
                thr = 0.2

            # Dynamic signal trade count (post-hoc diagnostic)
            signal_trade_count_dyn = int(np.sum(abs_actions > thr)) if len(abs_actions) > 0 else 0


            returns = pd.Series(nav_track).pct_change().fillna(0.0)
            sharpe  = float((returns.mean() / (returns.std() + 1e-9)) * _annualization_factor(df_window))
            drawdown = float(
                ((pd.Series(nav_track).cummax() - pd.Series(nav_track)) /
                pd.Series(nav_track).cummax()).max() * 100.0
            )

            # Classification stats (now using thr)
            correct = 0
            total   = 0
            tp_buy = fp_buy = 0
            tp_sell = fp_sell = 0

            for r in step_log:
                a = float(r["Action"])
                ret_t = float(r.get("next_ret", 0.0))

                if a > thr:
                    sig = "BUY"
                elif a < -thr:
                    sig = "SELL"
                else:
                    sig = "HOLD"

                if sig == "BUY":
                    if ret_t > 0:
                        tp_buy += 1; correct += 1
                    else:
                        fp_buy += 1
                    total += 1
                elif sig == "SELL":
                    if ret_t < 0:
                        tp_sell += 1; correct += 1
                    else:
                        fp_sell += 1
                    total += 1
                # HOLD not counted

            precision_long  = tp_buy  / (tp_buy  + fp_buy  + 1e-9)
            precision_short = tp_sell / (tp_sell + fp_sell + 1e-9)
            precision_trades = (tp_buy + tp_sell) / (
                (tp_buy + tp_sell) + (fp_buy + fp_sell) + 1e-9
            )
            step_accuracy = round(correct / total, 4) if total > 0 else 0.0
            #Trade_count reflect REAL executed trades (cooldown/min_trade_delta)
            trade_count = int(executed_trade_count)

            # Save VecNormalize
            try:
                env.save(vecnorm_path)
            except Exception as e:
                logging.warning(f"Could not save VecNormalize for {ticker} {start}-{end}: {e}")
                vecnorm_path = None

            # Save model
            model.save(model_path)

            # Save detailed predictions
            pred_path = os.path.join(RUN_RESULTS_DIR, f"{prefix}_predictions.csv")
            pd.DataFrame(step_log).to_csv(pred_path, index=False)
            logging.info(f"Saved predictions to {pred_path}")

            # Save compat predictions with same thresholds as metrics
            compat_rows = []
            for r in step_log:
                a = r["Action"]

                if a > thr:
                    signal = "BUY"
                elif a < -thr:
                    signal = "SELL"
                else:
                    signal = "HOLD"
                compat_rows.append({
                    "Index": r["Index"],
                    "Datetime": r["Datetime"],
                    "Close": r["Close"],
                    "Action": a,
                    "Signal": signal,
                    "PortfolioValue": r["nav"],
                    "Reward": r.get("reward", np.nan),
                })
            compat_path = os.path.join(RUN_RESULTS_DIR, f"{prefix}_predictions_compat.csv")
            pd.DataFrame(compat_rows).to_csv(compat_path, index=False)
            logging.info(f"Saved compatibility predictions to {compat_path}")

            # Summary row
            result_row = {
                "Ticker": ticker,
                "Window": f"{start}-{end}",
                "WindowIdx": int(w_idx + 1),
                "Prefix": prefix,
                "PPO_Portfolio": round(final_value, 2),
                "BuyHold": round(hold_value, 2),
                "Sharpe": round(sharpe, 3),
                "Drawdown_%": round(drawdown, 2),
                "Winner": "PPO" if final_value > hold_value else "Buy & Hold",
                "Action_Threshold": round(thr, 4),
                "Accuracy": step_accuracy,
                "Trade_Count": trade_count,
                "Signal_Trade_Count": int(signal_trade_count),
                "Signal_Trade_Count_Dyn": int(signal_trade_count_dyn),
                "Executed_Trade_Count": int(executed_trade_count),
                "Precision_Long": round(precision_long, 4),
                "Precision_Short": round(precision_short, 4),
                "Precision_Trades": round(precision_trades, 4),
            }

            results.append(result_row)

            meta = {
                "result": result_row,
                "features": df_window.columns.tolist(),
                "prefix": prefix,
                "model_path": model_path,
                "vecnorm_path": vecnorm_path,
            }

            item = (result_row["Sharpe"], prefix, meta)
            if len(top_heap) < TOP_N_WINDOWS:
                heapq.heappush(top_heap, item)
            else:
                if item[0] > top_heap[0][0]:
                    heapq.heapreplace(top_heap, item)

            logging.info(
                f"{ticker} | Window {w_idx+1} runtime: "
                f"{round(time.time() - window_start_time, 2)}s"
            )
        finally:
            try:
                env.close()
            except Exception:
                pass
            del env
            try:
                del model
            except Exception:
                pass
            gc.collect()
            try:
                torch.cuda.empty_cache()
            except Exception:
                pass

    if skipped_windows:
        logging.info(
            f"{ticker} skipped windows (already complete): {', '.join(skipped_windows)}"
        )
        record_skips_global(
            ticker,
            skipped_windows=skipped_windows,
            total_windows=len(windows),
            fully_skipped=False,
        )

    # Save top-N QC-compatible
    top_list = sorted(top_heap, key=lambda t: t[0], reverse=True)
    for _, _, meta in top_list:
        artifact_for_save = {
            "model": None,  # we're copying from disk, not re-saving an in-memory model
            "model_path": meta["model_path"],
            "vecnorm_path": meta["vecnorm_path"],
            "features": meta["features"],
            "result": meta["result"],
            "prefix": meta["prefix"],
        }
        save_quantconnect_model(artifact_for_save, meta["prefix"], QC_TOP_DIR)

    return results

def process_ticker(ticker):
    try:
        hp = pick_params(ticker)
        return walkforward_ppo(
            df[df["Symbol"] == ticker].copy(),
            ticker,
            window_size=WINDOW_SIZE,
            step_size=STEP_SIZE,
            timesteps=TIMESTEPS,
            learning_rate=hp["lr"],
            ppo_overrides=hp,
        )
    except Exception as e:
        logging.error(f"{ticker}: training failed with {e}")
        return []


from concurrent.futures import ThreadPoolExecutor

def run_parallel_tickers(tickers,
                         out_path=os.path.join(RUN_RESULTS_DIR, "summary.csv"),
                         max_workers=8):
    results = []
    with ThreadPoolExecutor(max_workers=max_workers) as ex:
        for res in ex.map(process_ticker, tickers):
            if res:
                results.extend(res)

    if results:
        pd.DataFrame(results).to_csv(out_path, index=False)
        logging.info(f"Saved summary to {out_path}")
    else:
        logging.warning("No results produced; summary not written.")

    logging.info("All tickers processed.")
    return results

def build_ppo_selector():
    """Aggregate all summary*.csv across runs and build selector JSON."""
    summary_files = glob.glob(
        os.path.join(BASE_RESULTS_DIR, "ppo_walkforward_results_*", "summary*.csv")
    )
    all_summaries = []

    for p in summary_files:
        try:
            tmp = pd.read_csv(p)
            tmp["RunFolder"] = os.path.dirname(p)
            all_summaries.append(tmp)
        except Exception as e:
            print(f"⚠️ Skipping {p} due to error: {e}")

    if not all_summaries:
        logging.warning("No PPO summaries found across walkforward results folders.")
        return

    combo = pd.concat(all_summaries, ignore_index=True)

    # Robust Sharpe handling (avoid string sorting / bad rows from older runs)
    if "Sharpe" in combo.columns:
        combo["Sharpe"] = pd.to_numeric(combo["Sharpe"], errors="coerce")
        combo = combo.dropna(subset=["Sharpe"])
    else:
        logging.warning("No 'Sharpe' column found in combined summaries; selector may be empty.")

    # Ensure key columns exist for robust ratios
    if "BuyHold" not in combo.columns:
        combo["BuyHold"] = np.nan
    if "PPO_Portfolio" not in combo.columns:
        combo["PPO_Portfolio"] = np.nan

    # parse Window "start-end" to WindowStart
    def _parse_window_start(w):
        if pd.isna(w):
            return None
        if isinstance(w, (int, float)):
            return int(w)
        parts = str(w).split("-")
        try:
            return int(parts[0])
        except Exception:
            return None

    combo["WindowStart"] = combo["Window"].apply(_parse_window_start)

    combo = combo.sort_values(["Ticker", "WindowStart"]).reset_index(drop=True)
    combo["WindowIdx"] = combo.groupby("Ticker").cumcount() + 1

    combo = combo.drop_duplicates(subset=["Ticker", "WindowIdx"], keep="last")

    best_by_symbol = (
        combo
        .sort_values("Sharpe", ascending=False)
        .groupby("Ticker")
        .first()
        .reset_index()
    )

    # If Drawdown_% missing (older runs), create it so rename won't break
    if "Drawdown_%" not in best_by_symbol.columns:
        best_by_symbol["Drawdown_%"] = np.nan

    # Ensure precision cols exist
    for col in ["Precision_Long", "Precision_Short", "Precision_Trades"]:
        if col not in best_by_symbol.columns:
            best_by_symbol[col] = None  # or np.nan if you prefer

    # Rename columns so everything downstream uses consistent names
    best_by_symbol = best_by_symbol.rename(columns={
        "Drawdown_%": "Drawdown",
        "PPO_Portfolio": "Final_Portfolio",
    })

    # Ensure Accuracy / Trade_Count exist
    if "Accuracy" not in best_by_symbol.columns:
        best_by_symbol["Accuracy"] = 0.0
    if "Trade_Count" not in best_by_symbol.columns:
        best_by_symbol["Trade_Count"] = None

    best_by_symbol["Model"] = MODEL_NAME

    # PPO vs Buy & Hold ratio (safe division)
    best_by_symbol["Rel_vs_BH"] = best_by_symbol.apply(
        lambda r: (r["Final_Portfolio"] / r["BuyHold"])
        if (pd.notna(r["BuyHold"]) and r["BuyHold"] not in (0, 0.0)) else np.nan,
        axis=1
    )

    # Save flat CSV for debugging
    best_by_symbol.to_csv(SELECTOR_FULL_PATH, index=False)
    print(f"Aggregated PPO selector saved to → {SELECTOR_FULL_PATH}")

    # Safety filters (tune as needed)
    df_sel = best_by_symbol.copy()
    gates = (
        (df_sel["Sharpe"].fillna(-999) > 0.0) &
        (df_sel["Drawdown"].fillna(999) < 50.0) &
        (df_sel["Final_Portfolio"].fillna(0) > 80_000) &
        (df_sel["Rel_vs_BH"].fillna(0) >= 0.95)   # PPO ≥ 95% of B&H; change to >1.0 to enforce beat
    )
    df_sel = df_sel[gates].copy()

    df_sel["prefix"] = (
        "ppo_"
        + df_sel["Ticker"].astype(str)
        + "_window"
        + df_sel["WindowIdx"].astype(int).astype(str)
    )

    df_sel["artifact_path"] = df_sel["prefix"].apply(
        lambda p: os.path.join(FINAL_MODEL_DIR, f"{p}_model.zip")
    )
    df_sel["vecnorm_path"] = df_sel["prefix"].apply(
        lambda p: os.path.join(FINAL_MODEL_DIR, f"{p}_vecnorm.pkl")
    )

    EPS = 0.03  # 3% of top-sharpe for "close enough"
    selected_models = {}

    def safe_int(v, default=0):
        if v is None:
            return int(default)
        try:
            import math
            if isinstance(v, float) and math.isnan(v):
                return int(default)
        except TypeError:
            pass
        try:
            return int(v)
        except (ValueError, TypeError):
            return int(default)

    def safe_float(v, default=0.0):
        if v is None:
            return float(default)
        try:
            import math
            if isinstance(v, float) and math.isnan(v):
                return float(default)
        except TypeError:
            pass
        try:
            return float(v)
        except (ValueError, TypeError):
            return float(default)

    for ticker, group in df_sel.groupby("Ticker"):
        group_sorted = group.sort_values("Sharpe", ascending=False)
        top = group_sorted.iloc[0]
        second = group_sorted.iloc[1] if len(group_sorted) > 1 else None

        if (second is not None) and (
            abs(top["Sharpe"] - second["Sharpe"]) <= abs(top["Sharpe"]) * EPS
        ):
            mode = "ensemble"
            primary, secondary = top["Model"], second["Model"]
        else:
            mode = "single"
            primary, secondary = top["Model"], None

        selected_models[ticker] = {
            "model": MODEL_NAME,
            "score": round(safe_float(top["Sharpe"]), 4),
            "return": round(safe_float(top["Final_Portfolio"]), 2),
            "sharpe": round(safe_float(top["Sharpe"]), 3),
            "drawdown": round(safe_float(top["Drawdown"]), 2),
            "sortino": None,
            "turnover": None,
            "trade_count": safe_int(top.get("Trade_Count", 0)),
            "precision": {
                "long":   safe_float(top.get("Precision_Long", 0.0)),
                "short":  safe_float(top.get("Precision_Short", 0.0)),
                "trades": safe_float(top.get("Precision_Trades", 0.0)),
            },
            "stability": {},
            "regime": "unknown",
            "rl_profile": "fast",
            "artifact": {
                "path": top["artifact_path"],
                "vecnorm": top["vecnorm_path"],
                "features": None,
                "load_ms": 180,
                "mem_mb": 512,
                "exists": os.path.exists(top["artifact_path"]),
            },
            "selection": {
                "mode": mode,
                "primary": primary,
                "secondary": secondary,
            },
        }

    with open(SELECTOR_JSON_PATH, "w") as f:
        json.dump(selected_models, f, indent=2)

    print(f"Final enhanced PPO selector JSON saved to → {SELECTOR_JSON_PATH}")

if __name__ == "__main__":
    logging.info(f"RUN_RESULTS_DIR   = {RUN_RESULTS_DIR}")
    logging.info(f"FINAL_MODEL_DIR  = {FINAL_MODEL_DIR}")
    logging.info(f"BASE_RESULTS_DIR = {BASE_RESULTS_DIR}")

    min_rows = WINDOW_SIZE + 50  # small buffer so we have at least one window
    all_symbols = df["Symbol"].value_counts()
    candidate_symbols = []

    for sym, n in all_symbols.items():
        if n >= min_rows:
            candidate_symbols.append(sym)
        else:
            logging.warning(f"Skipping {sym}: only {n} rows (< {min_rows} required)")

    if not candidate_symbols:
        logging.error("No symbols have enough rows for the current WINDOW_SIZE. Nothing to train.")
    else:
        logging.info(f"Training candidate symbols: {candidate_symbols}")

    needed_cols = ["Close", "Datetime"]
    if ENABLE_WAVELET:
        needed_cols.append("Denoised_Close")
    if ENABLE_SENTIMENT:
        needed_cols.append("SentimentScore")

    valid_symbols = []
    for sym in candidate_symbols:
        cols = set(df.loc[df["Symbol"] == sym].columns)
        missing = [c for c in needed_cols if c not in cols]
        if missing:
            logging.warning(f"Skipping {sym}: missing required cols {missing}")
        else:
            valid_symbols.append(sym)

    if not valid_symbols:
        logging.error("No symbols passed the feature/column checks. Nothing to train.")
    else:
        logging.info(f"Final training universe: {valid_symbols}")

    all_results = []

    if test_mode:
        # Optional: shrink timesteps and/or window size in test mode
        TIMESTEPS = 100_000   # lighter test
        # WINDOW_SIZE = 2000  # uncomment if you want faster test runs
        # STEP_SIZE   = 500

        test_stocks = ["AAPL", "NVDA", "MSFT"]
        present = [s for s in test_stocks if s in valid_symbols]
        if not present:
            logging.warning("Test mode: none of ['AAPL','NVDA','MSFT'] present after filters.")
        else:
            logging.info(f"Test mode: running on {present}")

        for sym in present:
            logging.info(f">>> [TEST_MODE] Processing {sym}")
            res = process_ticker(sym)
            logging.info(f"{sym}: produced {len(res)} window summaries")
            if res:
                all_results.extend(res)

        summary_path = os.path.join(RUN_RESULTS_DIR, "summary_test_mode.csv")
        if all_results:
            pd.DataFrame(all_results).to_csv(summary_path, index=False)
            logging.info(f"Test-mode summary saved to {summary_path}")
        else:
            logging.warning("Test mode finished but no results were generated (no windows, or all skipped).")

    else:
        logging.info("Starting full parallel PPO walkforward run...")
        summary_results = run_parallel_tickers(valid_symbols)
        if not summary_results:
            logging.warning("No results generated in full run (check logs for skips/length issues).")
        else:
            summary_path = os.path.join(RUN_RESULTS_DIR, "summary.csv")
            pd.DataFrame(summary_results).to_csv(summary_path, index=False)
            logging.info(f"Summary saved to {summary_path}")

    try:
        build_ppo_selector()
    except Exception as e:
        logging.error(f"build_ppo_selector failed: {e}")

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
2025-12-26 00:44:30,958 - INFO - RUN_RESULTS_DIR   = /content/drive/MyDrive/Results_May_2025/ppo_walkforward_results_20251226_0044
2025-12-26 00:44:30,960 - INFO - FINAL_MODEL_DIR  = /content/drive/MyDrive/Results_May_2025/ppo_models_master
2025-12-26 00:44:30,961 - INFO - BASE_RESULTS_DIR = /content/drive/MyDrive/Results_May_2025
2025-12-26 00:44:31,078 - INFO - Training candidate symbols: ['AAPL', 'QCOM', 'COST', 'RTX', 'BRK-B', 'SBUX', 'TMO', 'BAC', 'TSLA', 'CRM', 'AVGO', 'TXN', 'UNH', 'ADBE', 'AMGN', 'UNP', 'AMD', 'ACN', 'AMZN', 'ABT', 'WMT', 'ABBV', 'XOM', 'PFE', 'PM', 'CSCO', 'PG', 'LLY', 'MA', 'MCD', 'META', 'MRK', 'JPM', 

Aggregated PPO selector saved to → /content/drive/MyDrive/Results_May_2025/ppo_model_selector_FULL.csv
Final enhanced PPO selector JSON saved to → /content/drive/MyDrive/Results_May_2025/ppo_model_selector_final.json


In [None]:
# PPO walkforward training + selector
import os, gc, time, json, logging, glob
import shutil
from threading import Lock
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt  # optional for ad-hoc plots

import torch
import gymnasium as gym
from gymnasium.spaces import Box as GBox

import yfinance as yf
from gym_anytrading.envs import StocksEnv

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.utils import set_random_seed

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="gymnasium")
# ---- Sharpe annualization helper (intraday heuristic: 6.5 hrs * 252) ----
def _annualization_factor(_df_like=None) -> float:
    """Annualization factor for intraday bars (6.5 trading hours × 252 days)."""
    return np.sqrt(252 * 6.5)

warnings.filterwarnings("ignore", category=DeprecationWarning, module="jupyter_client.session")
warnings.filterwarnings("ignore", message=".*Gym has been unmaintained.*")

try:
    compute_enhanced_features  # type: ignore
except NameError:
    def compute_enhanced_features(df_in: pd.DataFrame) -> pd.DataFrame:
        df_out = df_in.copy()
        if "Datetime" in df_out.columns:
            df_out["Datetime"] = pd.to_datetime(df_out["Datetime"])
            df_out = df_out.sort_values("Datetime").reset_index(drop=True)
        if "Close" not in df_out.columns:
            raise ValueError("compute_enhanced_features: missing required column 'Close'")
        return df_out

set_random_seed(42)

BASE_RESULTS_DIR = "/content/drive/MyDrive/Results_May_2025"
RUN_TAG = datetime.now().strftime("%Y%m%d_%H%M")

RUN_RESULTS_DIR = os.path.join(BASE_RESULTS_DIR, f"ppo_walkforward_results_{RUN_TAG}")
FINAL_MODEL_DIR = os.path.join(BASE_RESULTS_DIR, "ppo_models_master")
QC_TOP_DIR      = os.path.join(BASE_RESULTS_DIR, "ppo_models_QC_TOP")

os.makedirs(QC_TOP_DIR, exist_ok=True)
os.makedirs(RUN_RESULTS_DIR, exist_ok=True)
os.makedirs(FINAL_MODEL_DIR, exist_ok=True)

# Aggregated selector outputs
SELECTOR_FULL_PATH = os.path.join(BASE_RESULTS_DIR, "ppo_model_selector_FULL.csv")
SELECTOR_JSON_PATH = os.path.join(BASE_RESULTS_DIR, "ppo_model_selector_final.json")
MODEL_NAME = "PPO"

# Global skip aggregation (thread-safe)
SKIP_AGG_PATH = os.path.join(RUN_RESULTS_DIR, "skipped_windows_global.csv")
SKIP_LOCK = Lock()

# Logging Setup
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    force=True
)

# Flags
ENABLE_SENTIMENT = False
ENABLE_SLO       = True
ENABLE_WAVELET   = True
test_mode        = True            # set False for full universe
ENABLE_PLOTS     = False
LIVE_MODE        = False           # set True to run simple live/paper loop
SIM_LATENCY_MS   = 0               # broker latency simulation; 0 = off
BROKER           = "log"           # "log" = do not place orders, just log

# Global training settings
WINDOW_SIZE = 3500
STEP_SIZE   = 500
TIMESTEPS   = 150_000  # overridden in test_mode block to smaller value


DATA_PATH = "multi_stock_feature_engineered_dataset.csv"
if not os.path.exists(DATA_PATH):
    raise FileNotFoundError("Required feature-engineered dataset not found!")

df = pd.read_csv(DATA_PATH)
df["Datetime"] = pd.to_datetime(df["Datetime"])

# Wavelet fallback
if ENABLE_WAVELET and "Denoised_Close" not in df.columns:
    logging.warning("ENABLE_WAVELET=True but 'Denoised_Close' missing; "
                    "falling back to Close->Denoised_Close.")
    df["Denoised_Close"] = df["Close"]


def record_skips_global(ticker: str, skipped_windows: list,
                        total_windows: int = None, fully_skipped: bool = False):
    """Append skipped windows to the global skip log."""
    if not skipped_windows and not fully_skipped:
        return
    import csv
    with SKIP_LOCK:
        new_file = not os.path.exists(SKIP_AGG_PATH)
        with open(SKIP_AGG_PATH, "a", newline="") as f:
            w = csv.writer(f)
            if new_file:
                w.writerow(["Ticker", "Window", "FullySkipped", "TotalWindows"])
            if fully_skipped:
                w.writerow([ticker, "ALL", True, total_windows if total_windows is not None else ""])
            else:
                for wname in skipped_windows:
                    try:
                        _, win_str = wname.split("_window")
                        win = int(win_str)
                    except Exception:
                        win = ""
                    w.writerow([ticker, win, False, total_windows if total_windows is not None else ""])


ENV_KWARGS = dict(
    window_size=10,
    cost_rate=0.0002,
    slip_rate=0.0003,

    k_alpha=0.0,
    k_mom=0.15,
    k_sent=(0.01 if ENABLE_SENTIMENT else 0.0),
    mom_source="denoised",
    mom_lookback=20,

    min_trade_delta=0.08,
    cooldown=10,

    reward_clip=0.05,
    k_vol=0.00,
    k_dd=0.00,
)


class ContinuousPositionEnv(StocksEnv):
    def __init__(self, df, frame_bound, **kwargs):
        # Require window_size from ENV_KWARGS
        if "window_size" not in kwargs:
            raise ValueError("ContinuousPositionEnv requires window_size (pass via ENV_KWARGS).")

        window_size = int(kwargs.pop("window_size"))

        # Pull params (all defaults live in ENV_KWARGS; these are just safety fallbacks)
        cost_rate       = float(kwargs.pop("cost_rate", 0.0002))
        slip_rate       = float(kwargs.pop("slip_rate", 0.0003))
        k_alpha         = float(kwargs.pop("k_alpha", 0.0))
        k_mom           = float(kwargs.pop("k_mom", 0.15))
        k_sent          = float(kwargs.pop("k_sent", 0.0))
        mom_source      = str(kwargs.pop("mom_source", "denoised"))
        mom_lookback    = int(kwargs.pop("mom_lookback", 20))
        min_trade_delta = float(kwargs.pop("min_trade_delta", 0.04))
        cooldown        = int(kwargs.pop("cooldown", 6))
        reward_clip     = float(kwargs.pop("reward_clip", 0.05))
        k_vol           = float(kwargs.pop("k_vol", 0.0))
        k_dd            = float(kwargs.pop("k_dd", 0.0))

        # Fail fast on unexpected env kwargs
        if kwargs:
            raise ValueError(f"Unexpected env kwargs: {list(kwargs.keys())}")

        super().__init__(
            df=df.reset_index(drop=True),
            frame_bound=frame_bound,
            window_size=window_size
        )

        if isinstance(self.observation_space, gym.spaces.Box):
            self.observation_space = GBox(
                low=self.observation_space.low,
                high=self.observation_space.high,
                shape=self.observation_space.shape,
                dtype=self.observation_space.dtype,
            )

        self.k_vol = k_vol
        self.k_dd  = k_dd

        self.ret_history = []
        self.nav_history = []
        self.peak_nav    = 1.0
        self.trade_count = 0

        self.action_space = GBox(low=-1.0, high=1.0, shape=(1,), dtype=np.float32)


        self.cost_rate       = cost_rate
        self.slip_rate       = slip_rate
        self.k_alpha         = k_alpha
        self.k_mom           = k_mom
        self.k_sent          = k_sent
        self.mom_source      = mom_source
        self.mom_lookback    = mom_lookback
        self.min_trade_delta = min_trade_delta
        self.cooldown        = cooldown
        self.reward_clip     = reward_clip

        self.nav = 1.0
        self.pos = 0.0
        self._last_trade_step = -self.cooldown

    def reset(self, **kwargs):
        out = super().reset(**kwargs)
        if isinstance(out, tuple):
            obs, info = out
        else:
            obs, info = out, {}

        self.nav = 1.0
        self.pos = 0.0
        self._last_trade_step = -self.cooldown

        self.trade_count = 0
        self.ret_history = []
        self.nav_history = [self.nav]
        self.peak_nav    = self.nav

        info = info or {}
        info.update({
            "nav": self.nav,
            "pos": self.pos,
            "trade_count": int(self.trade_count),

        })
        return obs, info

    def _step_parent_hold(self):
        step_result = super().step(2)
        if len(step_result) == 5:
            obs, _env_rew, terminated, truncated, info = step_result
        else:
            obs, _env_rew, done, info = step_result
            terminated, truncated = bool(done), False
        return obs, terminated, truncated, info

    def _ret_t(self):
        cur  = float(self.df.loc[self._current_tick, "Close"])
        prev = float(self.df.loc[max(self._current_tick - 1, 0), "Close"])
        return 0.0 if prev <= 0 else (cur - prev) / prev

    def _mom_signal(self):
        if self.mom_source == "macd" and "MACD_Line" in self.df.columns:
            recent = self.df["MACD_Line"].iloc[max(self._current_tick - 200, 0):self._current_tick + 1]
            return float(np.tanh(
                float(self.df.loc[self._current_tick, "MACD_Line"]) /
                (1e-6 + float(recent.std()))
            ))

        if "Denoised_Close" in self.df.columns and self._current_tick - self.mom_lookback >= 0:
            now  = float(self.df.loc[self._current_tick, "Denoised_Close"])
            then = float(self.df.loc[self._current_tick - self.mom_lookback, "Denoised_Close"])
            base = float(self.df.loc[max(self._current_tick - 1, 0), "Close"])
            slope = (now - then) / max(self.mom_lookback, 1)
            return float(np.tanh(10.0 * (slope / max(abs(base), 1e-6))))

        return 0.0

    def step(self, action):
        a = float(np.array(action).squeeze())
        target_pos = float(np.clip(a, -1.0, 1.0))

        r_t = self._ret_t()
        base_ret = self.pos * r_t

        changed = (
            abs(target_pos - self.pos) >= self.min_trade_delta
        ) and (
            (self._current_tick - self._last_trade_step) >= self.cooldown
        )

        delta_pos = (target_pos - self.pos) if changed else 0.0
        trade_cost = (self.cost_rate + self.slip_rate) * abs(delta_pos)

        rel_alpha = base_ret - r_t
        mom_term = self.pos * self._mom_signal()

        alpha_term = self.k_alpha * rel_alpha

        sent_term = 0.0
        if ENABLE_SENTIMENT and "SentimentScore" in self.df.columns:
            sent_term = self.k_sent * float(self.df.loc[self._current_tick, "SentimentScore"])

        shaped = base_ret + alpha_term + (self.k_mom * mom_term) + sent_term - trade_cost
        reward = float(np.clip(shaped, -self.reward_clip, self.reward_clip))


        self.nav *= (1.0 + base_ret - trade_cost)
        self.nav_history.append(self.nav)
        self.peak_nav = max(self.peak_nav, self.nav)

        executed_trade = False
        if changed:
            self.pos = target_pos
            self._last_trade_step = self._current_tick
            self.trade_count += 1
            executed_trade = True

        obs, terminated, truncated, info = self._step_parent_hold()
        info = info or {}
        info.update({
            "ret_t": r_t,
            "nav": self.nav,
            "pos": self.pos,
            "trade_cost": trade_cost,
            "base_ret": base_ret,
            "rel_alpha": rel_alpha,
            "mom": mom_term,
            "changed": bool(changed),
            "executed_trade": bool(executed_trade),
            "trade_count": int(self.trade_count),
            "delta_pos": float(delta_pos),
        })
        return obs, reward, terminated, truncated, info

def get_mu_sigma(model, obs):
    """SB3 v2-safe way to get Gaussian policy mean/std for continuous actions."""
    with torch.no_grad():
        obs_t, _ = model.policy.obs_to_tensor(obs)
        features = model.policy.extract_features(obs_t)
        latent_pi, _ = model.policy.mlp_extractor(features)
        mean_actions = model.policy.action_net(latent_pi)
        log_std = model.policy.log_std
        mu = float(mean_actions.detach().cpu().numpy().squeeze())
        sigma = float(log_std.exp().detach().cpu().numpy().squeeze())
    return mu, sigma

def get_walk_forward_windows(df_in, window_size=3500, step_size=500, min_len=1200):
    return [
        (start, start + window_size)
        for start in range(0, len(df_in) - min_len, step_size)
        if start + window_size < len(df_in)
    ]

def save_quantconnect_model(artifact, prefix, save_dir):
    """Save/copy QC-compatible artifacts into save_dir."""
    import shutil

    os.makedirs(save_dir, exist_ok=True)

    # --- Model zip: save or copy ---
    model_dst = os.path.join(save_dir, f"{prefix}_model.zip")

    model_obj = artifact.get("model", None)
    model_src = artifact.get("model_path", None)

    try:
        if model_obj is not None:
            # Save from in-memory SB3 model
            if not os.path.exists(model_dst):
                model_obj.save(model_dst)

        else:
            # Copy from an existing trained window model zip
            if model_src and os.path.exists(model_src):
                if os.path.abspath(model_src) != os.path.abspath(model_dst):
                    shutil.copyfile(model_src, model_dst)
            else:
                # If neither provided, warn loudly
                if not os.path.exists(model_dst):
                    logging.warning(f"[QC SAVE] Missing model for {prefix}: no model_obj and no valid model_path.")
    except Exception as e:
        logging.warning(f"[QC SAVE] Model handling issue for {prefix}: {e}")

    # --- VecNormalize: copy ---
    vecnorm_src = artifact.get("vecnorm_path")
    if vecnorm_src and os.path.exists(vecnorm_src):
        try:
            vecnorm_dst = os.path.join(save_dir, f"{prefix}_vecnorm.pkl")
            if os.path.abspath(vecnorm_src) != os.path.abspath(vecnorm_dst):
                shutil.copyfile(vecnorm_src, vecnorm_dst)
        except Exception as e:
            logging.warning(f"[QC SAVE] VecNormalize handling issue for {prefix}: {e}")
    else:
        logging.warning(f"[QC SAVE] VecNormalize missing for {prefix}: vecnorm_path not found.")

    # --- Features ---
    try:
        with open(os.path.join(save_dir, f"{prefix}_features.json"), "w") as f:
            json.dump({"features": artifact.get("features", [])}, f)
    except Exception as e:
        logging.warning(f"[QC SAVE] Could not write features.json for {prefix}: {e}")

    # --- Probability config ---
    try:
        thr = 0.2
        try:
            thr = float(artifact.get("result", {}).get("Action_Threshold", 0.2))
        except Exception:
            thr = 0.2

        with open(os.path.join(save_dir, f"{prefix}_probability_config.json"), "w") as f:
            json.dump(
                {"threshold": thr, "use_confidence": True, "inference_mode": "deterministic"},
                f
            )
    except Exception as e:
        logging.warning(f"[QC SAVE] Could not write probability_config.json for {prefix}: {e}")


    # --- Model info ---
    try:
        r = artifact.get("result", {})
        with open(os.path.join(save_dir, f"{prefix}_model_info.json"), "w") as f:
            json.dump({
                "model": "PPO",
                "ticker": r.get("Ticker"),
                "window": r.get("Window"),
                "date_trained": datetime.today().strftime("%Y-%m-%d"),
                "framework": "stable-baselines3",
                "input_features": artifact.get("features", []),
                "final_portfolio": r.get("PPO_Portfolio"),
                "buy_hold": r.get("BuyHold"),
                "sharpe": r.get("Sharpe"),
            }, f)
    except Exception as e:
        logging.warning(f"[QC SAVE] Could not write model_info.json for {prefix}: {e}")

    logging.info(f"[QC SAVE] Saved QC artifacts for {prefix}")

def load_model_and_env(prefix):
    """Load a trained PPO and create a factory to build a matching env window."""
    model_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
    vec_path   = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")
    model = PPO.load(model_path, device="cpu")

    def make_env(df_window):
        frame_bound = (50, len(df_window) - 3)
        e = DummyVecEnv([lambda: ContinuousPositionEnv(
            df=df_window, frame_bound=frame_bound, **ENV_KWARGS
        )])
        if os.path.exists(vec_path):
            e = VecNormalize.load(vec_path, e)
        e.training = False
        e.norm_reward = False
        return e

    return model, make_env

def latest_df_for_symbol(symbol, horizon_days=5, interval="1m"):
    """Fetch fresh bars and rebuild features exactly like training."""
    end = datetime.utcnow()
    start = end - timedelta(days=horizon_days)
    df_live = yf.download(
        symbol,
        start=start.strftime("%Y-%m-%d"),
        end=end.strftime("%Y-%m-%d"),
        interval=interval,
        progress=False,
        auto_adjust=False,
    )
    if df_live is None or df_live.empty:
        return None
    df_live = df_live.reset_index()
    df_live["Symbol"] = symbol
    df_live = compute_enhanced_features(df_live)
    if ENABLE_WAVELET and "Denoised_Close" not in df_live.columns:
        df_live["Denoised_Close"] = df_live["Close"]
    return df_live

def predict_latest(symbol, prefix):
    """Build last window, fast-forward env, call model.predict(), return a signal."""
    # --- load per-model threshold ---
    cfg_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_probability_config.json")
    thr = 0.2
    if os.path.exists(cfg_path):
        try:
            with open(cfg_path, "r") as f:
                thr = float(json.load(f).get("threshold", 0.2))
        except Exception:
            thr = 0.2

    model, make_env = load_model_and_env(prefix)
    live_df = latest_df_for_symbol(symbol)
    if live_df is None or len(live_df) < 100:
        logging.warning("No fresh data yet for live inference.")
        return None

    df_window = live_df.iloc[-2500:].reset_index(drop=True) if len(live_df) > 2500 else live_df.copy()

    env = make_env(df_window)
    obs = env.reset()
    if isinstance(obs, tuple):
        obs, _ = obs

    # fast-forward with HOLD
    for _ in range(len(df_window) - 1):
        obs, _, dones, _ = env.step([np.array([0.0], dtype=np.float32)])
        if isinstance(dones, (np.ndarray, list, tuple)) and len(dones) and dones[0]:
            break

    action, _ = model.predict(obs, deterministic=True)
    mu, sigma = get_mu_sigma(model, obs)

    a = float(np.array(action).squeeze())

    # --- thresholded signal using loaded thr ---
    if a > thr:
        signal = "BUY"
    elif a < -thr:
        signal = "SELL"
    else:
        signal = "HOLD"

    conf = abs(a)
    ts = df_window["Datetime"].iloc[-1] if "Datetime" in df_window.columns else None
    price = float(df_window["Close"].iloc[-1])

    return dict(
        signal=signal,
        confidence=conf,
        action=a,
        threshold=thr,
        ts=ts,
        price=price,
        mu=mu,
        sigma=sigma,
    )

def place_order(signal, qty=1):
    """Stub broker router with latency simulation; logs in Colab."""
    if SIM_LATENCY_MS > 0:
        time.sleep(SIM_LATENCY_MS / 1000.0)
    if BROKER == "log":
        logging.info(f"[PAPER] {signal} x{qty}")
    else:
        logging.info(f"[BROKER={BROKER}] {signal} x{qty} (not implemented)")

def live_loop(symbol, best_prefix):
    """Simple polling loop—set LIVE_MODE=True to run."""
    while LIVE_MODE:
        try:
            pred = predict_latest(symbol, best_prefix)
            if pred:
                logging.info(
                    f"{symbol} {pred['ts']} | {pred['signal']} "
                    f"@ {pred['price']:.2f} (conf {pred['confidence']:.2f})"
                )
                place_order(pred["signal"], qty=1)
        except Exception as e:
            logging.error(f"Live loop error: {e}")
        time.sleep(60)  # Poll each minute

TOP_N_WINDOWS = 3

FAST = {
    "lr": 8e-5,
    "n_steps": 3072,
    "batch": 512,
    "clip": 0.2,
    "ent": 0.01,
}

SLOW = {
    "lr": 3e-5,
    "n_steps": 3072,
    "batch": 512,
    "clip": 0.16,
    "ent": 0.005,
}

fast_names = {
    "TSLA","NVDA","AMD","AVGO","AAPL","MSFT","AMZN","GOOGL","META","ADBE","CRM",
    "INTC","QCOM","TXN","ORCL","NEE","GE","XOM","CVX","LLY","NKE","SBUX"
}
slow_names = {
    "BRK-B","JPM","BAC","JNJ","UNH","MRK","PFE","ABBV","ABT","AMGN","PG","PEP","KO",
    "V","MA","WMT","MCD","TMO","DHR","ACN","IBM","LIN","PM","RTX","UPS","UNP","COST","HD","LOW"
}

def pick_params(symbol: str):
    return FAST if symbol in fast_names else SLOW

def export_qc_top_from_existing(ticker: str, top_n: int = 3):
    """
    If a ticker is fully skipped (models already exist), still populate QC_TOP_DIR.
    Uses existing summary CSVs to pick top Sharpe windows, then copies artifacts from FINAL_MODEL_DIR.
    Prefers using 'Prefix' from summaries (robust). Falls back to WindowIdx reconstruction.
    """
    summary_files = glob.glob(os.path.join(BASE_RESULTS_DIR, "ppo_walkforward_results_*", "summary*.csv"))
    if not summary_files:
        logging.warning(f"[QC_TOP] No summary files found; cannot export QC_TOP for {ticker}.")
        return

    frames = []
    for p in summary_files:
        try:
            tmp = pd.read_csv(p)
            frames.append(tmp)
        except Exception as e:
            logging.warning(f"[QC_TOP] Could not read {p}: {e}")

    if not frames:
        logging.warning(f"[QC_TOP] Could not read any summary files; cannot export QC_TOP for {ticker}.")
        return

    combo = pd.concat(frames, ignore_index=True)

    if "Ticker" not in combo.columns:
        logging.warning("[QC_TOP] Summary files missing 'Ticker' column; cannot export.")
        return

    combo = combo[combo["Ticker"] == ticker].copy()
    if combo.empty or "Sharpe" not in combo.columns:
        logging.warning(f"[QC_TOP] No rows for {ticker} in summaries (or missing Sharpe); cannot export QC_TOP.")
        return

    # Ensure Sharpe is numeric so sorting works reliably
    combo["Sharpe"] = pd.to_numeric(combo["Sharpe"], errors="coerce")
    combo = combo.dropna(subset=["Sharpe"])
    if combo.empty:
        logging.warning(f"[QC_TOP] All Sharpe values were non-numeric for {ticker}; cannot export.")
        return

    use_prefix = ("Prefix" in combo.columns) and combo["Prefix"].notna().any()

    if use_prefix:
        # Robust path: use saved Prefix directly
        top = combo.sort_values("Sharpe", ascending=False).head(top_n).copy()
        top["__prefix__"] = top["Prefix"].astype(str)
    else:
        # Fallback: reconstruct WindowIdx (less robust)
        def _window_start(w):
            try:
                s = str(w)
                return int(s.split("-")[0]) if "-" in s else np.nan
            except Exception:
                return np.nan

        combo["WindowStart"] = combo["Window"].apply(_window_start)
        combo = combo.sort_values(["WindowStart"]).reset_index(drop=True)
        combo["WindowIdx"] = combo.groupby("Ticker").cumcount() + 1

        top = combo.sort_values("Sharpe", ascending=False).head(top_n).copy()
        top["__prefix__"] = top["WindowIdx"].apply(lambda widx: f"ppo_{ticker}_window{int(widx)}")

    exported = 0

    for _, r in top.iterrows():
        prefix = str(r["__prefix__"])

        model_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
        vec_path   = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")

        if not (os.path.exists(model_path) and os.path.exists(vec_path)):
            logging.warning(f"[QC_TOP] Missing model/vecnorm for {prefix}; cannot export.")
            continue

        artifact_for_save = {
            "model": None,
            "model_path": model_path,
            "vecnorm_path": vec_path,
            "features": [],         # ok if unknown; QC can load features elsewhere
            "result": r.to_dict(),  # includes Sharpe, Action_Threshold, etc if present
            "prefix": prefix,
        }
        save_quantconnect_model(artifact_for_save, prefix, QC_TOP_DIR)
        exported += 1

    logging.info(f"[QC_TOP] Exported {exported}/{len(top)} QC artifacts for {ticker}.")

def walkforward_ppo(df_sym, ticker,
                    window_size=3500, step_size=500,
                    timesteps=150_000, learning_rate=1e-4,
                    ppo_overrides=None):
    import heapq

    if ppo_overrides is None:
        ppo_overrides = {}

    if len(df_sym) < window_size:
        logging.warning(
            f"Skipping {ticker}: only {len(df_sym)} rows (min required: {window_size})"
        )
        return []

    results = []
    windows = get_walk_forward_windows(df_sym, window_size, step_size)
    top_heap = []
    skipped_windows = []

    # quick check: all windows already have model+vecnorm?
    all_done = True
    for idx in range(len(windows)):
        prefix = f"ppo_{ticker}_window{idx+1}"
        model_ok   = os.path.exists(os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip"))
        vecnorm_ok = os.path.exists(os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl"))
        if not (model_ok and vecnorm_ok):
            all_done = False
            break

    if all_done:
        logging.info(f"Ticker {ticker} fully skipped (all {len(windows)} windows already complete).")
        record_skips_global(ticker, skipped_windows=[], total_windows=len(windows), fully_skipped=True)

        export_qc_top_from_existing(ticker, top_n=TOP_N_WINDOWS)
        return []



    for w_idx, (start, end) in enumerate(windows):
        window_start_time = time.time()
        gc.collect()

        prefix = f"ppo_{ticker}_window{w_idx+1}"
        model_path   = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
        vecnorm_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")

        if os.path.exists(model_path) and os.path.exists(vecnorm_path):
            logging.info(f"Skipping {ticker} | Window {w_idx+1}, already trained.")
            skipped_windows.append(f"{ticker}_window{w_idx+1}")
            continue

        missing = []
        if not os.path.exists(model_path):   missing.append("model.zip")
        if not os.path.exists(vecnorm_path): missing.append("vecnorm.pkl")
        logging.info(
            f"Will train {ticker} | Window {w_idx+1} because missing: {', '.join(missing)}"
        )

        df_window = df_sym.iloc[start:end].reset_index(drop=True)
        if len(df_window) <= 52 or len(df_window) % 2 != 0:
            df_window = df_window.iloc[:-1]

        frame_bound = (50, len(df_window) - 3)

        env = DummyVecEnv([lambda: ContinuousPositionEnv(
          df=df_window, frame_bound=frame_bound, **ENV_KWARGS
        )])

        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.0)

        try:
            model = PPO(
                "MlpPolicy",
                env,
                verbose=0,
                device=("cuda" if torch.cuda.is_available() else "cpu"),
                learning_rate=ppo_overrides.get("lr", learning_rate),
                n_steps=ppo_overrides.get("n_steps", 256),
                batch_size=ppo_overrides.get("batch", 64),
                n_epochs=5,
                gamma=0.99,
                gae_lambda=0.95,
                clip_range=ppo_overrides.get("clip", 0.2),
                ent_coef=ppo_overrides.get("ent", 0.005),
                policy_kwargs=dict(net_arch=[64, 64]),
            )

            logging.info(f"Training {ticker} Window {w_idx+1}/{len(windows)}")
            model.learn(total_timesteps=timesteps)

            # Evaluation pass
            env.training = False
            env.norm_reward = False
            obs = env.reset()
            if isinstance(obs, tuple):
                obs, _ = obs

            nav_track = [1.0]
            bh_track  = [1.0]
            step_log  = []
            executed_trade_count = 0
            signal_trade_count   = 0
            signal_trade_count_dyn   = 0   # dynamic-threshold diagnostic

            DIAG_THR = 0.2
            for i in range(len(df_window) - 1):
                action, _ = model.predict(obs, deterministic=True)
                mu, sigma = get_mu_sigma(model, obs)

                obs, rew, dones, infos = env.step(action)
                # VecEnv returns list/tuple of infos; otherwise it may be a dict
                if isinstance(infos, (list, tuple)):
                    info = infos[0] if len(infos) else {}
                elif isinstance(infos, dict):
                    info = infos
                else:
                    info = {}


                nav_track.append(float(info.get("nav", nav_track[-1])))
                bh_track.append(
                    bh_track[-1] * (1.0 + float(info.get("ret_t", 0.0)))
                )

                a = float(np.array(action).squeeze())
                dt_val = df_window["Datetime"].iloc[i+1] if "Datetime" in df_window.columns else None
                px     = float(df_window["Close"].iloc[i+1]) if "Close" in df_window.columns else np.nan
                #“signal” trades (threshold-based) — diagnostic only
                if a > DIAG_THR or a < -DIAG_THR:
                    signal_trade_count += 1
                #real trades executed by env friction logic
                if bool(info.get("executed_trade", False)):
                    executed_trade_count += 1

                # next-bar return (to score BUY/SELL vs the *next* move)
                if i + 2 < len(df_window):
                    p0 = float(df_window["Close"].iloc[i+1])
                    p1 = float(df_window["Close"].iloc[i+2])
                    next_ret = 0.0 if p0 <= 0 else (p1 - p0) / p0
                else:
                    next_ret = 0.0

                # reward scalar (VecEnv returns arrays)
                rew_val = float(rew[0]) if isinstance(rew, (list, tuple, np.ndarray)) else float(rew)

                step_log.append({
                    "Index": i+1,
                    "Datetime": dt_val,
                    "Close": px,
                    "Action": a,
                    "mu": mu,
                    "sigma": sigma,
                    "nav": nav_track[-1],
                    "ret_t": float(info.get("ret_t", 0.0)),
                    "next_ret": float(next_ret),
                    "reward": rew_val,
                    "pos": float(info.get("pos", 0.0)),
                    "trade_cost": float(info.get("trade_cost", 0.0)),
                    "base_ret": float(info.get("base_ret", 0.0)),
                    "rel_alpha": float(info.get("rel_alpha", 0.0)),
                    "mom": float(info.get("mom", 0.0)),
                })

                # done handling (VecEnv)
                if isinstance(dones, (np.ndarray, list, tuple)):
                    if dones[0]:
                        break
                elif dones:
                    break


            # --- Metrics ---
            final_value = float(nav_track[-1]) * 100_000.0
            hold_value  = float(bh_track[-1])  * 100_000.0

            #dynamic action threshold for this window (prevents “no signals” windows)
            abs_actions = np.array([abs(float(r["Action"])) for r in step_log], dtype=float)
            if len(abs_actions) > 0:
                thr = float(np.quantile(abs_actions, 0.70))  # 70th percentile
                thr = float(np.clip(thr, 0.08, 0.30))
            else:
                thr = 0.2

            # Dynamic signal trade count (post-hoc diagnostic)
            signal_trade_count_dyn = int(np.sum(abs_actions > thr)) if len(abs_actions) > 0 else 0


            returns = pd.Series(nav_track).pct_change().fillna(0.0)
            sharpe  = float((returns.mean() / (returns.std() + 1e-9)) * _annualization_factor(df_window))
            drawdown = float(
                ((pd.Series(nav_track).cummax() - pd.Series(nav_track)) /
                pd.Series(nav_track).cummax()).max() * 100.0
            )

            # Classification stats (now using thr)
            correct = 0
            total   = 0
            tp_buy = fp_buy = 0
            tp_sell = fp_sell = 0

            for r in step_log:
                a = float(r["Action"])
                ret_t = float(r.get("next_ret", 0.0))

                if a > thr:
                    sig = "BUY"
                elif a < -thr:
                    sig = "SELL"
                else:
                    sig = "HOLD"

                if sig == "BUY":
                    if ret_t > 0:
                        tp_buy += 1; correct += 1
                    else:
                        fp_buy += 1
                    total += 1
                elif sig == "SELL":
                    if ret_t < 0:
                        tp_sell += 1; correct += 1
                    else:
                        fp_sell += 1
                    total += 1
                # HOLD not counted

            precision_long  = tp_buy  / (tp_buy  + fp_buy  + 1e-9)
            precision_short = tp_sell / (tp_sell + fp_sell + 1e-9)
            precision_trades = (tp_buy + tp_sell) / (
                (tp_buy + tp_sell) + (fp_buy + fp_sell) + 1e-9
            )
            step_accuracy = round(correct / total, 4) if total > 0 else 0.0
            #Trade_count reflect REAL executed trades (cooldown/min_trade_delta)
            trade_count = int(executed_trade_count)

            # Save VecNormalize
            try:
                env.save(vecnorm_path)
            except Exception as e:
                logging.warning(f"Could not save VecNormalize for {ticker} {start}-{end}: {e}")
                vecnorm_path = None

            # Save model
            model.save(model_path)

            # Save detailed predictions
            pred_path = os.path.join(RUN_RESULTS_DIR, f"{prefix}_predictions.csv")
            pd.DataFrame(step_log).to_csv(pred_path, index=False)
            logging.info(f"Saved predictions to {pred_path}")

            # Save compat predictions with same thresholds as metrics
            compat_rows = []
            for r in step_log:
                a = r["Action"]

                if a > thr:
                    signal = "BUY"
                elif a < -thr:
                    signal = "SELL"
                else:
                    signal = "HOLD"
                compat_rows.append({
                    "Index": r["Index"],
                    "Datetime": r["Datetime"],
                    "Close": r["Close"],
                    "Action": a,
                    "Signal": signal,
                    "PortfolioValue": r["nav"],
                    "Reward": r.get("reward", np.nan),
                })
            compat_path = os.path.join(RUN_RESULTS_DIR, f"{prefix}_predictions_compat.csv")
            pd.DataFrame(compat_rows).to_csv(compat_path, index=False)
            logging.info(f"Saved compatibility predictions to {compat_path}")

            # Summary row
            result_row = {
                "Ticker": ticker,
                "Window": f"{start}-{end}",
                "WindowIdx": int(w_idx + 1),
                "Prefix": prefix,
                "PPO_Portfolio": round(final_value, 2),
                "BuyHold": round(hold_value, 2),
                "Sharpe": round(sharpe, 3),
                "Drawdown_%": round(drawdown, 2),
                "Winner": "PPO" if final_value > hold_value else "Buy & Hold",
                "Action_Threshold": round(thr, 4),
                "Accuracy": step_accuracy,
                "Trade_Count": trade_count,
                "Signal_Trade_Count": int(signal_trade_count),
                "Signal_Trade_Count_Dyn": int(signal_trade_count_dyn),
                "Executed_Trade_Count": int(executed_trade_count),
                "Precision_Long": round(precision_long, 4),
                "Precision_Short": round(precision_short, 4),
                "Precision_Trades": round(precision_trades, 4),
            }

            results.append(result_row)

            meta = {
                "result": result_row,
                "features": df_window.columns.tolist(),
                "prefix": prefix,
                "model_path": model_path,
                "vecnorm_path": vecnorm_path,
            }

            item = (result_row["Sharpe"], prefix, meta)
            if len(top_heap) < TOP_N_WINDOWS:
                heapq.heappush(top_heap, item)
            else:
                if item[0] > top_heap[0][0]:
                    heapq.heapreplace(top_heap, item)

            logging.info(
                f"{ticker} | Window {w_idx+1} runtime: "
                f"{round(time.time() - window_start_time, 2)}s"
            )
        finally:
            try:
                env.close()
            except Exception:
                pass
            del env
            try:
                del model
            except Exception:
                pass
            gc.collect()
            try:
                torch.cuda.empty_cache()
            except Exception:
                pass

    if skipped_windows:
        logging.info(
            f"{ticker} skipped windows (already complete): {', '.join(skipped_windows)}"
        )
        record_skips_global(
            ticker,
            skipped_windows=skipped_windows,
            total_windows=len(windows),
            fully_skipped=False,
        )

    # Save top-N QC-compatible
    top_list = sorted(top_heap, key=lambda t: t[0], reverse=True)
    for _, _, meta in top_list:
        artifact_for_save = {
            "model": None,  # we're copying from disk, not re-saving an in-memory model
            "model_path": meta["model_path"],
            "vecnorm_path": meta["vecnorm_path"],
            "features": meta["features"],
            "result": meta["result"],
            "prefix": meta["prefix"],
        }
        save_quantconnect_model(artifact_for_save, meta["prefix"], QC_TOP_DIR)

    return results

def process_ticker(ticker):
    try:
        hp = pick_params(ticker)
        return walkforward_ppo(
            df[df["Symbol"] == ticker].copy(),
            ticker,
            window_size=WINDOW_SIZE,
            step_size=STEP_SIZE,
            timesteps=TIMESTEPS,
            learning_rate=hp["lr"],
            ppo_overrides=hp,
        )
    except Exception as e:
        logging.error(f"{ticker}: training failed with {e}")
        return []


from concurrent.futures import ThreadPoolExecutor

def run_parallel_tickers(tickers,
                         out_path=os.path.join(RUN_RESULTS_DIR, "summary.csv"),
                         max_workers=8):
    results = []
    with ThreadPoolExecutor(max_workers=max_workers) as ex:
        for res in ex.map(process_ticker, tickers):
            if res:
                results.extend(res)

    if results:
        pd.DataFrame(results).to_csv(out_path, index=False)
        logging.info(f"Saved summary to {out_path}")
    else:
        logging.warning("No results produced; summary not written.")

    logging.info("All tickers processed.")
    return results

def build_ppo_selector():
    """Aggregate all summary*.csv across runs and build selector JSON."""
    summary_files = glob.glob(
        os.path.join(BASE_RESULTS_DIR, "ppo_walkforward_results_*", "summary*.csv")
    )
    all_summaries = []

    for p in summary_files:
        try:
            tmp = pd.read_csv(p)
            tmp["RunFolder"] = os.path.dirname(p)
            all_summaries.append(tmp)
        except Exception as e:
            print(f"⚠️ Skipping {p} due to error: {e}")

    if not all_summaries:
        logging.warning("No PPO summaries found across walkforward results folders.")
        return

    combo = pd.concat(all_summaries, ignore_index=True)
    if "Sharpe" in combo.columns:
    combo["Sharpe"] = pd.to_numeric(combo["Sharpe"], errors="coerce")
    combo = combo.dropna(subset=["Sharpe"])

    # Ensure key columns exist for robust ratios
    if "BuyHold" not in combo.columns:
        combo["BuyHold"] = np.nan
    if "PPO_Portfolio" not in combo.columns:
        combo["PPO_Portfolio"] = np.nan

    # parse Window "start-end" to WindowStart
    def _parse_window_start(w):
        if pd.isna(w):
            return None
        if isinstance(w, (int, float)):
            return int(w)
        parts = str(w).split("-")
        try:
            return int(parts[0])
        except Exception:
            return None

    combo["WindowStart"] = combo["Window"].apply(_parse_window_start)

    combo = combo.sort_values(["Ticker", "WindowStart"]).reset_index(drop=True)
    combo["WindowIdx"] = combo.groupby("Ticker").cumcount() + 1

    combo = combo.drop_duplicates(subset=["Ticker", "WindowIdx"], keep="last")

    best_by_symbol = (
        combo
        .sort_values("Sharpe", ascending=False)
        .groupby("Ticker")
        .first()
        .reset_index()
    )

    # If Drawdown_% missing (older runs), create it so rename won't break
    if "Drawdown_%" not in best_by_symbol.columns:
        best_by_symbol["Drawdown_%"] = np.nan

    # Ensure precision cols exist
    for col in ["Precision_Long", "Precision_Short", "Precision_Trades"]:
        if col not in best_by_symbol.columns:
            best_by_symbol[col] = None  # or np.nan if you prefer

    # Rename columns so everything downstream uses consistent names
    best_by_symbol = best_by_symbol.rename(columns={
        "Drawdown_%": "Drawdown",
        "PPO_Portfolio": "Final_Portfolio",
    })

    # Ensure Accuracy / Trade_Count exist
    if "Accuracy" not in best_by_symbol.columns:
        best_by_symbol["Accuracy"] = 0.0
    if "Trade_Count" not in best_by_symbol.columns:
        best_by_symbol["Trade_Count"] = None

    best_by_symbol["Model"] = MODEL_NAME

    # PPO vs Buy & Hold ratio (safe division)
    best_by_symbol["Rel_vs_BH"] = best_by_symbol.apply(
        lambda r: (r["Final_Portfolio"] / r["BuyHold"])
        if (pd.notna(r["BuyHold"]) and r["BuyHold"] not in (0, 0.0)) else np.nan,
        axis=1
    )

    # Save flat CSV for debugging
    best_by_symbol.to_csv(SELECTOR_FULL_PATH, index=False)
    print(f"Aggregated PPO selector saved to → {SELECTOR_FULL_PATH}")

    # Safety filters (tune as needed)
    df_sel = best_by_symbol.copy()
    gates = (
        (df_sel["Sharpe"].fillna(-999) > 0.0) &
        (df_sel["Drawdown"].fillna(999) < 50.0) &
        (df_sel["Final_Portfolio"].fillna(0) > 80_000) &
        (df_sel["Rel_vs_BH"].fillna(0) >= 0.95)   # PPO ≥ 95% of B&H; change to >1.0 to enforce beat
    )
    df_sel = df_sel[gates].copy()

    df_sel["prefix"] = (
        "ppo_"
        + df_sel["Ticker"].astype(str)
        + "_window"
        + df_sel["WindowIdx"].astype(int).astype(str)
    )

    df_sel["artifact_path"] = df_sel["prefix"].apply(
        lambda p: os.path.join(FINAL_MODEL_DIR, f"{p}_model.zip")
    )
    df_sel["vecnorm_path"] = df_sel["prefix"].apply(
        lambda p: os.path.join(FINAL_MODEL_DIR, f"{p}_vecnorm.pkl")
    )

    EPS = 0.03  # 3% of top-sharpe for "close enough"
    selected_models = {}

    def safe_int(v, default=0):
        if v is None:
            return int(default)
        try:
            import math
            if isinstance(v, float) and math.isnan(v):
                return int(default)
        except TypeError:
            pass
        try:
            return int(v)
        except (ValueError, TypeError):
            return int(default)

    def safe_float(v, default=0.0):
        if v is None:
            return float(default)
        try:
            import math
            if isinstance(v, float) and math.isnan(v):
                return float(default)
        except TypeError:
            pass
        try:
            return float(v)
        except (ValueError, TypeError):
            return float(default)

    for ticker, group in df_sel.groupby("Ticker"):
        group_sorted = group.sort_values("Sharpe", ascending=False)
        top = group_sorted.iloc[0]
        second = group_sorted.iloc[1] if len(group_sorted) > 1 else None

        if (second is not None) and (
            abs(top["Sharpe"] - second["Sharpe"]) <= abs(top["Sharpe"]) * EPS
        ):
            mode = "ensemble"
            primary, secondary = top["Model"], second["Model"]
        else:
            mode = "single"
            primary, secondary = top["Model"], None

        selected_models[ticker] = {
            "model": MODEL_NAME,
            "score": round(safe_float(top["Sharpe"]), 4),
            "return": round(safe_float(top["Final_Portfolio"]), 2),
            "sharpe": round(safe_float(top["Sharpe"]), 3),
            "drawdown": round(safe_float(top["Drawdown"]), 2),
            "sortino": None,
            "turnover": None,
            "trade_count": safe_int(top.get("Trade_Count", 0)),
            "precision": {
                "long":   safe_float(top.get("Precision_Long", 0.0)),
                "short":  safe_float(top.get("Precision_Short", 0.0)),
                "trades": safe_float(top.get("Precision_Trades", 0.0)),
            },
            "stability": {},
            "regime": "unknown",
            "rl_profile": "fast",
            "artifact": {
                "path": top["artifact_path"],
                "vecnorm": top["vecnorm_path"],
                "features": None,
                "load_ms": 180,
                "mem_mb": 512,
                "exists": os.path.exists(top["artifact_path"]),
            },
            "selection": {
                "mode": mode,
                "primary": primary,
                "secondary": secondary,
            },
        }

    with open(SELECTOR_JSON_PATH, "w") as f:
        json.dump(selected_models, f, indent=2)

    print(f"Final enhanced PPO selector JSON saved to → {SELECTOR_JSON_PATH}")

if __name__ == "__main__":
    logging.info(f"RUN_RESULTS_DIR   = {RUN_RESULTS_DIR}")
    logging.info(f"FINAL_MODEL_DIR  = {FINAL_MODEL_DIR}")
    logging.info(f"BASE_RESULTS_DIR = {BASE_RESULTS_DIR}")

    min_rows = WINDOW_SIZE + 50  # small buffer so we have at least one window
    all_symbols = df["Symbol"].value_counts()
    candidate_symbols = []

    for sym, n in all_symbols.items():
        if n >= min_rows:
            candidate_symbols.append(sym)
        else:
            logging.warning(f"Skipping {sym}: only {n} rows (< {min_rows} required)")

    if not candidate_symbols:
        logging.error("No symbols have enough rows for the current WINDOW_SIZE. Nothing to train.")
    else:
        logging.info(f"Training candidate symbols: {candidate_symbols}")

    needed_cols = ["Close", "Datetime"]
    if ENABLE_WAVELET:
        needed_cols.append("Denoised_Close")
    if ENABLE_SENTIMENT:
        needed_cols.append("SentimentScore")

    valid_symbols = []
    for sym in candidate_symbols:
        cols = set(df.loc[df["Symbol"] == sym].columns)
        missing = [c for c in needed_cols if c not in cols]
        if missing:
            logging.warning(f"Skipping {sym}: missing required cols {missing}")
        else:
            valid_symbols.append(sym)

    if not valid_symbols:
        logging.error("No symbols passed the feature/column checks. Nothing to train.")
    else:
        logging.info(f"Final training universe: {valid_symbols}")

    all_results = []

    if test_mode:
        # Optional: shrink timesteps and/or window size in test mode
        TIMESTEPS = 100_000   # lighter test
        # WINDOW_SIZE = 2000  # uncomment if you want faster test runs
        # STEP_SIZE   = 500

        test_stocks = ["AAPL", "NVDA", "MSFT"]
        present = [s for s in test_stocks if s in valid_symbols]
        if not present:
            logging.warning("Test mode: none of ['AAPL','NVDA','MSFT'] present after filters.")
        else:
            logging.info(f"Test mode: running on {present}")

        for sym in present:
            logging.info(f">>> [TEST_MODE] Processing {sym}")
            res = process_ticker(sym)
            logging.info(f"{sym}: produced {len(res)} window summaries")
            if res:
                all_results.extend(res)

        summary_path = os.path.join(RUN_RESULTS_DIR, "summary_test_mode.csv")
        if all_results:
            pd.DataFrame(all_results).to_csv(summary_path, index=False)
            logging.info(f"Test-mode summary saved to {summary_path}")
        else:
            logging.warning("Test mode finished but no results were generated (no windows, or all skipped).")

    else:
        logging.info("Starting full parallel PPO walkforward run...")
        summary_results = run_parallel_tickers(valid_symbols)
        if not summary_results:
            logging.warning("No results generated in full run (check logs for skips/length issues).")
        else:
            summary_path = os.path.join(RUN_RESULTS_DIR, "summary.csv")
            pd.DataFrame(summary_results).to_csv(summary_path, index=False)
            logging.info(f"Summary saved to {summary_path}")

    try:
        build_ppo_selector()
    except Exception as e:
        logging.error(f"build_ppo_selector failed: {e}")

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
2025-12-26 00:44:30,958 - INFO - RUN_RESULTS_DIR   = /content/drive/MyDrive/Results_May_2025/ppo_walkforward_results_20251226_0044
2025-12-26 00:44:30,960 - INFO - FINAL_MODEL_DIR  = /content/drive/MyDrive/Results_May_2025/ppo_models_master
2025-12-26 00:44:30,961 - INFO - BASE_RESULTS_DIR = /content/drive/MyDrive/Results_May_2025
2025-12-26 00:44:31,078 - INFO - Training candidate symbols: ['AAPL', 'QCOM', 'COST', 'RTX', 'BRK-B', 'SBUX', 'TMO', 'BAC', 'TSLA', 'CRM', 'AVGO', 'TXN', 'UNH', 'ADBE', 'AMGN', 'UNP', 'AMD', 'ACN', 'AMZN', 'ABT', 'WMT', 'ABBV', 'XOM', 'PFE', 'PM', 'CSCO', 'PG', 'LLY', 'MA', 'MCD', 'META', 'MRK', 'JPM', 

Aggregated PPO selector saved to → /content/drive/MyDrive/Results_May_2025/ppo_model_selector_FULL.csv
Final enhanced PPO selector JSON saved to → /content/drive/MyDrive/Results_May_2025/ppo_model_selector_final.json


In [None]:
# PPO walkforward training + selector
import os, gc, time, json, logging, glob
import shutil
from threading import Lock
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt  # optional for ad-hoc plots

import torch
import gymnasium as gym
from gymnasium.spaces import Box as GBox

import yfinance as yf
from gym_anytrading.envs import StocksEnv

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.utils import set_random_seed

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="gymnasium")
# ---- Sharpe annualization helper (intraday heuristic: 6.5 hrs * 252) ----
def _annualization_factor(_df_like=None) -> float:
    """Annualization factor for intraday bars (6.5 trading hours × 252 days)."""
    return np.sqrt(252 * 6.5)

warnings.filterwarnings("ignore", category=DeprecationWarning, module="jupyter_client.session")
warnings.filterwarnings("ignore", message=".*Gym has been unmaintained.*")

try:
    compute_enhanced_features  # type: ignore
except NameError:
    def compute_enhanced_features(df_in: pd.DataFrame) -> pd.DataFrame:
        df_out = df_in.copy()
        if "Datetime" in df_out.columns:
            df_out["Datetime"] = pd.to_datetime(df_out["Datetime"])
            df_out = df_out.sort_values("Datetime").reset_index(drop=True)
        if "Close" not in df_out.columns:
            raise ValueError("compute_enhanced_features: missing required column 'Close'")
        return df_out

set_random_seed(42)

BASE_RESULTS_DIR = "/content/drive/MyDrive/Results_May_2025"
RUN_TAG = datetime.now().strftime("%Y%m%d_%H%M")

RUN_RESULTS_DIR = os.path.join(BASE_RESULTS_DIR, f"ppo_walkforward_results_{RUN_TAG}")
FINAL_MODEL_DIR = os.path.join(BASE_RESULTS_DIR, "ppo_models_master")
QC_TOP_DIR      = os.path.join(BASE_RESULTS_DIR, "ppo_models_QC_TOP")

os.makedirs(QC_TOP_DIR, exist_ok=True)
os.makedirs(RUN_RESULTS_DIR, exist_ok=True)
os.makedirs(FINAL_MODEL_DIR, exist_ok=True)

# Aggregated selector outputs
SELECTOR_FULL_PATH = os.path.join(BASE_RESULTS_DIR, "ppo_model_selector_FULL.csv")
SELECTOR_JSON_PATH = os.path.join(BASE_RESULTS_DIR, "ppo_model_selector_final.json")
MODEL_NAME = "PPO"

# Global skip aggregation (thread-safe)
SKIP_AGG_PATH = os.path.join(RUN_RESULTS_DIR, "skipped_windows_global.csv")
SKIP_LOCK = Lock()

# Logging Setup
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    force=True
)

# Flags
ENABLE_SENTIMENT = False
ENABLE_SLO       = True
ENABLE_WAVELET   = True
test_mode        = True            # set False for full universe
ENABLE_PLOTS     = False
LIVE_MODE        = False           # set True to run simple live/paper loop
SIM_LATENCY_MS   = 0               # broker latency simulation; 0 = off
BROKER           = "log"           # "log" = do not place orders, just log

# Global training settings
WINDOW_SIZE = 3500
STEP_SIZE   = 500
TIMESTEPS   = 150_000  # overridden in test_mode block to smaller value


DATA_PATH = "multi_stock_feature_engineered_dataset.csv"
if not os.path.exists(DATA_PATH):
    raise FileNotFoundError("Required feature-engineered dataset not found!")

df = pd.read_csv(DATA_PATH)
df["Datetime"] = pd.to_datetime(df["Datetime"])

# Wavelet fallback
if ENABLE_WAVELET and "Denoised_Close" not in df.columns:
    logging.warning("ENABLE_WAVELET=True but 'Denoised_Close' missing; "
                    "falling back to Close->Denoised_Close.")
    df["Denoised_Close"] = df["Close"]


def record_skips_global(ticker: str, skipped_windows: list,
                        total_windows: int = None, fully_skipped: bool = False):
    """Append skipped windows to the global skip log."""
    if not skipped_windows and not fully_skipped:
        return
    import csv
    with SKIP_LOCK:
        new_file = not os.path.exists(SKIP_AGG_PATH)
        with open(SKIP_AGG_PATH, "a", newline="") as f:
            w = csv.writer(f)
            if new_file:
                w.writerow(["Ticker", "Window", "FullySkipped", "TotalWindows"])
            if fully_skipped:
                w.writerow([ticker, "ALL", True, total_windows if total_windows is not None else ""])
            else:
                for wname in skipped_windows:
                    try:
                        _, win_str = wname.split("_window")
                        win = int(win_str)
                    except Exception:
                        win = ""
                    w.writerow([ticker, win, False, total_windows if total_windows is not None else ""])


ENV_KWARGS = dict(
    window_size=10,
    cost_rate=0.0002,
    slip_rate=0.0003,

    k_alpha=0.0,
    k_mom=0.15,
    k_sent=(0.01 if ENABLE_SENTIMENT else 0.0),
    mom_source="denoised",
    mom_lookback=20,

    min_trade_delta=0.08,
    cooldown=10,

    reward_clip=0.05,
    k_vol=0.00,
    k_dd=0.00,
)


class ContinuousPositionEnv(StocksEnv):
    def __init__(self, df, frame_bound, **kwargs):
        # Require window_size from ENV_KWARGS
        if "window_size" not in kwargs:
            raise ValueError("ContinuousPositionEnv requires window_size (pass via ENV_KWARGS).")

        window_size = int(kwargs.pop("window_size"))

        # Pull params (all defaults live in ENV_KWARGS; these are just safety fallbacks)
        cost_rate       = float(kwargs.pop("cost_rate", 0.0002))
        slip_rate       = float(kwargs.pop("slip_rate", 0.0003))
        k_alpha         = float(kwargs.pop("k_alpha", 0.0))
        k_mom           = float(kwargs.pop("k_mom", 0.15))
        k_sent          = float(kwargs.pop("k_sent", 0.0))
        mom_source      = str(kwargs.pop("mom_source", "denoised"))
        mom_lookback    = int(kwargs.pop("mom_lookback", 20))
        min_trade_delta = float(kwargs.pop("min_trade_delta", 0.04))
        cooldown        = int(kwargs.pop("cooldown", 6))
        reward_clip     = float(kwargs.pop("reward_clip", 0.05))
        k_vol           = float(kwargs.pop("k_vol", 0.0))
        k_dd            = float(kwargs.pop("k_dd", 0.0))

        # Fail fast on unexpected env kwargs
        if kwargs:
            raise ValueError(f"Unexpected env kwargs: {list(kwargs.keys())}")

        super().__init__(
            df=df.reset_index(drop=True),
            frame_bound=frame_bound,
            window_size=window_size
        )

        if isinstance(self.observation_space, gym.spaces.Box):
            self.observation_space = GBox(
                low=self.observation_space.low,
                high=self.observation_space.high,
                shape=self.observation_space.shape,
                dtype=self.observation_space.dtype,
            )

        self.k_vol = k_vol
        self.k_dd  = k_dd

        self.ret_history = []
        self.nav_history = []
        self.peak_nav    = 1.0
        self.trade_count = 0

        self.action_space = GBox(low=-1.0, high=1.0, shape=(1,), dtype=np.float32)


        self.cost_rate       = cost_rate
        self.slip_rate       = slip_rate
        self.k_alpha         = k_alpha
        self.k_mom           = k_mom
        self.k_sent          = k_sent
        self.mom_source      = mom_source
        self.mom_lookback    = mom_lookback
        self.min_trade_delta = min_trade_delta
        self.cooldown        = cooldown
        self.reward_clip     = reward_clip

        self.nav = 1.0
        self.pos = 0.0
        self._last_trade_step = -self.cooldown

    def reset(self, **kwargs):
        out = super().reset(**kwargs)
        if isinstance(out, tuple):
            obs, info = out
        else:
            obs, info = out, {}

        self.nav = 1.0
        self.pos = 0.0
        self._last_trade_step = -self.cooldown

        self.trade_count = 0
        self.ret_history = []
        self.nav_history = [self.nav]
        self.peak_nav    = self.nav

        info = info or {}
        info.update({
            "nav": self.nav,
            "pos": self.pos,
            "trade_count": int(self.trade_count),

        })
        return obs, info

    def _step_parent_hold(self):
        step_result = super().step(2)
        if len(step_result) == 5:
            obs, _env_rew, terminated, truncated, info = step_result
        else:
            obs, _env_rew, done, info = step_result
            terminated, truncated = bool(done), False
        return obs, terminated, truncated, info

    def _ret_t(self):
        cur  = float(self.df.loc[self._current_tick, "Close"])
        prev = float(self.df.loc[max(self._current_tick - 1, 0), "Close"])
        return 0.0 if prev <= 0 else (cur - prev) / prev

    def _mom_signal(self):
        if self.mom_source == "macd" and "MACD_Line" in self.df.columns:
            recent = self.df["MACD_Line"].iloc[max(self._current_tick - 200, 0):self._current_tick + 1]
            return float(np.tanh(
                float(self.df.loc[self._current_tick, "MACD_Line"]) /
                (1e-6 + float(recent.std()))
            ))

        if "Denoised_Close" in self.df.columns and self._current_tick - self.mom_lookback >= 0:
            now  = float(self.df.loc[self._current_tick, "Denoised_Close"])
            then = float(self.df.loc[self._current_tick - self.mom_lookback, "Denoised_Close"])
            base = float(self.df.loc[max(self._current_tick - 1, 0), "Close"])
            slope = (now - then) / max(self.mom_lookback, 1)
            return float(np.tanh(10.0 * (slope / max(abs(base), 1e-6))))

        return 0.0

    def step(self, action):
        a = float(np.array(action).squeeze())
        target_pos = float(np.clip(a, -1.0, 1.0))

        r_t = self._ret_t()
        base_ret = self.pos * r_t

        changed = (
            abs(target_pos - self.pos) >= self.min_trade_delta
        ) and (
            (self._current_tick - self._last_trade_step) >= self.cooldown
        )

        delta_pos = (target_pos - self.pos) if changed else 0.0
        trade_cost = (self.cost_rate + self.slip_rate) * abs(delta_pos)

        rel_alpha = base_ret - r_t
        mom_term = self.pos * self._mom_signal()

        alpha_term = self.k_alpha * rel_alpha

        sent_term = 0.0
        if ENABLE_SENTIMENT and "SentimentScore" in self.df.columns:
            sent_term = self.k_sent * float(self.df.loc[self._current_tick, "SentimentScore"])

        shaped = base_ret + alpha_term + (self.k_mom * mom_term) + sent_term - trade_cost
        reward = float(np.clip(shaped, -self.reward_clip, self.reward_clip))


        self.nav *= (1.0 + base_ret - trade_cost)
        self.nav_history.append(self.nav)
        self.peak_nav = max(self.peak_nav, self.nav)

        executed_trade = False
        if changed:
            self.pos = target_pos
            self._last_trade_step = self._current_tick
            self.trade_count += 1
            executed_trade = True

        obs, terminated, truncated, info = self._step_parent_hold()
        info = info or {}
        info.update({
            "ret_t": r_t,
            "nav": self.nav,
            "pos": self.pos,
            "trade_cost": trade_cost,
            "base_ret": base_ret,
            "rel_alpha": rel_alpha,
            "mom": mom_term,
            "changed": bool(changed),
            "executed_trade": bool(executed_trade),
            "trade_count": int(self.trade_count),
            "delta_pos": float(delta_pos),
        })
        return obs, reward, terminated, truncated, info

def get_mu_sigma(model, obs):
    """SB3 v2-safe way to get Gaussian policy mean/std for continuous actions."""
    with torch.no_grad():
        obs_t, _ = model.policy.obs_to_tensor(obs)
        features = model.policy.extract_features(obs_t)
        latent_pi, _ = model.policy.mlp_extractor(features)
        mean_actions = model.policy.action_net(latent_pi)
        log_std = model.policy.log_std
        mu = float(mean_actions.detach().cpu().numpy().squeeze())
        sigma = float(log_std.exp().detach().cpu().numpy().squeeze())
    return mu, sigma

def get_walk_forward_windows(df_in, window_size=3500, step_size=500, min_len=1200):
    return [
        (start, start + window_size)
        for start in range(0, len(df_in) - min_len, step_size)
        if start + window_size < len(df_in)
    ]

def save_quantconnect_model(artifact, prefix, save_dir):
    """Save/copy QC-compatible artifacts into save_dir."""
    import shutil

    os.makedirs(save_dir, exist_ok=True)

    # --- Model zip: save or copy ---
    model_dst = os.path.join(save_dir, f"{prefix}_model.zip")

    model_obj = artifact.get("model", None)
    model_src = artifact.get("model_path", None)

    try:
        if model_obj is not None:
            # Save from in-memory SB3 model
            if not os.path.exists(model_dst):
                model_obj.save(model_dst)

        else:
            # Copy from an existing trained window model zip
            if model_src and os.path.exists(model_src):
                if os.path.abspath(model_src) != os.path.abspath(model_dst):
                    shutil.copyfile(model_src, model_dst)
            else:
                # If neither provided, warn loudly
                if not os.path.exists(model_dst):
                    logging.warning(f"[QC SAVE] Missing model for {prefix}: no model_obj and no valid model_path.")
    except Exception as e:
        logging.warning(f"[QC SAVE] Model handling issue for {prefix}: {e}")

    # --- VecNormalize: copy ---
    vecnorm_src = artifact.get("vecnorm_path")
    if vecnorm_src and os.path.exists(vecnorm_src):
        try:
            vecnorm_dst = os.path.join(save_dir, f"{prefix}_vecnorm.pkl")
            if os.path.abspath(vecnorm_src) != os.path.abspath(vecnorm_dst):
                shutil.copyfile(vecnorm_src, vecnorm_dst)
        except Exception as e:
            logging.warning(f"[QC SAVE] VecNormalize handling issue for {prefix}: {e}")
    else:
        logging.warning(f"[QC SAVE] VecNormalize missing for {prefix}: vecnorm_path not found.")

    # --- Features ---
    try:
        with open(os.path.join(save_dir, f"{prefix}_features.json"), "w") as f:
            json.dump({"features": artifact.get("features", [])}, f)
    except Exception as e:
        logging.warning(f"[QC SAVE] Could not write features.json for {prefix}: {e}")

    # --- Probability config ---
    try:
        thr = 0.2
        try:
            thr = float(artifact.get("result", {}).get("Action_Threshold", 0.2))
        except Exception:
            thr = 0.2

        with open(os.path.join(save_dir, f"{prefix}_probability_config.json"), "w") as f:
            json.dump(
                {"threshold": thr, "use_confidence": True, "inference_mode": "deterministic"},
                f
            )
    except Exception as e:
        logging.warning(f"[QC SAVE] Could not write probability_config.json for {prefix}: {e}")


    # --- Model info ---
    try:
        r = artifact.get("result", {})
        with open(os.path.join(save_dir, f"{prefix}_model_info.json"), "w") as f:
            json.dump({
                "model": "PPO",
                "ticker": r.get("Ticker"),
                "window": r.get("Window"),
                "date_trained": datetime.today().strftime("%Y-%m-%d"),
                "framework": "stable-baselines3",
                "input_features": artifact.get("features", []),
                "final_portfolio": r.get("PPO_Portfolio"),
                "buy_hold": r.get("BuyHold"),
                "sharpe": r.get("Sharpe"),
            }, f)
    except Exception as e:
        logging.warning(f"[QC SAVE] Could not write model_info.json for {prefix}: {e}")

    logging.info(f"[QC SAVE] Saved QC artifacts for {prefix}")

def load_model_and_env(prefix):
    """Load a trained PPO and create a factory to build a matching env window."""
    model_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
    vec_path   = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")
    model = PPO.load(model_path, device="cpu")

    def make_env(df_window):
        frame_bound = (50, len(df_window) - 3)
        e = DummyVecEnv([lambda: ContinuousPositionEnv(
            df=df_window, frame_bound=frame_bound, **ENV_KWARGS
        )])
        if os.path.exists(vec_path):
            e = VecNormalize.load(vec_path, e)
        e.training = False
        e.norm_reward = False
        return e

    return model, make_env

def latest_df_for_symbol(symbol, horizon_days=5, interval="1m"):
    """Fetch fresh bars and rebuild features exactly like training."""
    end = datetime.utcnow()
    start = end - timedelta(days=horizon_days)
    df_live = yf.download(
        symbol,
        start=start.strftime("%Y-%m-%d"),
        end=end.strftime("%Y-%m-%d"),
        interval=interval,
        progress=False,
        auto_adjust=False,
    )
    if df_live is None or df_live.empty:
        return None
    df_live = df_live.reset_index()
    df_live["Symbol"] = symbol
    df_live = compute_enhanced_features(df_live)
    if ENABLE_WAVELET and "Denoised_Close" not in df_live.columns:
        df_live["Denoised_Close"] = df_live["Close"]
    return df_live

def predict_latest(symbol, prefix):
    """Build last window, fast-forward env, call model.predict(), return a signal."""
    # --- load per-model threshold ---
    cfg_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_probability_config.json")
    thr = 0.2
    if os.path.exists(cfg_path):
        try:
            with open(cfg_path, "r") as f:
                thr = float(json.load(f).get("threshold", 0.2))
        except Exception:
            thr = 0.2

    model, make_env = load_model_and_env(prefix)
    live_df = latest_df_for_symbol(symbol)
    if live_df is None or len(live_df) < 100:
        logging.warning("No fresh data yet for live inference.")
        return None

    df_window = live_df.iloc[-2500:].reset_index(drop=True) if len(live_df) > 2500 else live_df.copy()

    env = make_env(df_window)
    obs = env.reset()
    if isinstance(obs, tuple):
        obs, _ = obs

    # fast-forward with HOLD
    for _ in range(len(df_window) - 1):
        obs, _, dones, _ = env.step([np.array([0.0], dtype=np.float32)])
        if isinstance(dones, (np.ndarray, list, tuple)) and len(dones) and dones[0]:
            break

    action, _ = model.predict(obs, deterministic=True)
    mu, sigma = get_mu_sigma(model, obs)

    a = float(np.array(action).squeeze())

    # --- thresholded signal using loaded thr ---
    if a > thr:
        signal = "BUY"
    elif a < -thr:
        signal = "SELL"
    else:
        signal = "HOLD"

    conf = abs(a)
    ts = df_window["Datetime"].iloc[-1] if "Datetime" in df_window.columns else None
    price = float(df_window["Close"].iloc[-1])

    return dict(
        signal=signal,
        confidence=conf,
        action=a,
        threshold=thr,
        ts=ts,
        price=price,
        mu=mu,
        sigma=sigma,
    )

def place_order(signal, qty=1):
    """Stub broker router with latency simulation; logs in Colab."""
    if SIM_LATENCY_MS > 0:
        time.sleep(SIM_LATENCY_MS / 1000.0)
    if BROKER == "log":
        logging.info(f"[PAPER] {signal} x{qty}")
    else:
        logging.info(f"[BROKER={BROKER}] {signal} x{qty} (not implemented)")

def live_loop(symbol, best_prefix):
    """Simple polling loop—set LIVE_MODE=True to run."""
    while LIVE_MODE:
        try:
            pred = predict_latest(symbol, best_prefix)
            if pred:
                logging.info(
                    f"{symbol} {pred['ts']} | {pred['signal']} "
                    f"@ {pred['price']:.2f} (conf {pred['confidence']:.2f})"
                )
                place_order(pred["signal"], qty=1)
        except Exception as e:
            logging.error(f"Live loop error: {e}")
        time.sleep(60)  # Poll each minute

TOP_N_WINDOWS = 3

FAST = {
    "lr": 8e-5,
    "n_steps": 3072,
    "batch": 512,
    "clip": 0.2,
    "ent": 0.01,
}

SLOW = {
    "lr": 3e-5,
    "n_steps": 3072,
    "batch": 512,
    "clip": 0.16,
    "ent": 0.005,
}

fast_names = {
    "TSLA","NVDA","AMD","AVGO","AAPL","MSFT","AMZN","GOOGL","META","ADBE","CRM",
    "INTC","QCOM","TXN","ORCL","NEE","GE","XOM","CVX","LLY","NKE","SBUX"
}
slow_names = {
    "BRK-B","JPM","BAC","JNJ","UNH","MRK","PFE","ABBV","ABT","AMGN","PG","PEP","KO",
    "V","MA","WMT","MCD","TMO","DHR","ACN","IBM","LIN","PM","RTX","UPS","UNP","COST","HD","LOW"
}

def pick_params(symbol: str):
    return FAST if symbol in fast_names else SLOW

def export_qc_top_from_existing(ticker: str, top_n: int = 3):
    """
    If a ticker is fully skipped (models already exist), still populate QC_TOP_DIR.
    Uses existing summary CSVs to pick top Sharpe windows, then copies artifacts from FINAL_MODEL_DIR.
    Prefers using 'Prefix' from summaries (robust). Falls back to WindowIdx reconstruction.
    """
    summary_files = glob.glob(os.path.join(BASE_RESULTS_DIR, "ppo_walkforward_results_*", "summary*.csv"))
    if not summary_files:
        logging.warning(f"[QC_TOP] No summary files found; cannot export QC_TOP for {ticker}.")
        return

    frames = []
    for p in summary_files:
        try:
            tmp = pd.read_csv(p)
            frames.append(tmp)
        except Exception as e:
            logging.warning(f"[QC_TOP] Could not read {p}: {e}")

    if not frames:
        logging.warning(f"[QC_TOP] Could not read any summary files; cannot export QC_TOP for {ticker}.")
        return

    combo = pd.concat(frames, ignore_index=True)

    if "Ticker" not in combo.columns:
        logging.warning("[QC_TOP] Summary files missing 'Ticker' column; cannot export.")
        return

    combo = combo[combo["Ticker"] == ticker].copy()
    if combo.empty or "Sharpe" not in combo.columns:
        logging.warning(f"[QC_TOP] No rows for {ticker} in summaries (or missing Sharpe); cannot export QC_TOP.")
        return

    # Ensure Sharpe is numeric so sorting works reliably
    combo["Sharpe"] = pd.to_numeric(combo["Sharpe"], errors="coerce")
    combo = combo.dropna(subset=["Sharpe"])
    if combo.empty:
        logging.warning(f"[QC_TOP] All Sharpe values were non-numeric for {ticker}; cannot export.")
        return

    use_prefix = ("Prefix" in combo.columns) and combo["Prefix"].notna().any()

    if use_prefix:
        # Robust path: use saved Prefix directly
        top = combo.sort_values("Sharpe", ascending=False).head(top_n).copy()
        top["__prefix__"] = top["Prefix"].astype(str)
    else:
        # Fallback: reconstruct WindowIdx (less robust)
        def _window_start(w):
            try:
                s = str(w)
                return int(s.split("-")[0]) if "-" in s else np.nan
            except Exception:
                return np.nan

        combo["WindowStart"] = combo["Window"].apply(_window_start)
        combo = combo.sort_values(["WindowStart"]).reset_index(drop=True)
        combo["WindowIdx"] = combo.groupby("Ticker").cumcount() + 1

        top = combo.sort_values("Sharpe", ascending=False).head(top_n).copy()
        top["__prefix__"] = top["WindowIdx"].apply(lambda widx: f"ppo_{ticker}_window{int(widx)}")

    exported = 0

    for _, r in top.iterrows():
        prefix = str(r["__prefix__"])

        model_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
        vec_path   = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")

        if not (os.path.exists(model_path) and os.path.exists(vec_path)):
            logging.warning(f"[QC_TOP] Missing model/vecnorm for {prefix}; cannot export.")
            continue

        artifact_for_save = {
            "model": None,
            "model_path": model_path,
            "vecnorm_path": vec_path,
            "features": [],         # ok if unknown; QC can load features elsewhere
            "result": r.to_dict(),  # includes Sharpe, Action_Threshold, etc if present
            "prefix": prefix,
        }
        save_quantconnect_model(artifact_for_save, prefix, QC_TOP_DIR)
        exported += 1

    logging.info(f"[QC_TOP] Exported {exported}/{len(top)} QC artifacts for {ticker}.")

def walkforward_ppo(df_sym, ticker,
                    window_size=3500, step_size=500,
                    timesteps=150_000, learning_rate=1e-4,
                    ppo_overrides=None):
    import heapq

    if ppo_overrides is None:
        ppo_overrides = {}

    if len(df_sym) < window_size:
        logging.warning(
            f"Skipping {ticker}: only {len(df_sym)} rows (min required: {window_size})"
        )
        return []

    results = []
    windows = get_walk_forward_windows(df_sym, window_size, step_size)
    top_heap = []
    skipped_windows = []

    # quick check: all windows already have model+vecnorm?
    all_done = True
    for idx in range(len(windows)):
        prefix = f"ppo_{ticker}_window{idx+1}"
        model_ok   = os.path.exists(os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip"))
        vecnorm_ok = os.path.exists(os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl"))
        if not (model_ok and vecnorm_ok):
            all_done = False
            break

    if all_done:
        logging.info(f"Ticker {ticker} fully skipped (all {len(windows)} windows already complete).")
        record_skips_global(ticker, skipped_windows=[], total_windows=len(windows), fully_skipped=True)

        export_qc_top_from_existing(ticker, top_n=TOP_N_WINDOWS)
        return []



    for w_idx, (start, end) in enumerate(windows):
        window_start_time = time.time()
        gc.collect()

        prefix = f"ppo_{ticker}_window{w_idx+1}"
        model_path   = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
        vecnorm_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")

        if os.path.exists(model_path) and os.path.exists(vecnorm_path):
            logging.info(f"Skipping {ticker} | Window {w_idx+1}, already trained.")
            skipped_windows.append(f"{ticker}_window{w_idx+1}")
            continue

        missing = []
        if not os.path.exists(model_path):   missing.append("model.zip")
        if not os.path.exists(vecnorm_path): missing.append("vecnorm.pkl")
        logging.info(
            f"Will train {ticker} | Window {w_idx+1} because missing: {', '.join(missing)}"
        )

        df_window = df_sym.iloc[start:end].reset_index(drop=True)
        if len(df_window) <= 52 or len(df_window) % 2 != 0:
            df_window = df_window.iloc[:-1]

        frame_bound = (50, len(df_window) - 3)

        env = DummyVecEnv([lambda: ContinuousPositionEnv(
          df=df_window, frame_bound=frame_bound, **ENV_KWARGS
        )])

        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.0)

        try:
            model = PPO(
                "MlpPolicy",
                env,
                verbose=0,
                device=("cuda" if torch.cuda.is_available() else "cpu"),
                learning_rate=ppo_overrides.get("lr", learning_rate),
                n_steps=ppo_overrides.get("n_steps", 256),
                batch_size=ppo_overrides.get("batch", 64),
                n_epochs=5,
                gamma=0.99,
                gae_lambda=0.95,
                clip_range=ppo_overrides.get("clip", 0.2),
                ent_coef=ppo_overrides.get("ent", 0.005),
                policy_kwargs=dict(net_arch=[64, 64]),
            )

            logging.info(f"Training {ticker} Window {w_idx+1}/{len(windows)}")
            model.learn(total_timesteps=timesteps)

            # Evaluation pass
            env.training = False
            env.norm_reward = False
            obs = env.reset()
            if isinstance(obs, tuple):
                obs, _ = obs

            nav_track = [1.0]
            bh_track  = [1.0]
            step_log  = []
            executed_trade_count = 0
            signal_trade_count   = 0
            signal_trade_count_dyn   = 0   # dynamic-threshold diagnostic

            DIAG_THR = 0.2
            for i in range(len(df_window) - 1):
                action, _ = model.predict(obs, deterministic=True)
                mu, sigma = get_mu_sigma(model, obs)

                obs, rew, dones, infos = env.step(action)
                # VecEnv returns list/tuple of infos; otherwise it may be a dict
                if isinstance(infos, (list, tuple)):
                    info = infos[0] if len(infos) else {}
                elif isinstance(infos, dict):
                    info = infos
                else:
                    info = {}


                nav_track.append(float(info.get("nav", nav_track[-1])))
                bh_track.append(
                    bh_track[-1] * (1.0 + float(info.get("ret_t", 0.0)))
                )

                a = float(np.array(action).squeeze())
                dt_val = df_window["Datetime"].iloc[i+1] if "Datetime" in df_window.columns else None
                px     = float(df_window["Close"].iloc[i+1]) if "Close" in df_window.columns else np.nan
                #“signal” trades (threshold-based) — diagnostic only
                if a > DIAG_THR or a < -DIAG_THR:
                    signal_trade_count += 1
                #real trades executed by env friction logic
                if bool(info.get("executed_trade", False)):
                    executed_trade_count += 1

                # next-bar return (to score BUY/SELL vs the *next* move)
                if i + 2 < len(df_window):
                    p0 = float(df_window["Close"].iloc[i+1])
                    p1 = float(df_window["Close"].iloc[i+2])
                    next_ret = 0.0 if p0 <= 0 else (p1 - p0) / p0
                else:
                    next_ret = 0.0

                # reward scalar (VecEnv returns arrays)
                rew_val = float(rew[0]) if isinstance(rew, (list, tuple, np.ndarray)) else float(rew)

                step_log.append({
                    "Index": i+1,
                    "Datetime": dt_val,
                    "Close": px,
                    "Action": a,
                    "mu": mu,
                    "sigma": sigma,
                    "nav": nav_track[-1],
                    "ret_t": float(info.get("ret_t", 0.0)),
                    "next_ret": float(next_ret),
                    "reward": rew_val,
                    "pos": float(info.get("pos", 0.0)),
                    "trade_cost": float(info.get("trade_cost", 0.0)),
                    "base_ret": float(info.get("base_ret", 0.0)),
                    "rel_alpha": float(info.get("rel_alpha", 0.0)),
                    "mom": float(info.get("mom", 0.0)),
                })

                # done handling (VecEnv)
                if isinstance(dones, (np.ndarray, list, tuple)):
                    if dones[0]:
                        break
                elif dones:
                    break


            # --- Metrics ---
            final_value = float(nav_track[-1]) * 100_000.0
            hold_value  = float(bh_track[-1])  * 100_000.0

            #dynamic action threshold for this window (prevents “no signals” windows)
            abs_actions = np.array([abs(float(r["Action"])) for r in step_log], dtype=float)
            if len(abs_actions) > 0:
                thr = float(np.quantile(abs_actions, 0.70))  # 70th percentile
                thr = float(np.clip(thr, 0.08, 0.30))
            else:
                thr = 0.2

            # Dynamic signal trade count (post-hoc diagnostic)
            signal_trade_count_dyn = int(np.sum(abs_actions > thr)) if len(abs_actions) > 0 else 0


            returns = pd.Series(nav_track).pct_change().fillna(0.0)
            sharpe  = float((returns.mean() / (returns.std() + 1e-9)) * _annualization_factor(df_window))
            drawdown = float(
                ((pd.Series(nav_track).cummax() - pd.Series(nav_track)) /
                pd.Series(nav_track).cummax()).max() * 100.0
            )

            # Classification stats (now using thr)
            correct = 0
            total   = 0
            tp_buy = fp_buy = 0
            tp_sell = fp_sell = 0

            for r in step_log:
                a = float(r["Action"])
                ret_t = float(r.get("next_ret", 0.0))

                if a > thr:
                    sig = "BUY"
                elif a < -thr:
                    sig = "SELL"
                else:
                    sig = "HOLD"

                if sig == "BUY":
                    if ret_t > 0:
                        tp_buy += 1; correct += 1
                    else:
                        fp_buy += 1
                    total += 1
                elif sig == "SELL":
                    if ret_t < 0:
                        tp_sell += 1; correct += 1
                    else:
                        fp_sell += 1
                    total += 1
                # HOLD not counted

            precision_long  = tp_buy  / (tp_buy  + fp_buy  + 1e-9)
            precision_short = tp_sell / (tp_sell + fp_sell + 1e-9)
            precision_trades = (tp_buy + tp_sell) / (
                (tp_buy + tp_sell) + (fp_buy + fp_sell) + 1e-9
            )
            step_accuracy = round(correct / total, 4) if total > 0 else 0.0
            #Trade_count reflect REAL executed trades (cooldown/min_trade_delta)
            trade_count = int(executed_trade_count)

            # Save VecNormalize
            try:
                env.save(vecnorm_path)
            except Exception as e:
                logging.warning(f"Could not save VecNormalize for {ticker} {start}-{end}: {e}")
                vecnorm_path = None

            # Save model
            model.save(model_path)

            # Save detailed predictions
            pred_path = os.path.join(RUN_RESULTS_DIR, f"{prefix}_predictions.csv")
            pd.DataFrame(step_log).to_csv(pred_path, index=False)
            logging.info(f"Saved predictions to {pred_path}")

            # Save compat predictions with same thresholds as metrics
            compat_rows = []
            for r in step_log:
                a = r["Action"]

                if a > thr:
                    signal = "BUY"
                elif a < -thr:
                    signal = "SELL"
                else:
                    signal = "HOLD"
                compat_rows.append({
                    "Index": r["Index"],
                    "Datetime": r["Datetime"],
                    "Close": r["Close"],
                    "Action": a,
                    "Signal": signal,
                    "PortfolioValue": r["nav"],
                    "Reward": r.get("reward", np.nan),
                })
            compat_path = os.path.join(RUN_RESULTS_DIR, f"{prefix}_predictions_compat.csv")
            pd.DataFrame(compat_rows).to_csv(compat_path, index=False)
            logging.info(f"Saved compatibility predictions to {compat_path}")

            # Summary row
            result_row = {
                "Ticker": ticker,
                "Window": f"{start}-{end}",
                "WindowIdx": int(w_idx + 1),
                "Prefix": prefix,
                "PPO_Portfolio": round(final_value, 2),
                "BuyHold": round(hold_value, 2),
                "Sharpe": round(sharpe, 3),
                "Drawdown_%": round(drawdown, 2),
                "Winner": "PPO" if final_value > hold_value else "Buy & Hold",
                "Action_Threshold": round(thr, 4),
                "Accuracy": step_accuracy,
                "Trade_Count": trade_count,
                "Signal_Trade_Count": int(signal_trade_count),
                "Signal_Trade_Count_Dyn": int(signal_trade_count_dyn),
                "Executed_Trade_Count": int(executed_trade_count),
                "Precision_Long": round(precision_long, 4),
                "Precision_Short": round(precision_short, 4),
                "Precision_Trades": round(precision_trades, 4),
            }

            results.append(result_row)

            meta = {
                "result": result_row,
                "features": df_window.columns.tolist(),
                "prefix": prefix,
                "model_path": model_path,
                "vecnorm_path": vecnorm_path,
            }

            item = (result_row["Sharpe"], prefix, meta)
            if len(top_heap) < TOP_N_WINDOWS:
                heapq.heappush(top_heap, item)
            else:
                if item[0] > top_heap[0][0]:
                    heapq.heapreplace(top_heap, item)

            logging.info(
                f"{ticker} | Window {w_idx+1} runtime: "
                f"{round(time.time() - window_start_time, 2)}s"
            )
        finally:
            try:
                env.close()
            except Exception:
                pass
            del env
            try:
                del model
            except Exception:
                pass
            gc.collect()
            try:
                torch.cuda.empty_cache()
            except Exception:
                pass

    if skipped_windows:
        logging.info(
            f"{ticker} skipped windows (already complete): {', '.join(skipped_windows)}"
        )
        record_skips_global(
            ticker,
            skipped_windows=skipped_windows,
            total_windows=len(windows),
            fully_skipped=False,
        )

    # Save top-N QC-compatible
    top_list = sorted(top_heap, key=lambda t: t[0], reverse=True)
    for _, _, meta in top_list:
        artifact_for_save = {
            "model": None,  # we're copying from disk, not re-saving an in-memory model
            "model_path": meta["model_path"],
            "vecnorm_path": meta["vecnorm_path"],
            "features": meta["features"],
            "result": meta["result"],
            "prefix": meta["prefix"],
        }
        save_quantconnect_model(artifact_for_save, meta["prefix"], QC_TOP_DIR)

    return results

def process_ticker(ticker):
    try:
        hp = pick_params(ticker)
        return walkforward_ppo(
            df[df["Symbol"] == ticker].copy(),
            ticker,
            window_size=WINDOW_SIZE,
            step_size=STEP_SIZE,
            timesteps=TIMESTEPS,
            learning_rate=hp["lr"],
            ppo_overrides=hp,
        )
    except Exception as e:
        logging.error(f"{ticker}: training failed with {e}")
        return []


from concurrent.futures import ThreadPoolExecutor

def run_parallel_tickers(tickers,
                         out_path=os.path.join(RUN_RESULTS_DIR, "summary.csv"),
                         max_workers=8):
    results = []
    with ThreadPoolExecutor(max_workers=max_workers) as ex:
        for res in ex.map(process_ticker, tickers):
            if res:
                results.extend(res)

    if results:
        pd.DataFrame(results).to_csv(out_path, index=False)
        logging.info(f"Saved summary to {out_path}")
    else:
        logging.warning("No results produced; summary not written.")

    logging.info("All tickers processed.")
    return results

def build_ppo_selector():
    """Aggregate all summary*.csv across runs and build selector JSON."""
    summary_files = glob.glob(
        os.path.join(BASE_RESULTS_DIR, "ppo_walkforward_results_*", "summary*.csv")
    )
    all_summaries = []

    for p in summary_files:
        try:
            tmp = pd.read_csv(p)
            tmp["RunFolder"] = os.path.dirname(p)
            all_summaries.append(tmp)
        except Exception as e:
            print(f"⚠️ Skipping {p} due to error: {e}")

    if not all_summaries:
        logging.warning("No PPO summaries found across walkforward results folders.")
        return

    combo = pd.concat(all_summaries, ignore_index=True)
    if "Sharpe" in combo.columns:
    combo["Sharpe"] = pd.to_numeric(combo["Sharpe"], errors="coerce")
    combo = combo.dropna(subset=["Sharpe"])

    # Ensure key columns exist for robust ratios
    if "BuyHold" not in combo.columns:
        combo["BuyHold"] = np.nan
    if "PPO_Portfolio" not in combo.columns:
        combo["PPO_Portfolio"] = np.nan

    # parse Window "start-end" to WindowStart
    def _parse_window_start(w):
        if pd.isna(w):
            return None
        if isinstance(w, (int, float)):
            return int(w)
        parts = str(w).split("-")
        try:
            return int(parts[0])
        except Exception:
            return None

    combo["WindowStart"] = combo["Window"].apply(_parse_window_start)

    combo = combo.sort_values(["Ticker", "WindowStart"]).reset_index(drop=True)
    combo["WindowIdx"] = combo.groupby("Ticker").cumcount() + 1

    combo = combo.drop_duplicates(subset=["Ticker", "WindowIdx"], keep="last")

    best_by_symbol = (
        combo
        .sort_values("Sharpe", ascending=False)
        .groupby("Ticker")
        .first()
        .reset_index()
    )

    # If Drawdown_% missing (older runs), create it so rename won't break
    if "Drawdown_%" not in best_by_symbol.columns:
        best_by_symbol["Drawdown_%"] = np.nan

    # Ensure precision cols exist
    for col in ["Precision_Long", "Precision_Short", "Precision_Trades"]:
        if col not in best_by_symbol.columns:
            best_by_symbol[col] = None  # or np.nan if you prefer

    # Rename columns so everything downstream uses consistent names
    best_by_symbol = best_by_symbol.rename(columns={
        "Drawdown_%": "Drawdown",
        "PPO_Portfolio": "Final_Portfolio",
    })

    # Ensure Accuracy / Trade_Count exist
    if "Accuracy" not in best_by_symbol.columns:
        best_by_symbol["Accuracy"] = 0.0
    if "Trade_Count" not in best_by_symbol.columns:
        best_by_symbol["Trade_Count"] = None

    best_by_symbol["Model"] = MODEL_NAME

    # PPO vs Buy & Hold ratio (safe division)
    best_by_symbol["Rel_vs_BH"] = best_by_symbol.apply(
        lambda r: (r["Final_Portfolio"] / r["BuyHold"])
        if (pd.notna(r["BuyHold"]) and r["BuyHold"] not in (0, 0.0)) else np.nan,
        axis=1
    )

    # Save flat CSV for debugging
    best_by_symbol.to_csv(SELECTOR_FULL_PATH, index=False)
    print(f"Aggregated PPO selector saved to → {SELECTOR_FULL_PATH}")

    # Safety filters (tune as needed)
    df_sel = best_by_symbol.copy()
    gates = (
        (df_sel["Sharpe"].fillna(-999) > 0.0) &
        (df_sel["Drawdown"].fillna(999) < 50.0) &
        (df_sel["Final_Portfolio"].fillna(0) > 80_000) &
        (df_sel["Rel_vs_BH"].fillna(0) >= 0.95)   # PPO ≥ 95% of B&H; change to >1.0 to enforce beat
    )
    df_sel = df_sel[gates].copy()

    df_sel["prefix"] = (
        "ppo_"
        + df_sel["Ticker"].astype(str)
        + "_window"
        + df_sel["WindowIdx"].astype(int).astype(str)
    )

    df_sel["artifact_path"] = df_sel["prefix"].apply(
        lambda p: os.path.join(FINAL_MODEL_DIR, f"{p}_model.zip")
    )
    df_sel["vecnorm_path"] = df_sel["prefix"].apply(
        lambda p: os.path.join(FINAL_MODEL_DIR, f"{p}_vecnorm.pkl")
    )

    EPS = 0.03  # 3% of top-sharpe for "close enough"
    selected_models = {}

    def safe_int(v, default=0):
        if v is None:
            return int(default)
        try:
            import math
            if isinstance(v, float) and math.isnan(v):
                return int(default)
        except TypeError:
            pass
        try:
            return int(v)
        except (ValueError, TypeError):
            return int(default)

    def safe_float(v, default=0.0):
        if v is None:
            return float(default)
        try:
            import math
            if isinstance(v, float) and math.isnan(v):
                return float(default)
        except TypeError:
            pass
        try:
            return float(v)
        except (ValueError, TypeError):
            return float(default)

    for ticker, group in df_sel.groupby("Ticker"):
        group_sorted = group.sort_values("Sharpe", ascending=False)
        top = group_sorted.iloc[0]
        second = group_sorted.iloc[1] if len(group_sorted) > 1 else None

        if (second is not None) and (
            abs(top["Sharpe"] - second["Sharpe"]) <= abs(top["Sharpe"]) * EPS
        ):
            mode = "ensemble"
            primary, secondary = top["Model"], second["Model"]
        else:
            mode = "single"
            primary, secondary = top["Model"], None

        selected_models[ticker] = {
            "model": MODEL_NAME,
            "score": round(safe_float(top["Sharpe"]), 4),
            "return": round(safe_float(top["Final_Portfolio"]), 2),
            "sharpe": round(safe_float(top["Sharpe"]), 3),
            "drawdown": round(safe_float(top["Drawdown"]), 2),
            "sortino": None,
            "turnover": None,
            "trade_count": safe_int(top.get("Trade_Count", 0)),
            "precision": {
                "long":   safe_float(top.get("Precision_Long", 0.0)),
                "short":  safe_float(top.get("Precision_Short", 0.0)),
                "trades": safe_float(top.get("Precision_Trades", 0.0)),
            },
            "stability": {},
            "regime": "unknown",
            "rl_profile": "fast",
            "artifact": {
                "path": top["artifact_path"],
                "vecnorm": top["vecnorm_path"],
                "features": None,
                "load_ms": 180,
                "mem_mb": 512,
                "exists": os.path.exists(top["artifact_path"]),
            },
            "selection": {
                "mode": mode,
                "primary": primary,
                "secondary": secondary,
            },
        }

    with open(SELECTOR_JSON_PATH, "w") as f:
        json.dump(selected_models, f, indent=2)

    print(f"Final enhanced PPO selector JSON saved to → {SELECTOR_JSON_PATH}")

if __name__ == "__main__":
    logging.info(f"RUN_RESULTS_DIR   = {RUN_RESULTS_DIR}")
    logging.info(f"FINAL_MODEL_DIR  = {FINAL_MODEL_DIR}")
    logging.info(f"BASE_RESULTS_DIR = {BASE_RESULTS_DIR}")

    min_rows = WINDOW_SIZE + 50  # small buffer so we have at least one window
    all_symbols = df["Symbol"].value_counts()
    candidate_symbols = []

    for sym, n in all_symbols.items():
        if n >= min_rows:
            candidate_symbols.append(sym)
        else:
            logging.warning(f"Skipping {sym}: only {n} rows (< {min_rows} required)")

    if not candidate_symbols:
        logging.error("No symbols have enough rows for the current WINDOW_SIZE. Nothing to train.")
    else:
        logging.info(f"Training candidate symbols: {candidate_symbols}")

    needed_cols = ["Close", "Datetime"]
    if ENABLE_WAVELET:
        needed_cols.append("Denoised_Close")
    if ENABLE_SENTIMENT:
        needed_cols.append("SentimentScore")

    valid_symbols = []
    for sym in candidate_symbols:
        cols = set(df.loc[df["Symbol"] == sym].columns)
        missing = [c for c in needed_cols if c not in cols]
        if missing:
            logging.warning(f"Skipping {sym}: missing required cols {missing}")
        else:
            valid_symbols.append(sym)

    if not valid_symbols:
        logging.error("No symbols passed the feature/column checks. Nothing to train.")
    else:
        logging.info(f"Final training universe: {valid_symbols}")

    all_results = []

    if test_mode:
        # Optional: shrink timesteps and/or window size in test mode
        TIMESTEPS = 100_000   # lighter test
        # WINDOW_SIZE = 2000  # uncomment if you want faster test runs
        # STEP_SIZE   = 500

        test_stocks = ["AAPL", "NVDA", "MSFT"]
        present = [s for s in test_stocks if s in valid_symbols]
        if not present:
            logging.warning("Test mode: none of ['AAPL','NVDA','MSFT'] present after filters.")
        else:
            logging.info(f"Test mode: running on {present}")

        for sym in present:
            logging.info(f">>> [TEST_MODE] Processing {sym}")
            res = process_ticker(sym)
            logging.info(f"{sym}: produced {len(res)} window summaries")
            if res:
                all_results.extend(res)

        summary_path = os.path.join(RUN_RESULTS_DIR, "summary_test_mode.csv")
        if all_results:
            pd.DataFrame(all_results).to_csv(summary_path, index=False)
            logging.info(f"Test-mode summary saved to {summary_path}")
        else:
            logging.warning("Test mode finished but no results were generated (no windows, or all skipped).")

    else:
        logging.info("Starting full parallel PPO walkforward run...")
        summary_results = run_parallel_tickers(valid_symbols)
        if not summary_results:
            logging.warning("No results generated in full run (check logs for skips/length issues).")
        else:
            summary_path = os.path.join(RUN_RESULTS_DIR, "summary.csv")
            pd.DataFrame(summary_results).to_csv(summary_path, index=False)
            logging.info(f"Summary saved to {summary_path}")

    try:
        build_ppo_selector()
    except Exception as e:
        logging.error(f"build_ppo_selector failed: {e}")

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
2025-12-26 00:44:30,958 - INFO - RUN_RESULTS_DIR   = /content/drive/MyDrive/Results_May_2025/ppo_walkforward_results_20251226_0044
2025-12-26 00:44:30,960 - INFO - FINAL_MODEL_DIR  = /content/drive/MyDrive/Results_May_2025/ppo_models_master
2025-12-26 00:44:30,961 - INFO - BASE_RESULTS_DIR = /content/drive/MyDrive/Results_May_2025
2025-12-26 00:44:31,078 - INFO - Training candidate symbols: ['AAPL', 'QCOM', 'COST', 'RTX', 'BRK-B', 'SBUX', 'TMO', 'BAC', 'TSLA', 'CRM', 'AVGO', 'TXN', 'UNH', 'ADBE', 'AMGN', 'UNP', 'AMD', 'ACN', 'AMZN', 'ABT', 'WMT', 'ABBV', 'XOM', 'PFE', 'PM', 'CSCO', 'PG', 'LLY', 'MA', 'MCD', 'META', 'MRK', 'JPM', 

Aggregated PPO selector saved to → /content/drive/MyDrive/Results_May_2025/ppo_model_selector_FULL.csv
Final enhanced PPO selector JSON saved to → /content/drive/MyDrive/Results_May_2025/ppo_model_selector_final.json


In [None]:
# PPO walkforward training + selector
import os, gc, time, json, logging, glob
import shutil
from threading import Lock
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt  # optional for ad-hoc plots

import torch
import gymnasium as gym
from gymnasium.spaces import Box as GBox

import yfinance as yf
from gym_anytrading.envs import StocksEnv

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.utils import set_random_seed

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="gymnasium")
# ---- Sharpe annualization helper (intraday heuristic: 6.5 hrs * 252) ----
def _annualization_factor(_df_like=None) -> float:
    """Annualization factor for intraday bars (6.5 trading hours × 252 days)."""
    return np.sqrt(252 * 6.5)

warnings.filterwarnings("ignore", category=DeprecationWarning, module="jupyter_client.session")
warnings.filterwarnings("ignore", message=".*Gym has been unmaintained.*")

try:
    compute_enhanced_features  # type: ignore
except NameError:
    def compute_enhanced_features(df_in: pd.DataFrame) -> pd.DataFrame:
        df_out = df_in.copy()
        if "Datetime" in df_out.columns:
            df_out["Datetime"] = pd.to_datetime(df_out["Datetime"])
            df_out = df_out.sort_values("Datetime").reset_index(drop=True)
        if "Close" not in df_out.columns:
            raise ValueError("compute_enhanced_features: missing required column 'Close'")
        return df_out

set_random_seed(42)

BASE_RESULTS_DIR = "/content/drive/MyDrive/Results_May_2025"
RUN_TAG = datetime.now().strftime("%Y%m%d_%H%M")

RUN_RESULTS_DIR = os.path.join(BASE_RESULTS_DIR, f"ppo_walkforward_results_{RUN_TAG}")
FINAL_MODEL_DIR = os.path.join(BASE_RESULTS_DIR, "ppo_models_master")
QC_TOP_DIR      = os.path.join(BASE_RESULTS_DIR, "ppo_models_QC_TOP")

os.makedirs(QC_TOP_DIR, exist_ok=True)
os.makedirs(RUN_RESULTS_DIR, exist_ok=True)
os.makedirs(FINAL_MODEL_DIR, exist_ok=True)

# Aggregated selector outputs
SELECTOR_FULL_PATH = os.path.join(BASE_RESULTS_DIR, "ppo_model_selector_FULL.csv")
SELECTOR_JSON_PATH = os.path.join(BASE_RESULTS_DIR, "ppo_model_selector_final.json")
MODEL_NAME = "PPO"

# Global skip aggregation (thread-safe)
SKIP_AGG_PATH = os.path.join(RUN_RESULTS_DIR, "skipped_windows_global.csv")
SKIP_LOCK = Lock()

# Logging Setup
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    force=True
)

# Flags
ENABLE_SENTIMENT = False
ENABLE_SLO       = True
ENABLE_WAVELET   = True
test_mode        = True            # set False for full universe
ENABLE_PLOTS     = False
LIVE_MODE        = False           # set True to run simple live/paper loop
SIM_LATENCY_MS   = 0               # broker latency simulation; 0 = off
BROKER           = "log"           # "log" = do not place orders, just log

# Global training settings
WINDOW_SIZE = 3500
STEP_SIZE   = 500
TIMESTEPS   = 150_000  # overridden in test_mode block to smaller value


DATA_PATH = "multi_stock_feature_engineered_dataset.csv"
if not os.path.exists(DATA_PATH):
    raise FileNotFoundError("Required feature-engineered dataset not found!")

df = pd.read_csv(DATA_PATH)
df["Datetime"] = pd.to_datetime(df["Datetime"])

# Wavelet fallback
if ENABLE_WAVELET and "Denoised_Close" not in df.columns:
    logging.warning("ENABLE_WAVELET=True but 'Denoised_Close' missing; "
                    "falling back to Close->Denoised_Close.")
    df["Denoised_Close"] = df["Close"]


def record_skips_global(ticker: str, skipped_windows: list,
                        total_windows: int = None, fully_skipped: bool = False):
    """Append skipped windows to the global skip log."""
    if not skipped_windows and not fully_skipped:
        return
    import csv
    with SKIP_LOCK:
        new_file = not os.path.exists(SKIP_AGG_PATH)
        with open(SKIP_AGG_PATH, "a", newline="") as f:
            w = csv.writer(f)
            if new_file:
                w.writerow(["Ticker", "Window", "FullySkipped", "TotalWindows"])
            if fully_skipped:
                w.writerow([ticker, "ALL", True, total_windows if total_windows is not None else ""])
            else:
                for wname in skipped_windows:
                    try:
                        _, win_str = wname.split("_window")
                        win = int(win_str)
                    except Exception:
                        win = ""
                    w.writerow([ticker, win, False, total_windows if total_windows is not None else ""])


ENV_KWARGS = dict(
    window_size=10,
    cost_rate=0.0002,
    slip_rate=0.0003,

    k_alpha=0.0,
    k_mom=0.15,
    k_sent=(0.01 if ENABLE_SENTIMENT else 0.0),
    mom_source="denoised",
    mom_lookback=20,

    min_trade_delta=0.08,
    cooldown=10,

    reward_clip=0.05,
    k_vol=0.00,
    k_dd=0.00,
)


class ContinuousPositionEnv(StocksEnv):
    def __init__(self, df, frame_bound, **kwargs):
        # Require window_size from ENV_KWARGS
        if "window_size" not in kwargs:
            raise ValueError("ContinuousPositionEnv requires window_size (pass via ENV_KWARGS).")

        window_size = int(kwargs.pop("window_size"))

        # Pull params (all defaults live in ENV_KWARGS; these are just safety fallbacks)
        cost_rate       = float(kwargs.pop("cost_rate", 0.0002))
        slip_rate       = float(kwargs.pop("slip_rate", 0.0003))
        k_alpha         = float(kwargs.pop("k_alpha", 0.0))
        k_mom           = float(kwargs.pop("k_mom", 0.15))
        k_sent          = float(kwargs.pop("k_sent", 0.0))
        mom_source      = str(kwargs.pop("mom_source", "denoised"))
        mom_lookback    = int(kwargs.pop("mom_lookback", 20))
        min_trade_delta = float(kwargs.pop("min_trade_delta", 0.04))
        cooldown        = int(kwargs.pop("cooldown", 6))
        reward_clip     = float(kwargs.pop("reward_clip", 0.05))
        k_vol           = float(kwargs.pop("k_vol", 0.0))
        k_dd            = float(kwargs.pop("k_dd", 0.0))

        # Fail fast on unexpected env kwargs
        if kwargs:
            raise ValueError(f"Unexpected env kwargs: {list(kwargs.keys())}")

        super().__init__(
            df=df.reset_index(drop=True),
            frame_bound=frame_bound,
            window_size=window_size
        )

        if isinstance(self.observation_space, gym.spaces.Box):
            self.observation_space = GBox(
                low=self.observation_space.low,
                high=self.observation_space.high,
                shape=self.observation_space.shape,
                dtype=self.observation_space.dtype,
            )

        self.k_vol = k_vol
        self.k_dd  = k_dd

        self.ret_history = []
        self.nav_history = []
        self.peak_nav    = 1.0
        self.trade_count = 0

        self.action_space = GBox(low=-1.0, high=1.0, shape=(1,), dtype=np.float32)


        self.cost_rate       = cost_rate
        self.slip_rate       = slip_rate
        self.k_alpha         = k_alpha
        self.k_mom           = k_mom
        self.k_sent          = k_sent
        self.mom_source      = mom_source
        self.mom_lookback    = mom_lookback
        self.min_trade_delta = min_trade_delta
        self.cooldown        = cooldown
        self.reward_clip     = reward_clip

        self.nav = 1.0
        self.pos = 0.0
        self._last_trade_step = -self.cooldown

    def reset(self, **kwargs):
        out = super().reset(**kwargs)
        if isinstance(out, tuple):
            obs, info = out
        else:
            obs, info = out, {}

        self.nav = 1.0
        self.pos = 0.0
        self._last_trade_step = -self.cooldown

        self.trade_count = 0
        self.ret_history = []
        self.nav_history = [self.nav]
        self.peak_nav    = self.nav

        info = info or {}
        info.update({
            "nav": self.nav,
            "pos": self.pos,
            "trade_count": int(self.trade_count),

        })
        return obs, info

    def _step_parent_hold(self):
        step_result = super().step(2)
        if len(step_result) == 5:
            obs, _env_rew, terminated, truncated, info = step_result
        else:
            obs, _env_rew, done, info = step_result
            terminated, truncated = bool(done), False
        return obs, terminated, truncated, info

    def _ret_t(self):
        cur  = float(self.df.loc[self._current_tick, "Close"])
        prev = float(self.df.loc[max(self._current_tick - 1, 0), "Close"])
        return 0.0 if prev <= 0 else (cur - prev) / prev

    def _mom_signal(self):
        if self.mom_source == "macd" and "MACD_Line" in self.df.columns:
            recent = self.df["MACD_Line"].iloc[max(self._current_tick - 200, 0):self._current_tick + 1]
            return float(np.tanh(
                float(self.df.loc[self._current_tick, "MACD_Line"]) /
                (1e-6 + float(recent.std()))
            ))

        if "Denoised_Close" in self.df.columns and self._current_tick - self.mom_lookback >= 0:
            now  = float(self.df.loc[self._current_tick, "Denoised_Close"])
            then = float(self.df.loc[self._current_tick - self.mom_lookback, "Denoised_Close"])
            base = float(self.df.loc[max(self._current_tick - 1, 0), "Close"])
            slope = (now - then) / max(self.mom_lookback, 1)
            return float(np.tanh(10.0 * (slope / max(abs(base), 1e-6))))

        return 0.0

    def step(self, action):
        a = float(np.array(action).squeeze())
        target_pos = float(np.clip(a, -1.0, 1.0))

        r_t = self._ret_t()
        base_ret = self.pos * r_t

        changed = (
            abs(target_pos - self.pos) >= self.min_trade_delta
        ) and (
            (self._current_tick - self._last_trade_step) >= self.cooldown
        )

        delta_pos = (target_pos - self.pos) if changed else 0.0
        trade_cost = (self.cost_rate + self.slip_rate) * abs(delta_pos)

        rel_alpha = base_ret - r_t
        mom_term = self.pos * self._mom_signal()

        alpha_term = self.k_alpha * rel_alpha

        sent_term = 0.0
        if ENABLE_SENTIMENT and "SentimentScore" in self.df.columns:
            sent_term = self.k_sent * float(self.df.loc[self._current_tick, "SentimentScore"])

        shaped = base_ret + alpha_term + (self.k_mom * mom_term) + sent_term - trade_cost
        reward = float(np.clip(shaped, -self.reward_clip, self.reward_clip))


        self.nav *= (1.0 + base_ret - trade_cost)
        self.nav_history.append(self.nav)
        self.peak_nav = max(self.peak_nav, self.nav)

        executed_trade = False
        if changed:
            self.pos = target_pos
            self._last_trade_step = self._current_tick
            self.trade_count += 1
            executed_trade = True

        obs, terminated, truncated, info = self._step_parent_hold()
        info = info or {}
        info.update({
            "ret_t": r_t,
            "nav": self.nav,
            "pos": self.pos,
            "trade_cost": trade_cost,
            "base_ret": base_ret,
            "rel_alpha": rel_alpha,
            "mom": mom_term,
            "changed": bool(changed),
            "executed_trade": bool(executed_trade),
            "trade_count": int(self.trade_count),
            "delta_pos": float(delta_pos),
        })
        return obs, reward, terminated, truncated, info

def get_mu_sigma(model, obs):
    """SB3 v2-safe way to get Gaussian policy mean/std for continuous actions."""
    with torch.no_grad():
        obs_t, _ = model.policy.obs_to_tensor(obs)
        features = model.policy.extract_features(obs_t)
        latent_pi, _ = model.policy.mlp_extractor(features)
        mean_actions = model.policy.action_net(latent_pi)
        log_std = model.policy.log_std
        mu = float(mean_actions.detach().cpu().numpy().squeeze())
        sigma = float(log_std.exp().detach().cpu().numpy().squeeze())
    return mu, sigma

def get_walk_forward_windows(df_in, window_size=3500, step_size=500, min_len=1200):
    return [
        (start, start + window_size)
        for start in range(0, len(df_in) - min_len, step_size)
        if start + window_size < len(df_in)
    ]

def save_quantconnect_model(artifact, prefix, save_dir):
    """Save/copy QC-compatible artifacts into save_dir."""
    import shutil

    os.makedirs(save_dir, exist_ok=True)

    # --- Model zip: save or copy ---
    model_dst = os.path.join(save_dir, f"{prefix}_model.zip")

    model_obj = artifact.get("model", None)
    model_src = artifact.get("model_path", None)

    try:
        if model_obj is not None:
            # Save from in-memory SB3 model
            if not os.path.exists(model_dst):
                model_obj.save(model_dst)

        else:
            # Copy from an existing trained window model zip
            if model_src and os.path.exists(model_src):
                if os.path.abspath(model_src) != os.path.abspath(model_dst):
                    shutil.copyfile(model_src, model_dst)
            else:
                # If neither provided, warn loudly
                if not os.path.exists(model_dst):
                    logging.warning(f"[QC SAVE] Missing model for {prefix}: no model_obj and no valid model_path.")
    except Exception as e:
        logging.warning(f"[QC SAVE] Model handling issue for {prefix}: {e}")

    # --- VecNormalize: copy ---
    vecnorm_src = artifact.get("vecnorm_path")
    if vecnorm_src and os.path.exists(vecnorm_src):
        try:
            vecnorm_dst = os.path.join(save_dir, f"{prefix}_vecnorm.pkl")
            if os.path.abspath(vecnorm_src) != os.path.abspath(vecnorm_dst):
                shutil.copyfile(vecnorm_src, vecnorm_dst)
        except Exception as e:
            logging.warning(f"[QC SAVE] VecNormalize handling issue for {prefix}: {e}")
    else:
        logging.warning(f"[QC SAVE] VecNormalize missing for {prefix}: vecnorm_path not found.")

    # --- Features ---
    try:
        with open(os.path.join(save_dir, f"{prefix}_features.json"), "w") as f:
            json.dump({"features": artifact.get("features", [])}, f)
    except Exception as e:
        logging.warning(f"[QC SAVE] Could not write features.json for {prefix}: {e}")

    # --- Probability config ---
    try:
        thr = 0.2
        try:
            thr = float(artifact.get("result", {}).get("Action_Threshold", 0.2))
        except Exception:
            thr = 0.2

        with open(os.path.join(save_dir, f"{prefix}_probability_config.json"), "w") as f:
            json.dump(
                {"threshold": thr, "use_confidence": True, "inference_mode": "deterministic"},
                f
            )
    except Exception as e:
        logging.warning(f"[QC SAVE] Could not write probability_config.json for {prefix}: {e}")


    # --- Model info ---
    try:
        r = artifact.get("result", {})
        with open(os.path.join(save_dir, f"{prefix}_model_info.json"), "w") as f:
            json.dump({
                "model": "PPO",
                "ticker": r.get("Ticker"),
                "window": r.get("Window"),
                "date_trained": datetime.today().strftime("%Y-%m-%d"),
                "framework": "stable-baselines3",
                "input_features": artifact.get("features", []),
                "final_portfolio": r.get("PPO_Portfolio"),
                "buy_hold": r.get("BuyHold"),
                "sharpe": r.get("Sharpe"),
            }, f)
    except Exception as e:
        logging.warning(f"[QC SAVE] Could not write model_info.json for {prefix}: {e}")

    logging.info(f"[QC SAVE] Saved QC artifacts for {prefix}")

def load_model_and_env(prefix):
    """Load a trained PPO and create a factory to build a matching env window."""
    model_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
    vec_path   = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")
    model = PPO.load(model_path, device="cpu")

    def make_env(df_window):
        frame_bound = (50, len(df_window) - 3)
        e = DummyVecEnv([lambda: ContinuousPositionEnv(
            df=df_window, frame_bound=frame_bound, **ENV_KWARGS
        )])
        if os.path.exists(vec_path):
            e = VecNormalize.load(vec_path, e)
        e.training = False
        e.norm_reward = False
        return e

    return model, make_env

def latest_df_for_symbol(symbol, horizon_days=5, interval="1m"):
    """Fetch fresh bars and rebuild features exactly like training."""
    end = datetime.utcnow()
    start = end - timedelta(days=horizon_days)
    df_live = yf.download(
        symbol,
        start=start.strftime("%Y-%m-%d"),
        end=end.strftime("%Y-%m-%d"),
        interval=interval,
        progress=False,
        auto_adjust=False,
    )
    if df_live is None or df_live.empty:
        return None
    df_live = df_live.reset_index()
    df_live["Symbol"] = symbol
    df_live = compute_enhanced_features(df_live)
    if ENABLE_WAVELET and "Denoised_Close" not in df_live.columns:
        df_live["Denoised_Close"] = df_live["Close"]
    return df_live

def predict_latest(symbol, prefix):
    """Build last window, fast-forward env, call model.predict(), return a signal."""
    # --- load per-model threshold ---
    cfg_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_probability_config.json")
    thr = 0.2
    if os.path.exists(cfg_path):
        try:
            with open(cfg_path, "r") as f:
                thr = float(json.load(f).get("threshold", 0.2))
        except Exception:
            thr = 0.2

    model, make_env = load_model_and_env(prefix)
    live_df = latest_df_for_symbol(symbol)
    if live_df is None or len(live_df) < 100:
        logging.warning("No fresh data yet for live inference.")
        return None

    df_window = live_df.iloc[-2500:].reset_index(drop=True) if len(live_df) > 2500 else live_df.copy()

    env = make_env(df_window)
    obs = env.reset()
    if isinstance(obs, tuple):
        obs, _ = obs

    # fast-forward with HOLD
    for _ in range(len(df_window) - 1):
        obs, _, dones, _ = env.step([np.array([0.0], dtype=np.float32)])
        if isinstance(dones, (np.ndarray, list, tuple)) and len(dones) and dones[0]:
            break

    action, _ = model.predict(obs, deterministic=True)
    mu, sigma = get_mu_sigma(model, obs)

    a = float(np.array(action).squeeze())

    # --- thresholded signal using loaded thr ---
    if a > thr:
        signal = "BUY"
    elif a < -thr:
        signal = "SELL"
    else:
        signal = "HOLD"

    conf = abs(a)
    ts = df_window["Datetime"].iloc[-1] if "Datetime" in df_window.columns else None
    price = float(df_window["Close"].iloc[-1])

    return dict(
        signal=signal,
        confidence=conf,
        action=a,
        threshold=thr,
        ts=ts,
        price=price,
        mu=mu,
        sigma=sigma,
    )

def place_order(signal, qty=1):
    """Stub broker router with latency simulation; logs in Colab."""
    if SIM_LATENCY_MS > 0:
        time.sleep(SIM_LATENCY_MS / 1000.0)
    if BROKER == "log":
        logging.info(f"[PAPER] {signal} x{qty}")
    else:
        logging.info(f"[BROKER={BROKER}] {signal} x{qty} (not implemented)")

def live_loop(symbol, best_prefix):
    """Simple polling loop—set LIVE_MODE=True to run."""
    while LIVE_MODE:
        try:
            pred = predict_latest(symbol, best_prefix)
            if pred:
                logging.info(
                    f"{symbol} {pred['ts']} | {pred['signal']} "
                    f"@ {pred['price']:.2f} (conf {pred['confidence']:.2f})"
                )
                place_order(pred["signal"], qty=1)
        except Exception as e:
            logging.error(f"Live loop error: {e}")
        time.sleep(60)  # Poll each minute

TOP_N_WINDOWS = 3

FAST = {
    "lr": 8e-5,
    "n_steps": 3072,
    "batch": 512,
    "clip": 0.2,
    "ent": 0.01,
}

SLOW = {
    "lr": 3e-5,
    "n_steps": 3072,
    "batch": 512,
    "clip": 0.16,
    "ent": 0.005,
}

fast_names = {
    "TSLA","NVDA","AMD","AVGO","AAPL","MSFT","AMZN","GOOGL","META","ADBE","CRM",
    "INTC","QCOM","TXN","ORCL","NEE","GE","XOM","CVX","LLY","NKE","SBUX"
}
slow_names = {
    "BRK-B","JPM","BAC","JNJ","UNH","MRK","PFE","ABBV","ABT","AMGN","PG","PEP","KO",
    "V","MA","WMT","MCD","TMO","DHR","ACN","IBM","LIN","PM","RTX","UPS","UNP","COST","HD","LOW"
}

def pick_params(symbol: str):
    return FAST if symbol in fast_names else SLOW

def export_qc_top_from_existing(ticker: str, top_n: int = 3):
    """
    If a ticker is fully skipped (models already exist), still populate QC_TOP_DIR.
    Uses existing summary CSVs to pick top Sharpe windows, then copies artifacts from FINAL_MODEL_DIR.
    Prefers using 'Prefix' from summaries (robust). Falls back to WindowIdx reconstruction.
    """
    summary_files = glob.glob(os.path.join(BASE_RESULTS_DIR, "ppo_walkforward_results_*", "summary*.csv"))
    if not summary_files:
        logging.warning(f"[QC_TOP] No summary files found; cannot export QC_TOP for {ticker}.")
        return

    frames = []
    for p in summary_files:
        try:
            tmp = pd.read_csv(p)
            frames.append(tmp)
        except Exception as e:
            logging.warning(f"[QC_TOP] Could not read {p}: {e}")

    if not frames:
        logging.warning(f"[QC_TOP] Could not read any summary files; cannot export QC_TOP for {ticker}.")
        return

    combo = pd.concat(frames, ignore_index=True)

    if "Ticker" not in combo.columns:
        logging.warning("[QC_TOP] Summary files missing 'Ticker' column; cannot export.")
        return

    combo = combo[combo["Ticker"] == ticker].copy()
    if combo.empty or "Sharpe" not in combo.columns:
        logging.warning(f"[QC_TOP] No rows for {ticker} in summaries (or missing Sharpe); cannot export QC_TOP.")
        return

    # Ensure Sharpe is numeric so sorting works reliably
    combo["Sharpe"] = pd.to_numeric(combo["Sharpe"], errors="coerce")
    combo = combo.dropna(subset=["Sharpe"])
    if combo.empty:
        logging.warning(f"[QC_TOP] All Sharpe values were non-numeric for {ticker}; cannot export.")
        return

    use_prefix = ("Prefix" in combo.columns) and combo["Prefix"].notna().any()

    if use_prefix:
        # Robust path: use saved Prefix directly
        top = combo.sort_values("Sharpe", ascending=False).head(top_n).copy()
        top["__prefix__"] = top["Prefix"].astype(str)
    else:
        # Fallback: reconstruct WindowIdx (less robust)
        def _window_start(w):
            try:
                s = str(w)
                return int(s.split("-")[0]) if "-" in s else np.nan
            except Exception:
                return np.nan

        combo["WindowStart"] = combo["Window"].apply(_window_start)
        combo = combo.sort_values(["WindowStart"]).reset_index(drop=True)
        combo["WindowIdx"] = combo.groupby("Ticker").cumcount() + 1

        top = combo.sort_values("Sharpe", ascending=False).head(top_n).copy()
        top["__prefix__"] = top["WindowIdx"].apply(lambda widx: f"ppo_{ticker}_window{int(widx)}")

    exported = 0

    for _, r in top.iterrows():
        prefix = str(r["__prefix__"])

        model_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
        vec_path   = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")

        if not (os.path.exists(model_path) and os.path.exists(vec_path)):
            logging.warning(f"[QC_TOP] Missing model/vecnorm for {prefix}; cannot export.")
            continue

        artifact_for_save = {
            "model": None,
            "model_path": model_path,
            "vecnorm_path": vec_path,
            "features": [],         # ok if unknown; QC can load features elsewhere
            "result": r.to_dict(),  # includes Sharpe, Action_Threshold, etc if present
            "prefix": prefix,
        }
        save_quantconnect_model(artifact_for_save, prefix, QC_TOP_DIR)
        exported += 1

    logging.info(f"[QC_TOP] Exported {exported}/{len(top)} QC artifacts for {ticker}.")

def walkforward_ppo(df_sym, ticker,
                    window_size=3500, step_size=500,
                    timesteps=150_000, learning_rate=1e-4,
                    ppo_overrides=None):
    import heapq

    if ppo_overrides is None:
        ppo_overrides = {}

    if len(df_sym) < window_size:
        logging.warning(
            f"Skipping {ticker}: only {len(df_sym)} rows (min required: {window_size})"
        )
        return []

    results = []
    windows = get_walk_forward_windows(df_sym, window_size, step_size)
    top_heap = []
    skipped_windows = []

    # quick check: all windows already have model+vecnorm?
    all_done = True
    for idx in range(len(windows)):
        prefix = f"ppo_{ticker}_window{idx+1}"
        model_ok   = os.path.exists(os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip"))
        vecnorm_ok = os.path.exists(os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl"))
        if not (model_ok and vecnorm_ok):
            all_done = False
            break

    if all_done:
        logging.info(f"Ticker {ticker} fully skipped (all {len(windows)} windows already complete).")
        record_skips_global(ticker, skipped_windows=[], total_windows=len(windows), fully_skipped=True)

        export_qc_top_from_existing(ticker, top_n=TOP_N_WINDOWS)
        return []



    for w_idx, (start, end) in enumerate(windows):
        window_start_time = time.time()
        gc.collect()

        prefix = f"ppo_{ticker}_window{w_idx+1}"
        model_path   = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
        vecnorm_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")

        if os.path.exists(model_path) and os.path.exists(vecnorm_path):
            logging.info(f"Skipping {ticker} | Window {w_idx+1}, already trained.")
            skipped_windows.append(f"{ticker}_window{w_idx+1}")
            continue

        missing = []
        if not os.path.exists(model_path):   missing.append("model.zip")
        if not os.path.exists(vecnorm_path): missing.append("vecnorm.pkl")
        logging.info(
            f"Will train {ticker} | Window {w_idx+1} because missing: {', '.join(missing)}"
        )

        df_window = df_sym.iloc[start:end].reset_index(drop=True)
        if len(df_window) <= 52 or len(df_window) % 2 != 0:
            df_window = df_window.iloc[:-1]

        frame_bound = (50, len(df_window) - 3)

        env = DummyVecEnv([lambda: ContinuousPositionEnv(
          df=df_window, frame_bound=frame_bound, **ENV_KWARGS
        )])

        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.0)

        try:
            model = PPO(
                "MlpPolicy",
                env,
                verbose=0,
                device=("cuda" if torch.cuda.is_available() else "cpu"),
                learning_rate=ppo_overrides.get("lr", learning_rate),
                n_steps=ppo_overrides.get("n_steps", 256),
                batch_size=ppo_overrides.get("batch", 64),
                n_epochs=5,
                gamma=0.99,
                gae_lambda=0.95,
                clip_range=ppo_overrides.get("clip", 0.2),
                ent_coef=ppo_overrides.get("ent", 0.005),
                policy_kwargs=dict(net_arch=[64, 64]),
            )

            logging.info(f"Training {ticker} Window {w_idx+1}/{len(windows)}")
            model.learn(total_timesteps=timesteps)

            # Evaluation pass
            env.training = False
            env.norm_reward = False
            obs = env.reset()
            if isinstance(obs, tuple):
                obs, _ = obs

            nav_track = [1.0]
            bh_track  = [1.0]
            step_log  = []
            executed_trade_count = 0
            signal_trade_count   = 0
            signal_trade_count_dyn   = 0   # dynamic-threshold diagnostic

            DIAG_THR = 0.2
            for i in range(len(df_window) - 1):
                action, _ = model.predict(obs, deterministic=True)
                mu, sigma = get_mu_sigma(model, obs)

                obs, rew, dones, infos = env.step(action)
                # VecEnv returns list/tuple of infos; otherwise it may be a dict
                if isinstance(infos, (list, tuple)):
                    info = infos[0] if len(infos) else {}
                elif isinstance(infos, dict):
                    info = infos
                else:
                    info = {}


                nav_track.append(float(info.get("nav", nav_track[-1])))
                bh_track.append(
                    bh_track[-1] * (1.0 + float(info.get("ret_t", 0.0)))
                )

                a = float(np.array(action).squeeze())
                dt_val = df_window["Datetime"].iloc[i+1] if "Datetime" in df_window.columns else None
                px     = float(df_window["Close"].iloc[i+1]) if "Close" in df_window.columns else np.nan
                #“signal” trades (threshold-based) — diagnostic only
                if a > DIAG_THR or a < -DIAG_THR:
                    signal_trade_count += 1
                #real trades executed by env friction logic
                if bool(info.get("executed_trade", False)):
                    executed_trade_count += 1

                # next-bar return (to score BUY/SELL vs the *next* move)
                if i + 2 < len(df_window):
                    p0 = float(df_window["Close"].iloc[i+1])
                    p1 = float(df_window["Close"].iloc[i+2])
                    next_ret = 0.0 if p0 <= 0 else (p1 - p0) / p0
                else:
                    next_ret = 0.0

                # reward scalar (VecEnv returns arrays)
                rew_val = float(rew[0]) if isinstance(rew, (list, tuple, np.ndarray)) else float(rew)

                step_log.append({
                    "Index": i+1,
                    "Datetime": dt_val,
                    "Close": px,
                    "Action": a,
                    "mu": mu,
                    "sigma": sigma,
                    "nav": nav_track[-1],
                    "ret_t": float(info.get("ret_t", 0.0)),
                    "next_ret": float(next_ret),
                    "reward": rew_val,
                    "pos": float(info.get("pos", 0.0)),
                    "trade_cost": float(info.get("trade_cost", 0.0)),
                    "base_ret": float(info.get("base_ret", 0.0)),
                    "rel_alpha": float(info.get("rel_alpha", 0.0)),
                    "mom": float(info.get("mom", 0.0)),
                })

                # done handling (VecEnv)
                if isinstance(dones, (np.ndarray, list, tuple)):
                    if dones[0]:
                        break
                elif dones:
                    break


            # --- Metrics ---
            final_value = float(nav_track[-1]) * 100_000.0
            hold_value  = float(bh_track[-1])  * 100_000.0

            #dynamic action threshold for this window (prevents “no signals” windows)
            abs_actions = np.array([abs(float(r["Action"])) for r in step_log], dtype=float)
            if len(abs_actions) > 0:
                thr = float(np.quantile(abs_actions, 0.70))  # 70th percentile
                thr = float(np.clip(thr, 0.08, 0.30))
            else:
                thr = 0.2

            # Dynamic signal trade count (post-hoc diagnostic)
            signal_trade_count_dyn = int(np.sum(abs_actions > thr)) if len(abs_actions) > 0 else 0


            returns = pd.Series(nav_track).pct_change().fillna(0.0)
            sharpe  = float((returns.mean() / (returns.std() + 1e-9)) * _annualization_factor(df_window))
            drawdown = float(
                ((pd.Series(nav_track).cummax() - pd.Series(nav_track)) /
                pd.Series(nav_track).cummax()).max() * 100.0
            )

            # Classification stats (now using thr)
            correct = 0
            total   = 0
            tp_buy = fp_buy = 0
            tp_sell = fp_sell = 0

            for r in step_log:
                a = float(r["Action"])
                ret_t = float(r.get("next_ret", 0.0))

                if a > thr:
                    sig = "BUY"
                elif a < -thr:
                    sig = "SELL"
                else:
                    sig = "HOLD"

                if sig == "BUY":
                    if ret_t > 0:
                        tp_buy += 1; correct += 1
                    else:
                        fp_buy += 1
                    total += 1
                elif sig == "SELL":
                    if ret_t < 0:
                        tp_sell += 1; correct += 1
                    else:
                        fp_sell += 1
                    total += 1
                # HOLD not counted

            precision_long  = tp_buy  / (tp_buy  + fp_buy  + 1e-9)
            precision_short = tp_sell / (tp_sell + fp_sell + 1e-9)
            precision_trades = (tp_buy + tp_sell) / (
                (tp_buy + tp_sell) + (fp_buy + fp_sell) + 1e-9
            )
            step_accuracy = round(correct / total, 4) if total > 0 else 0.0
            #Trade_count reflect REAL executed trades (cooldown/min_trade_delta)
            trade_count = int(executed_trade_count)

            # Save VecNormalize
            try:
                env.save(vecnorm_path)
            except Exception as e:
                logging.warning(f"Could not save VecNormalize for {ticker} {start}-{end}: {e}")
                vecnorm_path = None

            # Save model
            model.save(model_path)

            # Save detailed predictions
            pred_path = os.path.join(RUN_RESULTS_DIR, f"{prefix}_predictions.csv")
            pd.DataFrame(step_log).to_csv(pred_path, index=False)
            logging.info(f"Saved predictions to {pred_path}")

            # Save compat predictions with same thresholds as metrics
            compat_rows = []
            for r in step_log:
                a = r["Action"]

                if a > thr:
                    signal = "BUY"
                elif a < -thr:
                    signal = "SELL"
                else:
                    signal = "HOLD"
                compat_rows.append({
                    "Index": r["Index"],
                    "Datetime": r["Datetime"],
                    "Close": r["Close"],
                    "Action": a,
                    "Signal": signal,
                    "PortfolioValue": r["nav"],
                    "Reward": r.get("reward", np.nan),
                })
            compat_path = os.path.join(RUN_RESULTS_DIR, f"{prefix}_predictions_compat.csv")
            pd.DataFrame(compat_rows).to_csv(compat_path, index=False)
            logging.info(f"Saved compatibility predictions to {compat_path}")

            # Summary row
            result_row = {
                "Ticker": ticker,
                "Window": f"{start}-{end}",
                "WindowIdx": int(w_idx + 1),
                "Prefix": prefix,
                "PPO_Portfolio": round(final_value, 2),
                "BuyHold": round(hold_value, 2),
                "Sharpe": round(sharpe, 3),
                "Drawdown_%": round(drawdown, 2),
                "Winner": "PPO" if final_value > hold_value else "Buy & Hold",
                "Action_Threshold": round(thr, 4),
                "Accuracy": step_accuracy,
                "Trade_Count": trade_count,
                "Signal_Trade_Count": int(signal_trade_count),
                "Signal_Trade_Count_Dyn": int(signal_trade_count_dyn),
                "Executed_Trade_Count": int(executed_trade_count),
                "Precision_Long": round(precision_long, 4),
                "Precision_Short": round(precision_short, 4),
                "Precision_Trades": round(precision_trades, 4),
            }

            results.append(result_row)

            meta = {
                "result": result_row,
                "features": df_window.columns.tolist(),
                "prefix": prefix,
                "model_path": model_path,
                "vecnorm_path": vecnorm_path,
            }

            item = (result_row["Sharpe"], prefix, meta)
            if len(top_heap) < TOP_N_WINDOWS:
                heapq.heappush(top_heap, item)
            else:
                if item[0] > top_heap[0][0]:
                    heapq.heapreplace(top_heap, item)

            logging.info(
                f"{ticker} | Window {w_idx+1} runtime: "
                f"{round(time.time() - window_start_time, 2)}s"
            )
        finally:
            try:
                env.close()
            except Exception:
                pass
            del env
            try:
                del model
            except Exception:
                pass
            gc.collect()
            try:
                torch.cuda.empty_cache()
            except Exception:
                pass

    if skipped_windows:
        logging.info(
            f"{ticker} skipped windows (already complete): {', '.join(skipped_windows)}"
        )
        record_skips_global(
            ticker,
            skipped_windows=skipped_windows,
            total_windows=len(windows),
            fully_skipped=False,
        )

    # Save top-N QC-compatible
    top_list = sorted(top_heap, key=lambda t: t[0], reverse=True)
    for _, _, meta in top_list:
        artifact_for_save = {
            "model": None,  # we're copying from disk, not re-saving an in-memory model
            "model_path": meta["model_path"],
            "vecnorm_path": meta["vecnorm_path"],
            "features": meta["features"],
            "result": meta["result"],
            "prefix": meta["prefix"],
        }
        save_quantconnect_model(artifact_for_save, meta["prefix"], QC_TOP_DIR)

    return results

def process_ticker(ticker):
    try:
        hp = pick_params(ticker)
        return walkforward_ppo(
            df[df["Symbol"] == ticker].copy(),
            ticker,
            window_size=WINDOW_SIZE,
            step_size=STEP_SIZE,
            timesteps=TIMESTEPS,
            learning_rate=hp["lr"],
            ppo_overrides=hp,
        )
    except Exception as e:
        logging.error(f"{ticker}: training failed with {e}")
        return []


from concurrent.futures import ThreadPoolExecutor

def run_parallel_tickers(tickers,
                         out_path=os.path.join(RUN_RESULTS_DIR, "summary.csv"),
                         max_workers=8):
    results = []
    with ThreadPoolExecutor(max_workers=max_workers) as ex:
        for res in ex.map(process_ticker, tickers):
            if res:
                results.extend(res)

    if results:
        pd.DataFrame(results).to_csv(out_path, index=False)
        logging.info(f"Saved summary to {out_path}")
    else:
        logging.warning("No results produced; summary not written.")

    logging.info("All tickers processed.")
    return results

def build_ppo_selector():
    """Aggregate all summary*.csv across runs and build selector JSON."""
    summary_files = glob.glob(
        os.path.join(BASE_RESULTS_DIR, "ppo_walkforward_results_*", "summary*.csv")
    )
    all_summaries = []

    for p in summary_files:
        try:
            tmp = pd.read_csv(p)
            tmp["RunFolder"] = os.path.dirname(p)
            all_summaries.append(tmp)
        except Exception as e:
            print(f"⚠️ Skipping {p} due to error: {e}")

    if not all_summaries:
        logging.warning("No PPO summaries found across walkforward results folders.")
        return

    combo = pd.concat(all_summaries, ignore_index=True)
    if "Sharpe" in combo.columns:
    combo["Sharpe"] = pd.to_numeric(combo["Sharpe"], errors="coerce")
    combo = combo.dropna(subset=["Sharpe"])

    # Ensure key columns exist for robust ratios
    if "BuyHold" not in combo.columns:
        combo["BuyHold"] = np.nan
    if "PPO_Portfolio" not in combo.columns:
        combo["PPO_Portfolio"] = np.nan

    # parse Window "start-end" to WindowStart
    def _parse_window_start(w):
        if pd.isna(w):
            return None
        if isinstance(w, (int, float)):
            return int(w)
        parts = str(w).split("-")
        try:
            return int(parts[0])
        except Exception:
            return None

    combo["WindowStart"] = combo["Window"].apply(_parse_window_start)

    combo = combo.sort_values(["Ticker", "WindowStart"]).reset_index(drop=True)
    combo["WindowIdx"] = combo.groupby("Ticker").cumcount() + 1

    combo = combo.drop_duplicates(subset=["Ticker", "WindowIdx"], keep="last")

    best_by_symbol = (
        combo
        .sort_values("Sharpe", ascending=False)
        .groupby("Ticker")
        .first()
        .reset_index()
    )

    # If Drawdown_% missing (older runs), create it so rename won't break
    if "Drawdown_%" not in best_by_symbol.columns:
        best_by_symbol["Drawdown_%"] = np.nan

    # Ensure precision cols exist
    for col in ["Precision_Long", "Precision_Short", "Precision_Trades"]:
        if col not in best_by_symbol.columns:
            best_by_symbol[col] = None  # or np.nan if you prefer

    # Rename columns so everything downstream uses consistent names
    best_by_symbol = best_by_symbol.rename(columns={
        "Drawdown_%": "Drawdown",
        "PPO_Portfolio": "Final_Portfolio",
    })

    # Ensure Accuracy / Trade_Count exist
    if "Accuracy" not in best_by_symbol.columns:
        best_by_symbol["Accuracy"] = 0.0
    if "Trade_Count" not in best_by_symbol.columns:
        best_by_symbol["Trade_Count"] = None

    best_by_symbol["Model"] = MODEL_NAME

    # PPO vs Buy & Hold ratio (safe division)
    best_by_symbol["Rel_vs_BH"] = best_by_symbol.apply(
        lambda r: (r["Final_Portfolio"] / r["BuyHold"])
        if (pd.notna(r["BuyHold"]) and r["BuyHold"] not in (0, 0.0)) else np.nan,
        axis=1
    )

    # Save flat CSV for debugging
    best_by_symbol.to_csv(SELECTOR_FULL_PATH, index=False)
    print(f"Aggregated PPO selector saved to → {SELECTOR_FULL_PATH}")

    # Safety filters (tune as needed)
    df_sel = best_by_symbol.copy()
    gates = (
        (df_sel["Sharpe"].fillna(-999) > 0.0) &
        (df_sel["Drawdown"].fillna(999) < 50.0) &
        (df_sel["Final_Portfolio"].fillna(0) > 80_000) &
        (df_sel["Rel_vs_BH"].fillna(0) >= 0.95)   # PPO ≥ 95% of B&H; change to >1.0 to enforce beat
    )
    df_sel = df_sel[gates].copy()

    df_sel["prefix"] = (
        "ppo_"
        + df_sel["Ticker"].astype(str)
        + "_window"
        + df_sel["WindowIdx"].astype(int).astype(str)
    )

    df_sel["artifact_path"] = df_sel["prefix"].apply(
        lambda p: os.path.join(FINAL_MODEL_DIR, f"{p}_model.zip")
    )
    df_sel["vecnorm_path"] = df_sel["prefix"].apply(
        lambda p: os.path.join(FINAL_MODEL_DIR, f"{p}_vecnorm.pkl")
    )

    EPS = 0.03  # 3% of top-sharpe for "close enough"
    selected_models = {}

    def safe_int(v, default=0):
        if v is None:
            return int(default)
        try:
            import math
            if isinstance(v, float) and math.isnan(v):
                return int(default)
        except TypeError:
            pass
        try:
            return int(v)
        except (ValueError, TypeError):
            return int(default)

    def safe_float(v, default=0.0):
        if v is None:
            return float(default)
        try:
            import math
            if isinstance(v, float) and math.isnan(v):
                return float(default)
        except TypeError:
            pass
        try:
            return float(v)
        except (ValueError, TypeError):
            return float(default)

    for ticker, group in df_sel.groupby("Ticker"):
        group_sorted = group.sort_values("Sharpe", ascending=False)
        top = group_sorted.iloc[0]
        second = group_sorted.iloc[1] if len(group_sorted) > 1 else None

        if (second is not None) and (
            abs(top["Sharpe"] - second["Sharpe"]) <= abs(top["Sharpe"]) * EPS
        ):
            mode = "ensemble"
            primary, secondary = top["Model"], second["Model"]
        else:
            mode = "single"
            primary, secondary = top["Model"], None

        selected_models[ticker] = {
            "model": MODEL_NAME,
            "score": round(safe_float(top["Sharpe"]), 4),
            "return": round(safe_float(top["Final_Portfolio"]), 2),
            "sharpe": round(safe_float(top["Sharpe"]), 3),
            "drawdown": round(safe_float(top["Drawdown"]), 2),
            "sortino": None,
            "turnover": None,
            "trade_count": safe_int(top.get("Trade_Count", 0)),
            "precision": {
                "long":   safe_float(top.get("Precision_Long", 0.0)),
                "short":  safe_float(top.get("Precision_Short", 0.0)),
                "trades": safe_float(top.get("Precision_Trades", 0.0)),
            },
            "stability": {},
            "regime": "unknown",
            "rl_profile": "fast",
            "artifact": {
                "path": top["artifact_path"],
                "vecnorm": top["vecnorm_path"],
                "features": None,
                "load_ms": 180,
                "mem_mb": 512,
                "exists": os.path.exists(top["artifact_path"]),
            },
            "selection": {
                "mode": mode,
                "primary": primary,
                "secondary": secondary,
            },
        }

    with open(SELECTOR_JSON_PATH, "w") as f:
        json.dump(selected_models, f, indent=2)

    print(f"Final enhanced PPO selector JSON saved to → {SELECTOR_JSON_PATH}")

if __name__ == "__main__":
    logging.info(f"RUN_RESULTS_DIR   = {RUN_RESULTS_DIR}")
    logging.info(f"FINAL_MODEL_DIR  = {FINAL_MODEL_DIR}")
    logging.info(f"BASE_RESULTS_DIR = {BASE_RESULTS_DIR}")

    min_rows = WINDOW_SIZE + 50  # small buffer so we have at least one window
    all_symbols = df["Symbol"].value_counts()
    candidate_symbols = []

    for sym, n in all_symbols.items():
        if n >= min_rows:
            candidate_symbols.append(sym)
        else:
            logging.warning(f"Skipping {sym}: only {n} rows (< {min_rows} required)")

    if not candidate_symbols:
        logging.error("No symbols have enough rows for the current WINDOW_SIZE. Nothing to train.")
    else:
        logging.info(f"Training candidate symbols: {candidate_symbols}")

    needed_cols = ["Close", "Datetime"]
    if ENABLE_WAVELET:
        needed_cols.append("Denoised_Close")
    if ENABLE_SENTIMENT:
        needed_cols.append("SentimentScore")

    valid_symbols = []
    for sym in candidate_symbols:
        cols = set(df.loc[df["Symbol"] == sym].columns)
        missing = [c for c in needed_cols if c not in cols]
        if missing:
            logging.warning(f"Skipping {sym}: missing required cols {missing}")
        else:
            valid_symbols.append(sym)

    if not valid_symbols:
        logging.error("No symbols passed the feature/column checks. Nothing to train.")
    else:
        logging.info(f"Final training universe: {valid_symbols}")

    all_results = []

    if test_mode:
        # Optional: shrink timesteps and/or window size in test mode
        TIMESTEPS = 100_000   # lighter test
        # WINDOW_SIZE = 2000  # uncomment if you want faster test runs
        # STEP_SIZE   = 500

        test_stocks = ["AAPL", "NVDA", "MSFT"]
        present = [s for s in test_stocks if s in valid_symbols]
        if not present:
            logging.warning("Test mode: none of ['AAPL','NVDA','MSFT'] present after filters.")
        else:
            logging.info(f"Test mode: running on {present}")

        for sym in present:
            logging.info(f">>> [TEST_MODE] Processing {sym}")
            res = process_ticker(sym)
            logging.info(f"{sym}: produced {len(res)} window summaries")
            if res:
                all_results.extend(res)

        summary_path = os.path.join(RUN_RESULTS_DIR, "summary_test_mode.csv")
        if all_results:
            pd.DataFrame(all_results).to_csv(summary_path, index=False)
            logging.info(f"Test-mode summary saved to {summary_path}")
        else:
            logging.warning("Test mode finished but no results were generated (no windows, or all skipped).")

    else:
        logging.info("Starting full parallel PPO walkforward run...")
        summary_results = run_parallel_tickers(valid_symbols)
        if not summary_results:
            logging.warning("No results generated in full run (check logs for skips/length issues).")
        else:
            summary_path = os.path.join(RUN_RESULTS_DIR, "summary.csv")
            pd.DataFrame(summary_results).to_csv(summary_path, index=False)
            logging.info(f"Summary saved to {summary_path}")

    try:
        build_ppo_selector()
    except Exception as e:
        logging.error(f"build_ppo_selector failed: {e}")

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
2025-12-26 00:44:30,958 - INFO - RUN_RESULTS_DIR   = /content/drive/MyDrive/Results_May_2025/ppo_walkforward_results_20251226_0044
2025-12-26 00:44:30,960 - INFO - FINAL_MODEL_DIR  = /content/drive/MyDrive/Results_May_2025/ppo_models_master
2025-12-26 00:44:30,961 - INFO - BASE_RESULTS_DIR = /content/drive/MyDrive/Results_May_2025
2025-12-26 00:44:31,078 - INFO - Training candidate symbols: ['AAPL', 'QCOM', 'COST', 'RTX', 'BRK-B', 'SBUX', 'TMO', 'BAC', 'TSLA', 'CRM', 'AVGO', 'TXN', 'UNH', 'ADBE', 'AMGN', 'UNP', 'AMD', 'ACN', 'AMZN', 'ABT', 'WMT', 'ABBV', 'XOM', 'PFE', 'PM', 'CSCO', 'PG', 'LLY', 'MA', 'MCD', 'META', 'MRK', 'JPM', 

Aggregated PPO selector saved to → /content/drive/MyDrive/Results_May_2025/ppo_model_selector_FULL.csv
Final enhanced PPO selector JSON saved to → /content/drive/MyDrive/Results_May_2025/ppo_model_selector_final.json


In [None]:
# PPO walkforward training + selector
import os, gc, time, json, logging, glob
import shutil
from threading import Lock
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt  # optional for ad-hoc plots

import torch
import gymnasium as gym
from gymnasium.spaces import Box as GBox

import yfinance as yf
from gym_anytrading.envs import StocksEnv

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.utils import set_random_seed

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="gymnasium")
# ---- Sharpe annualization helper (intraday heuristic: 6.5 hrs * 252) ----
def _annualization_factor(_df_like=None) -> float:
    """Annualization factor for intraday bars (6.5 trading hours × 252 days)."""
    return np.sqrt(252 * 6.5)

warnings.filterwarnings("ignore", category=DeprecationWarning, module="jupyter_client.session")
warnings.filterwarnings("ignore", message=".*Gym has been unmaintained.*")

try:
    compute_enhanced_features  # type: ignore
except NameError:
    def compute_enhanced_features(df_in: pd.DataFrame) -> pd.DataFrame:
        df_out = df_in.copy()
        if "Datetime" in df_out.columns:
            df_out["Datetime"] = pd.to_datetime(df_out["Datetime"])
            df_out = df_out.sort_values("Datetime").reset_index(drop=True)
        if "Close" not in df_out.columns:
            raise ValueError("compute_enhanced_features: missing required column 'Close'")
        return df_out

set_random_seed(42)

BASE_RESULTS_DIR = "/content/drive/MyDrive/Results_May_2025"
RUN_TAG = datetime.now().strftime("%Y%m%d_%H%M")

RUN_RESULTS_DIR = os.path.join(BASE_RESULTS_DIR, f"ppo_walkforward_results_{RUN_TAG}")
FINAL_MODEL_DIR = os.path.join(BASE_RESULTS_DIR, "ppo_models_master")
QC_TOP_DIR      = os.path.join(BASE_RESULTS_DIR, "ppo_models_QC_TOP")

os.makedirs(QC_TOP_DIR, exist_ok=True)
os.makedirs(RUN_RESULTS_DIR, exist_ok=True)
os.makedirs(FINAL_MODEL_DIR, exist_ok=True)

# Aggregated selector outputs
SELECTOR_FULL_PATH = os.path.join(BASE_RESULTS_DIR, "ppo_model_selector_FULL.csv")
SELECTOR_JSON_PATH = os.path.join(BASE_RESULTS_DIR, "ppo_model_selector_final.json")
MODEL_NAME = "PPO"

# Global skip aggregation (thread-safe)
SKIP_AGG_PATH = os.path.join(RUN_RESULTS_DIR, "skipped_windows_global.csv")
SKIP_LOCK = Lock()

# Logging Setup
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    force=True
)

# Flags
ENABLE_SENTIMENT = False
ENABLE_SLO       = True
ENABLE_WAVELET   = True
test_mode        = True            # set False for full universe
ENABLE_PLOTS     = False
LIVE_MODE        = False           # set True to run simple live/paper loop
SIM_LATENCY_MS   = 0               # broker latency simulation; 0 = off
BROKER           = "log"           # "log" = do not place orders, just log

# Global training settings
WINDOW_SIZE = 3500
STEP_SIZE   = 500
TIMESTEPS   = 150_000  # overridden in test_mode block to smaller value


DATA_PATH = "multi_stock_feature_engineered_dataset.csv"
if not os.path.exists(DATA_PATH):
    raise FileNotFoundError("Required feature-engineered dataset not found!")

df = pd.read_csv(DATA_PATH)
df["Datetime"] = pd.to_datetime(df["Datetime"])

# Wavelet fallback
if ENABLE_WAVELET and "Denoised_Close" not in df.columns:
    logging.warning("ENABLE_WAVELET=True but 'Denoised_Close' missing; "
                    "falling back to Close->Denoised_Close.")
    df["Denoised_Close"] = df["Close"]


def record_skips_global(ticker: str, skipped_windows: list,
                        total_windows: int = None, fully_skipped: bool = False):
    """Append skipped windows to the global skip log."""
    if not skipped_windows and not fully_skipped:
        return
    import csv
    with SKIP_LOCK:
        new_file = not os.path.exists(SKIP_AGG_PATH)
        with open(SKIP_AGG_PATH, "a", newline="") as f:
            w = csv.writer(f)
            if new_file:
                w.writerow(["Ticker", "Window", "FullySkipped", "TotalWindows"])
            if fully_skipped:
                w.writerow([ticker, "ALL", True, total_windows if total_windows is not None else ""])
            else:
                for wname in skipped_windows:
                    try:
                        _, win_str = wname.split("_window")
                        win = int(win_str)
                    except Exception:
                        win = ""
                    w.writerow([ticker, win, False, total_windows if total_windows is not None else ""])


ENV_KWARGS = dict(
    window_size=10,
    cost_rate=0.0002,
    slip_rate=0.0003,

    k_alpha=0.0,
    k_mom=0.15,
    k_sent=(0.01 if ENABLE_SENTIMENT else 0.0),
    mom_source="denoised",
    mom_lookback=20,

    min_trade_delta=0.08,
    cooldown=10,

    reward_clip=0.05,
    k_vol=0.00,
    k_dd=0.00,
)


class ContinuousPositionEnv(StocksEnv):
    def __init__(self, df, frame_bound, **kwargs):
        # Require window_size from ENV_KWARGS
        if "window_size" not in kwargs:
            raise ValueError("ContinuousPositionEnv requires window_size (pass via ENV_KWARGS).")

        window_size = int(kwargs.pop("window_size"))

        # Pull params (all defaults live in ENV_KWARGS; these are just safety fallbacks)
        cost_rate       = float(kwargs.pop("cost_rate", 0.0002))
        slip_rate       = float(kwargs.pop("slip_rate", 0.0003))
        k_alpha         = float(kwargs.pop("k_alpha", 0.0))
        k_mom           = float(kwargs.pop("k_mom", 0.15))
        k_sent          = float(kwargs.pop("k_sent", 0.0))
        mom_source      = str(kwargs.pop("mom_source", "denoised"))
        mom_lookback    = int(kwargs.pop("mom_lookback", 20))
        min_trade_delta = float(kwargs.pop("min_trade_delta", 0.04))
        cooldown        = int(kwargs.pop("cooldown", 6))
        reward_clip     = float(kwargs.pop("reward_clip", 0.05))
        k_vol           = float(kwargs.pop("k_vol", 0.0))
        k_dd            = float(kwargs.pop("k_dd", 0.0))

        # Fail fast on unexpected env kwargs
        if kwargs:
            raise ValueError(f"Unexpected env kwargs: {list(kwargs.keys())}")

        super().__init__(
            df=df.reset_index(drop=True),
            frame_bound=frame_bound,
            window_size=window_size
        )

        if isinstance(self.observation_space, gym.spaces.Box):
            self.observation_space = GBox(
                low=self.observation_space.low,
                high=self.observation_space.high,
                shape=self.observation_space.shape,
                dtype=self.observation_space.dtype,
            )

        self.k_vol = k_vol
        self.k_dd  = k_dd

        self.ret_history = []
        self.nav_history = []
        self.peak_nav    = 1.0
        self.trade_count = 0

        self.action_space = GBox(low=-1.0, high=1.0, shape=(1,), dtype=np.float32)


        self.cost_rate       = cost_rate
        self.slip_rate       = slip_rate
        self.k_alpha         = k_alpha
        self.k_mom           = k_mom
        self.k_sent          = k_sent
        self.mom_source      = mom_source
        self.mom_lookback    = mom_lookback
        self.min_trade_delta = min_trade_delta
        self.cooldown        = cooldown
        self.reward_clip     = reward_clip

        self.nav = 1.0
        self.pos = 0.0
        self._last_trade_step = -self.cooldown

    def reset(self, **kwargs):
        out = super().reset(**kwargs)
        if isinstance(out, tuple):
            obs, info = out
        else:
            obs, info = out, {}

        self.nav = 1.0
        self.pos = 0.0
        self._last_trade_step = -self.cooldown

        self.trade_count = 0
        self.ret_history = []
        self.nav_history = [self.nav]
        self.peak_nav    = self.nav

        info = info or {}
        info.update({
            "nav": self.nav,
            "pos": self.pos,
            "trade_count": int(self.trade_count),

        })
        return obs, info

    def _step_parent_hold(self):
        step_result = super().step(2)
        if len(step_result) == 5:
            obs, _env_rew, terminated, truncated, info = step_result
        else:
            obs, _env_rew, done, info = step_result
            terminated, truncated = bool(done), False
        return obs, terminated, truncated, info

    def _ret_t(self):
        cur  = float(self.df.loc[self._current_tick, "Close"])
        prev = float(self.df.loc[max(self._current_tick - 1, 0), "Close"])
        return 0.0 if prev <= 0 else (cur - prev) / prev

    def _mom_signal(self):
        if self.mom_source == "macd" and "MACD_Line" in self.df.columns:
            recent = self.df["MACD_Line"].iloc[max(self._current_tick - 200, 0):self._current_tick + 1]
            return float(np.tanh(
                float(self.df.loc[self._current_tick, "MACD_Line"]) /
                (1e-6 + float(recent.std()))
            ))

        if "Denoised_Close" in self.df.columns and self._current_tick - self.mom_lookback >= 0:
            now  = float(self.df.loc[self._current_tick, "Denoised_Close"])
            then = float(self.df.loc[self._current_tick - self.mom_lookback, "Denoised_Close"])
            base = float(self.df.loc[max(self._current_tick - 1, 0), "Close"])
            slope = (now - then) / max(self.mom_lookback, 1)
            return float(np.tanh(10.0 * (slope / max(abs(base), 1e-6))))

        return 0.0

    def step(self, action):
        a = float(np.array(action).squeeze())
        target_pos = float(np.clip(a, -1.0, 1.0))

        r_t = self._ret_t()
        base_ret = self.pos * r_t

        changed = (
            abs(target_pos - self.pos) >= self.min_trade_delta
        ) and (
            (self._current_tick - self._last_trade_step) >= self.cooldown
        )

        delta_pos = (target_pos - self.pos) if changed else 0.0
        trade_cost = (self.cost_rate + self.slip_rate) * abs(delta_pos)

        rel_alpha = base_ret - r_t
        mom_term = self.pos * self._mom_signal()

        alpha_term = self.k_alpha * rel_alpha

        sent_term = 0.0
        if ENABLE_SENTIMENT and "SentimentScore" in self.df.columns:
            sent_term = self.k_sent * float(self.df.loc[self._current_tick, "SentimentScore"])

        shaped = base_ret + alpha_term + (self.k_mom * mom_term) + sent_term - trade_cost
        reward = float(np.clip(shaped, -self.reward_clip, self.reward_clip))


        self.nav *= (1.0 + base_ret - trade_cost)
        self.nav_history.append(self.nav)
        self.peak_nav = max(self.peak_nav, self.nav)

        executed_trade = False
        if changed:
            self.pos = target_pos
            self._last_trade_step = self._current_tick
            self.trade_count += 1
            executed_trade = True

        obs, terminated, truncated, info = self._step_parent_hold()
        info = info or {}
        info.update({
            "ret_t": r_t,
            "nav": self.nav,
            "pos": self.pos,
            "trade_cost": trade_cost,
            "base_ret": base_ret,
            "rel_alpha": rel_alpha,
            "mom": mom_term,
            "changed": bool(changed),
            "executed_trade": bool(executed_trade),
            "trade_count": int(self.trade_count),
            "delta_pos": float(delta_pos),
        })
        return obs, reward, terminated, truncated, info

def get_mu_sigma(model, obs):
    """SB3 v2-safe way to get Gaussian policy mean/std for continuous actions."""
    with torch.no_grad():
        obs_t, _ = model.policy.obs_to_tensor(obs)
        features = model.policy.extract_features(obs_t)
        latent_pi, _ = model.policy.mlp_extractor(features)
        mean_actions = model.policy.action_net(latent_pi)
        log_std = model.policy.log_std
        mu = float(mean_actions.detach().cpu().numpy().squeeze())
        sigma = float(log_std.exp().detach().cpu().numpy().squeeze())
    return mu, sigma

def get_walk_forward_windows(df_in, window_size=3500, step_size=500, min_len=1200):
    return [
        (start, start + window_size)
        for start in range(0, len(df_in) - min_len, step_size)
        if start + window_size < len(df_in)
    ]

def save_quantconnect_model(artifact, prefix, save_dir):
    """Save/copy QC-compatible artifacts into save_dir."""
    import shutil

    os.makedirs(save_dir, exist_ok=True)

    # --- Model zip: save or copy ---
    model_dst = os.path.join(save_dir, f"{prefix}_model.zip")

    model_obj = artifact.get("model", None)
    model_src = artifact.get("model_path", None)

    try:
        if model_obj is not None:
            # Save from in-memory SB3 model
            if not os.path.exists(model_dst):
                model_obj.save(model_dst)

        else:
            # Copy from an existing trained window model zip
            if model_src and os.path.exists(model_src):
                if os.path.abspath(model_src) != os.path.abspath(model_dst):
                    shutil.copyfile(model_src, model_dst)
            else:
                # If neither provided, warn loudly
                if not os.path.exists(model_dst):
                    logging.warning(f"[QC SAVE] Missing model for {prefix}: no model_obj and no valid model_path.")
    except Exception as e:
        logging.warning(f"[QC SAVE] Model handling issue for {prefix}: {e}")

    # --- VecNormalize: copy ---
    vecnorm_src = artifact.get("vecnorm_path")
    if vecnorm_src and os.path.exists(vecnorm_src):
        try:
            vecnorm_dst = os.path.join(save_dir, f"{prefix}_vecnorm.pkl")
            if os.path.abspath(vecnorm_src) != os.path.abspath(vecnorm_dst):
                shutil.copyfile(vecnorm_src, vecnorm_dst)
        except Exception as e:
            logging.warning(f"[QC SAVE] VecNormalize handling issue for {prefix}: {e}")
    else:
        logging.warning(f"[QC SAVE] VecNormalize missing for {prefix}: vecnorm_path not found.")

    # --- Features ---
    try:
        with open(os.path.join(save_dir, f"{prefix}_features.json"), "w") as f:
            json.dump({"features": artifact.get("features", [])}, f)
    except Exception as e:
        logging.warning(f"[QC SAVE] Could not write features.json for {prefix}: {e}")

    # --- Probability config ---
    try:
        thr = 0.2
        try:
            thr = float(artifact.get("result", {}).get("Action_Threshold", 0.2))
        except Exception:
            thr = 0.2

        with open(os.path.join(save_dir, f"{prefix}_probability_config.json"), "w") as f:
            json.dump(
                {"threshold": thr, "use_confidence": True, "inference_mode": "deterministic"},
                f
            )
    except Exception as e:
        logging.warning(f"[QC SAVE] Could not write probability_config.json for {prefix}: {e}")


    # --- Model info ---
    try:
        r = artifact.get("result", {})
        with open(os.path.join(save_dir, f"{prefix}_model_info.json"), "w") as f:
            json.dump({
                "model": "PPO",
                "ticker": r.get("Ticker"),
                "window": r.get("Window"),
                "date_trained": datetime.today().strftime("%Y-%m-%d"),
                "framework": "stable-baselines3",
                "input_features": artifact.get("features", []),
                "final_portfolio": r.get("PPO_Portfolio"),
                "buy_hold": r.get("BuyHold"),
                "sharpe": r.get("Sharpe"),
            }, f)
    except Exception as e:
        logging.warning(f"[QC SAVE] Could not write model_info.json for {prefix}: {e}")

    logging.info(f"[QC SAVE] Saved QC artifacts for {prefix}")

def load_model_and_env(prefix):
    """Load a trained PPO and create a factory to build a matching env window."""
    model_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
    vec_path   = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")
    model = PPO.load(model_path, device="cpu")

    def make_env(df_window):
        frame_bound = (50, len(df_window) - 3)
        e = DummyVecEnv([lambda: ContinuousPositionEnv(
            df=df_window, frame_bound=frame_bound, **ENV_KWARGS
        )])
        if os.path.exists(vec_path):
            e = VecNormalize.load(vec_path, e)
        e.training = False
        e.norm_reward = False
        return e

    return model, make_env

def latest_df_for_symbol(symbol, horizon_days=5, interval="1m"):
    """Fetch fresh bars and rebuild features exactly like training."""
    end = datetime.utcnow()
    start = end - timedelta(days=horizon_days)
    df_live = yf.download(
        symbol,
        start=start.strftime("%Y-%m-%d"),
        end=end.strftime("%Y-%m-%d"),
        interval=interval,
        progress=False,
        auto_adjust=False,
    )
    if df_live is None or df_live.empty:
        return None
    df_live = df_live.reset_index()
    df_live["Symbol"] = symbol
    df_live = compute_enhanced_features(df_live)
    if ENABLE_WAVELET and "Denoised_Close" not in df_live.columns:
        df_live["Denoised_Close"] = df_live["Close"]
    return df_live

def predict_latest(symbol, prefix):
    """Build last window, fast-forward env, call model.predict(), return a signal."""
    # --- load per-model threshold ---
    cfg_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_probability_config.json")
    thr = 0.2
    if os.path.exists(cfg_path):
        try:
            with open(cfg_path, "r") as f:
                thr = float(json.load(f).get("threshold", 0.2))
        except Exception:
            thr = 0.2

    model, make_env = load_model_and_env(prefix)
    live_df = latest_df_for_symbol(symbol)
    if live_df is None or len(live_df) < 100:
        logging.warning("No fresh data yet for live inference.")
        return None

    df_window = live_df.iloc[-2500:].reset_index(drop=True) if len(live_df) > 2500 else live_df.copy()

    env = make_env(df_window)
    obs = env.reset()
    if isinstance(obs, tuple):
        obs, _ = obs

    # fast-forward with HOLD
    for _ in range(len(df_window) - 1):
        obs, _, dones, _ = env.step([np.array([0.0], dtype=np.float32)])
        if isinstance(dones, (np.ndarray, list, tuple)) and len(dones) and dones[0]:
            break

    action, _ = model.predict(obs, deterministic=True)
    mu, sigma = get_mu_sigma(model, obs)

    a = float(np.array(action).squeeze())

    # --- thresholded signal using loaded thr ---
    if a > thr:
        signal = "BUY"
    elif a < -thr:
        signal = "SELL"
    else:
        signal = "HOLD"

    conf = abs(a)
    ts = df_window["Datetime"].iloc[-1] if "Datetime" in df_window.columns else None
    price = float(df_window["Close"].iloc[-1])

    return dict(
        signal=signal,
        confidence=conf,
        action=a,
        threshold=thr,
        ts=ts,
        price=price,
        mu=mu,
        sigma=sigma,
    )

def place_order(signal, qty=1):
    """Stub broker router with latency simulation; logs in Colab."""
    if SIM_LATENCY_MS > 0:
        time.sleep(SIM_LATENCY_MS / 1000.0)
    if BROKER == "log":
        logging.info(f"[PAPER] {signal} x{qty}")
    else:
        logging.info(f"[BROKER={BROKER}] {signal} x{qty} (not implemented)")

def live_loop(symbol, best_prefix):
    """Simple polling loop—set LIVE_MODE=True to run."""
    while LIVE_MODE:
        try:
            pred = predict_latest(symbol, best_prefix)
            if pred:
                logging.info(
                    f"{symbol} {pred['ts']} | {pred['signal']} "
                    f"@ {pred['price']:.2f} (conf {pred['confidence']:.2f})"
                )
                place_order(pred["signal"], qty=1)
        except Exception as e:
            logging.error(f"Live loop error: {e}")
        time.sleep(60)  # Poll each minute

TOP_N_WINDOWS = 3

FAST = {
    "lr": 8e-5,
    "n_steps": 3072,
    "batch": 512,
    "clip": 0.2,
    "ent": 0.01,
}

SLOW = {
    "lr": 3e-5,
    "n_steps": 3072,
    "batch": 512,
    "clip": 0.16,
    "ent": 0.005,
}

fast_names = {
    "TSLA","NVDA","AMD","AVGO","AAPL","MSFT","AMZN","GOOGL","META","ADBE","CRM",
    "INTC","QCOM","TXN","ORCL","NEE","GE","XOM","CVX","LLY","NKE","SBUX"
}
slow_names = {
    "BRK-B","JPM","BAC","JNJ","UNH","MRK","PFE","ABBV","ABT","AMGN","PG","PEP","KO",
    "V","MA","WMT","MCD","TMO","DHR","ACN","IBM","LIN","PM","RTX","UPS","UNP","COST","HD","LOW"
}

def pick_params(symbol: str):
    return FAST if symbol in fast_names else SLOW

def export_qc_top_from_existing(ticker: str, top_n: int = 3):
    """
    If a ticker is fully skipped (models already exist), still populate QC_TOP_DIR.
    Uses existing summary CSVs to pick top Sharpe windows, then copies artifacts from FINAL_MODEL_DIR.
    Prefers using 'Prefix' from summaries (robust). Falls back to WindowIdx reconstruction.
    """
    summary_files = glob.glob(os.path.join(BASE_RESULTS_DIR, "ppo_walkforward_results_*", "summary*.csv"))
    if not summary_files:
        logging.warning(f"[QC_TOP] No summary files found; cannot export QC_TOP for {ticker}.")
        return

    frames = []
    for p in summary_files:
        try:
            tmp = pd.read_csv(p)
            frames.append(tmp)
        except Exception as e:
            logging.warning(f"[QC_TOP] Could not read {p}: {e}")

    if not frames:
        logging.warning(f"[QC_TOP] Could not read any summary files; cannot export QC_TOP for {ticker}.")
        return

    combo = pd.concat(frames, ignore_index=True)

    if "Ticker" not in combo.columns:
        logging.warning("[QC_TOP] Summary files missing 'Ticker' column; cannot export.")
        return

    combo = combo[combo["Ticker"] == ticker].copy()
    if combo.empty or "Sharpe" not in combo.columns:
        logging.warning(f"[QC_TOP] No rows for {ticker} in summaries (or missing Sharpe); cannot export QC_TOP.")
        return

    # Ensure Sharpe is numeric so sorting works reliably
    combo["Sharpe"] = pd.to_numeric(combo["Sharpe"], errors="coerce")
    combo = combo.dropna(subset=["Sharpe"])
    if combo.empty:
        logging.warning(f"[QC_TOP] All Sharpe values were non-numeric for {ticker}; cannot export.")
        return

    use_prefix = ("Prefix" in combo.columns) and combo["Prefix"].notna().any()

    if use_prefix:
        # Robust path: use saved Prefix directly
        top = combo.sort_values("Sharpe", ascending=False).head(top_n).copy()
        top["__prefix__"] = top["Prefix"].astype(str)
    else:
        # Fallback: reconstruct WindowIdx (less robust)
        def _window_start(w):
            try:
                s = str(w)
                return int(s.split("-")[0]) if "-" in s else np.nan
            except Exception:
                return np.nan

        combo["WindowStart"] = combo["Window"].apply(_window_start)
        combo = combo.sort_values(["WindowStart"]).reset_index(drop=True)
        combo["WindowIdx"] = combo.groupby("Ticker").cumcount() + 1

        top = combo.sort_values("Sharpe", ascending=False).head(top_n).copy()
        top["__prefix__"] = top["WindowIdx"].apply(lambda widx: f"ppo_{ticker}_window{int(widx)}")

    exported = 0

    for _, r in top.iterrows():
        prefix = str(r["__prefix__"])

        model_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
        vec_path   = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")

        if not (os.path.exists(model_path) and os.path.exists(vec_path)):
            logging.warning(f"[QC_TOP] Missing model/vecnorm for {prefix}; cannot export.")
            continue

        artifact_for_save = {
            "model": None,
            "model_path": model_path,
            "vecnorm_path": vec_path,
            "features": [],         # ok if unknown; QC can load features elsewhere
            "result": r.to_dict(),  # includes Sharpe, Action_Threshold, etc if present
            "prefix": prefix,
        }
        save_quantconnect_model(artifact_for_save, prefix, QC_TOP_DIR)
        exported += 1

    logging.info(f"[QC_TOP] Exported {exported}/{len(top)} QC artifacts for {ticker}.")

def walkforward_ppo(df_sym, ticker,
                    window_size=3500, step_size=500,
                    timesteps=150_000, learning_rate=1e-4,
                    ppo_overrides=None):
    import heapq

    if ppo_overrides is None:
        ppo_overrides = {}

    if len(df_sym) < window_size:
        logging.warning(
            f"Skipping {ticker}: only {len(df_sym)} rows (min required: {window_size})"
        )
        return []

    results = []
    windows = get_walk_forward_windows(df_sym, window_size, step_size)
    top_heap = []
    skipped_windows = []

    # quick check: all windows already have model+vecnorm?
    all_done = True
    for idx in range(len(windows)):
        prefix = f"ppo_{ticker}_window{idx+1}"
        model_ok   = os.path.exists(os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip"))
        vecnorm_ok = os.path.exists(os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl"))
        if not (model_ok and vecnorm_ok):
            all_done = False
            break

    if all_done:
        logging.info(f"Ticker {ticker} fully skipped (all {len(windows)} windows already complete).")
        record_skips_global(ticker, skipped_windows=[], total_windows=len(windows), fully_skipped=True)

        export_qc_top_from_existing(ticker, top_n=TOP_N_WINDOWS)
        return []



    for w_idx, (start, end) in enumerate(windows):
        window_start_time = time.time()
        gc.collect()

        prefix = f"ppo_{ticker}_window{w_idx+1}"
        model_path   = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
        vecnorm_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")

        if os.path.exists(model_path) and os.path.exists(vecnorm_path):
            logging.info(f"Skipping {ticker} | Window {w_idx+1}, already trained.")
            skipped_windows.append(f"{ticker}_window{w_idx+1}")
            continue

        missing = []
        if not os.path.exists(model_path):   missing.append("model.zip")
        if not os.path.exists(vecnorm_path): missing.append("vecnorm.pkl")
        logging.info(
            f"Will train {ticker} | Window {w_idx+1} because missing: {', '.join(missing)}"
        )

        df_window = df_sym.iloc[start:end].reset_index(drop=True)
        if len(df_window) <= 52 or len(df_window) % 2 != 0:
            df_window = df_window.iloc[:-1]

        frame_bound = (50, len(df_window) - 3)

        env = DummyVecEnv([lambda: ContinuousPositionEnv(
          df=df_window, frame_bound=frame_bound, **ENV_KWARGS
        )])

        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.0)

        try:
            model = PPO(
                "MlpPolicy",
                env,
                verbose=0,
                device=("cuda" if torch.cuda.is_available() else "cpu"),
                learning_rate=ppo_overrides.get("lr", learning_rate),
                n_steps=ppo_overrides.get("n_steps", 256),
                batch_size=ppo_overrides.get("batch", 64),
                n_epochs=5,
                gamma=0.99,
                gae_lambda=0.95,
                clip_range=ppo_overrides.get("clip", 0.2),
                ent_coef=ppo_overrides.get("ent", 0.005),
                policy_kwargs=dict(net_arch=[64, 64]),
            )

            logging.info(f"Training {ticker} Window {w_idx+1}/{len(windows)}")
            model.learn(total_timesteps=timesteps)

            # Evaluation pass
            env.training = False
            env.norm_reward = False
            obs = env.reset()
            if isinstance(obs, tuple):
                obs, _ = obs

            nav_track = [1.0]
            bh_track  = [1.0]
            step_log  = []
            executed_trade_count = 0
            signal_trade_count   = 0
            signal_trade_count_dyn   = 0   # dynamic-threshold diagnostic

            DIAG_THR = 0.2
            for i in range(len(df_window) - 1):
                action, _ = model.predict(obs, deterministic=True)
                mu, sigma = get_mu_sigma(model, obs)

                obs, rew, dones, infos = env.step(action)
                # VecEnv returns list/tuple of infos; otherwise it may be a dict
                if isinstance(infos, (list, tuple)):
                    info = infos[0] if len(infos) else {}
                elif isinstance(infos, dict):
                    info = infos
                else:
                    info = {}


                nav_track.append(float(info.get("nav", nav_track[-1])))
                bh_track.append(
                    bh_track[-1] * (1.0 + float(info.get("ret_t", 0.0)))
                )

                a = float(np.array(action).squeeze())
                dt_val = df_window["Datetime"].iloc[i+1] if "Datetime" in df_window.columns else None
                px     = float(df_window["Close"].iloc[i+1]) if "Close" in df_window.columns else np.nan
                #“signal” trades (threshold-based) — diagnostic only
                if a > DIAG_THR or a < -DIAG_THR:
                    signal_trade_count += 1
                #real trades executed by env friction logic
                if bool(info.get("executed_trade", False)):
                    executed_trade_count += 1

                # next-bar return (to score BUY/SELL vs the *next* move)
                if i + 2 < len(df_window):
                    p0 = float(df_window["Close"].iloc[i+1])
                    p1 = float(df_window["Close"].iloc[i+2])
                    next_ret = 0.0 if p0 <= 0 else (p1 - p0) / p0
                else:
                    next_ret = 0.0

                # reward scalar (VecEnv returns arrays)
                rew_val = float(rew[0]) if isinstance(rew, (list, tuple, np.ndarray)) else float(rew)

                step_log.append({
                    "Index": i+1,
                    "Datetime": dt_val,
                    "Close": px,
                    "Action": a,
                    "mu": mu,
                    "sigma": sigma,
                    "nav": nav_track[-1],
                    "ret_t": float(info.get("ret_t", 0.0)),
                    "next_ret": float(next_ret),
                    "reward": rew_val,
                    "pos": float(info.get("pos", 0.0)),
                    "trade_cost": float(info.get("trade_cost", 0.0)),
                    "base_ret": float(info.get("base_ret", 0.0)),
                    "rel_alpha": float(info.get("rel_alpha", 0.0)),
                    "mom": float(info.get("mom", 0.0)),
                })

                # done handling (VecEnv)
                if isinstance(dones, (np.ndarray, list, tuple)):
                    if dones[0]:
                        break
                elif dones:
                    break


            # --- Metrics ---
            final_value = float(nav_track[-1]) * 100_000.0
            hold_value  = float(bh_track[-1])  * 100_000.0

            #dynamic action threshold for this window (prevents “no signals” windows)
            abs_actions = np.array([abs(float(r["Action"])) for r in step_log], dtype=float)
            if len(abs_actions) > 0:
                thr = float(np.quantile(abs_actions, 0.70))  # 70th percentile
                thr = float(np.clip(thr, 0.08, 0.30))
            else:
                thr = 0.2

            # Dynamic signal trade count (post-hoc diagnostic)
            signal_trade_count_dyn = int(np.sum(abs_actions > thr)) if len(abs_actions) > 0 else 0


            returns = pd.Series(nav_track).pct_change().fillna(0.0)
            sharpe  = float((returns.mean() / (returns.std() + 1e-9)) * _annualization_factor(df_window))
            drawdown = float(
                ((pd.Series(nav_track).cummax() - pd.Series(nav_track)) /
                pd.Series(nav_track).cummax()).max() * 100.0
            )

            # Classification stats (now using thr)
            correct = 0
            total   = 0
            tp_buy = fp_buy = 0
            tp_sell = fp_sell = 0

            for r in step_log:
                a = float(r["Action"])
                ret_t = float(r.get("next_ret", 0.0))

                if a > thr:
                    sig = "BUY"
                elif a < -thr:
                    sig = "SELL"
                else:
                    sig = "HOLD"

                if sig == "BUY":
                    if ret_t > 0:
                        tp_buy += 1; correct += 1
                    else:
                        fp_buy += 1
                    total += 1
                elif sig == "SELL":
                    if ret_t < 0:
                        tp_sell += 1; correct += 1
                    else:
                        fp_sell += 1
                    total += 1
                # HOLD not counted

            precision_long  = tp_buy  / (tp_buy  + fp_buy  + 1e-9)
            precision_short = tp_sell / (tp_sell + fp_sell + 1e-9)
            precision_trades = (tp_buy + tp_sell) / (
                (tp_buy + tp_sell) + (fp_buy + fp_sell) + 1e-9
            )
            step_accuracy = round(correct / total, 4) if total > 0 else 0.0
            #Trade_count reflect REAL executed trades (cooldown/min_trade_delta)
            trade_count = int(executed_trade_count)

            # Save VecNormalize
            try:
                env.save(vecnorm_path)
            except Exception as e:
                logging.warning(f"Could not save VecNormalize for {ticker} {start}-{end}: {e}")
                vecnorm_path = None

            # Save model
            model.save(model_path)

            # Save detailed predictions
            pred_path = os.path.join(RUN_RESULTS_DIR, f"{prefix}_predictions.csv")
            pd.DataFrame(step_log).to_csv(pred_path, index=False)
            logging.info(f"Saved predictions to {pred_path}")

            # Save compat predictions with same thresholds as metrics
            compat_rows = []
            for r in step_log:
                a = r["Action"]

                if a > thr:
                    signal = "BUY"
                elif a < -thr:
                    signal = "SELL"
                else:
                    signal = "HOLD"
                compat_rows.append({
                    "Index": r["Index"],
                    "Datetime": r["Datetime"],
                    "Close": r["Close"],
                    "Action": a,
                    "Signal": signal,
                    "PortfolioValue": r["nav"],
                    "Reward": r.get("reward", np.nan),
                })
            compat_path = os.path.join(RUN_RESULTS_DIR, f"{prefix}_predictions_compat.csv")
            pd.DataFrame(compat_rows).to_csv(compat_path, index=False)
            logging.info(f"Saved compatibility predictions to {compat_path}")

            # Summary row
            result_row = {
                "Ticker": ticker,
                "Window": f"{start}-{end}",
                "WindowIdx": int(w_idx + 1),
                "Prefix": prefix,
                "PPO_Portfolio": round(final_value, 2),
                "BuyHold": round(hold_value, 2),
                "Sharpe": round(sharpe, 3),
                "Drawdown_%": round(drawdown, 2),
                "Winner": "PPO" if final_value > hold_value else "Buy & Hold",
                "Action_Threshold": round(thr, 4),
                "Accuracy": step_accuracy,
                "Trade_Count": trade_count,
                "Signal_Trade_Count": int(signal_trade_count),
                "Signal_Trade_Count_Dyn": int(signal_trade_count_dyn),
                "Executed_Trade_Count": int(executed_trade_count),
                "Precision_Long": round(precision_long, 4),
                "Precision_Short": round(precision_short, 4),
                "Precision_Trades": round(precision_trades, 4),
            }

            results.append(result_row)

            meta = {
                "result": result_row,
                "features": df_window.columns.tolist(),
                "prefix": prefix,
                "model_path": model_path,
                "vecnorm_path": vecnorm_path,
            }

            item = (result_row["Sharpe"], prefix, meta)
            if len(top_heap) < TOP_N_WINDOWS:
                heapq.heappush(top_heap, item)
            else:
                if item[0] > top_heap[0][0]:
                    heapq.heapreplace(top_heap, item)

            logging.info(
                f"{ticker} | Window {w_idx+1} runtime: "
                f"{round(time.time() - window_start_time, 2)}s"
            )
        finally:
            try:
                env.close()
            except Exception:
                pass
            del env
            try:
                del model
            except Exception:
                pass
            gc.collect()
            try:
                torch.cuda.empty_cache()
            except Exception:
                pass

    if skipped_windows:
        logging.info(
            f"{ticker} skipped windows (already complete): {', '.join(skipped_windows)}"
        )
        record_skips_global(
            ticker,
            skipped_windows=skipped_windows,
            total_windows=len(windows),
            fully_skipped=False,
        )

    # Save top-N QC-compatible
    top_list = sorted(top_heap, key=lambda t: t[0], reverse=True)
    for _, _, meta in top_list:
        artifact_for_save = {
            "model": None,  # we're copying from disk, not re-saving an in-memory model
            "model_path": meta["model_path"],
            "vecnorm_path": meta["vecnorm_path"],
            "features": meta["features"],
            "result": meta["result"],
            "prefix": meta["prefix"],
        }
        save_quantconnect_model(artifact_for_save, meta["prefix"], QC_TOP_DIR)

    return results

def process_ticker(ticker):
    try:
        hp = pick_params(ticker)
        return walkforward_ppo(
            df[df["Symbol"] == ticker].copy(),
            ticker,
            window_size=WINDOW_SIZE,
            step_size=STEP_SIZE,
            timesteps=TIMESTEPS,
            learning_rate=hp["lr"],
            ppo_overrides=hp,
        )
    except Exception as e:
        logging.error(f"{ticker}: training failed with {e}")
        return []


from concurrent.futures import ThreadPoolExecutor

def run_parallel_tickers(tickers,
                         out_path=os.path.join(RUN_RESULTS_DIR, "summary.csv"),
                         max_workers=8):
    results = []
    with ThreadPoolExecutor(max_workers=max_workers) as ex:
        for res in ex.map(process_ticker, tickers):
            if res:
                results.extend(res)

    if results:
        pd.DataFrame(results).to_csv(out_path, index=False)
        logging.info(f"Saved summary to {out_path}")
    else:
        logging.warning("No results produced; summary not written.")

    logging.info("All tickers processed.")
    return results

def build_ppo_selector():
    """Aggregate all summary*.csv across runs and build selector JSON."""
    summary_files = glob.glob(
        os.path.join(BASE_RESULTS_DIR, "ppo_walkforward_results_*", "summary*.csv")
    )
    all_summaries = []

    for p in summary_files:
        try:
            tmp = pd.read_csv(p)
            tmp["RunFolder"] = os.path.dirname(p)
            all_summaries.append(tmp)
        except Exception as e:
            print(f"⚠️ Skipping {p} due to error: {e}")

    if not all_summaries:
        logging.warning("No PPO summaries found across walkforward results folders.")
        return

    combo = pd.concat(all_summaries, ignore_index=True)
    if "Sharpe" in combo.columns:
    combo["Sharpe"] = pd.to_numeric(combo["Sharpe"], errors="coerce")
    combo = combo.dropna(subset=["Sharpe"])

    # Ensure key columns exist for robust ratios
    if "BuyHold" not in combo.columns:
        combo["BuyHold"] = np.nan
    if "PPO_Portfolio" not in combo.columns:
        combo["PPO_Portfolio"] = np.nan

    # parse Window "start-end" to WindowStart
    def _parse_window_start(w):
        if pd.isna(w):
            return None
        if isinstance(w, (int, float)):
            return int(w)
        parts = str(w).split("-")
        try:
            return int(parts[0])
        except Exception:
            return None

    combo["WindowStart"] = combo["Window"].apply(_parse_window_start)

    combo = combo.sort_values(["Ticker", "WindowStart"]).reset_index(drop=True)
    combo["WindowIdx"] = combo.groupby("Ticker").cumcount() + 1

    combo = combo.drop_duplicates(subset=["Ticker", "WindowIdx"], keep="last")

    best_by_symbol = (
        combo
        .sort_values("Sharpe", ascending=False)
        .groupby("Ticker")
        .first()
        .reset_index()
    )

    # If Drawdown_% missing (older runs), create it so rename won't break
    if "Drawdown_%" not in best_by_symbol.columns:
        best_by_symbol["Drawdown_%"] = np.nan

    # Ensure precision cols exist
    for col in ["Precision_Long", "Precision_Short", "Precision_Trades"]:
        if col not in best_by_symbol.columns:
            best_by_symbol[col] = None  # or np.nan if you prefer

    # Rename columns so everything downstream uses consistent names
    best_by_symbol = best_by_symbol.rename(columns={
        "Drawdown_%": "Drawdown",
        "PPO_Portfolio": "Final_Portfolio",
    })

    # Ensure Accuracy / Trade_Count exist
    if "Accuracy" not in best_by_symbol.columns:
        best_by_symbol["Accuracy"] = 0.0
    if "Trade_Count" not in best_by_symbol.columns:
        best_by_symbol["Trade_Count"] = None

    best_by_symbol["Model"] = MODEL_NAME

    # PPO vs Buy & Hold ratio (safe division)
    best_by_symbol["Rel_vs_BH"] = best_by_symbol.apply(
        lambda r: (r["Final_Portfolio"] / r["BuyHold"])
        if (pd.notna(r["BuyHold"]) and r["BuyHold"] not in (0, 0.0)) else np.nan,
        axis=1
    )

    # Save flat CSV for debugging
    best_by_symbol.to_csv(SELECTOR_FULL_PATH, index=False)
    print(f"Aggregated PPO selector saved to → {SELECTOR_FULL_PATH}")

    # Safety filters (tune as needed)
    df_sel = best_by_symbol.copy()
    gates = (
        (df_sel["Sharpe"].fillna(-999) > 0.0) &
        (df_sel["Drawdown"].fillna(999) < 50.0) &
        (df_sel["Final_Portfolio"].fillna(0) > 80_000) &
        (df_sel["Rel_vs_BH"].fillna(0) >= 0.95)   # PPO ≥ 95% of B&H; change to >1.0 to enforce beat
    )
    df_sel = df_sel[gates].copy()

    df_sel["prefix"] = (
        "ppo_"
        + df_sel["Ticker"].astype(str)
        + "_window"
        + df_sel["WindowIdx"].astype(int).astype(str)
    )

    df_sel["artifact_path"] = df_sel["prefix"].apply(
        lambda p: os.path.join(FINAL_MODEL_DIR, f"{p}_model.zip")
    )
    df_sel["vecnorm_path"] = df_sel["prefix"].apply(
        lambda p: os.path.join(FINAL_MODEL_DIR, f"{p}_vecnorm.pkl")
    )

    EPS = 0.03  # 3% of top-sharpe for "close enough"
    selected_models = {}

    def safe_int(v, default=0):
        if v is None:
            return int(default)
        try:
            import math
            if isinstance(v, float) and math.isnan(v):
                return int(default)
        except TypeError:
            pass
        try:
            return int(v)
        except (ValueError, TypeError):
            return int(default)

    def safe_float(v, default=0.0):
        if v is None:
            return float(default)
        try:
            import math
            if isinstance(v, float) and math.isnan(v):
                return float(default)
        except TypeError:
            pass
        try:
            return float(v)
        except (ValueError, TypeError):
            return float(default)

    for ticker, group in df_sel.groupby("Ticker"):
        group_sorted = group.sort_values("Sharpe", ascending=False)
        top = group_sorted.iloc[0]
        second = group_sorted.iloc[1] if len(group_sorted) > 1 else None

        if (second is not None) and (
            abs(top["Sharpe"] - second["Sharpe"]) <= abs(top["Sharpe"]) * EPS
        ):
            mode = "ensemble"
            primary, secondary = top["Model"], second["Model"]
        else:
            mode = "single"
            primary, secondary = top["Model"], None

        selected_models[ticker] = {
            "model": MODEL_NAME,
            "score": round(safe_float(top["Sharpe"]), 4),
            "return": round(safe_float(top["Final_Portfolio"]), 2),
            "sharpe": round(safe_float(top["Sharpe"]), 3),
            "drawdown": round(safe_float(top["Drawdown"]), 2),
            "sortino": None,
            "turnover": None,
            "trade_count": safe_int(top.get("Trade_Count", 0)),
            "precision": {
                "long":   safe_float(top.get("Precision_Long", 0.0)),
                "short":  safe_float(top.get("Precision_Short", 0.0)),
                "trades": safe_float(top.get("Precision_Trades", 0.0)),
            },
            "stability": {},
            "regime": "unknown",
            "rl_profile": "fast",
            "artifact": {
                "path": top["artifact_path"],
                "vecnorm": top["vecnorm_path"],
                "features": None,
                "load_ms": 180,
                "mem_mb": 512,
                "exists": os.path.exists(top["artifact_path"]),
            },
            "selection": {
                "mode": mode,
                "primary": primary,
                "secondary": secondary,
            },
        }

    with open(SELECTOR_JSON_PATH, "w") as f:
        json.dump(selected_models, f, indent=2)

    print(f"Final enhanced PPO selector JSON saved to → {SELECTOR_JSON_PATH}")

if __name__ == "__main__":
    logging.info(f"RUN_RESULTS_DIR   = {RUN_RESULTS_DIR}")
    logging.info(f"FINAL_MODEL_DIR  = {FINAL_MODEL_DIR}")
    logging.info(f"BASE_RESULTS_DIR = {BASE_RESULTS_DIR}")

    min_rows = WINDOW_SIZE + 50  # small buffer so we have at least one window
    all_symbols = df["Symbol"].value_counts()
    candidate_symbols = []

    for sym, n in all_symbols.items():
        if n >= min_rows:
            candidate_symbols.append(sym)
        else:
            logging.warning(f"Skipping {sym}: only {n} rows (< {min_rows} required)")

    if not candidate_symbols:
        logging.error("No symbols have enough rows for the current WINDOW_SIZE. Nothing to train.")
    else:
        logging.info(f"Training candidate symbols: {candidate_symbols}")

    needed_cols = ["Close", "Datetime"]
    if ENABLE_WAVELET:
        needed_cols.append("Denoised_Close")
    if ENABLE_SENTIMENT:
        needed_cols.append("SentimentScore")

    valid_symbols = []
    for sym in candidate_symbols:
        cols = set(df.loc[df["Symbol"] == sym].columns)
        missing = [c for c in needed_cols if c not in cols]
        if missing:
            logging.warning(f"Skipping {sym}: missing required cols {missing}")
        else:
            valid_symbols.append(sym)

    if not valid_symbols:
        logging.error("No symbols passed the feature/column checks. Nothing to train.")
    else:
        logging.info(f"Final training universe: {valid_symbols}")

    all_results = []

    if test_mode:
        # Optional: shrink timesteps and/or window size in test mode
        TIMESTEPS = 100_000   # lighter test
        # WINDOW_SIZE = 2000  # uncomment if you want faster test runs
        # STEP_SIZE   = 500

        test_stocks = ["AAPL", "NVDA", "MSFT"]
        present = [s for s in test_stocks if s in valid_symbols]
        if not present:
            logging.warning("Test mode: none of ['AAPL','NVDA','MSFT'] present after filters.")
        else:
            logging.info(f"Test mode: running on {present}")

        for sym in present:
            logging.info(f">>> [TEST_MODE] Processing {sym}")
            res = process_ticker(sym)
            logging.info(f"{sym}: produced {len(res)} window summaries")
            if res:
                all_results.extend(res)

        summary_path = os.path.join(RUN_RESULTS_DIR, "summary_test_mode.csv")
        if all_results:
            pd.DataFrame(all_results).to_csv(summary_path, index=False)
            logging.info(f"Test-mode summary saved to {summary_path}")
        else:
            logging.warning("Test mode finished but no results were generated (no windows, or all skipped).")

    else:
        logging.info("Starting full parallel PPO walkforward run...")
        summary_results = run_parallel_tickers(valid_symbols)
        if not summary_results:
            logging.warning("No results generated in full run (check logs for skips/length issues).")
        else:
            summary_path = os.path.join(RUN_RESULTS_DIR, "summary.csv")
            pd.DataFrame(summary_results).to_csv(summary_path, index=False)
            logging.info(f"Summary saved to {summary_path}")

    try:
        build_ppo_selector()
    except Exception as e:
        logging.error(f"build_ppo_selector failed: {e}")

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
2025-12-26 00:44:30,958 - INFO - RUN_RESULTS_DIR   = /content/drive/MyDrive/Results_May_2025/ppo_walkforward_results_20251226_0044
2025-12-26 00:44:30,960 - INFO - FINAL_MODEL_DIR  = /content/drive/MyDrive/Results_May_2025/ppo_models_master
2025-12-26 00:44:30,961 - INFO - BASE_RESULTS_DIR = /content/drive/MyDrive/Results_May_2025
2025-12-26 00:44:31,078 - INFO - Training candidate symbols: ['AAPL', 'QCOM', 'COST', 'RTX', 'BRK-B', 'SBUX', 'TMO', 'BAC', 'TSLA', 'CRM', 'AVGO', 'TXN', 'UNH', 'ADBE', 'AMGN', 'UNP', 'AMD', 'ACN', 'AMZN', 'ABT', 'WMT', 'ABBV', 'XOM', 'PFE', 'PM', 'CSCO', 'PG', 'LLY', 'MA', 'MCD', 'META', 'MRK', 'JPM', 

Aggregated PPO selector saved to → /content/drive/MyDrive/Results_May_2025/ppo_model_selector_FULL.csv
Final enhanced PPO selector JSON saved to → /content/drive/MyDrive/Results_May_2025/ppo_model_selector_final.json


In [None]:
from pathlib import Path
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

RESULTS_DIR = Path(globals().get("RESULTS_DIR", os.getenv("RESULTS_DIR", ".")))
LATEST_DIR  = Path(globals().get("LATEST_DIR",  os.getenv("LATEST_DIR",  str(RESULTS_DIR))))

eq_candidates = [
    globals().get("EQUITY_LOG_CSV"),
    globals().get("EQUITY_LOG_LATEST"),
    RESULTS_DIR / "equity_log.csv",
    LATEST_DIR / "equity_log.csv",
]

def _first_existing(paths):
    for p in paths:
        if p:
            p = Path(p)
            if p.exists() and p.is_file():
                return p
    return None

eq_path = _first_existing(eq_candidates)
if eq_path is None:
    all_eq = list(RESULTS_DIR.glob("equity_log*.csv")) + list(LATEST_DIR.glob("equity_log*.csv"))
    eq_path = max(all_eq, key=lambda p: p.stat().st_mtime, default=None)

if eq_path and eq_path.exists():
    print(f"[equity source] {eq_path}")
    try:
        eq = pd.read_csv(eq_path, parse_dates=["datetime_utc"]).sort_values("datetime_utc")
        if not eq.empty:
            r = eq["equity"].pct_change().dropna()
            sharpe_h = (r.mean() / (r.std() + 1e-12)) * np.sqrt(252 * 6.5) if len(r) else float("nan")
            print(
                f"\nEquity summary — last: ${eq['equity'].iloc[-1]:,.2f} | "
                f"n={len(eq)} pts | Sharpe(h): {sharpe_h:.2f} | src={eq_path}"
            )
        else:
            print(f"No rows in equity log: {eq_path}")
    except Exception as e:
        print(f"Could not summarize equity ({eq_path}): {e}")
else:
    print("No equity_log*.csv found in RESULTS_DIR/LATEST_DIR.")

def _resolve_tickers():
    g = globals().get("TICKERS", None)
    if isinstance(g, (list, tuple, set)):
        base = [str(x).upper() for x in g]
    else:
        env_val = os.getenv("TICKERS", (g if isinstance(g, str) else ""))
        base = [t.strip().upper() for t in str(env_val).split(",") if t.strip()]

    #Also include symbols with existing logs on disk
    discovered = [
        p.stem.replace("trade_log_", "").upper()
        for p in list(RESULTS_DIR.glob("trade_log_*.csv")) + list(LATEST_DIR.glob("trade_log_*.csv"))
    ]
    ticks = sorted(set(base) | set(discovered))
    return ticks if ticks else ["UNH", "GE"]

tickers_to_report = _resolve_tickers()
print("Tickers to report:", tickers_to_report)

print("\nTrade Summary:")
for ticker in tickers_to_report:
    trade_candidates = [
        RESULTS_DIR / f"trade_log_{ticker}.csv",
        LATEST_DIR / f"trade_log_{ticker}.csv",
    ]
    log_path = _first_existing(trade_candidates)
    if not log_path:
        #Tolerate Drive duplicates like "trade_log_XYZ (1).csv"
        any_logs = list(RESULTS_DIR.glob(f"trade_log_{ticker}*.csv")) + \
                   list(LATEST_DIR.glob(f"trade_log_{ticker}*.csv"))
        log_path = max(any_logs, key=lambda p: p.stat().st_mtime, default=None)

    if not log_path or not log_path.exists():
        print(f"{ticker}: no trades logged yet.")
        continue

    try:
        df = pd.read_csv(
            log_path,
            on_bad_lines="skip",
            engine="python",
            parse_dates=["log_time", "bar_time"],
        )
        key = "signal" if "signal" in df.columns else ("action" if "action" in df.columns else None)
        if key:
            counts = df[key].value_counts(dropna=False).to_dict()
            print(f"{ticker}: {counts} | src={log_path.name}")
        else:
            print(f"{ticker}: log present but missing 'signal'/'action' columns. src={log_path.name}")

        if "confidence" in df.columns and df["confidence"].notna().any():
            plt.figure(figsize=(8, 3.5))
            df["confidence"].dropna().plot(kind="hist", bins=10, edgecolor="black")
            plt.title(f"{ticker} - Confidence Distribution")
            plt.xlabel("confidence")
            plt.tight_layout()
            plt.show()

        for col in ["weight", "raw_action"]:
            if col in df.columns and df[col].notna().any():
                s = df[col].dropna()
                print(
                    f"{ticker} {col}: mean={s.mean():.3f}, std={s.std():.3f}, "
                    f"min={s.min():.3f}, max={s.max():.3f}"
                )
    except Exception as e:
        print(f"{ticker}: could not summarize trades ({log_path}): {e}")

try:
    if "api" not in globals():
        api = init_alpaca()
    positions = api.list_positions()
    total_market_value = 0.0
    print("\nPosition Summary:")
    for p in positions:
        mv = float(p.market_value)
        total_market_value += mv
        print(f"  {p.symbol}: {p.qty} shares @ ${float(p.current_price):.2f} | Value: ${mv:,.2f}")
    print(f"\nTotal Market Value: ${total_market_value:,.2f}")
except Exception as e:
    print(f"Could not summarize positions: {e}")

from datetime import datetime, timedelta, timezone

def count_filled_orders_since(api, symbol: str, days: int = 14) -> int:
    after = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()
    orders = api.list_orders(status="all", after=after, nested=True)
    return sum(1 for o in orders if o.symbol == symbol and o.status in ("filled", "partially_filled"))

try:
    api_chk = api if "api" in globals() else init_alpaca()
    for sym in tickers_to_report:
        n = count_filled_orders_since(api_chk, sym, days=14)
        print(f"{sym}: {n} filled trades in last 14 days")
except Exception as e:
    print(f"Could not fetch filled orders: {e}")


In [None]:
#--- Export locally & download to your computer (Colab) ---
from pathlib import Path
from datetime import datetime, timezone
from google.colab import files   #<-- NEW: for browser download
import shutil, time, pandas as pd

#Drive root (same as before, to read your results)
ROOT = Path("/content/drive/MyDrive/AlpacaPaper")
TODAY = datetime.now(timezone.utc).strftime("%Y-%m-%d")

#Original sources in Drive (unchanged)
SRC_RESULTS = ROOT / "results" / TODAY         #e.g., /.../results/2025-10-13
SRC_EXPORT  = ROOT / "results_export" / TODAY  #rescue export folder (if used)

#=== CHANGE: write/export to LOCAL staging (in Colab VM), not Drive ===
DEST = Path("/content") / "exports" / f"{TODAY}_export"
DEST.mkdir(parents=True, exist_ok=True)

def copy_all(src_dir, dest_dir):
    if src_dir.exists():
        for p in src_dir.glob("*"):
            if p.is_file():
                shutil.copy2(p, dest_dir / p.name)
                print("Copied:", p.name, "from", src_dir.name)
    else:
        print("Missing source:", src_dir)

#Copy from both possible sources into local /content/exports/<today>_export
copy_all(SRC_RESULTS, DEST)
copy_all(SRC_EXPORT, DEST)

#Build/refresh trade_log_master.csv from per-symbol logs (in LOCAL DEST)
sym_logs = list(DEST.glob("trade_log_*.csv"))
if sym_logs:
    frames = []
    for p in sym_logs:
        try:
            df = pd.read_csv(p)
            df["symbol_file"] = p.stem.replace("trade_log_", "")
            frames.append(df)
        except Exception as e:
            print("Skip", p.name, "->", e)
    if frames:
        master = pd.concat(frames, ignore_index=True, sort=False)
        master_path = DEST / "trade_log_master.csv"
        master.to_csv(master_path, index=False)
        print("Wrote:", master_path)

#Zip LOCALLY under /content and trigger a browser download
zip_base = Path("/content") / f"results_{TODAY}_{int(time.time())}"
archive_path = shutil.make_archive(str(zip_base), "zip", DEST)
archive_path = str(Path(archive_path))  #ensure string for files.download

print("ZIP ->", archive_path)

#OPTIONAL: also keep a copy in Drive (uncomment if wanted)
#shutil.copy2(archive_path, ROOT / "results" / Path(archive_path).name)

#Prompt download to your computer
files.download(archive_path)

#Show what's in the LOCAL export folder
print("\nLocal export now contains:")
for p in sorted(DEST.iterdir()):
    print(" -", p.name)
