<a href="https://colab.research.google.com/github/racoope70/exploratory_daytrading/blob/main/deployable_multi_stock_ppo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install "shimmy>=2.0.0"

Collecting shimmy>=2.0.0
  Downloading Shimmy-2.0.0-py3-none-any.whl.metadata (3.5 kB)
Downloading Shimmy-2.0.0-py3-none-any.whl (30 kB)
Installing collected packages: shimmy
Successfully installed shimmy-2.0.0


In [2]:
!pip -q install yfinance pywavelets transformers --upgrade

In [3]:
!apt-get remove --purge -y cuda* libcuda* nvidia* || echo "No conflicting CUDA packages"
!apt-get autoremove -y
!apt-get clean

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
Note, selecting 'cuda-toolkit-12-4-config-common' for glob 'cuda*'
Note, selecting 'cuda-cudart-dev-11-0' for glob 'cuda*'
Note, selecting 'cuda-cudart-dev-11-1' for glob 'cuda*'
Note, selecting 'cuda-cudart-dev-11-7' for glob 'cuda*'
Note, selecting 'cuda-cudart-dev-11-8' for glob 'cuda*'
Note, selecting 'cuda-toolkit-12-5-config-common' for glob 'cuda*'
Note, selecting 'cuda-cudart-dev-12-0' for glob 'cuda*'
Note, selecting 'cuda-cudart-dev-12-1' for glob 'cuda*'
Note, selecting 'cuda-cudart-dev-12-2' for glob 'cuda*'
Note, selecting 'cuda-cudart-dev-12-3' for glob 'cuda*'
Note, selecting 'cuda-cudart-dev-12-4' for glob 'cuda*'
Note, selecting 'cuda-cudart-dev-12-5' for glob 'cuda*'
Note, selecting 'cuda-cudart-dev-12-6' for glob 'cuda*'
Note, selecting 'cuda-cudart-dev-12-8' for glob 'cuda*'
Note, selecting 'cuda-cudart-dev-12-9' for glob 'cuda*'
Note, selecting 'cuda-cudart-dev-13-0' fo

In [None]:
!apt-get update -qq && apt-get install -y \
    libcusolver11 libcusparse11 libcurand10 libcufft10 libnppig10 libnppc10 libnppial10 \
    cuda-toolkit-12-4

In [None]:
!pip uninstall -y protobuf
!pip install protobuf==3.20.3


In [None]:
!pip install --extra-index-url=https://pypi.nvidia.com \
    cuml-cu12==25.2.0 cudf-cu12==25.2.0 cupy-cuda12x \
    dask-cuda==25.2.0 dask-cudf-cu12==25.2.0


In [None]:
!pip install numba==0.60.0


In [None]:
!pip install "stable-baselines3[extra]>=2.0.0" "gymnasium>=0.29" "shimmy>=2.0.0" \
  gym-anytrading yfinance pandas numpy scikit-learn xgboost joblib


In [None]:
#!pip install stable-baselines3[extra] gymnasium gym-anytrading yfinance xgboost joblib
#!pip install matplotlib scikit-learn pandas

In [None]:
!pip install tensorflow==2.18.0

In [None]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124


In [None]:
import tensorflow as tf

gpus = tf.config.list_physical_devices("GPU")
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("TensorFlow GPU memory growth enabled")
    except RuntimeError as e:
        print(f"TensorFlow GPU memory config failed: {e}")


In [None]:
import os
os.environ['CUDA_HOME'] = '/usr/local/cuda-12.4'
os.environ['PATH'] += ':/usr/local/cuda-12.4/bin'
os.environ['LD_LIBRARY_PATH'] += ':/usr/local/cuda-12.4/lib64'


In [None]:
# Step 7: authenticate with hugging face hub (optional)
# This allows for better access and avoids rate limits when downloading public models/datasets

# Authenticate with Hugging Face Hub
# Notebook_login()

In [None]:
# Imports
import os, gc, time, json, pywt, logging, warnings
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import yfinance as yf

# Feature Toggles / Config
USE_SENTIMENT = False
USE_REGIME    = True
TEST_MODE     = False

INTERVAL     = "1h"
PERIOD_DAYS  = 720
FWD_HORIZON  = 10
UP_THR       = 0.02
DN_THR       = -0.02

# Train/Validation split (by time)
VAL_FRACTION = 0.20

# Output paths
LOCAL_OUT   = "multi_stock_feature_engineered_dataset.csv"
LOCAL_TRAIN = "train.csv"
LOCAL_VAL   = "val.csv"
PARQ_FULL   = "features_full.parquet"
PARQ_TRAIN  = "train.parquet"
PARQ_VAL    = "val.parquet"

try:
    from google.colab import drive
    drive.mount('/content/drive')
    DRIVE_BASE = "/content/drive/MyDrive"
except Exception:
    DRIVE_BASE = os.getcwd()

DRIVE_DIR = os.path.join(DRIVE_BASE, "trading_data")
os.makedirs(DRIVE_DIR, exist_ok=True)

# Logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
warnings.filterwarnings("ignore", category=FutureWarning)
logging.info(f"yfinance version: {getattr(yf, '__version__', 'unknown')}")
logging.info(f"pandas version: {pd.__version__}")

ticker_list = [
    'AAPL','TSLA','MSFT','GOOGL','AMZN','NVDA','META','BRK-B','JPM','JNJ',
    'XOM','V','PG','UNH','MA','HD','LLY','MRK','PEP','KO',
    'BAC','ABBV','AVGO','PFE','COST','CSCO','TMO','ABT','ACN','WMT',
    'MCD','ADBE','DHR','CRM','NKE','INTC','QCOM','NEE','AMD','TXN',
    'AMGN','UPS','LIN','PM','UNP','BMY','LOW','RTX','CVX','IBM',
    'GE','SBUX','ORCL'
]
SYMBOLS = ['AAPL', 'NVDA', 'MSFT'] if TEST_MODE else ticker_list

if USE_SENTIMENT:
    try:
        import torch
        from transformers import pipeline
        device_id = 0 if torch.cuda.is_available() else -1
        sentiment_pipeline = pipeline("sentiment-analysis", model="ProsusAI/finbert", device=device_id)
        logging.info("FinBERT sentiment enabled.")
    except Exception as e:
        logging.warning(f"Could not init FinBERT; disabling sentiment. Err: {e}")
        USE_SENTIMENT = False
        sentiment_pipeline = None
else:
    sentiment_pipeline = None

# Helpers
def _force_datetime_column(df: pd.DataFrame) -> pd.DataFrame:
    """
    Ensure a tz-naive 'Datetime' column exists even if the index had no name.
    Deduplicate/sort by Datetime.
    """
    if isinstance(df.index, pd.DatetimeIndex):
        try:
            if df.index.tz is not None:
                df.index = df.index.tz_convert(None)
        except Exception:
            try:
                df.index = df.index.tz_localize(None)
            except Exception:
                pass
        df.index.name = 'Datetime'
        df = df.reset_index()
    else:
        df = df.reset_index()
        first = df.columns[0]
        if np.issubdtype(df[first].dtype, np.datetime64):
            df = df.rename(columns={first: 'Datetime'})
        elif 'Date' in df.columns:
            df['Datetime'] = pd.to_datetime(df['Date'])
        elif 'Datetime' not in df.columns:
            try:
                df['Datetime'] = pd.to_datetime(df[first], errors='coerce')
            except Exception:
                pass

    if 'Datetime' not in df.columns:
        raise KeyError("Failed to construct 'Datetime' column from yfinance output.")

    df['Datetime'] = pd.to_datetime(df['Datetime'])
    df = (df.drop_duplicates(subset=['Datetime'])
            .sort_values('Datetime')
            .reset_index(drop=True))
    return df

def _normalize_ohlcv(df_in: pd.DataFrame, ticker: str) -> pd.DataFrame:
    """
    Normalize column names from yfinance:
    - Flatten MultiIndex
    - Strip ticker at prefix/suffix (e.g., 'AAPL Open' or 'Open AAPL')
    - Map variants to canonical: Open, High, Low, Close, Adj Close, Volume
    """
    import re
    df = df_in.copy()

    if isinstance(df.columns, pd.MultiIndex):
        flat = []
        for col in df.columns:
            parts = [str(p) for p in col if p is not None and str(p) != ""]
            flat.append(" ".join(parts))
        df.columns = flat

    df.columns = [re.sub(r"\s+", " ", str(c)).strip() for c in df.columns]

    tkr = ticker.upper().replace("-", "[- ]?")
    cleaned = {}
    for c in df.columns:
        c_up = c.upper()
        c2 = re.sub(rf"^(?:{tkr})[\s/_-]+", "", c_up)   # Leading ticker
        c2 = re.sub(rf"[\s/_-]+(?:{tkr})$", "", c2)     # Trailing ticker
        c2 = c2.title()
        cleaned[c] = c2
    if any(cleaned[c] != c for c in df.columns):
        df = df.rename(columns=cleaned)

    cols_ci = {c.lower(): c for c in df.columns}
    wants = {
        "Open":      ["open"],
        "High":      ["high"],
        "Low":       ["low"],
        "Close":     ["close", "close*", "last"],
        "Adj Close": ["adj close", "adj_close", "adjclose", "adjusted close"],
        "Volume":    ["volume", "vol"]
    }
    rename_map = {}
    for desired, alts in wants.items():
        if desired.lower() in cols_ci:
            rename_map[cols_ci[desired.lower()]] = desired
            continue
        for a in alts:
            if a in cols_ci:
                rename_map[cols_ci[a]] = desired
                break
    if rename_map:
        df = df.rename(columns=rename_map)

    return df

def download_stock_data(ticker, interval="1h", period_days=720, max_retries=5, sleep_base=3):
    """
    Robust yfinance intraday downloader with schema normalization and history() fallback.
    Ensures: Open, High, Low, Close, Volume (Adj Close synthesized if missing) + Datetime + Symbol.
    """
    period_str = f"{int(period_days)}d"

    def _postprocess(df: pd.DataFrame) -> pd.DataFrame:
        df = _normalize_ohlcv(df, ticker)
        df = _force_datetime_column(df)
        needed = {'Open', 'High', 'Low', 'Close', 'Volume'}
        missing = needed - set(df.columns)
        if missing:
            logging.debug(f"[{ticker}] columns received: {list(df.columns)}")
            raise ValueError(f"Missing OHLCV columns after normalize: {missing}")
        if 'Adj Close' not in df.columns:
            df['Adj Close'] = df['Close']
        return df

    for attempt in range(1, max_retries + 1):
        try:
            logging.info(f" [{ticker}] Attempt {attempt}: download(period={period_str}, interval={interval})")
            df = yf.download(
                tickers=ticker,
                period=period_str,
                interval=interval,
                progress=False,
                auto_adjust=False,
                group_by='column',
                threads=False,
                prepost=False,
                repair=True
            )
            if df is None or df.empty:
                raise ValueError("Empty data from download()")
            if attempt == 1:
                logging.debug(f"[{ticker}] raw columns (download): {list(df.columns)}")

            df = _postprocess(df)
            df['Symbol'] = ticker
            logging.info(f" [{ticker}] rows: {len(df)} from {df['Datetime'].min()} to {df['Datetime'].max()}")
            return df

        except Exception as e1:
            logging.warning(f" [{ticker}] download normalize error: {e1} | trying history() fallback")
            try:
                hist = yf.Ticker(ticker).history(
                    period=period_str,
                    interval=interval,
                    auto_adjust=False,
                    actions=False
                )
                if hist is None or hist.empty:
                    raise ValueError("Empty data from history()")
                if attempt == 1:
                    logging.debug(f"[{ticker}] raw columns (history): {list(hist.columns)}")

                df = _postprocess(hist)
                df['Symbol'] = ticker
                logging.info(f" [{ticker}] (fallback) rows: {len(df)} from {df['Datetime'].min()} to {df['Datetime'].max()}")
                return df

            except Exception as e2:
                wait = sleep_base * attempt
                logging.warning(f" [{ticker}] history() error: {e2} | retrying in {wait}s")
                time.sleep(wait)

    logging.error(f" [{ticker}] Failed to download after {max_retries} attempts.")
    return None

def denoise_wavelet(series, wavelet='db1', level=2):
    s = pd.Series(series).astype(float).ffill().bfill().to_numpy()
    try:
        coeffs = pywt.wavedec(s, wavelet, mode='symmetric', level=level)
        for i in range(1, len(coeffs)):
            coeffs[i] = np.zeros_like(coeffs[i])
        rec = pywt.waverec(coeffs, wavelet, mode='symmetric')
        return pd.Series(rec[:len(s)], index=series.index)
    except Exception as e:
        logging.warning(f"Wavelet denoising failed ({e}); using raw Close.")
        return pd.Series(s, index=series.index)

def score_sentiment(texts):
    if not USE_SENTIMENT or sentiment_pipeline is None:
        return [0.0] * len(texts)
    try:
        outputs = sentiment_pipeline(texts, truncation=True, max_length=256, batch_size=32)
        scores = []
        for r in outputs:
            label = r['label'].lower()
            if label == 'positive':
                scores.append(+float(r['score']))
            elif label == 'negative':
                scores.append(-float(r['score']))
            else:
                scores.append(0.0)
        return scores
    except Exception as e:
        logging.error(f"Sentiment scoring error: {e}")
        return [0.0] * len(texts)

def add_regime(df: pd.DataFrame) -> pd.DataFrame:
    df['Vol20'] = df['Close'].pct_change().rolling(20).std()
    df['Ret20'] = df['Close'].pct_change(20)
    vol_hi   = (df['Vol20'] > df['Vol20'].median()).astype(int)
    trend_hi = (df['Ret20'].abs() > df['Ret20'].abs().median()).astype(int)
    df['Regime4'] = vol_hi * 2 + trend_hi
    return df

def compute_enhanced_features(df):
    df = df.copy()
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.get_level_values(0)
    df = df.loc[:, ~df.columns.duplicated()]

    # Technicals
    df['SMA_20'] = df['Close'].rolling(20).mean()
    df['STD_20'] = df['Close'].rolling(20).std()
    df['Upper_Band'] = df['SMA_20'] + 2 * df['STD_20']
    df['Lower_Band'] = df['SMA_20'] - 2 * df['STD_20']

    df['Lowest_Low']   = df['Low'].rolling(14).min()
    df['Highest_High'] = df['High'].rolling(14).max()
    denom = (df['Highest_High'] - df['Lowest_Low']).replace(0, np.nan)
    df['Stoch'] = ((df['Close'] - df['Lowest_Low']) / denom) * 100

    df['ROC'] = df['Close'].pct_change(10)
    df['OBV'] = (np.sign(df['Close'].diff()).fillna(0) * df['Volume'].fillna(0)).cumsum()

    tp = (df['High'] + df['Low'] + df['Close']) / 3
    sma_tp = tp.rolling(20).mean()
    md = (tp - sma_tp).abs().rolling(20).mean()
    df['CCI'] = (tp - sma_tp) / (0.015 * md)

    df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()
    df['EMA_50'] = df['Close'].ewm(span=50, adjust=False).mean()
    ema12 = df['Close'].ewm(span=12, adjust=False).mean()
    ema26 = df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD_Line']   = ema12 - ema26
    df['MACD_Signal'] = df['MACD_Line'].ewm(span=9, adjust=False).mean()

    delta = df['Close'].diff()
    gain  = delta.clip(lower=0).rolling(14).mean()
    loss  = (-delta.clip(upper=0)).rolling(14).mean()
    rs = gain / (loss.replace(0, np.nan))
    df['RSI'] = 100 - (100 / (1 + rs))

    tr = pd.concat([
        (df['High'] - df['Low']),
        (df['High'] - df['Close'].shift()).abs(),
        (df['Low'] - df['Close'].shift()).abs()
    ], axis=1).max(axis=1)
    df['ATR'] = tr.rolling(14).mean()

    df['Volatility'] = df['Close'].pct_change().rolling(20).std()

    df['Denoised_Close'] = denoise_wavelet(df['Close'].ffill())

    if USE_REGIME:
        df = add_regime(df)

    if USE_SENTIMENT and len(df):
        headline = f"{df['Symbol'].iloc[0]} is expected to perform well in the market."
        try:
            score = score_sentiment([headline])[0]
        except Exception as e:
            logging.warning(f"Sentiment scoring failed for {df['Symbol'].iloc[0]}: {e}")
            score = 0.0
        df['SentimentScore'] = float(score)
    else:
        df['SentimentScore'] = 0.0

    df['Delta'] = df['Close'].pct_change(1).fillna(0)
    df['Gamma'] = df['Delta'].diff().fillna(0)

    df = df.dropna().reset_index(drop=True)

    cols = [c for c in df.columns if c not in ['Symbol']] + ['Symbol']
    return df[cols]

# Main Loop
all_dfs = []
for i, ticker in enumerate(SYMBOLS, 1):
    logging.info(f"[{i}/{len(SYMBOLS)}] Processing {ticker}")
    raw = download_stock_data(
        ticker, interval=INTERVAL, period_days=PERIOD_DAYS, max_retries=5, sleep_base=3
    )
    if raw is None or raw.empty:
        logging.warning(f"No data for {ticker}")
        continue

    try:
        features = compute_enhanced_features(raw)
        if features is not None and not features.empty:
            logging.info(f" [{ticker}] feature rows: {len(features)}")
            all_dfs.append(features)
        else:
            logging.warning(f"Feature set empty for {ticker}")
    except Exception as e:
        logging.error(f"Feature engineering failed for {ticker}: {e}")
    finally:
        del raw
        try:
            del features
        except NameError:
            pass
        gc.collect()
        time.sleep(0.5)

if all_dfs:
    final_df = pd.concat(all_dfs, ignore_index=True)
    logging.info(f" Combined dataset shape: {final_df.shape}")

    if 'Repaired?' in final_df.columns:
        final_df = final_df.drop(columns=['Repaired?'])
    final_df['Datetime'] = pd.to_datetime(final_df['Datetime'], utc=True).dt.tz_convert('America/New_York')
    dt = final_df['Datetime']
    rth_mask = (
        (dt.dt.weekday < 5) &
        (dt.dt.time >= pd.to_datetime("09:30").time()) &
        (dt.dt.time <  pd.to_datetime("16:00").time())
    )
    final_df = final_df[rth_mask].reset_index(drop=True)

    def relabel(df):
        df = df.copy()
        df['Return'] = (df['Close'].shift(-FWD_HORIZON) - df['Close']) / df['Close']
        df['Target'] = np.select(
            [df['Return'] > UP_THR, df['Return'] < DN_THR],
            [1, -1],
            default=0
        )
        return df

    final_df = relabel(final_df)
    final_df = final_df.sort_values(['Symbol', 'Datetime']).reset_index(drop=True)
    final_df['__i'] = final_df.groupby('Symbol').cumcount()
    final_df['__n'] = final_df.groupby('Symbol')['__i'].transform('max') + 1
    final_df = final_df[ final_df['__i'] < final_df['__n'] - FWD_HORIZON ].copy()
    final_df = final_df.drop(columns=['__i','__n']).reset_index(drop=True)
    feature_cols = [c for c in final_df.columns if c not in ['Target','Return','Symbol','Datetime']]
    final_df = final_df.dropna(subset=feature_cols).reset_index(drop=True)
    ORDER_LAST = ['Target', 'Return', 'Symbol']
    cols = [c for c in final_df.columns if c not in ORDER_LAST] + ORDER_LAST
    final_df = final_df[cols]

    # Sanity summary
    def summarize(df):
        print(" Combined dataset shape:", df.shape)
        print(" Range:", df['Datetime'].min(), "-", df['Datetime'].max())
        print(" Per-ticker counts:")
        print(df['Symbol'].value_counts().to_string())
        na_cols = df.columns[df.isna().any()]
        if len(na_cols):
            print("\n Columns with NaNs:")
            print(df[na_cols].isna().sum().sort_values(ascending=False).to_string())
        else:
            print("\n No NaNs detected.")
        print("\n Label counts:")
        print(df['Target'].value_counts().sort_index().to_string())
        print("\n Label ratios (%):")
        print((df['Target'].value_counts(normalize=True)*100).round(2).to_string())

    summarize(final_df)

    # Time-based Train/Validation split (80/20 by time)
    final_df = final_df.sort_values('Datetime').reset_index(drop=True)
    cutoff_idx = int((1.0 - VAL_FRACTION) * len(final_df))
    cutoff_time = final_df.loc[cutoff_idx, 'Datetime']
    train_df = final_df[ final_df['Datetime'] <  cutoff_time ].reset_index(drop=True)
    val_df   = final_df[ final_df['Datetime'] >= cutoff_time ].reset_index(drop=True)

    print(f"\n Time split cutoff @ {cutoff_time}")
    print(f"Train: {train_df.shape},  Val: {val_df.shape}")

    # Save (CSV)
    final_df.to_csv(LOCAL_OUT, index=False)
    train_df.to_csv(LOCAL_TRAIN, index=False)
    val_df.to_csv(LOCAL_VAL, index=False)
    logging.info(f"Saved CSVs: {LOCAL_OUT}, {LOCAL_TRAIN}, {LOCAL_VAL}")

    # Save (Parquet)
    final_df.to_parquet(PARQ_FULL, index=False)
    train_df.to_parquet(PARQ_TRAIN, index=False)
    val_df.to_parquet(PARQ_VAL, index=False)
    logging.info(f"Saved Parquets: {PARQ_FULL}, {PARQ_TRAIN}, {PARQ_VAL}")

    # Also save to Drive
    final_path = os.path.join(DRIVE_DIR, "multi_stock_feature_engineered_dataset.csv")
    train_path = os.path.join(DRIVE_DIR, "train.csv")
    val_path   = os.path.join(DRIVE_DIR, "val.csv")
    final_df.to_csv(final_path, index=False)
    train_df.to_csv(train_path, index=False)
    val_df.to_csv(val_path, index=False)
    logging.info(f"Saved to Google Drive:\n- {final_path}\n- {train_path}\n- {val_path}")

    # Cleanup
    del all_dfs, final_df, train_df, val_df
    gc.collect()
else:
    logging.warning("No usable data found for any ticker.")


In [None]:
df = pd.read_csv("multi_stock_feature_engineered_dataset.csv")
print(df.head())


In [None]:
# PPO Walkforward with Runtime Timing, Full Model Saving, and Logging
import os, gc, time, json, torch, logging
import pandas as pd, numpy as np, matplotlib.pyplot as plt, heapq
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor
import gymnasium as gym
import yfinance as yf
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.utils import set_random_seed
from gym_anytrading.envs import StocksEnv
from gymnasium.spaces import Box as GBox
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="gymnasium")
warnings.filterwarnings("ignore", category=DeprecationWarning, module="jupyter_client.session")
warnings.filterwarnings("ignore", message=".*Gym has been unmaintained.*")

# IMPORTANT: reuse the compute_enhanced_features already defined above if present
try:
    compute_enhanced_features
except NameError:
    # FINAL fallback only if it truly doesn't exist
    def compute_enhanced_features(df_in: pd.DataFrame) -> pd.DataFrame:
        return df_in

set_random_seed(42)



# Configuration N_TAG = datetime.now().strftime("%Y%m%d_%H%M")
RESULTS_DIR = f"/content/drive/MyDrive/Results_May_2025/ppo_walkforward_results_{RUN_TAG}"


def get_mu_sigma(model, obs):
    """SB3 v2-safe way to get Gaussian policy mean/std for continuous actions."""
    with torch.no_grad():
        obs_t, _ = model.policy.obs_to_tensor(obs)
        features = model.policy.extract_features(obs_t)
        latent_pi, _ = model.policy.mlp_extractor(features)
        mean_actions = model.policy.action_net(latent_pi)
        log_std = model.policy.log_std
        mu = float(mean_actions.detach().cpu().numpy().squeeze())
        sigma = float(log_std.exp().detach().cpu().numpy().squeeze())
    return mu, sigma

FINAL_MODEL_DIR = os.path.join("/content/drive/MyDrive/Results_May_2025", "ppo_models_master")
os.makedirs(RESULTS_DIR, exist_ok=True)
os.makedirs(FINAL_MODEL_DIR, exist_ok=True)

# Global skip aggregation (thread-safe)
from threading import Lock
SKIP_AGG_PATH = os.path.join(RESULTS_DIR, "skipped_windows_global.csv")
SKIP_LOCK = Lock()

def record_skips_global(ticker: str, skipped_windows: list, total_windows: int = None, fully_skipped: bool = False):
    """
    Append skipped windows to a global CSV so we can produce a recap at the end.
    Windows should be like 'AAPL_window3'. If fully_skipped is True, all windows were pre-existing.
    """
    if not skipped_windows and not fully_skipped:
        return
    import csv
    with SKIP_LOCK:
        new_file = not os.path.exists(SKIP_AGG_PATH)
        with open(SKIP_AGG_PATH, "a", newline="") as f:
            w = csv.writer(f)
            if new_file:
                w.writerow(["Ticker", "Window", "FullySkipped", "TotalWindows"])
            if fully_skipped:
                # Record a single line indicating the ticker was completely skipped
                w.writerow([ticker, "ALL", True, total_windows if total_windows is not None else ""])
            else:
                for wname in skipped_windows:
                    # wname format expected: SYMBOL_windowX
                    try:
                        _, win_str = wname.split("_window")
                        win = int(win_str)
                    except Exception:
                        win = ""
                    w.writerow([ticker, win, False, total_windows if total_windows is not None else ""])

# Logging Setup
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    force=True
)


# Flags
ENABLE_SENTIMENT = False
ENABLE_SLO       = True
ENABLE_WAVELET   = True
test_mode        = False
ENABLE_PLOTS     = False
LIVE_MODE        = False           # Simple live/paper inference loop (set True to run)
SIM_LATENCY_MS   = 0             # Network/broker latency simulation; 0 = off
BROKER           = "log"           #"log" = do not place orders, just log

# Global training settings
WINDOW_SIZE = 3500
STEP_SIZE = 500
TIMESTEPS = 150_000

# Load Dataset
DATA_PATH = "multi_stock_feature_engineered_dataset.csv"
if not os.path.exists(DATA_PATH):
    raise FileNotFoundError("Required feature-engineered dataset not found!")
df = pd.read_csv(DATA_PATH)
df['Datetime'] = pd.to_datetime(df['Datetime'])

# NEW: ContinuousPositionEnv (continuous exposure + reward shaping)
class ContinuousPositionEnv(StocksEnv):
    def __init__(self, df, frame_bound, window_size,
                 cost_rate=0.0002, slip_rate=0.0003,
                 k_alpha=0.20, k_mom=0.05, k_sent=0.0,
                 mom_source="denoised", mom_lookback=20,
                 min_trade_delta=0.01, cooldown=5, reward_clip=1.0):
        super().__init__(df=df.reset_index(drop=True), frame_bound=frame_bound, window_size=window_size)
        if isinstance(self.observation_space, gym.spaces.Box):
            self.observation_space = GBox(
                low=self.observation_space.low,
                high=self.observation_space.high,
                shape=self.observation_space.shape,
                dtype=self.observation_space.dtype,
            )

        self.action_space = GBox(low=-1.0, high=1.0, shape=(1,), dtype=np.float32)
        self.cost_rate, self.slip_rate = float(cost_rate), float(slip_rate)
        self.k_alpha, self.k_mom = float(k_alpha), float(k_mom)
        self.k_sent = float(k_sent)
        self.mom_source, self.mom_lookback = str(mom_source), int(mom_lookback)
        self.min_trade_delta, self.cooldown = float(min_trade_delta), int(cooldown)
        self.reward_clip = float(reward_clip)
        self.nav, self.pos, self._last_trade_step = 1.0, 0.0, -self.cooldown

    def reset(self, **kwargs):
        out = super().reset(**kwargs)
        if isinstance(out, tuple):
            obs, info = out
        else:
            obs, info = out, {}
        self.nav, self.pos, self._last_trade_step = 1.0, 0.0, -self.cooldown
        info = info or {}
        info.update({"nav": self.nav, "pos": self.pos})
        return obs, info

    def _step_parent_hold(self):
        step_result = super().step(2)
        if len(step_result) == 5:
            obs, _env_rew, terminated, truncated, info = step_result
        else:
            obs, _env_rew, done, info = step_result
            terminated, truncated = bool(done), False
        return obs, terminated, truncated, info

    def _ret_t(self):
        cur  = float(self.df.loc[self._current_tick, 'Close'])
        prev = float(self.df.loc[max(self._current_tick - 1, 0), 'Close'])
        return 0.0 if prev <= 0 else (cur - prev) / prev
    def _mom_signal(self):
        if self.mom_source == "macd" and "MACD_Line" in self.df.columns:
            recent = self.df["MACD_Line"].iloc[max(self._current_tick-200,0):self._current_tick+1]
            return float(np.tanh(float(self.df.loc[self._current_tick, "MACD_Line"]) / (1e-6 + recent.std())))
        if "Denoised_Close" in self.df.columns and self._current_tick - self.mom_lookback >= 0:
            now  = float(self.df.loc[self._current_tick, "Denoised_Close"])
            then = float(self.df.loc[self._current_tick - self.mom_lookback, "Denoised_Close"])
            base = float(self.df.loc[max(self._current_tick - 1, 0), "Close"])
            slope = (now - then) / max(self.mom_lookback, 1)
            return float(np.tanh(10.0 * (slope / max(abs(base), 1e-6))))
        return 0.0

    def step(self, action):
        try:
            a = float(np.array(action).squeeze())
            target_pos = float(np.clip(a, -1.0, 1.0))

            r_t = self._ret_t()
            base_ret = self.pos * r_t

            changed = (abs(target_pos - self.pos) >= self.min_trade_delta) and \
                      ((self._current_tick - self._last_trade_step) >= self.cooldown)
            delta_pos = (target_pos - self.pos) if changed else 0.0
            trade_cost = (self.cost_rate + self.slip_rate) * abs(delta_pos)
            rel_alpha = base_ret - r_t                    # Outperform b&h
            mom_term  = self.pos * self._mom_signal()     # carry on trend
            # Optional sentiment shaping
            sent_term = 0.0
            if ENABLE_SENTIMENT and "SentimentScore" in self.df.columns:
                sent_term = self.k_sent * float(self.df.loc[self._current_tick, "SentimentScore"])
            shaped = base_ret + self.k_alpha*rel_alpha + self.k_mom*mom_term + sent_term - trade_cost
            reward = float(np.clip(shaped, -self.reward_clip, self.reward_clip))
            self.nav *= (1.0 + base_ret - trade_cost)
            if changed:
                self.pos = target_pos
                self._last_trade_step = self._current_tick
            obs, terminated, truncated, info = self._step_parent_hold()
            info = info or {}
            info.update({
                "ret_t": r_t, "nav": self.nav, "pos": self.pos,
                "trade_cost": trade_cost, "base_ret": base_ret,
                "rel_alpha": rel_alpha, "mom": self._mom_signal()
            })
            return obs, reward, terminated, truncated, info
        except Exception as e:
            logging.error(f"Step error: {e}")
            try:
                obs, _ = self.reset()
            except Exception:
                obs = super().reset() if hasattr(super(), "reset") else None
            return obs, 0.0, True, False, {}

# Walkforward windowing
def get_walk_forward_windows(df, window_size=3500, step_size=500, min_len=1200):
    return [
        (start, start + window_size)
        for start in range(0, len(df) - min_len, step_size)
        if start + window_size < len(df)
    ]
# Save QuantConnect-Compatible Artifacts
def save_quantconnect_model(artifact, prefix, save_dir):
    model_obj = artifact.get("model", None)
    model_path = os.path.join(save_dir, f"{prefix}_model.zip")
    if model_obj is not None and not os.path.exists(model_path):
        model_obj.save(model_path)

    vecnorm_src = artifact.get("vecnorm_path")
    if vecnorm_src:
        try:
            vecnorm_dst = os.path.join(save_dir, f"{prefix}_vecnorm.pkl")
            if os.path.abspath(vecnorm_src) != os.path.abspath(vecnorm_dst):
                import shutil
                shutil.copyfile(vecnorm_src, vecnorm_dst)
        except Exception as e:
            logging.warning(f"VecNormalize file handling issue for {prefix}: {e}")

    with open(os.path.join(save_dir, f"{prefix}_features.json"), "w") as f:
        json.dump({"features": artifact['features']}, f)

    with open(os.path.join(save_dir, f"{prefix}_probability_config.json"), "w") as f:
        json.dump({"threshold": 0.05, "use_confidence": True, "inference_mode": "deterministic"}, f)

    with open(os.path.join(save_dir, f"{prefix}_model_info.json"), "w") as f:
        json.dump({
            "model": "PPO",
            "ticker": artifact['result']['Ticker'],
            "window": artifact['result']['Window'],
            "date_trained": datetime.today().strftime("%Y-%m-%d"),
            "framework": "stable-baselines3",
            "input_features": artifact['features'],
            "final_portfolio": artifact['result']['PPO_Portfolio'],
            "buy_hold": artifact['result']['BuyHold'],
            "sharpe": artifact['result']['Sharpe']
        }, f)

    logging.info(f"Saved QuantConnect-compatible model for {artifact['result']['Ticker']} | {artifact['result']['Window']}")

# Live inference helpers (uses saved PPO + VecNormalize)
def load_model_and_env(prefix):
    """Load a trained PPO and create a factory to build a matching env window."""
    model_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
    vec_path   = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")
    model = PPO.load(model_path, device="cpu")

    def make_env(df_window):
        frame_bound = (50, len(df_window) - 3)
        e = DummyVecEnv([lambda: ContinuousPositionEnv(
            df=df_window, frame_bound=frame_bound, window_size=10,
            cost_rate=(0.0002 if ENABLE_SLO else 0.0),
            slip_rate=(0.0003 if ENABLE_SLO else 0.0),
            k_alpha=0.20, k_mom=0.05, k_sent=(0.01 if ENABLE_SENTIMENT else 0.0),
            mom_source="denoised", mom_lookback=20,
            min_trade_delta=0.01, cooldown=5, reward_clip=1.0
        )])

        if os.path.exists(vec_path):
            e = VecNormalize.load(vec_path, e)
        e.training = False
        e.norm_reward = False
        return e
    return model, make_env

def latest_df_for_symbol(symbol, horizon_days=5, interval="1m"):
    """Fetch fresh bars and rebuild features exactly like training."""
    end = datetime.utcnow()
    start = end - timedelta(days=horizon_days)
    df = yf.download(
        symbol,
        start=start.strftime("%Y-%m-%d"),
        end=end.strftime("%Y-%m-%d"),
        interval=interval,
        progress=False,
        auto_adjust=False
    )
    if df is None or df.empty:
        return None
    df = df.reset_index()
    df['Symbol'] = symbol
    # IMPORTANT: reuse your same feature function
    return compute_enhanced_features(df)

def predict_latest(symbol, prefix):
    """Build last window, fast-forward env, call model.predict(), return a signal."""
    model, make_env = load_model_and_env(prefix)
    live_df = latest_df_for_symbol(symbol)
    if live_df is None or len(live_df) < 100:
        logging.warning("No fresh data yet for live inference.")
        return None
    df_window = live_df.iloc[-2500:].reset_index(drop=True) if len(live_df) > 2500 else live_df.copy()
    env = make_env(df_window)
    obs = env.reset()
    if isinstance(obs, tuple):
        obs, _ = obs
    for _ in range(len(df_window) - 1):
        obs, _, dones, _ = env.step([np.array([0.0], dtype=np.float32)])
        if isinstance(dones, (np.ndarray, list)) and dones[0]:
            break
    action, _ = model.predict(obs, deterministic=True)
    mu, sigma = get_mu_sigma(model, obs)
    from math import erf, sqrt
    def Phi(x):  # Standard normal CDF
        return 0.5 * (1.0 + erf(x / sqrt(2.0)))
    # Approximate P(a>0) using Gaussian in action space (pre-squash)
    p_long  = 1.0 - Phi((0.0 - mu) / max(sigma, 1e-6))
    p_short = 1.0 - p_long
    a = float(np.array(action).squeeze())
    signal = "BUY" if a > 0.1 else ("SELL" if a < -0.3 else "HOLD")
    conf = abs(a)
    ts = df_window['Datetime'].iloc[-1] if 'Datetime' in df_window.columns else None
    price = float(df_window['Close'].iloc[-1])
    return dict(signal=signal, confidence=conf, action=a, ts=ts, price=price,
                p_long=p_long, p_short=p_short, mu=mu, sigma=sigma)
def place_order(signal, qty=1):
    """Stub broker router with latency simulation; logs in Colab."""
    if SIM_LATENCY_MS > 0:
        time.sleep(SIM_LATENCY_MS / 1000.0)
    if BROKER == "log":
        logging.info(f"[PAPER] {signal} x{qty}")
    else:
        # Hook up real broker SDK here (Alpaca, IB, etc.)
        logging.info(f"[BROKER={BROKER}] {signal} x{qty} (not implemented)")

def live_loop(symbol, best_prefix):
    """Simple polling loop—set LIVE_MODE=True to run."""
    while LIVE_MODE:
        try:
            pred = predict_latest(symbol, best_prefix)
            if pred:
                logging.info(
                    f"{symbol} {pred['ts']} | {pred['signal']} @ {pred['price']:.2f} (conf {pred['confidence']:.2f})"
                )
                place_order(pred['signal'], qty=1)
        except Exception as e:
            logging.error(f"Live loop error: {e}")
        time.sleep(60)  # Poll each minute
TOP_N_WINDOWS = 3
# PPO Walkforward Function
def walkforward_ppo(df, ticker, window_size=3500, step_size=500,
                    timesteps=150_000, learning_rate=1e-4, ppo_overrides=None):
    if ppo_overrides is None:
        ppo_overrides = {}
    if len(df) < window_size:
        logging.warning(f"Skipping {ticker}: only {len(df)} rows (min required: {window_size})")
        return []
    results = []
    windows = get_walk_forward_windows(df, window_size, step_size)
    top_heap = []
    skipped_windows = []
    all_done = True
    for idx in range(len(windows)):
        prefix = f"ppo_{ticker}_window{idx+1}"
        model_ok   = os.path.exists(os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip"))
        vecnorm_ok = os.path.exists(os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl"))
        if not (model_ok and vecnorm_ok):
            all_done = False
            break

    if all_done:
        logging.info(f"Ticker {ticker} fully skipped (all {len(windows)} windows already complete).")
        record_skips_global(ticker, skipped_windows=[], total_windows=len(windows), fully_skipped=True)
        return []
    # Main window loop
    for w_idx, (start, end) in enumerate(windows):
        window_start_time = time.time()
        gc.collect()
        prefix = f"ppo_{ticker}_window{w_idx+1}"
        model_path   = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
        vecnorm_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")
        if os.path.exists(model_path) and os.path.exists(vecnorm_path):
            logging.info(f"Skipping {ticker} | Window {w_idx+1}, already trained.")
            skipped_windows.append(f"{ticker}_window{w_idx+1}")
            continue
        missing = []
        if not os.path.exists(model_path):   missing.append("model.zip")
        if not os.path.exists(vecnorm_path): missing.append("vecnorm.pkl")
        logging.info(f"Will train {ticker} | Window {w_idx+1} because missing: {', '.join(missing)}")
        df_window = df.iloc[start:end].reset_index(drop=True)
        if len(df_window) <= 52 or len(df_window) % 2 != 0:
            df_window = df_window.iloc[:-1]
        frame_bound = (50, len(df_window) - 3)
        env = DummyVecEnv([lambda: ContinuousPositionEnv(
            df=df_window, frame_bound=frame_bound, window_size=10,
            cost_rate=0.0002, slip_rate=0.0003,
            k_alpha=0.20, k_mom=0.05,
            k_sent=(0.01 if ENABLE_SENTIMENT else 0.0),
            mom_source="denoised", mom_lookback=20,
            min_trade_delta=0.01, cooldown=5, reward_clip=1.0
        )])
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

        try:
            # Train
            model = PPO(
                "MlpPolicy",
                env,
                verbose=0,
                device=("cuda" if torch.cuda.is_available() else "cpu"),
                learning_rate=ppo_overrides.get("lr", learning_rate),
                n_steps=ppo_overrides.get("n_steps", 256),
                batch_size=ppo_overrides.get("batch", 64),
                n_epochs=5,
                gamma=0.99,
                gae_lambda=0.95,
                clip_range=ppo_overrides.get("clip", 0.2),
                ent_coef=ppo_overrides.get("ent", 0.005),
                policy_kwargs=dict(net_arch=[64, 64]),
            )

            logging.info(f"Training {ticker} Window {w_idx+1}/{len(windows)}")
            model.learn(total_timesteps=timesteps)
            env.training = False
            env.norm_reward = False
            obs = env.reset()
            if isinstance(obs, tuple):
                obs, _ = obs
            nav_track, bh_track = [1.0], [1.0]
            step_log = []  # Per-step rich logs
            for i in range(len(df_window) - 1):
                action, _ = model.predict(obs, deterministic=True)
                mu, sigma = get_mu_sigma(model, obs)
                obs, _, dones, infos = env.step(action)
                info = infos[0] if isinstance(infos, (list, tuple)) else infos

                nav_track.append(float(info.get("nav", nav_track[-1])))
                bh_track.append(bh_track[-1] * (1.0 + float(info.get("ret_t", 0.0))))

                a = float(np.array(action).squeeze())
                dt_val = df_window["Datetime"].iloc[i+1] if "Datetime" in df_window.columns else None
                px     = float(df_window["Close"].iloc[i+1]) if "Close" in df_window.columns else np.nan
                step_log.append({
                    "Index": i+1, "Datetime": dt_val, "Close": px, "Action": a,
                    "mu": mu, "sigma": sigma, "nav": nav_track[-1],
                    "ret_t": float(info.get("ret_t", 0.0)),
                    "pos": float(info.get("pos", 0.0)),
                    "trade_cost": float(info.get("trade_cost", 0.0)),
                    "base_ret": float(info.get("base_ret", 0.0)),
                    "rel_alpha": float(info.get("rel_alpha", 0.0)),
                    "mom": float(info.get("mom", 0.0)),
                })

                if isinstance(dones, (np.ndarray, list, tuple)):
                    if dones[0]:
                        break
                elif dones:
                    break
            # Metrics
            final_value = float(nav_track[-1]) * 100000.0
            hold_value  = float(bh_track[-1])  * 100000.0
            returns = pd.Series(nav_track).pct_change().fillna(0.0)
            sharpe  = float((returns.mean() / (returns.std() + 1e-9)) * np.sqrt(252))
            drawdown = float(((pd.Series(nav_track).cummax() - pd.Series(nav_track)) /
                              pd.Series(nav_track).cummax()).max() * 100)

            # Save model + VecNormalize
            vecnorm_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl")
            try:
                env.save(vecnorm_path)
            except Exception as e:
                logging.warning(f"Could not save VecNormalize for {ticker} {start}-{end}: {e}")
                vecnorm_path = None

            model_path = os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip")
            model.save(model_path)

            # Save predictions (both schemas)
            pred_path = os.path.join(RESULTS_DIR, f"{prefix}_predictions.csv")
            pd.DataFrame(step_log).to_csv(pred_path, index=False)
            logging.info(f"Saved predictions to {pred_path}")

            compat_rows = []
            for r in step_log:
                a = r["Action"]
                signal = "BUY" if a > 0.1 else ("SELL" if a < -0.3 else "HOLD")
                compat_rows.append({
                    "Index": r["Index"], "Datetime": r["Datetime"], "Close": r["Close"],
                    "Action": a, "Signal": signal, "PortfolioValue": r["nav"], "Reward": np.nan,
                })
            compat_path = os.path.join(RESULTS_DIR, f"{prefix}_predictions_compat.csv")
            pd.DataFrame(compat_rows).to_csv(compat_path, index=False)
            logging.info(f"Saved compatibility predictions to {compat_path}")

            # Summary row
            result_row = {
                "Ticker": ticker,
                "Window": f"{start}-{end}",
                "PPO_Portfolio": round(final_value, 2),
                "BuyHold": round(hold_value, 2),
                "Sharpe": round(sharpe, 3),
                "Drawdown_%": round(drawdown, 2),
                "Winner": "PPO" if final_value > hold_value else "Buy & Hold",
            }
            results.append(result_row)

            meta = {
                "result": result_row,
                "features": df_window.columns.tolist(),
                "prefix": prefix,
                "model_path": model_path,
                "vecnorm_path": vecnorm_path,
            }

            item = (result_row["Sharpe"], prefix, meta)
            if len(top_heap) < TOP_N_WINDOWS:
                heapq.heappush(top_heap, item)
            else:
                if item[0] > top_heap[0][0]:
                    heapq.heapreplace(top_heap, item)
            logging.info(f"{ticker} | Window {w_idx+1} runtime: {round(time.time()-window_start_time, 2)}s")
        finally:
            try: env.close()
            except Exception: pass
            del env
            try: del model
            except Exception: pass
            gc.collect()
            try: torch.cuda.empty_cache()
            except Exception: pass
    if skipped_windows:
        logging.info(f"{ticker} skipped windows (already complete): {', '.join(skipped_windows)}")
        record_skips_global(ticker, skipped_windows=skipped_windows, total_windows=len(windows), fully_skipped=False)


    # Save top-N (artifact wrapper reads from paths)
    top_list = sorted(top_heap, key=lambda t: t[0], reverse=True)
    for _, _, meta in top_list:
        artifact_for_save = {
            "model": None,
            "vecnorm_path": meta["vecnorm_path"],
            "features": meta["features"],
            "result": meta["result"],
            "prefix": meta["prefix"]
        }
        save_quantconnect_model(artifact_for_save, meta["prefix"], FINAL_MODEL_DIR)

    return results

# Per-bucket PPO configs
FAST = {"lr": 5e-5,  "n_steps": 1024, "batch": 128, "clip": 0.25, "ent": 0.015}
SLOW = {"lr": 1.5e-5,"n_steps": 2048, "batch": 64,  "clip": 0.16, "ent": 0.0075}

fast_names = {
    "TSLA","NVDA","AMD","AVGO","AAPL","MSFT","AMZN","GOOGL","META","ADBE","CRM",
    "INTC","QCOM","TXN","ORCL","NEE","GE","XOM","CVX","LLY","NKE","SBUX"
}
slow_names = {
    "BRK-B","JPM","BAC","JNJ","UNH","MRK","PFE","ABBV","ABT","AMGN","PG","PEP","KO",
    "V","MA","WMT","MCD","TMO","DHR","ACN","IBM","LIN","PM","RTX","UPS","UNP","COST","HD","LOW"
}

def pick_params(symbol: str):
    return FAST if symbol in fast_names else SLOW

def process_ticker(ticker):
    try:
        hp = pick_params(ticker)
        return walkforward_ppo(
            df[df['Symbol'] == ticker].copy(),
            ticker,
            window_size=WINDOW_SIZE,
            timesteps=TIMESTEPS,
            learning_rate=hp["lr"],
            ppo_overrides=hp
        )
    except Exception as e:
        logging.error(f"{ticker}: training failed with {e}")
        return []



def run_parallel_tickers(tickers, out_path=os.path.join(RESULTS_DIR, "summary.csv"), max_workers=8):
    results = []
    with ThreadPoolExecutor(max_workers=max_workers) as ex:
        for res in ex.map(process_ticker, tickers):
            if res:
                results.extend(res)
                pd.DataFrame(results).to_csv(out_path, index=False)
    logging.info("All tickers processed.")
    return results

# Execution Block
if __name__ == "__main__":
    min_rows = WINDOW_SIZE + 50  # Use global constant
    symbols = []
    for s, n in df['Symbol'].value_counts().items():
        if n >= min_rows:
            symbols.append(s)
        else:
            logging.warning(f"Skipping {s}: only {n} rows (< {min_rows} required)")
    need_cols = ["Close", "Datetime"]
    if ENABLE_WAVELET:
        need_cols.append("Denoised_Close")
    if ENABLE_SENTIMENT:
        need_cols.append("SentimentScore")

    valid_symbols = []
    for s in symbols:
        cols = set(df.loc[df.Symbol == s].columns)
        missing = [c for c in need_cols if c not in cols]
        if missing:
            logging.warning(f"Skipping {s}: missing {missing}")
        else:
            valid_symbols.append(s)
    CONFIG = {'symbols': valid_symbols}
    all_results = []
    if test_mode:
        test_stocks = ['AAPL', 'NVDA', 'MSFT']
        for stock in test_stocks:
            logging.info(f">>> Running test_mode on {stock}")
            results = process_ticker(stock)
            all_results.extend(results)

        test_summary_path = os.path.join(RESULTS_DIR, "summary_test_mode.csv")
        pd.DataFrame(all_results).to_csv(test_summary_path, index=False)
        logging.info(f"Test summary saved to {test_summary_path}")
    else:
        summary_results = run_parallel_tickers(CONFIG['symbols'])
        if not summary_results:
            logging.warning("No results generated.")
        else:
            path = os.path.join(RESULTS_DIR, "summary.csv")
            pd.DataFrame(summary_results).to_csv(path, index=False)
            logging.info(f"Summary saved to {path}")
        logging.info(f"Summary saved to {path}")

        # === Save model selector CSV for later pipeline ===
        try:
            best_by_symbol = pd.DataFrame(summary_results).groupby("Ticker").agg({
                "Sharpe": "mean",
                "Drawdown_%": "mean",
                "PPO_Portfolio": "mean"
            }).reset_index().rename(columns={"Drawdown_%": "MaxDD"})
            best_by_symbol["HitRatio"] = 0.0  # Placeholder unless available elsewhere

            selector_df = best_by_symbol[["Ticker", "Sharpe", "HitRatio", "MaxDD", "PPO_Portfolio"]].copy()
            selector_df.rename(columns={
                "Ticker": "Ticker",
                "HitRatio": "Accuracy",
                "MaxDD": "Drawdown",
                "PPO_Portfolio": "Final_Portfolio"
            }, inplace=True)
            selector_df["Model"] = "PPO"
            selector_out = os.path.join(RESULTS_DIR, "ppo_model_selector.csv")
            selector_df.to_csv(selector_out, index=False)
            print(f"✅ Saved model selector file → {selector_out}")
        except Exception as e:
            print(f"[selector_writer] Failed to save model selector CSV: {e}")

    try:
        if os.path.exists(SKIP_AGG_PATH):
            recap = pd.read_csv(SKIP_AGG_PATH)
            fs = recap["FullySkipped"].astype(str).str.lower().isin(["true", "1", "yes"])
            recap["FullySkipped"] = fs
            fully = recap[recap["FullySkipped"]]["Ticker"].dropna().unique().tolist()
            if fully:
                logging.info(f"Fully skipped tickers (all windows done): {', '.join(fully)}")
            part = recap[~recap["FullySkipped"]]
            if not part.empty:
                counts = part.groupby("Ticker")["Window"].count().sort_values(ascending=False)
                logging.info("Partially skipped window counts per ticker:")
                for t, c in counts.items():
                    logging.info(f"   - {t}: {c} window(s) already complete")
            logging.info(f"Global skip log: {SKIP_AGG_PATH}")
    except Exception as e:
        logging.warning(f"Could not produce global skip recap: {e}")


    best_prefix = None
    if LIVE_MODE and best_prefix is not None:
        live_loop("AAPL", best_prefix)d

In [None]:
import os, glob, logging
from google.colab import drive

#1) Make sure Drive is mounted
if not os.path.ismount("/content/drive"):
    drive.mount("/content/drive")

FINAL_MODEL_DIR = "/content/drive/MyDrive/Results_May_2025/ppo_models_master"

#2) Sanity: how many artifacts do we have?
zips  = glob.glob(os.path.join(FINAL_MODEL_DIR, "*.zip"))
pkl   = glob.glob(os.path.join(FINAL_MODEL_DIR, "*.pkl"))
print(f"Master folder: {FINAL_MODEL_DIR}")
print(f"Found {len(zips)} model.zip and {len(pkl)} vecnorm.pkl")

#3) Show a few missing pairs for known tickers/windows (non-destructive)
def check_prefix(prefix):
    m = os.path.exists(os.path.join(FINAL_MODEL_DIR, f"{prefix}_model.zip"))
    v = os.path.exists(os.path.join(FINAL_MODEL_DIR, f"{prefix}_vecnorm.pkl"))
    return m, v

sample = ["ppo_AAPL_window1","ppo_QCOM_window1","ppo_BRK-B_window1","ppo_SBUX_window1"]
for pref in sample:
    m,v = check_prefix(pref)
    print(f"{pref}: model={'OK' if m else 'MISSING'}, vecnorm={'OK' if v else 'MISSING'}")


In [None]:
import glob
all_summaries = []
for p in glob.glob(os.path.join(BASE, "ppo_walkforward_results_*", "summary*.csv")):
    try:
        tmp = pd.read_csv(p)
        tmp["RunFolder"] = os.path.dirname(p)
        all_summaries.append(tmp)
    except Exception:
        pass

if all_summaries:
    combo = pd.concat(all_summaries, ignore_index=True)
    combo = combo.drop_duplicates(subset=["Ticker","Window"], keep="last")
    out = os.path.join(FINAL, "all_runs_summary.csv")
    combo.to_csv(out, index=False)
    print(" Saved:", out, "rows:", len(combo))


In [None]:
#= Patch: align live features to training features & fix UTC now =
import os, json, numpy as np, pandas as pd
from datetime import datetime, timedelta, timezone

def align_to_training_features(df: pd.DataFrame, prefix: str, master_dir=FINAL_MODEL_DIR) -> pd.DataFrame:
    """
    Make df match the exact feature set & order used at training for this prefix.
    Adds missing columns with sensible defaults and reorders columns.
    """
    feats_path = os.path.join(master_dir, f"{prefix}_features.json")
    if not os.path.exists(feats_path):
        return df
    feat_list = json.load(open(feats_path))["features"]
    present = set(df.columns)
    need = [c for c in feat_list if c not in present]
    if "Return" in need and "Close" in df.columns:
        df["Return"] = df["Close"].pct_change().fillna(0.0).astype(float)
        present.add("Return")
    if "Target" in need:
        if "Return" in df.columns:
            df["Target"] = np.sign(df["Return"].shift(-1)).fillna(0.0).astype(float)
        else:
            df["Target"] = 0.0
        present.add("Target")
    for c in need:
        if c not in df.columns:
            df[c] = 0.0
    return df[feat_list]

# Replace utcnow() with timezone-aware now()
def latest_df_for_symbol(symbol, horizon_days=5, interval="1m"):
    end = datetime.now(timezone.utc)
    start = end - timedelta(days=horizon_days)
    df = yf.download(
        symbol,
        start=start.strftime("%Y-%m-%d"),
        end=end.strftime("%Y-%m-%d"),
        interval=interval,
        progress=False,
        auto_adjust=False
    )
    if df is None or df.empty:
        return None
    df = df.reset_index()
    df["Symbol"] = symbol
    return compute_enhanced_features(df)

# Ensure predict_latest aligns to training features before building the env
def predict_latest(symbol, prefix):
    model, make_env = load_model_and_env(prefix)
    live_df = latest_df_for_symbol(symbol)
    if live_df is None or len(live_df) < 100:
        return {"error": "no fresh data"}

    # Align features to training for THIS prefix
    live_df = align_to_training_features(live_df, prefix)

    # Use the last ~2500 rows like you did before
    df_window = live_df.iloc[-2500:].reset_index(drop=True) if len(live_df) > 2500 else live_df.copy()

    env = make_env(df_window)
    obs = env.reset()
    if isinstance(obs, tuple):
        obs, _ = obs

    # Fast-forward with no-op action
    for _ in range(len(df_window) - 1):
        obs, _, dones, _ = env.step([np.array([0.0], dtype=np.float32)])
        if isinstance(dones, (np.ndarray, list)) and dones[0]:
            break

    action, _ = model.predict(obs, deterministic=True)

    # Diagnostics (unchanged)
    mu, sigma = get_mu_sigma(model, obs)
    from math import erf, sqrt
    Phi = lambda x: 0.5 * (1.0 + erf(x / sqrt(2.0)))
    p_long  = 1.0 - Phi((0.0 - mu) / max(sigma, 1e-6))
    a = float(np.array(action).squeeze())
    signal = "BUY" if a > 0.1 else ("SELL" if a < -0.3 else "HOLD")
    ts = df_window["Datetime"].iloc[-1] if "Datetime" in df_window.columns else None
    price = float(df_window["Close"].iloc[-1])
    return dict(signal=signal, confidence=abs(a), action=a, ts=ts, price=price,
                p_long=p_long, p_short=1.0-p_long, mu=mu, sigma=sigma)


In [None]:
import os, json

MASTER_DIR = "/content/drive/MyDrive/Results_May_2025/ppo_models_master"
targets = ["ppo_UNH_window3", "ppo_TSLA_window2", "ppo_TMO_window3"]
need = ["_model.zip","_vecnorm.pkl","_features.json","_probability_config.json","_model_info.json"]

for p in targets:
    status = {s: os.path.exists(os.path.join(MASTER_DIR, p+s)) for s in need}
    print(p, status)
    # Quick features sanity
    fpath = os.path.join(MASTER_DIR, p+"_features.json")
    if status["_features.json"]:
        feats = json.load(open(fpath)).get("features", [])
        print(f"  features: {len(feats)} cols; first few: {feats[:8]}")


In [None]:
pairs = [("UNH","ppo_UNH_window3"), ("TSLA","ppo_TSLA_window2"), ("TMO","ppo_TMO_window3")]
for sym, pref in pairs:
    out = predict_latest(sym, pref)   # Downloads a small recent window and predicts once
    print(sym, pref, "->", out)
