<a href="https://colab.research.google.com/github/racoope70/daytrading-with-ml/blob/main/multi_stock_ppo_live_inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!apt-get remove --purge -y cuda* libcuda* nvidia* || echo "No conflicting CUDA packages"
!apt-get autoremove -y
!apt-get clean

In [None]:

!apt-get update -qq && apt-get install -y \
    libcusolver11 libcusparse11 libcurand10 libcufft10 libnppig10 libnppc10 libnppial10 \
    cuda-toolkit-12-4


In [None]:
!pip uninstall -y protobuf
!pip install protobuf==3.20.3


In [None]:
!pip install --extra-index-url=https://pypi.nvidia.com \
    cuml-cu12==25.2.0 cudf-cu12==25.2.0 cupy-cuda12x \
    dask-cuda==25.2.0 dask-cudf-cu12==25.2.0


In [None]:
!pip install numba==0.60.0


In [None]:
!pip install stable-baselines3[extra] gymnasium gym-anytrading yfinance xgboost joblib
!pip install matplotlib scikit-learn pandas


In [None]:
!pip install tensorflow==2.18.0


In [None]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124


In [None]:
import tensorflow as tf

gpus = tf.config.list_physical_devices("GPU")
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("TensorFlow GPU memory growth enabled")
    except RuntimeError as e:
        print(f"TensorFlow GPU memory config failed: {e}")


In [None]:
import os
os.environ['CUDA_HOME'] = '/usr/local/cuda-12.4'
os.environ['PATH'] += ':/usr/local/cuda-12.4/bin'
os.environ['LD_LIBRARY_PATH'] += ':/usr/local/cuda-12.4/lib64'


In [None]:
!rm -rf /content/drive

In [None]:
#Step 7: authenticate with hugging face hub (optional)
#This allows for better access and avoids rate limits when downloading public models/datasets

# Authenticate with Hugging Face Hub
#notebook_login()

In [None]:
# === Import Libraries ===
import os, gc, time, json, pywt, logging
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import yfinance as yf
from transformers import pipeline
from google.colab import drive

# === Logging Setup ===
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# === Mount Google Drive ===
drive.mount('/content/drive')

# === Configuration ===
ticker_list = ['AAPL', 'TSLA', 'MSFT', 'GOOGL', 'NVDA']
strategy_name = "ppo_walkforward_v1"
CONFIG = {
    'symbols': ticker_list,
    'period': '720d',
    'interval': '1h',
    'target': 'Target',
    'sharpe_threshold': 1.5,
    'return_threshold': 1.25,
    'strategy_name': strategy_name
}
test_mode = True
symbols = ['AAPL', 'NVDA', 'MSFT'] if test_mode else CONFIG['symbols']
end_date = datetime.today()
start_date = end_date - timedelta(days=729)

# === Sentiment Pipeline ===
sentiment_pipeline = pipeline("sentiment-analysis", model="ProsusAI/finbert")

# === Download Stock Data ===
def download_stock_data(ticker, start_date=None, end_date=None, interval="1h", max_retries=5, window_days=730):
    if start_date is None or end_date is None:
        end_date = datetime.today()
        start_date = end_date - timedelta(days=window_days)
    start_str = start_date.strftime('%Y-%m-%d')
    end_str = end_date.strftime('%Y-%m-%d')

    for attempt in range(1, max_retries + 1):
        try:
            logging.info(f"📥 Attempt {attempt}: Downloading {ticker} from {start_str} to {end_str}...")
            df = yf.download(
                ticker,
                start=start_str,
                end=end_str,
                interval=interval,
                progress=False,
                auto_adjust=False
            )
            if not df.empty:
                df.reset_index(inplace=True)
                df['Symbol'] = ticker
                df['Datetime'] = pd.to_datetime(df['Datetime'] if 'Datetime' in df.columns else df['Date'])
                return df
            raise ValueError("Empty data")
        except Exception as e:
            logging.error(f"⚠️ Error downloading {ticker}: {e}. Retrying in {attempt * 5} sec...")
            time.sleep(attempt * 5)
    logging.error(f"Failed to download {ticker}")
    return None

# === Wavelet Denoising ===
def denoise_wavelet(series, wavelet='db1', level=2):
    coeffs = pywt.wavedec(series, wavelet, mode='smooth')
    coeffs[1:] = [np.zeros_like(c) for c in coeffs[1:]]
    return pywt.waverec(coeffs, wavelet, mode='smooth')[:len(series)]

# === Sentiment Scoring ===
def score_sentiment(texts):
    try:
        outputs = sentiment_pipeline(texts, truncation=True, max_length=256)
        results = []
        for r in outputs:
            label = r['label'].lower()
            score = r['score'] if label == 'positive' else (-r['score'] if label == 'negative' else 0)
            results.append(score)
        return results
    except Exception as e:
        logging.error(f"Sentiment scoring error: {e}")
        return [0] * len(texts)

# === Feature Engineering ===
def compute_enhanced_features(df):
    df = df.copy()
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.get_level_values(0)
    df = df.loc[:, ~df.columns.duplicated()]

    df['SMA_20'] = df['Close'].rolling(20).mean()
    df['STD_20'] = df['Close'].rolling(20).std()
    df['Upper_Band'] = df['SMA_20'] + 2 * df['STD_20']
    df['Lower_Band'] = df['SMA_20'] - 2 * df['STD_20']
    df['Lowest_Low'] = df['Low'].rolling(14).min()
    df['Highest_High'] = df['High'].rolling(14).max()
    denom = (df['Highest_High'] - df['Lowest_Low']).replace(0, np.nan)
    df['Stoch'] = ((df['Close'] - df['Lowest_Low']) / denom) * 100
    df['ROC'] = df['Close'].pct_change(10)
    df['OBV'] = (np.sign(df['Close'].diff()) * df['Volume']).cumsum()
    df['CCI'] = ((df['High'] + df['Low'] + df['Close']) / 3 - df['Close'].rolling(20).mean()) / (0.015 * df['Close'].rolling(20).std())
    df['EMA_10'] = df['Close'].ewm(span=10).mean()
    df['EMA_50'] = df['Close'].ewm(span=50).mean()
    df['MACD_Line'] = df['Close'].ewm(span=12).mean() - df['Close'].ewm(span=26).mean()
    df['MACD_Signal'] = df['MACD_Line'].ewm(span=9).mean()
    df['RSI'] = 100 - (100 / (1 + df['Close'].diff().apply(lambda x: x if x > 0 else 0).rolling(14).mean() /
                              -df['Close'].diff().apply(lambda x: x if x < 0 else 0).rolling(14).mean()))
    tr = pd.concat([
        df['High'] - df['Low'],
        abs(df['High'] - df['Close'].shift()),
        abs(df['Low'] - df['Close'].shift())
    ], axis=1).max(axis=1)
    df['ATR'] = tr.rolling(14).mean()
    df['Volatility'] = df['Close'].pct_change().rolling(20).std()
    df['Return'] = (df['Close'].shift(-10) - df['Close']) / df['Close']
    df['Target'] = np.select([df['Return'] > 0.02, df['Return'] < -0.02], [1, -1], default=0)

    # === Denoise ===
    try:
        df['Denoised_Close'] = denoise_wavelet(df['Close'].ffill())
        logging.info(df[['Close', 'Denoised_Close']].head())
    except Exception as e:
        logging.warning(f"Wavelet denoising failed: {e}")
        df['Denoised_Close'] = df['Close']

    # === Sentiment ===
    df['Mock_Headline'] = f"{df['Symbol'].iloc[0]} is expected to perform well in the market."
    try:
        df['SentimentScore'] = score_sentiment(df['Mock_Headline'].tolist())
    except Exception as e:
        logging.warning(f"Sentiment scoring failed for {df['Symbol'].iloc[0]}: {e}")
        df['SentimentScore'] = 0

    # === Greeks ===
    df['Delta'] = df['Close'].pct_change(1).fillna(0)
    df['Gamma'] = df['Delta'].diff().fillna(0)
    df.dropna(inplace=True)

    # === Reorder Columns ===
    cols = [col for col in df.columns if col not in ['Target', 'Return', 'Symbol']] + ['Target', 'Return', 'Symbol']
    return df[cols]

# === Process All Symbols ===
all_dfs = []
for i, ticker in enumerate(symbols, 1):
    logging.info(f"[{i}/{len(symbols)}] Processing {ticker}")
    df_single = download_stock_data(ticker, start_date, end_date, interval="1h")
    if df_single is not None and not df_single.empty:
        try:
            df_features = compute_enhanced_features(df_single)
            all_dfs.append(df_features)
        except Exception as e:
            logging.error(f"Feature engineering failed for {ticker}: {e}")
        finally:
            del df_single
            try: del df_features
            except NameError: pass
            gc.collect()
    else:
        logging.warning(f"No data for {ticker}")

# === Save Final Dataset ===
if all_dfs:
    df = pd.concat(all_dfs, ignore_index=True)
    logging.info(f"Combined dataset created with shape: {df.shape}")
    df.to_csv("multi_stock_feature_engineered_dataset.csv", index=False)
    drive_path = "/content/drive/MyDrive/trading_data/"
    os.makedirs(drive_path, exist_ok=True)
    df.to_csv(os.path.join(drive_path, "multi_stock_feature_engineered_dataset.csv"), index=False)
    logging.info(f"Dataset saved to Google Drive at: {drive_path}")
    del all_dfs, df
    gc.collect()
else:
    logging.warning("No usable data found for any ticker.")


In [None]:
df = pd.read_csv("multi_stock_feature_engineered_dataset.csv")
print(df.head())


In [None]:
# === PPO Walkforward with Runtime Timing, Full Model Saving, and Logging ===
import os, gc, time, json, torch, logging, joblib
import pandas as pd, numpy as np, matplotlib.pyplot as plt
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from gym_anytrading.envs import StocksEnv
import gymnasium as gym
from gymnasium.spaces import Box

# === Configuration ===
RESULTS_DIR = "/content/drive/MyDrive/Results_May_2025/ppo_walkforward_results"
FINAL_MODEL_DIR = os.path.join(RESULTS_DIR, "models")
os.makedirs(RESULTS_DIR, exist_ok=True)
os.makedirs(FINAL_MODEL_DIR, exist_ok=True)

# === Logging Setup ===
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

# === Flags ===
ENABLE_SENTIMENT = True
ENABLE_SLO = True
ENABLE_WAVELET = True
test_mode = True
ENABLE_PLOTS = True

# === Load Dataset ===
if not os.path.exists("multi_stock_feature_engineered_dataset.csv"):
    raise FileNotFoundError("Required feature-engineered dataset not found!")
df = pd.read_csv("multi_stock_feature_engineered_dataset.csv")
df['Datetime'] = pd.to_datetime(df['Datetime'])

# === PPO-Compatible Environment ===
class ContinuousTradingEnv(StocksEnv):
    def __init__(self, df, frame_bound, window_size):
        super().__init__(df=df.reset_index(drop=True), frame_bound=frame_bound, window_size=window_size)
        self.action_space = Box(low=-1.0, high=1.0, shape=(1,), dtype=np.float32)

    def step(self, action):
        try:
            if action < -0.3:
                discrete_action = 0
            elif action > 0.1:
                discrete_action = 1
            else:
                discrete_action = 2

            step_result = super().step(discrete_action)
            if len(step_result) == 5:
                obs, reward, terminated, truncated, info = step_result
            else:
                obs, reward, terminated, info = step_result
                truncated = False

            current_price = self.df.loc[self._current_tick, 'Close']
            slippage_pct = 0.002
            fill_price = current_price * (1 + slippage_pct if discrete_action == 1 else (1 - slippage_pct if discrete_action == 0 else 0))
            price_diff = fill_price - self.df.loc[self._current_tick - 1, 'Close']
            reward += price_diff * discrete_action

            action_scalar = float(np.array(action).squeeze())
            reward += abs(action_scalar) * 0.01

            if ENABLE_SENTIMENT and 'SentimentScore' in self.df.columns:
                sentiment = self.df.loc[self._current_tick, 'SentimentScore']
                reward += sentiment * 0.01

            return obs, np.clip(reward, -1.0, 1.0), terminated, truncated, info
        except Exception as e:
            logging.error(f"Step error: {e}")
            return self.reset(), 0, True, True, {}

# === Utilities ===
def plot_performance(ticker, portfolio, hold_value):
    plt.figure(figsize=(10, 4))
    plt.plot(portfolio, label="PPO Portfolio")
    plt.axhline(y=hold_value, color="r", linestyle="--", label="Buy & Hold")
    plt.title(f"{ticker} - PPO vs Buy & Hold")
    plt.legend()
    plt.grid(True, linestyle="--", alpha=0.5)
    plt.tight_layout()
    plt.savefig(os.path.join(RESULTS_DIR, f"{ticker}_performance.png"))
    plt.close()

def get_walk_forward_windows(df, window_size=3000, step_size=500, min_len=1000):
    return [
        (start, start + window_size)
        for start in range(0, len(df) - min_len, step_size)
        if start + window_size < len(df)
    ]

# === Save QuantConnect-Compatible Artifacts ===
def save_quantconnect_model(artifact, prefix, save_dir):
    artifact['model'].save(os.path.join(save_dir, f"{prefix}_model.zip"))
    if artifact['vecnorm']:
        artifact['vecnorm'].save(os.path.join(save_dir, f"{prefix}_vecnorm.pkl"))
    json.dump({"features": artifact['features']}, open(os.path.join(save_dir, f"{prefix}_features.json"), "w"))
    json.dump({"threshold": 0.05, "use_confidence": True, "inference_mode": "deterministic"}, open(os.path.join(save_dir, f"{prefix}_probability_config.json"), "w"))
    json.dump({
        "model": "PPO",
        "ticker": artifact['result']['Ticker'],
        "window": artifact['result']['Window'],
        "date_trained": datetime.today().strftime("%Y-%m-%d"),
        "framework": "stable-baselines3",
        "input_features": artifact['features'],
        "final_portfolio": artifact['result']['PPO_Portfolio'],
        "buy_hold": artifact['result']['BuyHold'],
        "sharpe": artifact['result']['Sharpe']
    }, open(os.path.join(save_dir, f"{prefix}_model_info.json"), "w"))
    logging.info(f"Saved QuantConnect-compatible model for {artifact['result']['Ticker']} | {artifact['result']['Window']}")

# === PPO Walkforward Function ===
def walkforward_ppo(df, ticker, window_size=3000, step_size=500, timesteps=1_500_000, learning_rate=2e-5):
    if len(df) < window_size:
        logging.warning(f"Skipping {ticker}: only {len(df)} rows (min required: {window_size})")
        return []
    artifacts, results = [], []
    windows = get_walk_forward_windows(df, window_size, step_size)
    device = "cpu"

    for w_idx, (start, end) in enumerate(windows):
        last_trade_step = -10
        trade_occurred = False
        whipsaw_penalty = 0.0
        confidence_buckets = {'Low': 0, 'Mid': 0, 'High': 0}
        cooldown_skips = []
        risk_flags = []
        TRADE_COOLDOWN_STEPS = int(0.3 * len(df) / 100)  #Cooldown logic
        window_start_time = time.time()
        gc.collect()
        df_window = df.iloc[start:end].reset_index(drop=True)
        if len(df_window) <= 52 or len(df_window) % 2 != 0:
            df_window = df_window.iloc[:-1]

        frame_bound = (50, len(df_window) - 3)
        env = DummyVecEnv([lambda: ContinuousTradingEnv(df=df_window, frame_bound=frame_bound, window_size=10)])
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

        model = PPO("MlpPolicy", env, verbose=0, device=device,
                    learning_rate=learning_rate, n_steps=2048,
                    batch_size=64, n_epochs=10, gamma=0.99,
                    gae_lambda=0.95, clip_range=0.2, ent_coef=0.01,
                    policy_kwargs=dict(net_arch=[64, 64]))

        logging.info(f"Training {ticker} Window {w_idx+1}/{len(windows)}")
        model.learn(total_timesteps=timesteps)

        obs = env.reset()
        position, balance, portfolio = 0, 100000, []
        trade_log, trade_returns, risk_flags = [], [], []
        action_vals, cooldown_skips = [], []
        entry_price, last_trade_step, total_reward = None, -10, 0
        confidence_buckets = {'Low': 0, 'Mid': 0, 'High': 0}  #define only once

        for i in range(len(df_window)):
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done, info = env.step(action)

            #VecEnv returns array for done; extract scalar
            if isinstance(done, (np.ndarray, list)):
                done = done[0]

            action_val = float(np.array(action).squeeze())

            #Confidence bucket logic
            abs_action_val = abs(action_val)
            if abs_action_val < 0.2:
                confidence_buckets['Low'] += 1
            elif abs_action_val < 0.5:
                confidence_buckets['Mid'] += 1
            else:
                confidence_buckets['High'] += 1

            action_vals.append(action_val)
            price = df_window['Close'].iloc[i]

            # Risk management
            STOP_LOSS_PCT = 0.04
            TAKE_PROFIT_PCT = 0.12
            if position > 0 and entry_price:
                change = (price - entry_price) / entry_price
                if change < -STOP_LOSS_PCT:
                    balance = position * price
                    position = 0
                    trade_log.append("STOP-LOSS")
                    reward -= 0.2
                    entry_price = None
                elif change > TAKE_PROFIT_PCT:
                    balance = position * price
                    position = 0
                    trade_log.append("TAKE-PROFIT")
                    reward += 0.2
                    entry_price = None
            prior_position = position

            if abs(action_val) > 0.02 and (i - last_trade_step) > int(0.3 * len(df_window) / 100):
                if action_val > 0 and position == 0:
                    position = balance / price
                    balance = 0
                    entry_price = price
                    trade_log.append("BUY")
                    last_trade_step = i
                elif action_val < 0 and position > 0:
                    balance = position * price
                    position = 0
                    trade_log.append("SELL")
                    last_trade_step = i
                else:
                    trade_log.append("HOLD")
            else:
                trade_log.append("HOLD")

            if prior_position != position:
                trade_occurred = True  #only if a trade occurred

            portfolio.append(balance if balance > 0 else position * price)

        if not portfolio:
            logging.warning(f"Empty portfolio for {ticker} | Window {start}-{end}")
            continue
        # Whipsaw Penalty
        current_step = i
        if trade_occurred:
            steps_since_last_trade = current_step - last_trade_step
            if steps_since_last_trade < 30:
                whipsaw_penalty = -0.4
                logging.info(f"Whipsaw penalty at step {i} | Δ={steps_since_last_trade}")
            else:
                whipsaw_penalty = 0.0
            last_trade_step = current_step

        reward += whipsaw_penalty
        reward = np.clip(reward, -1.0, 1.0)
        if (i - last_trade_step) <= TRADE_COOLDOWN_STEPS:
            cooldown_skips.append(i)
            logging.info(f"⏳ Cooldown: Skipped at step {i} | Δ={i - last_trade_step}")

        final_value = portfolio[-1]
        hold_value = (100000 / df_window['Close'].iloc[0]) * df_window['Close'].iloc[-1]
        returns = pd.Series(portfolio).pct_change().fillna(0)
        sharpe = (returns.mean() / (returns.std() + 1e-6)) * np.sqrt(252)
        drawdown = ((pd.Series(portfolio).cummax() - pd.Series(portfolio)) / pd.Series(portfolio).cummax()).max() * 100
        prefix = f"ppo_{ticker}_window{w_idx+1}"

        artifacts.append({
            "model": model,
            "vecnorm": env,
            "features": df_window.columns.tolist(),
            "result": {
                "Ticker": ticker,
                "Window": f"{start}-{end}",
                "PPO_Portfolio": round(final_value, 2),
                "BuyHold": round(hold_value, 2),
                "Sharpe": round(sharpe, 3),
                "Drawdown_%": round(drawdown, 2),
                "Winner": "PPO" if final_value > hold_value else "Buy & Hold"
            },
            "prefix": prefix,
            "portfolio": portfolio.copy()
        })

        results.append(artifacts[-1]["result"])
        if ENABLE_PLOTS:
            plot_performance(ticker, portfolio, hold_value)

        # Hit Ratio
        logging.info(f"Confidence Buckets for {ticker} | Window {start}-{end}: {confidence_buckets}")
        successful_trades = sum([1 for t in trade_log if t == 'TAKE-PROFIT'])
        total_trades = sum([1 for t in trade_log if t in ['SELL', 'STOP-LOSS', 'TAKE-PROFIT']])
        hit_ratio = successful_trades / (total_trades + 1e-6)
        logging.info(f"Hit Ratio: {hit_ratio:.2%}")

        # Trade Summary
        logging.info(f"{ticker} | Trade Summary (Window {start}-{end})")
        logging.info(f"{pd.Series(trade_log).value_counts().to_dict()}")


        # Cooldown skip diagnostics
        if cooldown_skips:
            logging.info(f"Cooldown skips: {len(cooldown_skips)} | Sample: {cooldown_skips[:5]}")

        # ✅ Now clean up
        logging.info(f"{ticker} | Window {w_idx+1} runtime: {round(time.time() - window_start_time, 2)} sec")
        del env, model, df_window, portfolio, trade_log
        gc.collect()
        torch.cuda.empty_cache()

    # Save top-N windows by Sharpe
    TOP_N_WINDOWS = 3
    top_artifacts = sorted(artifacts, key=lambda x: x['result']['Sharpe'], reverse=True)[:TOP_N_WINDOWS]
    for artifact in top_artifacts:
        save_quantconnect_model(artifact, artifact['prefix'], FINAL_MODEL_DIR)
    for artifact in top_artifacts:
        save_quantconnect_model(artifact, artifact['prefix'], FINAL_MODEL_DIR)
    artifacts.clear()
    return results

# === Ticker Runner ===
def process_ticker(ticker):
    logging.info(f"Starting PPO Walkforward for {ticker}")
    df_ticker = df[df['Symbol'] == ticker].copy()
    if df_ticker.empty:
        logging.warning(f"No data found for {ticker}")
        return []
    return walkforward_ppo(df_ticker, ticker)

# === Parallel Execution ===
def run_parallel_tickers(tickers, out_path=os.path.join(RESULTS_DIR, "summary.csv")):
    results = []
    with ThreadPoolExecutor(max_workers=8) as executor:
        for result in executor.map(process_ticker, tickers):
            if result:
                results.extend(result)
                pd.DataFrame(results).to_csv(out_path, index=False)
    logging.info("All tickers processed in parallel.")
    return results

# === Execution Block ===
if __name__ == "__main__":
    CONFIG = {'symbols': df['Symbol'].unique().tolist()}
    all_results = []

if test_mode:
    test_stocks = ['AAPL', 'NVDA', 'MSFT']
    for stock in test_stocks:
        df_stock = df[df['Symbol'] == stock].copy()
        results = walkforward_ppo(df_stock, stock)
        all_results.extend(results)
    pd.DataFrame(all_results).to_csv(os.path.join(RESULTS_DIR, "summary_test_mode.csv"), index=False)
else:
    summary_df = run_parallel_tickers(CONFIG['symbols'])
    if not summary_df:
        logging.warning("No results generated.")
    else:
        path = os.path.join(RESULTS_DIR, "summary.csv")
        pd.DataFrame(summary_df).to_csv(path, index=False)
        logging.info(f"Summary saved to {path}")

logging.info("Script finished execution.")
gc.collect()
torch.cuda.empty_cache()


In [None]:
import os
import json
import logging
import joblib
import pandas as pd
from stable_baselines3 import PPO

# === Enable Logging Format ===
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# === Paths ===
RESULTS_DIR = "/content/drive/MyDrive/Results_May_2025/ppo_walkforward_results"
MODEL_DIR = os.path.join(RESULTS_DIR, "models")
TICKER = "AAPL"
WINDOW = "window1"

# === Filenames ===
model_path = os.path.join(MODEL_DIR, f"ppo_{TICKER}_{WINDOW}_model.zip")
vecnorm_path = os.path.join(MODEL_DIR, f"ppo_{TICKER}_{WINDOW}_vecnorm.pkl")
summary_path = os.path.join(RESULTS_DIR, "summary_results.csv")

# === Load PPO Model ===
if os.path.exists(model_path):
    model = PPO.load(model_path)
    logging.info(f"Loaded PPO model from: {model_path}")
else:
    logging.warning(f"Model file not found at: {model_path}")
    model = None

# === Load VecNormalize ===
if os.path.exists(vecnorm_path):
    vec_env = joblib.load(vecnorm_path)
    logging.info(f"Loaded VecNormalize from: {vecnorm_path}")
else:
    logging.warning(f"VecNormalize file not found at: {vecnorm_path}")
    vec_env = None

# === PPO Model Architecture ===
if model:
    logging.info("PPO Model Architecture:")
    logging.info(str(model.policy))

# === VecNormalize Diagnostics ===
if vec_env:
    try:
        logging.info("VecNormalize Statistics:")
        logging.info(f"Mean: {vec_env.obs_rms.mean}")
        logging.info(f"Var: {vec_env.obs_rms.var}")
        logging.info(f"Clip Range: {vec_env.clip_obs}")
    except Exception as e:
        logging.warning(f"Could not extract VecNormalize stats: {e}")

# === Summary Results (Top 3 by Sharpe) ===
if os.path.exists(summary_path):
    try:
        summary_df = pd.read_csv(summary_path)
        top3 = summary_df.sort_values(by="Sharpe", ascending=False).head(3)
        logging.info("Top 3 Results by Sharpe Ratio:")
        for i, row in top3.iterrows():
            logging.info(f"{row['Ticker']} | Window: {row['Window']} | Sharpe: {row['Sharpe']:.3f} | Portfolio: {row['PPO_Portfolio']:.2f} | Drawdown: {row['Drawdown_%']:.2f}%")
    except Exception as e:
        logging.warning(f"Error reading summary file: {e}")
else:
    logging.warning("No summary_results.csv found. Skipping top-3 summary.")


In [None]:
import json

# Old file (replace with your current notebook path)
old_file = "multi_stock_ppo_live_inference_v5_1.ipynb"
# New cleaned file
new_file = "multi_stock_ppo_live_inference_enhanced_v1.ipynb"

# Load notebook JSON
with open(old_file, "r", encoding="utf-8") as f:
    nb = json.load(f)

# Remove problematic widget metadata
if "widgets" in nb.get("metadata", {}):
    del nb["metadata"]["widgets"]

# Save cleaned notebook
with open(new_file, "w", encoding="utf-8") as f:
    json.dump(nb, f, indent=1, ensure_ascii=False)

print(f"✅ Cleaned notebook saved as {new_file}")
