<a href="https://colab.research.google.com/github/racoope70/daytrading-with-ml/blob/main/aapl_ml_model_eval_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Protocol Buffer Fix (for TensorFlow)
!pip install --upgrade protobuf
!pip install protobuf==3.20.3

Collecting protobuf
  Downloading protobuf-6.30.2-cp39-abi3-manylinux2014_x86_64.whl.metadata (593 bytes)
Downloading protobuf-6.30.2-cp39-abi3-manylinux2014_x86_64.whl (316 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m316.2/316.2 kB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 5.29.4
    Uninstalling protobuf-5.29.4:
      Successfully uninstalled protobuf-5.29.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
grpcio-status 1.71.0 requires protobuf<6.0dev,>=5.26.1, but you have protobuf 6.30.2 which is incompatible.
google-cloud-aiplatform 1.84.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.2, but you have protobuf 6.30.2 which is incompatible.
tensorflow 2.18.0 requires pro

In [2]:
#Install TensorFlow (latest stable GPU-compatible version)
!pip install tensorflow

#Install Stable Baselines3 and Trading Libraries
!pip install stable-baselines3[extra] gymnasium gym-anytrading yfinance xgboost joblib

#Reinstall RAPIDS dependencies if needed (optional reset)
!pip install --upgrade --force-reinstall \
    dask==2024.11.2 \
    rapids-dask-dependency==24.12.0 \
    cudf-cu12==24.12.0 \
    cuml-cu12==24.12.0 \
    pylibraft-cu12==24.12.0 \
    pylibcudf-cu12==24.12.0 \
    numba==0.61.0

Collecting gym-anytrading
  Downloading gym_anytrading-2.0.0-py3-none-any.whl.metadata (292 bytes)
Collecting stable-baselines3[extra]
  Downloading stable_baselines3-2.6.0-py3-none-any.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3[extra])
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.

In [43]:
import torch
import cudf
import cuml
import dask
import pandas as pd
import numpy as np
import scipy
import lightgbm as lgb
import gymnasium as gym
import stable_baselines3

#Version Checks
print("Library Versions")
print("--------------------")
print("PyTorch:", torch.__version__)
print("CUDA:", torch.version.cuda)
print("cuDF:", cudf.__version__)
print("cuML:", cuml.__version__)
print("Dask:", dask.__version__)
print("Pandas:", pd.__version__)
print("NumPy:", np.__version__)
print("SciPy:", scipy.__version__)
print("LightGBM:", lgb.__version__)
print("Gymnasium:", gym.__version__)
print("Stable Baselines3:", stable_baselines3.__version__)

# =========================
#GPU Check (Torch + NVIDIA)
# =========================
print("\nGPU Availability")
print("--------------------")
print("PyTorch GPU Available:", torch.cuda.is_available())
print("GPU Count:", torch.cuda.device_count())
if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))


Library Versions
--------------------
PyTorch: 2.6.0+cu124
CUDA: 12.4
cuDF: 24.12.00
cuML: 24.12.00
Dask: 2024.11.2
Pandas: 2.2.3
NumPy: 2.1.3
SciPy: 1.15.2
LightGBM: 4.5.0
Gymnasium: 1.1.1
Stable Baselines3: 2.6.0

GPU Availability
--------------------
PyTorch GPU Available: True
GPU Count: 1
GPU Name: Tesla T4


In [44]:
#Core Libraries
import gc
import json
import os
import pickle
import sys
import time
from collections import defaultdict
from datetime import datetime

#Data Science Essentials
import matplotlib.pyplot as plt
import numba
import numpy as np
import pandas as pd

#Machine Learning & Data Processing
import joblib
import lightgbm as lgb
import xgboost as xgb
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    f1_score,
)
from sklearn.model_selection import TimeSeriesSplit, train_test_split
from sklearn.preprocessing import MinMaxScaler

#Deep Learning
import tensorflow as tf
from tensorflow.keras import mixed_precision
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.models import Sequential, load_model

#RAPIDS Libraries (cuDF & cuML for GPU acceleration)
import cupy as cp

#Reinforcement Learning (Stable Baselines3)
import stable_baselines3
from stable_baselines3 import A2C, DDPG, DQN, PPO, SAC, TD3
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.logger import configure
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize

#Gym & Trading Environments
import gym
import gymnasium as gym
import gym_anytrading
from gym.spaces import Box
from gymnasium.spaces import Box as GymBox, Discrete
from gymnasium.wrappers import TimeLimit
from gym_anytrading.envs import StocksEnv

#Financial & Visualization Libraries
import yfinance as yf
import IPython.display as display


In [3]:
#Set CUDA Paths (Ensuring GPU Utilization)
os.environ['CUDA_HOME'] = '/usr/local/cuda-11.8'
os.environ['PATH'] += ':/usr/local/cuda-11.8/bin'
os.environ['LD_LIBRARY_PATH'] += ':/usr/local/cuda-11.8/lib64'

In [4]:
!nvidia-smi

Sat Mar 29 14:49:12 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   41C    P8              9W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [61]:
#Utility Functions

def fix_dataframe_index(df):
    """
    Flattens MultiIndex columns and removes duplicated columns.
    """
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.get_level_values(0)
    return df.loc[:, ~df.columns.duplicated()]


def calculate_rsi(series, period=14):
    """
    Calculates the Relative Strength Index (RSI) for a given price series.
    """
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / (loss + 1e-6)  # Avoid division by zero
    return 100 - (100 / (1 + rs))


def compute_technical_indicators(df):
    """
    Applies technical indicators including Bollinger Bands, Stochastic Oscillator,
    volatility, momentum, trend, and volume indicators.
    """
    df = df.copy()
    df = fix_dataframe_index(df)

    #Bollinger Bands
    df['SMA_20'] = df['Close'].rolling(window=20).mean()
    df['STD_20'] = df['Close'].rolling(window=20).std()
    df['Upper_Band'] = df['SMA_20'] + 2 * df['STD_20']
    df['Lower_Band'] = df['SMA_20'] - 2 * df['STD_20']

    #Stochastic Oscillator
    df['Lowest_Low'] = df['Low'].rolling(window=14).min()
    df['Highest_High'] = df['High'].rolling(window=14).max()
    denominator = (df['Highest_High'] - df['Lowest_Low']).replace(0, np.nan)
    df['Stoch'] = ((df['Close'] - df['Lowest_Low']) / denominator) * 100

    #Volatility and Momentum
    df['volatility'] = df['Close'].pct_change().rolling(20).std()
    df['ROC'] = df['Close'].pct_change(periods=10)
    df['OBV'] = (np.sign(df['Close'].diff()) * df['Volume']).fillna(0).cumsum()

    #Commodity Channel Index (CCI)
    typical_price = (df['High'] + df['Low'] + df['Close']) / 3
    df['CCI'] = (typical_price - typical_price.rolling(20).mean()) / (
        0.015 * typical_price.rolling(20).std()
    )

    #Price Rate of Change (PROC)
    df['PROC'] = ((df['Close'] - df['Close'].shift(12)) / df['Close'].shift(12)) * 100

    #Rolling and Expanding Means
    df['Rolling_Mean_50'] = df['Close'].rolling(window=50).mean()
    df['Expanding_Mean'] = df['Close'].expanding(min_periods=1).mean()

    df.dropna(inplace=True)
    return df


def generate_trade_labels(df, lookahead=10, threshold_factor=2):
    """
    Generates binary and dynamic trade labels based on future returns
    and volatility-adjusted thresholds.
    """
    df = df.copy()
    df = fix_dataframe_index(df)

    if 'Close' not in df.columns:
        raise KeyError("'Close' column is missing. Cannot generate trade labels.")

    #Calculate future price change for labeling
    df['Future_Close'] = df['Close'].shift(-lookahead)
    df['Price_Change'] = (df['Future_Close'] - df['Close']) / df['Close']

    #Binary trade labels (Target = 1 for gains > 3%, otherwise 0)
    df['Target'] = np.where(df['Price_Change'] > 0.03, 1, 0)

    #Dynamic trade labels with volatility-adjusted thresholds
    buy_threshold = df['volatility'] * threshold_factor
    sell_threshold = -df['volatility'] * threshold_factor
    df['Dynamic_Label'] = np.where(
        df['Price_Change'] > buy_threshold, 1,
        np.where(df['Price_Change'] < sell_threshold, -1, 0)
    )

    df.dropna(inplace=True)
    return df


def drop_low_importance_features(df, feature_importance_df, threshold=1.0):
    """
    Drops low-importance features based on a given threshold.
    """
    low_importance_features = feature_importance_df[
        feature_importance_df['importance'] < threshold
    ]['feature'].tolist()

    if low_importance_features:
        df.drop(columns=low_importance_features, inplace=True)
        print(f"Dropped low-importance features: {low_importance_features}")
    else:
        print("No low-importance features found to drop.")

    return df


In [62]:
def download_stock_data(ticker, period="720d", interval="1h", max_retries=5):
    for attempt in range(1, max_retries + 1):
        try:
            print(f"Attempt {attempt}: Downloading {ticker} stock data...")
            df_live = yf.download(ticker, period=period, interval=interval)
            if not df_live.empty:
                print("Successfully downloaded stock data!")
                df_live.reset_index(inplace=True)
                return df_live
            raise ValueError("Downloaded data is empty. Retrying...")
        except Exception as e:
            print(f"Error: {e}. Retrying in {attempt * 5} seconds...")
            time.sleep(attempt * 5)
    print("Failed to download stock data after multiple attempts.")
    return None

df_live = download_stock_data("aapl")
if df_live is None:
    print("Using previously saved dataset instead.")
    file_path = '/content/drive/My Drive/aaplfeature_engineered_dataset.csv'
    df_live = pd.read_csv(file_path)

df = df_live.copy()

Attempt 1: Downloading aapl stock data...


[*********************100%***********************]  1 of 1 completed

Successfully downloaded stock data!





In [77]:
#Utility Functions
def fix_dataframe_index(df):
    """Flatten MultiIndex columns and remove duplicates."""
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.get_level_values(0)
    return df.loc[:, ~df.columns.duplicated()]

def compute_technical_indicators(df):
    """Compute various technical indicators."""
    df = fix_dataframe_index(df)

    df['SMA_20'] = df['Close'].rolling(window=20).mean()
    df['STD_20'] = df['Close'].rolling(window=20).std()
    df['Upper_Band'] = df['SMA_20'] + 2 * df['STD_20']
    df['Lower_Band'] = df['SMA_20'] - df['STD_20']
    df['Stoch'] = ((df['Close'] - df['Low'].rolling(14).min()) /
                   (df['High'].rolling(14).max() - df['Low'].rolling(14).min())) * 100
    df['volatility'] = df['Close'].pct_change().rolling(20).std()
    df['ROC'] = df['Close'].pct_change(periods=10)
    df['OBV'] = (np.sign(df['Close'].diff()) * df['Volume']).fillna(0).cumsum()
    df['CCI'] = (df['Close'] - df['Close'].rolling(20).mean()) / (0.015 * df['Close'].rolling(20).std())
    df['PROC'] = ((df['Close'] - df['Close'].shift(12)) / df['Close'].shift(12)) * 100
    df['Rolling_Mean_50'] = df['Close'].rolling(window=50).mean()
    df['Expanding_Mean'] = df['Close'].expanding(min_periods=1).mean()

    df.dropna(inplace=True)
    return df

def generate_trade_labels(df, lookahead=10, threshold_factor=2):
    """Generate trade labels for future price changes."""
    df['Future_Close'] = df['Close'].shift(-lookahead)
    df['Price_Change'] = (df['Future_Close'] - df['Close']) / df['Close']
    df['Target'] = np.where(df['Price_Change'] > 0.03, 1, 0)
    df.dropna(inplace=True)
    return df

def download_stock_data(ticker, period="720d", interval="1h"):
    """Download stock data from Yahoo Finance."""
    df = yf.download(ticker, period=period, interval=interval)
    df.reset_index(inplace=True)
    return df

#Dynamic SMOTE Resampling
def apply_smote(X_train, y_train, min_samples=600):
    """Apply SMOTE dynamically with minimum samples."""
    majority_class_size = sum(y_train == 0)
    minority_class_size = sum(y_train == 1)

    if minority_class_size == 0 or majority_class_size == 0:
        print("Skipping SMOTE: No minority/majority class found.")
        return X_train, y_train

    #Skip SMOTE if class ratio is already balanced
    if minority_class_size / majority_class_size >= 0.5:
        print("Class already balanced. Skipping SMOTE.")
        return X_train, y_train

    #Set dynamic ratio for SMOTE
    sampling_ratio = min(0.5, min_samples / minority_class_size)
    if sampling_ratio <= 0 or sampling_ratio > 1:
        print(f"Skipping SMOTE due to invalid ratio: {sampling_ratio:.2f}")
        return X_train, y_train

    print(f"Applying SMOTE with ratio: {sampling_ratio:.2f}")
    smote = SMOTE(sampling_strategy=sampling_ratio, random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

    print(f"New Class Distribution: {np.bincount(y_resampled)}")
    return X_resampled, y_resampled

# ================================
#Random Forest Training
# ================================
def train_walk_forward(df, features, label='Target', model_path="rf_aapl.pkl"):
    """Train Random Forest with walk-forward validation."""
    tscv = TimeSeriesSplit(n_splits=5)
    accuracy_scores = []

    for train_idx, test_idx in tscv.split(df):
        train, test = df.iloc[train_idx], df.iloc[test_idx]
        X_train, y_train = train[features], train[label]
        X_test, y_test = test[features], test[label]

        #Apply SMOTE with dynamic ratio
        X_train_resampled, y_train_resampled = apply_smote(X_train, y_train)

        #Train Random Forest
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_train_resampled, y_train_resampled)
        joblib.dump(model, model_path)

        #Evaluate model
        probs = model.predict_proba(X_test)[:, 1]
        preds = (probs > 0.5).astype(int)
        acc = accuracy_score(y_test, preds)
        accuracy_scores.append(acc)

    print(f"\nAvg Accuracy Across Time Splits: {np.mean(accuracy_scores):.4f}")
    print(f"Model saved to: {model_path}")
    return model

# ================================
#Evaluate Model with Portfolio Simulation
# ================================
def evaluate_rf_model(df, features, label='Target', model_path="rf_aapl.pkl"):
    """Evaluate trained Random Forest model and simulate portfolio."""
    model = joblib.load(model_path)
    X_test, y_test = df[features], df[label]
    probs = model.predict_proba(X_test)[:, 1]

    #Tune Threshold for Best F1-Score
    best_threshold = 0.5
    best_f1 = 0

    for threshold in np.arange(0.4, 0.6, 0.05):
        preds = (probs > threshold).astype(int)
        f1 = f1_score(y_test, preds, zero_division=0)
        if f1 > best_f1:
            best_f1 = f1
            best_threshold = threshold

    print(f"Optimal Threshold: {best_threshold:.2f} with F1-Score: {best_f1:.4f}")
    preds = (probs > best_threshold).astype(int)

    #Simulate Portfolio with Constraints
    portfolio_value, shares_held = 100000, 0
    MAX_POSITION_SIZE = 200
    TRANSACTION_COST = 0.001
    MAX_PORTFOLIO_LIMIT = 500000
    values = []

    for i in range(len(df)):
        price = df["Close"].iloc[i]

        #Buy Signal
        if preds[i] == 1 and shares_held == 0 and portfolio_value > price:
            shares_held = min(portfolio_value / price, MAX_POSITION_SIZE)
            portfolio_value -= shares_held * price
            portfolio_value -= portfolio_value * TRANSACTION_COST
        elif preds[i] == 0 and shares_held > 0:
            #Sell Signal
            portfolio_value += shares_held * price
            portfolio_value -= portfolio_value * TRANSACTION_COST
            shares_held = 0

        #Apply Portfolio Cap
        portfolio_value = min(portfolio_value + (shares_held * price), MAX_PORTFOLIO_LIMIT)
        values.append(portfolio_value)

    final_value = values[-1]
    profit_loss = final_value - 100000

    print(f"\Final Portfolio Value: ${final_value:,.2f} | Profit/Loss: ${profit_loss:,.2f}")
    print(classification_report(y_test, preds, zero_division=0))

    #Save Results
    with open("rf_aapl_results.json", "w") as f:
        json.dump({"final_value": final_value, "profit_loss": profit_loss}, f, indent=2)

    print(f"Results saved to: rf_aapl_results.json")
    return final_value, profit_loss

#Main Pipeline
#Download and Prepare Data
df = download_stock_data("aapl")
df = compute_technical_indicators(df)
df = generate_trade_labels(df)

#Inspect Label Distribution
print("appl Target label distribution:")
print(df['Target'].value_counts())

#Define Features for Model
features = [
    'SMA_20', 'STD_20', 'Upper_Band', 'Lower_Band', 'Stoch', 'volatility',
    'OBV', 'CCI', 'ROC', 'PROC', 'Rolling_Mean_50', 'Expanding_Mean'
]

#Train and Evaluate Model
model_path = "rf_aapl.pkl"
rf_model = train_walk_forward(df, features, label='Target', model_path=model_path)
final_value, profit_loss = evaluate_rf_model(df, features, label='Target', model_path=model_path)
#Save results clearly in structured dictionary
rf_results = {
    "model_name": "Random Forest",
    "final_value": final_value,
    "profit_loss": profit_loss
}

#Write results to JSON file
with open("rf_results.json", "w") as f:
    json.dump(rf_results, f, indent=2)

print("LightGBM results saved successfully!")

[*********************100%***********************]  1 of 1 completed


appl Target label distribution:
Target
0    4679
1     278
Name: count, dtype: int64
Applying SMOTE with ratio: 0.50
New Class Distribution: [723 361]
Applying SMOTE with ratio: 0.50
New Class Distribution: [1478  739]
Applying SMOTE with ratio: 0.50
New Class Distribution: [2296 1148]
Applying SMOTE with ratio: 0.50
New Class Distribution: [3097 1548]
Applying SMOTE with ratio: 0.50
New Class Distribution: [3879 1939]

Avg Accuracy Across Time Splits: 0.9215
Model saved to: rf_aapl.pkl
Optimal Threshold: 0.45 with F1-Score: 0.9509

Final Portfolio Value: $500,000.00 | 📈 Profit/Loss: $400,000.00
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      4679
           1       1.00      0.91      0.95       278

    accuracy                           0.99      4957
   macro avg       1.00      0.95      0.97      4957
weighted avg       0.99      0.99      0.99      4957

Results saved to: rf_aapl_results.json
LightGBM results saved successf

In [64]:
# ✅ Core Libraries and Imports
import os
import json
import joblib
import numpy as np
import pandas as pd
from datetime import datetime
from imblearn.over_sampling import ADASYN
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import f1_score, precision_recall_curve
import lightgbm as lgb
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
#Utility Functions

def fix_dataframe_index(df):
    """
    Flattens MultiIndex columns and removes duplicated columns.
    """
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.get_level_values(0)
    return df.loc[:, ~df.columns.duplicated()]


def calculate_rsi(series, period=14):
    """
    Calculates the Relative Strength Index (RSI) for a given price series.
    """
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / (loss + 1e-6)
    return 100 - (100 / (1 + rs))


def compute_technical_indicators(df):
    """
    Applies technical indicators including Bollinger Bands, RSI, MACD,
    volatility, trend, and volume indicators.
    """
    df = df.copy()
    df = fix_dataframe_index(df)

    df['SMA_20'] = df['Close'].rolling(window=20).mean()
    df['STD_20'] = df['Close'].rolling(window=20).std()
    df['Upper_Band'] = df['SMA_20'] + 2 * df['STD_20']
    df['Lower_Band'] = df['SMA_20'] - 2 * df['STD_20']

    df['RSI_14'] = calculate_rsi(df['Close'], period=14)

    #MACD Indicator
    df['MACD'] = df['Close'].ewm(span=12, adjust=False).mean() - df['Close'].ewm(span=26, adjust=False).mean()

    df['volatility'] = df['Close'].pct_change().rolling(20).std()
    df['ROC'] = df['Close'].pct_change(periods=10)
    df['OBV'] = (np.sign(df['Close'].diff()) * df['Volume']).fillna(0).cumsum()

    typical_price = (df['High'] + df['Low'] + df['Close']) / 3
    df['CCI'] = (typical_price - typical_price.rolling(20).mean()) / (
        0.015 * typical_price.rolling(20).std()
    )

    df['PROC'] = ((df['Close'] - df['Close'].shift(12)) / df['Close'].shift(12)) * 100
    df['Rolling_Mean_50'] = df['Close'].rolling(window=50).mean()
    df['Expanding_Mean'] = df['Close'].expanding(min_periods=1).mean()

    df.dropna(inplace=True)
    return df


def generate_trade_labels(df, lookahead=10, threshold_factor=2):
    """
    Generates binary and dynamic trade labels based on future returns
    and volatility-adjusted thresholds.
    """
    df = df.copy()
    df = fix_dataframe_index(df)

    if 'Close' not in df.columns:
        raise KeyError("'Close' column is missing. Cannot generate trade labels.")

    df['Future_Close'] = df['Close'].shift(-lookahead)
    df['Price_Change'] = (df['Future_Close'] - df['Close']) / df['Close']
    df['Target'] = np.where(df['Price_Change'] > 0.03, 1, 0)

    buy_threshold = df['volatility'] * threshold_factor
    sell_threshold = -df['volatility'] * threshold_factor

    df['Dynamic_Label'] = np.where(df['Price_Change'] > buy_threshold, 1,
                            np.where(df['Price_Change'] < sell_threshold, -1, 0))

    df.dropna(inplace=True)
    return df


In [65]:
#Hybrid Trainer with ADASYN

def train_lgbm_hybrid(df, features, label='Target', model_path="lgbm_trading_model.pkl"):
    """
    Trains a LightGBM model with ADASYN for imbalance handling and saves the model.
    """
    tscv = TimeSeriesSplit(n_splits=5)
    f1_scores = []

    for train_idx, test_idx in tscv.split(df):
        train, test = df.iloc[train_idx], df.iloc[test_idx]
        X_train, y_train = train[features], train[label]
        X_test, y_test = test[features], test[label]

        #Apply ADASYN instead of SMOTE
        adasyn = ADASYN(sampling_strategy=0.9, random_state=42)
        X_resampled, y_resampled = adasyn.fit_resample(X_train, y_train)

        #Updated LightGBM model with better hyperparameters
        model = lgb.LGBMClassifier(
            n_estimators=3000,
            learning_rate=0.005,
            num_leaves=64,
            max_depth=8,
            min_data_in_leaf=50,
            max_bin=1024,
            colsample_bytree=0.8,
            subsample=0.8,
            reg_alpha=0.1,
            reg_lambda=0.1,
            force_col_wise=True,
            random_state=42
        )

        #Train with early stopping and patience of 200 rounds
        model.fit(
            X_resampled, y_resampled,
            eval_set=[(X_test, y_test)],
            callbacks=[lgb.early_stopping(200, verbose=100)]
        )

        preds = model.predict(X_test)
        f1 = f1_score(y_test, preds, zero_division=0)
        f1_scores.append(f1)

    print(f"\nAvg F1 Across Splits: {np.mean(f1_scores):.4f}")
    joblib.dump(model, model_path)
    print(f"Model saved: {model_path}")
    return model


In [66]:
#Evaluation and Portfolio Simulation
def evaluate_lgbm(model, df, features, initial_cash=100000):
    """
    Evaluates a LightGBM model and simulates a trading portfolio.
    """
    X = df[features]
    y_pred_proba = model.predict_proba(X)[:, 1]

    best_f1, best_threshold = 0, 0.5
    for thresh in np.arange(0.3, 0.7, 0.02):
        preds = (y_pred_proba > thresh).astype(int)
        f1 = f1_score(df['Target'], preds, zero_division=0)
        if f1 > best_f1:
            best_f1, best_threshold = f1, thresh

    print(f"Optimal Threshold: {best_threshold:.2f} (F1={best_f1:.4f})")
    preds = (y_pred_proba > best_threshold).astype(int)

    portfolio_value, shares_held = initial_cash, 0
    TRANSACTION_COST, MAX_POSITION_SIZE = 0.001, 200
    values = []

    for price, pred in zip(df['Close'], preds):
        if pred == 1 and shares_held == 0:
            shares_held = min(portfolio_value / price, MAX_POSITION_SIZE)
            portfolio_value -= shares_held * price
            portfolio_value -= portfolio_value * TRANSACTION_COST
        elif pred == 0 and shares_held > 0:
            portfolio_value += shares_held * price
            portfolio_value -= portfolio_value * TRANSACTION_COST
            shares_held = 0
        values.append(portfolio_value + shares_held * price)

    final_value = values[-1]
    profit_loss = final_value - initial_cash

    print(f"\nFinal Portfolio: ${final_value:,.2f} | Profit: ${profit_loss:,.2f}")
    with open("lgbm_realistic_results.json", "w") as f:
        json.dump({"final_value": final_value, "profit_loss": profit_loss}, f, indent=2)

    print("Results saved: lgbm_realistic_results.json")
    return final_value, profit_loss


In [67]:
# Precision-Recall Curve Threshold
def get_optimal_threshold(y_test, y_pred_proba):
    """
    Finds the best threshold using Precision-Recall Curve.
    """
    precision, recall, thresholds = precision_recall_curve(y_test, y_pred_proba)
    optimal_idx = np.argmax(precision * recall)
    optimal_threshold = thresholds[optimal_idx]
    print(f"Optimal Threshold for Precision-Recall: {optimal_threshold:.2f}")
    return optimal_threshold

In [52]:
#Ensemble Model Training (Optional)
def train_ensemble(X_train, y_train):
    """
    Trains an ensemble of LightGBM, Random Forest, and XGBoost models.
    """
    lgb_model = lgb.LGBMClassifier(n_estimators=3000, learning_rate=0.005, random_state=42)
    rf_model = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)
    xgb_model = XGBClassifier(n_estimators=300, learning_rate=0.01, random_state=42)

    lgb_model.fit(X_train, y_train)
    rf_model.fit(X_train, y_train)
    xgb_model.fit(X_train, y_train)

    return lgb_model, rf_model, xgb_model


In [53]:
#Evaluation and Portfolio Simulation
def evaluate_lgbm(model, df, features, initial_cash=100000):
    """
    Evaluates a LightGBM model and simulates a trading portfolio.
    """
    X = df[features]
    y_pred_proba = model.predict_proba(X)[:, 1]

    optimal_threshold = get_optimal_threshold(df['Target'], y_pred_proba)
    preds = (y_pred_proba > optimal_threshold).astype(int)

    portfolio_value, shares_held = initial_cash, 0
    TRANSACTION_COST, MAX_POSITION_SIZE = 0.001, 200
    values = []

    for price, pred in zip(df['Close'], preds):
        if pred == 1 and shares_held == 0:
            shares_held = min(portfolio_value / price, MAX_POSITION_SIZE)
            portfolio_value -= shares_held * price
            portfolio_value -= portfolio_value * TRANSACTION_COST
        elif pred == 0 and shares_held > 0:
            portfolio_value += shares_held * price
            portfolio_value -= portfolio_value * TRANSACTION_COST
            shares_held = 0
        values.append(portfolio_value + shares_held * price)

    final_value = values[-1]
    profit_loss = final_value - initial_cash

    print(f"\nFinal Portfolio: ${final_value:,.2f} | Profit: ${profit_loss:,.2f}")
    with open("lgbm_realistic_results.json", "w") as f:
        json.dump({"final_value": final_value, "profit_loss": profit_loss}, f, indent=2)

    print("Results saved: lgbm_realistic_results.json")
    return final_value, profit_loss


In [76]:
#Main Execution

#Load and prepare data
df = compute_technical_indicators(df)
df = generate_trade_labels(df)

features = [
    'SMA_20', 'STD_20', 'Upper_Band', 'Lower_Band', 'Stoch', 'volatility',
    'OBV', 'CCI', 'ROC', 'PROC', 'Rolling_Mean_50', 'Expanding_Mean',
    'RSI_14', 'MACD'  #Added New Features
]

#Train and Evaluate LightGBM Model with ADASYN
model_path = "lgbm_trading_model.pkl"
model = train_lgbm_hybrid(df, features, label='Target', model_path=model_path)

#Get feature importance and drop low-importance features
importances = model.feature_importances_
feature_importance_df = pd.DataFrame({'feature': features, 'importance': importances})
feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)
print("Feature Importance Summary:")
print(feature_importance_df)

#Drop low-importance features
low_importance_features = feature_importance_df[feature_importance_df['importance'] < 1.0]['feature'].tolist()
if low_importance_features:
    df.drop(columns=low_importance_features, inplace=True)
    print(f"Dropped low-importance features: {low_importance_features}")
else:
    print("No low-importance features found to drop.")

#Evaluate with Portfolio Simulation
#Evaluate with Portfolio Simulation and Save Results
final_value, profit_loss = evaluate_lgbm(model, df, features)

#Save results clearly in structured dictionary
lgbm_results = {
    "model_name": "LightGBM",
    "final_value": final_value,
    "profit_loss": profit_loss
}

#Write results to JSON file
with open("lgbm_results.json", "w") as f:
    json.dump(lgbm_results, f, indent=2)

print("LightGBM results saved successfully!")




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Early stopping, best iteration is:
[97]	valid_0's binary_logloss: 0.60421
[LightGBM] [Info] Number of positive: 1283, number of negative: 1449
[LightGBM] [Info] Total Bins 12753
[LightGBM] [Info] Number of data points in the train set: 2732, number of used features: 14
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.469619 -> initscore=-0.121673
[LightGBM] [Info] Start training from score -0.121673
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[1259]	valid_0's binary_logloss: 0.0677796
[LightGBM] [Info] Number of positive: 1998, number of negative: 2246
[LightGBM] [Info] Total Bins 14336
[LightGBM] [Info] Number of data points in the train set: 4244, number of used features: 14
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.470782 -> initscore=-0.117004
[LightGBM] [Info] Start training from score -0.117004
Training until validation scores don't improve for 200 rounds

In [69]:
gc.collect()
torch.cuda.empty_cache()

In [70]:
#Function to calculate RSI
def calculate_rsi(series, period=14):
    delta = series.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
    rs = gain / (loss + 1e-6)
    return 100 - (100 / (1 + rs))

class DiscreteTradingEnv(gym.Env):
    def __init__(self, df, frame_bound=(10, 100), window_size=10, verbose=False):
        super().__init__()
        self.df = df.reset_index(drop=True)
        self.frame_bound = frame_bound
        self.window_size = window_size
        self.current_step = self.frame_bound[0]
        self.done = False
        self.verbose = verbose

        #Portfolio Variables
        self.initial_balance = 100000
        self.portfolio_value = self.initial_balance
        self.shares_held = 0
        self.last_trade_price = 0

        #Add RSI and Moving Average
        self.df['RSI'] = calculate_rsi(self.df['Close'])
        self.df['SMA'] = self.df['Close'].rolling(window=window_size).mean()

        #Define Action and Observation Space
        self.action_space = Discrete(3)  # 0 = SELL, 1 = HOLD, 2 = BUY
        self.observation_space = Box(low=-np.inf, high=np.inf, shape=(window_size + 4,), dtype=np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = self.frame_bound[0]
        self.done = False

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = self.frame_bound[0]
        self.done = False
        self.portfolio_value = self.initial_balance
        self.shares_held = 0
        self.last_trade_price = 0


        #Reset Portfolio
        self.portfolio_value = self.initial_balance
        self.shares_held = 0
        self.last_trade_price = 0

        return self._next_observation(), {}

    def step(self, action):
        if self.current_step >= len(self.df) - 1:
            self.done = True
            return self._next_observation(), 0, self.done, False, {}

        self.current_step += 1
        new_price = self.df['Close'].iloc[self.current_step]

        reward = self._adjusted_reward_function(action, new_price)

        return self._next_observation(), reward, self.done, False, {}

    def _next_observation(self):
        """Returns stock prices, RSI, SMA, and portfolio status."""
        stock_prices = np.array(self.df['Close'].iloc[self.current_step - self.window_size:self.current_step], dtype=np.float32)
        rsi = self.df['RSI'].iloc[self.current_step] if not np.isnan(self.df['RSI'].iloc[self.current_step]) else 50
        sma = self.df['SMA'].iloc[self.current_step] if not np.isnan(self.df['SMA'].iloc[self.current_step]) else self.df['Close'].iloc[self.current_step]

        return np.concatenate(([self.portfolio_value], stock_prices, [self.shares_held, rsi, sma]))

    def _adjusted_reward_function(self, action, new_price):
        """Improves reward logic for better DQN trading decisions."""
        reward = 0

        #BUY ACTION: Only Buy on Dip
        if action == 2 and self.portfolio_value > 5000:
            allocated_funds = min(self.portfolio_value * 0.3, max(self.portfolio_value * 0.1, 5000))
            if self.portfolio_value >= allocated_funds:
                shares_bought = allocated_funds / new_price
                self.shares_held += shares_bought
                self.portfolio_value -= shares_bought * new_price
                self.last_trade_price = new_price

                price_change = (self.last_trade_price - new_price) / max(new_price, 1e-6)
                if price_change > 0.005:
                    reward += price_change * 100
                else:
                    reward -= 3

        #SELL ACTION: Encourages Profitable Selling
        elif action == 0 and self.shares_held > 0:
            sell_value = self.shares_held * new_price
            profit_percent = (new_price - self.last_trade_price) / max(self.last_trade_price, 1e-6)

            if profit_percent > 0.02:
                reward = profit_percent * 300
            elif profit_percent > 0.005:
                reward = profit_percent * 50
            else:
                reward = -10

            self.portfolio_value += sell_value
            self.shares_held = 0
            self.last_trade_price = 0

        #HOLD ACTION: Penalize Holding Too Long
        elif action == 1:
            unrealized_profit = (new_price - self.last_trade_price) * self.shares_held
            reward = np.tanh(unrealized_profit / self.initial_balance) * 10
            if self.shares_held > 0 and unrealized_profit < 0:
                reward -= 5

        return reward


In [71]:
class ContinuousTradingEnv(gym.Env):
    def __init__(self, df, frame_bound=(10, 100), window_size=10, verbose=False):
        super().__init__()
        self.df = df.reset_index(drop=True)
        self.frame_bound = frame_bound
        self.window_size = window_size
        self.verbose = verbose

        self.current_step = self.frame_bound[0]
        self.max_steps = frame_bound[1] - frame_bound[0]

        #Features
        self.features = [
            'Close', 'SMA_20', 'STD_20', 'Upper_Band', 'Lower_Band', 'Stoch', 'volatility',
            'OBV', 'CCI', 'ROC', 'PROC', 'Rolling_Mean_50', 'Expanding_Mean'
        ]

        #Spaces
        self.action_space = Box(low=-1.0, high=1.0, shape=(1,), dtype=np.float32)
        self.observation_space = Box(low=0, high=1, shape=(len(self.features) + 2,), dtype=np.float32)

        #Portfolio
        self.initial_balance = 100000
        self.reset_portfolio()

    def reset_portfolio(self):
        self.portfolio_value = self.initial_balance
        self.shares_held = 0
        self.total_reward = 0

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = self.frame_bound[0]
        self.reset_portfolio()
        return self._next_observation(), {}

    def _next_observation(self):
        current_data = self.df[self.features].iloc[self.current_step]
        max_vals = self.df[self.features].max()
        norm_features = current_data / max_vals.replace(0, 1)

        balance_norm = self.portfolio_value / self.initial_balance
        shares_norm = self.shares_held / 1000  # normalize

        obs = np.concatenate(([balance_norm, shares_norm], norm_features.values)).astype(np.float32)
        return obs

    def step(self, action):
        terminated = self.current_step >= len(self.df) - 1
        truncated = (self.current_step - self.frame_bound[0]) >= self.max_steps
        done = terminated or truncated

        if done:
            return self._next_observation(), 0.0, terminated, truncated, {}

        self.current_step += 1
        price = self.df['Close'].iloc[self.current_step]
        prev_price = self.df['Close'].iloc[self.current_step - 1]

        if isinstance(action, (np.ndarray, list)):
          action = float(np.clip(action[0], -1, 1))
        else:
          action = float(np.clip(action, -1, 1))

        trade_size = action * self.portfolio_value * 0.1
        shares = abs(trade_size) / max(price, 1e-3)
        reward = 0

        #Buy
        if action > 0 and self.portfolio_value >= shares * price:
            self.shares_held += shares
            self.portfolio_value -= shares * price

        #Sell
        elif action < 0 and self.shares_held > 0:
            shares_sold = min(shares, self.shares_held)
            self.portfolio_value += shares_sold * price
            self.shares_held -= shares_sold

            profit = (price - prev_price) * shares_sold
            reward = profit / self.initial_balance * 100

        #Penalty
        reward -= 0.001 * self.shares_held
        self.total_reward += reward

        return self._next_observation(), reward, terminated, truncated, {
            "portfolio_value": self.portfolio_value,
            "step": self.current_step,
            "shares_held": self.shares_held,
            "price": price
        }

    def render(self):
        print(f"Step: {self.current_step} | Portfolio: ${self.portfolio_value:.2f} | Shares Held: {self.shares_held:.2f}")


In [79]:
import numpy as np
import pandas as pd
import gc
import xgboost as xgb
from sklearn.metrics import accuracy_score

#Choose Dataset for Training
df = df_live.copy()

#Fix MultiIndex Issues (if applicable)
if isinstance(df.columns, pd.MultiIndex):
    df.columns = df.columns.get_level_values(0)

#Remove Named Index (Fixes KeyError Issues)
df.columns.name = None  # Remove 'Price' as the column index name

#Ensure EMA_10 and EMA_50 exist before XGBoost training
if 'EMA_10' not in df.columns or 'EMA_50' not in df.columns:
    print("'EMA_10' or 'EMA_50' missing. Recomputing EMAs...")
    df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()
    df['EMA_50'] = df['Close'].ewm(span=50, adjust=False).mean()
    df.dropna(subset=['EMA_10', 'EMA_50'], inplace=True)  # Drop NaNs from EMA calculations
    print("EMAs computed successfully.")

#Generate Trade Signals (BUY = 1, SELL = 0)
df['Trade_Signal'] = 0  # Default to SELL (0)

#Identify BUY signals (when EMA_10 crosses above EMA_50)
df.loc[df['EMA_10'] > df['EMA_50'], 'Trade_Signal'] = 1

#Ensure Trade Signal Exists
if 'Trade_Signal' not in df.columns or df['Trade_Signal'].isnull().all():
    raise ValueError("Trade_Signal column is still missing or empty. Check feature calculations!")

print("Trade signals generated successfully!")

#Define Feature Columns (Exclude Target Column)
feature_columns = ['Close', 'EMA_10', 'EMA_50']
target_column = 'Trade_Signal'

#Drop NaN Values (if any)
df = df.dropna(subset=feature_columns + [target_column])

#Split Data into Train & Test Sets
train_size = int(0.8 * len(df))  # 80% Training, 20% Testing
X_train_xgb, y_train_xgb = df[feature_columns][:train_size], df[target_column][:train_size]
X_test_xgb, y_test_xgb = df[feature_columns][train_size:], df[target_column][train_size:]

#Train XGBoost Model
GPU_AVAILABLE = True  # Set to False if no GPU is available
xgb_model = xgb.XGBClassifier(
    n_estimators=50,
    learning_rate=0.1,
    tree_method='hist' if GPU_AVAILABLE else 'exact',
    random_state=42
)
xgb_model.fit(X_train_xgb, y_train_xgb)

#Compute Accuracy
xgb_accuracy = accuracy_score(y_test_xgb, xgb_model.predict(X_test_xgb))
print(f"XGBoost Accuracy: {xgb_accuracy:.4f}")

#Free Memory
del X_train_xgb, y_train_xgb, X_test_xgb, y_test_xgb
gc.collect()

'EMA_10' or 'EMA_50' missing. Recomputing EMAs...
EMAs computed successfully.
Trade signals generated successfully!
XGBoost Accuracy: 0.7122


79

In [81]:
#Generate Trade Predictions for Portfolio Simulation
df['Trade_Signal_XGB'] = xgb_model.predict(df[feature_columns])

#Portfolio Simulation for XGBoost Model
portfolio_values_xgb = []
capital_xgb = 100000
shares_xgb = 0
buy_price_xgb = None
max_portfolio_value_xgb = capital_xgb

for i, trade in enumerate(df['Trade_Signal_XGB']):
    price = df['Close'].iloc[i]

    if trade == 1 and capital_xgb >= price and buy_price_xgb is None:
        shares_xgb = capital_xgb // price
        buy_price_xgb = price
        capital_xgb -= shares_xgb * price
    elif trade == 0 and shares_xgb > 0:
        capital_xgb += shares_xgb * price
        shares_xgb = 0
        buy_price_xgb = None

    #Update Portfolio Value
    portfolio_value_xgb = capital_xgb + (shares_xgb * price)
    portfolio_values_xgb.append(portfolio_value_xgb)

    #Track Max Drawdown
    max_portfolio_value_xgb = max(max_portfolio_value_xgb, portfolio_value_xgb)

#Convert Portfolio Values to DataFrame
results_df_xgb = pd.DataFrame({'Date': df.index, 'Portfolio Value': portfolio_values_xgb})

#Compute Performance Metrics for XGBoost
results_df_xgb['Daily Return'] = results_df_xgb['Portfolio Value'].pct_change().fillna(0)
xgb_cumulative_return = ((results_df_xgb['Portfolio Value'].iloc[-1] / 100000) - 1) * 100
daily_return_mean_xgb = results_df_xgb['Daily Return'].mean()
daily_return_std_xgb = results_df_xgb['Daily Return'].std()
xgb_sharpe_ratio = (daily_return_mean_xgb / daily_return_std_xgb) * np.sqrt(252) if daily_return_std_xgb != 0 else 0
drawdown_xgb = (results_df_xgb['Portfolio Value'].cummax() - results_df_xgb['Portfolio Value']) / results_df_xgb['Portfolio Value'].cummax()
xgb_max_drawdown = drawdown_xgb.max() * 100

#Print Performance Metrics
print("\n **XGBoost Model Performance**")
print(f"XGBoost Final Portfolio Value: ${results_df_xgb['Portfolio Value'].iloc[-1]:,.2f}")
print(f"XGBoost Cumulative Return: {xgb_cumulative_return:.2f}%")
print(f"XGBoost Sharpe Ratio: {xgb_sharpe_ratio:.2f}")
print(f"XGBoost Max Drawdown: {xgb_max_drawdown:.2f}%")

#Save XGBoost results clearly in structured dictionary
xgb_results = {
    "model_name": "XGBoost",
    "final_value": results_df_xgb['Portfolio Value'].iloc[-1],
    "profit_loss": results_df_xgb['Portfolio Value'].iloc[-1] - 100000,
    "cumulative_return_pct": xgb_cumulative_return,
    "sharpe_ratio": xgb_sharpe_ratio,
    "max_drawdown_pct": xgb_max_drawdown
}

#Write results to JSON file
with open("xgb_results.json", "w") as f:
    json.dump(xgb_results, f, indent=2)

print("XGBoost results saved successfully!")


**XGBoost Model Performance**
XGBoost Final Portfolio Value: $103,687.49
XGBoost Cumulative Return: 3.69%
XGBoost Sharpe Ratio: 0.06
XGBoost Max Drawdown: 28.11%
XGBoost results saved successfully!


In [82]:
gc.collect()
torch.cuda.empty_cache()

In [74]:
#STEP 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
#STEP 2: Create directory if it doesn't exist
import os
os.makedirs("/content/drive/MyDrive", exist_ok=True)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [83]:
import json
import pandas as pd

#STEP 1: Collect ML model results
ml_results = {}
if 'rf_results' in locals(): ml_results["RandomForest"] = rf_results["final_value"]
if 'lgbm_results' in locals(): ml_results["LightGBM"] = lgbm_results["final_value"]
if 'xgb_results' in locals(): ml_results["XGBoost"] = xgb_results["final_value"]

#STEP 2: Save ML results to JSON
with open("ml_model_final_values.json", "w") as f:
    json.dump(ml_results, f, indent=2)

#STEP 3: Reload results
with open("ml_model_final_values.json") as f:
    ml_results_loaded = json.load(f)

#STEP 4: Determine best ML model
best_ml_model = max(ml_results_loaded, key=ml_results_loaded.get) if ml_results_loaded else "N/A"

#STEP 5: Print best ML model
if best_ml_model != "N/A":
    print(f"Best Machine Learning Model: {best_ml_model} (${ml_results_loaded[best_ml_model]:,.2f})")
else:
    print("No ML models available.")

#STEP 6: Save ML results to CSV
df_ml_results = pd.DataFrame(list(ml_results_loaded.items()), columns=["Model", "Final Portfolio Value"])
csv_path = "/content/drive/MyDrive/ml_model_final_values.csv"
df_ml_results.to_csv(csv_path, index=False)
print(f"ML Results saved as CSV: {csv_path}")


Best Machine Learning Model: RandomForest ($500,000.00)
ML Results saved as CSV: /content/drive/MyDrive/ml_model_final_values.csv
