In [1]:
import kagglehub
import pandas as pd
import os
import ta
import shutil
import os



import numpy as np
import matplotlib.pyplot as plt

import gymnasium as gym
from gymnasium import spaces

from sklearn.preprocessing import MinMaxScaler

from stable_baselines3 import PPO, A2C, DDPG, SAC, TD3
from stable_baselines3.common.vec_env import DummyVecEnv


In [3]:
# Download latest version
# path = kagglehub.dataset_download("debashis74017/algo-trading-data-nifty-100-data-with-indicators")
path_sp500 = kagglehub.dataset_download(
    "gratefuldata/intraday-stock-data-1-min-sp-500-200821"
)


print("Path to dataset files:", path_sp500)

print("\nFiles in S&P 500 dataset directory:")
try:
    files_sp500 = os.listdir(path_sp500)
    print(files_sp500)
except FileNotFoundError:
    print(f"Error: Directory not found at {path_sp500}")
except Exception as e:
    print(f"An error occurred: {e}")

local_dir = "./datasets/sp500"
os.makedirs(local_dir, exist_ok=True)  # Create the directory if it doesn't exist

for fname in files_sp500:
    src = os.path.join(path_sp500, fname)
    dst = os.path.join(local_dir, fname)
    shutil.copy2(src, dst)  # Use copy2 to preserve metadata

print(f"Files saved to {local_dir}")


full_csv_path = os.path.join(path_sp500, files_sp500[0])

df = pd.read_csv(full_csv_path)
df.head()

Path to dataset files: C:\Users\john\.cache\kagglehub\datasets\gratefuldata\intraday-stock-data-1-min-sp-500-200821\versions\1

Files in S&P 500 dataset directory:
['1_min_SPY_2008-2021.csv']
Files saved to ./datasets/sp500


Unnamed: 0.1,Unnamed: 0,date,open,high,low,close,volume,barCount,average
0,0,20090522 07:30:00,89.45,89.46,89.37,89.37,7872,2102,89.424
1,1,20090522 07:31:00,89.38,89.53,89.37,89.5,5336,1938,89.468
2,2,20090522 07:32:00,89.51,89.54,89.48,89.49,3349,1184,89.516
3,3,20090522 07:33:00,89.49,89.49,89.31,89.34,3495,1240,89.386
4,4,20090522 07:34:00,89.33,89.46,89.33,89.39,9731,2637,89.379


Get data from csv

In [3]:
output_path = "sp500_with_ta_features.csv"
if os.path.exists(output_path):
    df = pd.read_csv(output_path)
    # Ensure datetime column is correctly parsed when loading from CSV
    df["datetime"] = pd.to_datetime(df["datetime"])
else:
    df["datetime"] = pd.to_datetime(df["date"])

    start_date = pd.Timestamp("2018-01-01")
    end_date = pd.Timestamp("2021-12-31 23:59:59")
    df = df[(df["datetime"] >= start_date) & (df["datetime"] <= end_date)]
    df = df.sort_values("datetime")

    df = ta.add_all_ta_features(
        df,
        open="open",
        high="high",
        low="low",
        close="close",
        volume="volume",
        fillna=True,
    )

    df.to_csv(output_path, index=False)

Apply filter

In [4]:
# Create a filter for Regular Trading Hours (9:30 AM - 4:00 PM ET)
df["hour"] = df["datetime"].dt.hour
df["minute"] = df["datetime"].dt.minute

rth_filter = (
    # 9:30 AM to 3:59 PM (handle 9:30 properly)
    ((df["hour"] == 9) & (df["minute"] >= 30))  # 9:30-9:59
    | ((df["hour"] > 9) & (df["hour"] < 16))  # 10:00-15:59
    | ((df["hour"] == 16) & (df["minute"] == 0))  # 4:00 PM exactly
)


df = df[rth_filter]

# Normalize the volume features you want to use
volume_features = ["volume", "volume_obv", "volume_vwap"]
for feature in volume_features:
    scaler = MinMaxScaler()
    df[f"{feature}_norm"] = scaler.fit_transform(df[[feature]])
df[volume_features + [f"{feat}_norm" for feat in volume_features]].head()


print(f"Full dataset date range:")
print(f"Earliest date: {df['datetime'].min()}")
print(f"Latest date: {df['datetime'].max()}")
print(f"Total unique dates: {df['datetime'].dt.date.nunique()}")
yearly_counts = df["datetime"].dt.year.value_counts().sort_index()
print("\nData points per year:")
print(yearly_counts)
print(df.head())

Full dataset date range:
Earliest date: 2018-01-02 09:30:00
Latest date: 2021-05-06 13:59:00
Total unique dates: 842

Data points per year:
datetime
2018     97290
2019     97020
2020    133809
2021     37566
Name: count, dtype: int64
     Unnamed: 0                date    open    high     low   close  volume  \
120     1605521  20180102  09:30:00  268.17  268.18  268.15  268.18     369   
121     1605522  20180102  09:31:00  268.17  268.18  268.12  268.13     306   
122     1605523  20180102  09:32:00  268.13  268.14  268.11  268.11     881   
123     1605524  20180102  09:33:00  268.11  268.12  268.10  268.10    1070   
124     1605525  20180102  09:34:00  268.10  268.14  268.10  268.14     745   

     barCount  average            datetime  ...  momentum_pvo_hist  \
120       192  268.162 2018-01-02 09:30:00  ...          -2.351737   
121       148  268.148 2018-01-02 09:31:00  ...          -5.726195   
122       345  268.121 2018-01-02 09:32:00  ...          -3.018600   
123       

Split training data

In [87]:
# Define time ranges for data splits
training_time_range = (pd.Timestamp("2018-01-01"), pd.Timestamp("2018-12-31 23:59:59"))
validation_time_range = (
    pd.Timestamp("2019-01-01"),
    pd.Timestamp("2019-12-31 23:59:59"),
)
test_time_range = (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-12-31 23:59:59"))

# Make sure datetime column is in datetime format
if not pd.api.types.is_datetime64_any_dtype(df["datetime"]):
    df["datetime"] = pd.to_datetime(df["datetime"])

# Split the DataFrame using boolean filtering
training_df = df[
    (df["datetime"] >= training_time_range[0])
    & (df["datetime"] <= training_time_range[1])
].copy()

validation_df = df[
    (df["datetime"] >= validation_time_range[0])
    & (df["datetime"] <= validation_time_range[1])
].copy()

test_df = df[
    (df["datetime"] >= test_time_range[0]) & (df["datetime"] <= test_time_range[1])
].copy()

# Display the shapes of the resulting DataFrames
print(f"Original data: {df.shape}")
print(f"Training data (2018): {training_df.shape}")
print(f"Validation data (2019): {validation_df.shape}")
print(f"Test data (2020): {test_df.shape}")
print(
    "New training data index range:",
    training_df.index.min(),
    "to",
    training_df.index.max(),
)
print(
    "New validation data index range:",
    validation_df.index.min(),
    "to",
    validation_df.index.max(),
)
print("New test data index range:", test_df.index.min(), "to", test_df.index.max())

Original data: (365685, 100)
Training data (2018): (97290, 100)
Validation data (2019): (97020, 100)
Test data (2020): (133809, 100)
New training data index range: 3825 to 2068868
New validation data index range: 4605 to 2064773
New test data index range: 3195 to 2067189


trend_macd - This is the main MACD line

trend_macd_signal - This is the signal line

trend_macd_diff - This is the histogram

trend_macd_diff = trend_macd - trend_macd_signal


In [None]:
class StockTradingEnv(gym.Env):
    metadata = {"render_modes": ["human"]}

    def __init__(self, stock_data, transaction_cost_percent=0.005):
        super(StockTradingEnv, self).__init__()

        # Remove any empty DataFrames
        self.stock_data = {
            ticker: df for ticker, df in stock_data.items() if not df.empty
        }
        self.tickers = list(self.stock_data.keys())

        if not self.tickers:
            raise ValueError("All provided stock data is empty")

        # Calculate the size of one stock's data
        sample_df = next(iter(self.stock_data.values()))
        self.n_features = len(sample_df.columns)

        # Define action and observation space
        self.action_space = spaces.Box(
            low=-1, high=1, shape=(len(self.tickers),), dtype=np.float32
        )

        # Observation space: price data for each stock + balance + shares held + net worth + max net worth + current step
        self.obs_shape = self.n_features * len(self.tickers) + 2 + len(self.tickers) + 2
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(self.obs_shape,), dtype=np.float32
        )

        # Initialize account balance 1M$
        self.initial_balance = 1000000
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.max_net_worth = self.initial_balance
        self.shares_held = {ticker: 0 for ticker in self.tickers}
        self.total_shares_sold = {ticker: 0 for ticker in self.tickers}
        self.total_sales_value = {ticker: 0 for ticker in self.tickers}

        # Set the current step
        self.current_step = 0

        # Calculate the minimum length of data across all stocks
        self.max_steps = max(0, min(len(df) for df in self.stock_data.values()) - 1)

        # Transaction cost
        self.transaction_cost_percent = transaction_cost_percent

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.max_net_worth = self.initial_balance
        self.shares_held = {ticker: 0 for ticker in self.tickers}
        self.total_shares_sold = {ticker: 0 for ticker in self.tickers}
        self.total_sales_value = {ticker: 0 for ticker in self.tickers}
        self.current_step = 0
        return self._next_observation(), {}

    def _next_observation(self):
        # initialize the frame
        frame = np.zeros(self.obs_shape)

        # Add stock data for each ticker
        idx = 0
        # Loop through each ticker
        for ticker in self.tickers:
            # Get the DataFrame for the current ticker
            df = self.stock_data[ticker]
            # If the current step is less than the length of the DataFrame, add the price data for the current step
            if self.current_step < len(df):
                frame[idx : idx + self.n_features] = df.iloc[self.current_step].values
            # Otherwise, add the last price data available
            elif len(df) > 0:
                frame[idx : idx + self.n_features] = df.iloc[-1].values
            # Move the index to the next ticker
            idx += self.n_features

        # Add balance, shares held, net worth, max net worth, and current step
        frame[-4 - len(self.tickers)] = self.balance  # Balance
        frame[-3 - len(self.tickers) : -3] = [
            self.shares_held[ticker] for ticker in self.tickers
        ]  # Shares held
        frame[-3] = self.net_worth  # Net worth
        frame[-2] = self.max_net_worth  # Max net worth
        frame[-1] = self.current_step  # Current step

        return frame

    def step(self, actions):
        # update the current step
        self.current_step += 1

        # check if we have reached the maximum number of steps
        if self.current_step > self.max_steps:
            return self._next_observation(), 0, True, False, {}

        current_prices = {}
        # Loop through each ticker and perform the action
        for i, ticker in enumerate(self.tickers):
            # Get the current price of the stock
            current_prices[ticker] = self.stock_data[ticker].iloc[self.current_step][
                "Close"
            ]
            # get the action for the current ticker
            action = actions[i]

            if action > 0:  # Buy
                # Calculate the number of shares to buy
                shares_to_buy = int(self.balance * action / current_prices[ticker])
                # Calculate the cost of the shares
                cost = shares_to_buy * current_prices[ticker]
                # Transaction cost
                transaction_cost = cost * self.transaction_cost_percent
                # Update the balance and shares held
                self.balance -= cost + transaction_cost
                # Update the total shares sold
                self.shares_held[ticker] += shares_to_buy

            elif action < 0:  # Sell
                # Calculate the number of shares to sell
                shares_to_sell = int(self.shares_held[ticker] * abs(action))
                # Calculate the sale value
                sale = shares_to_sell * current_prices[ticker]
                # Transaction cost, fixed fees...
                transaction_cost = sale * self.transaction_cost_percent
                # Update the balance and shares held
                self.balance += sale - transaction_cost
                # Update the total shares sold
                self.shares_held[ticker] -= shares_to_sell
                # Update the shares sold
                self.total_shares_sold[ticker] += shares_to_sell
                # Update the total sales value
                self.total_sales_value[ticker] += sale

        # Calculate the net worth
        self.net_worth = self.balance + sum(
            self.shares_held[ticker] * current_prices[ticker] for ticker in self.tickers
        )
        # Update the max net worth
        self.max_net_worth = max(self.net_worth, self.max_net_worth)
        # Calculate the reward
        reward = self.net_worth - self.initial_balance
        # Check if the episode is done
        done = self.net_worth <= 0 or self.current_step >= self.max_steps

        obs = self._next_observation()
        return obs, reward, done, False, {}

    def render(self, mode="human"):
        # Print the current step, balance, shares held, net worth, and profit
        profit = self.net_worth - self.initial_balance
        print(f"Step: {self.current_step}")
        print(f"Balance: {self.balance:.2f}")
        for ticker in self.tickers:
            print(f"{ticker} Shares held: {self.shares_held[ticker]}")
        print(f"Net worth: {self.net_worth:.2f}")
        print(f"Profit: {profit:.2f}")

    def close(self):
        pass


def update_stock_data(self, new_stock_data, transaction_cost_percent=None):
    """
    Update the environment with new stock data.

    Parameters:
    new_stock_data (dict): Dictionary containing new stock data,
                           with keys as stock tickers and values as DataFrames.
    """
    # Remove empty DataFrames
    self.stock_data = {
        ticker: df for ticker, df in new_stock_data.items() if not df.empty
    }
    self.tickers = list(self.stock_data.keys())

    if not self.tickers:
        raise ValueError("All new stock data are empty")

    # Update the number of features if needed
    sample_df = next(iter(self.stock_data.values()))
    self.n_features = len(sample_df.columns)

    # Update observation space
    self.obs_shape = self.n_features * len(self.tickers) + 2 + len(self.tickers) + 2
    self.observation_space = spaces.Box(
        low=-np.inf, high=np.inf, shape=(self.obs_shape,), dtype=np.float32
    )

    # Update maximum steps
    self.max_steps = max(0, min(len(df) for df in self.stock_data.values()) - 1)

    # Update transaction cost if provided
    if transaction_cost_percent is not None:
        self.transaction_cost_percent = transaction_cost_percent

    # Reset the environment
    self.reset()

    print(f"The environment has been updated with {len(self.tickers)} new stocks.")


# Define SAC Agent
class SACAgent:
    def __init__(self, env, total_timesteps):
        self.model = SAC("MlpPolicy", env, verbose=1)

    def train(self, total_timesteps):
        self.model.learn(total_timesteps=total_timesteps)

    def predict(self, obs):
        action, _ = self.model.predict(obs)
        return action


def test_agent(env, agent, stock_data, n_tests=1000, visualize=False):
    """
    Test a single agent and track performance metrics, with an option to visualize the results.

    Parameters:
    - env: The trading environment.
    - agent: The agent to be tested.
    - stock_data: Data for the stocks in the environment.
    - n_tests: Number of tests to run (default: 1000).
    - visualize: Boolean flag to enable or disable visualization (default: False).

    Returns:
    - A dictionary containing steps, balances, net worths, and shares held.
    """
    # Initialize metrics tracking
    metrics = {
        "steps": [],
        "balances": [],
        "net_worths": [],
        "shares_held": {ticker: [] for ticker in stock_data.keys()},
    }

    # Reset the environment before starting the tests
    obs = env.reset()

    # If you have 3 tickers, action will be something like [0.2, -0.5, 0.0]
    for i in range(n_tests):
        metrics["steps"].append(i)
        action = agent.predict(obs)
        obs, rewards, dones, infos = env.step(action)
        if visualize:
            env.render()

        # Track metrics
        metrics["balances"].append(env.get_attr("balance")[0])
        metrics["net_worths"].append(env.get_attr("net_worth")[0])
        env_shares_held = env.get_attr("shares_held")[0]

        # Update shares held for each ticker
        for ticker in stock_data.keys():
            if ticker in env_shares_held:
                metrics["shares_held"][ticker].append(env_shares_held[ticker])
            else:
                metrics["shares_held"][ticker].append(
                    0
                )  # Append 0 if ticker is not found

        if dones:
            obs = env.reset()

    return metrics


# function to visualize the multiple portfolio net worths ( same chart )
def visualize_multiple_portfolio_net_worth(steps, net_worths_list, labels):
    plt.figure(figsize=(12, 6))
    for i, net_worths in enumerate(net_worths_list):
        plt.plot(steps, net_worths, label=labels[i])
    plt.title("Net Worth Over Time")
    plt.xlabel("Steps")
    plt.ylabel("Net Worth")
    plt.legend()
    plt.show()


def test_and_visualize_agents(env, agents, training_data, n_tests=1000):
    metrics = {}
    for agent_name, agent in agents.items():
        print(f"Testing {agent_name}...")
        metrics[agent_name] = test_agent(
            env, agent, training_data, n_tests=n_tests, visualize=True
        )
        print(f"Done testing {agent_name}!")

    print("-" * 50)
    print("All agents tested!")
    print("-" * 50)

    # Extract net worths for visualization
    net_worths = [metrics[agent_name]["net_worths"] for agent_name in agents.keys()]
    steps = next(iter(metrics.values()))[
        "steps"
    ]  # Assuming all agents have the same step count for simplicity

    # Visualize the performance metrics of multiple agents
    visualize_multiple_portfolio_net_worth(steps, net_worths, list(agents.keys()))

In [11]:
# Train SAC Agent
stock_data = {"spy": df}
total_timesteps = 10000
env = DummyVecEnv([lambda: StockTradingEnv(stock_data)])
sac_agent = SACAgent(env, total_timesteps)
sac_agent.train(total_timesteps)

# Test the agent
metrics = test_agent(env, sac_agent, stock_data, n_tests=1000, visualize=True)

# Visualize the performance metrics of the agent
visualize_multiple_portfolio_net_worth(
    metrics["steps"], [metrics["net_worths"]], ["SAC Agent"]
)

Using cuda device


ValueError: could not convert string to float: '20180102  09:30:00'