In [None]:
import gymnasium as gym
import numpy as np
import yfinance as yf
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor

# Download historical stock data
symbol = 'TSLA'
data = yf.download(symbol, start='2020-01-01', end='2025-01-01')
data.dropna(inplace=True)

In [None]:

# Custom Trading Environment compatible with Gymnasium
class TradingEnv(gym.Env):
    def __init__(self, data):
        super().__init__()
        self.data = data.reset_index(drop=True)
        self.current_step = 0
        self.balance = 10000
        self.position = 0
        self.action_space = gym.spaces.Discrete(3)  # Buy, Sell, Hold
        self.observation_space = gym.spaces.Box(
            low=-np.inf, high=np.inf, shape=(4,), dtype=np.float32
        )

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = 0
        self.balance = 10000
        self.position = 0
        obs = self._next_observation()
        info = {}
        return obs, info

    def step(self, action):
        price = self.data['Close'].iloc[self.current_step]
        reward = 0

        if action == 0:  # Buy
            self.position += 1
            self.balance -= price
        elif action == 1 and self.position > 0:  # Sell
            self.position -= 1
            self.balance += price
            reward = price - self.data['Close'].iloc[self.current_step - 1]

        self.current_step += 1
        done = self.current_step >= len(self.data) - 1
        obs = self._next_observation()
        info = {}

        return obs, reward, done, False, info  # done, truncated=False, info

    def _next_observation(self):
        frame = self.data.iloc[self.current_step]
        return np.array([
            frame['Open'],
            frame['High'],
            frame['Low'],
            frame['Close']
        ], dtype=np.float32).flatten()


In [None]:
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor

# Wrap the custom environment with Monitor and DummyVecEnv
env = DummyVecEnv([lambda: Monitor(TradingEnv(data))])

# Initialize and train the DQN model
model = DQN('MlpPolicy', env, verbose=1)
model.learn(total_timesteps=50000)

# Save the trained model
model.save("dqn_trading_bot")


In [None]:
model = DQN.load("dqn_trading_bot")
obs = env.reset()
total_rewards = 0

while True:
    action, _states = model.predict(obs)
    obs, reward, done, info = env.step(action)
    total_rewards += reward
    if done:
        break

print(f"Total Reward: {total_rewards}")