## RL

In [1]:
import numpy as np
import pandas as pd
import gym
from gym import spaces
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque
import backtrader as bt
from typing import Optional, List
from trading.utils import *
from trading.traditional_strategies import *
from trading.trader import *



In [2]:
class TradingEnv(gym.Env):
    """
    Custom Trading Environment for Reinforcement Learning.
    """

    def __init__(self, data, initial_balance=1000000):
        super(TradingEnv, self).__init__()
        self.data = data
        self.initial_balance = initial_balance
        self.current_step = 0
        self.balance = initial_balance
        self.positions = 0  # Number of stocks held
        self.total_value = initial_balance
        self.max_steps = len(data)

        # Action space: 0 (Hold), 1 (Buy), 2 (Sell)
        self.action_space = spaces.Discrete(3)

        # Observation space: OHLCV + balance + positions
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(len(data.columns) + 2,), dtype=np.float32
        )

    def reset(self):
        self.current_step = 0
        self.balance = self.initial_balance
        self.positions = 0
        self.total_value = self.initial_balance
        return self._get_observation()

    def _get_observation(self):
        return np.concatenate(
            [self.data.iloc[self.current_step].values, [self.balance, self.positions]]
        )

    def step(self, action):
        current_price = self.data.iloc[self.current_step]["Close"]
        transaction_cost = 0.001  # Example: 0.1% transaction cost per trade

        if action == 1:  # Buy
            self.positions += 1
            self.balance -= current_price * (1 + transaction_cost)

        elif action == 2:  # Sell
            if self.positions > 0:
                self.positions -= 1
                self.balance += current_price * (1 - transaction_cost)

        self.current_step += 1
        done = self.current_step >= self.max_steps - 1

        # Update portfolio value
        self.total_value = self.balance + self.positions * current_price

        # Reward: Portfolio value change
        reward = self.total_value - self.initial_balance

        return self._get_observation(), reward, done, {}

    def render(self):
        print(f"Step: {self.current_step}, Balance: {self.balance}, Positions: {self.positions}, Total Value: {self.total_value}")


In [3]:
class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate = 0.001

        self.model = DQN(state_size, action_size).float()
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.criterion = nn.MSELoss()

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        state = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
        act_values = self.model(state)
        return torch.argmax(act_values).item()

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return

        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                next_state = torch.tensor(next_state, dtype=torch.float32).unsqueeze(0)
                target = reward + self.gamma * torch.max(self.model(next_state)).item()

            state = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
            target_f = self.model(state).detach().clone()
            target_f[0][action] = target

            output = self.model(state)
            loss = self.criterion(output, target_f)

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


In [4]:
# Load stock data
data = pd.read_csv("data/us_stock/all_GOOG.csv")
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)

# Initialize trading environment
env = TradingEnv(data)
state_size = env.observation_space.shape[0]
action_size = env.action_space.n

# Initialize DQN agent
agent = DQNAgent(state_size, action_size)

# Train the agent
episodes = 100
batch_size = 32

for e in range(episodes):
    state = env.reset()
    total_reward = 0

    for time in range(env.max_steps):
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward

        if done:
            print(f"Episode {e+1}/{episodes}, Total Reward: {total_reward}")
            break

    agent.replay(batch_size)

# Save the trained model
torch.save(agent.model.state_dict(), "dqn_model.pth")


Episode 1/100, Total Reward: 3253514.6636207
Episode 2/100, Total Reward: 983647.4999504752
Episode 3/100, Total Reward: 6897053.402270409
Episode 4/100, Total Reward: 3563464.378861721
Episode 5/100, Total Reward: 2154069.0641862913
Episode 6/100, Total Reward: 4150575.5277947495
Episode 7/100, Total Reward: 2097797.299933062
Episode 8/100, Total Reward: 6071427.0740653295
Episode 9/100, Total Reward: 872488.578317061
Episode 10/100, Total Reward: 277395.74302264233
Episode 11/100, Total Reward: 627159.8950152298
Episode 12/100, Total Reward: 765489.940488163
Episode 13/100, Total Reward: 1043757.4643282554
Episode 14/100, Total Reward: 2879713.7080451734
Episode 15/100, Total Reward: 1296479.4358431906
Episode 16/100, Total Reward: 12883099.897899956
Episode 17/100, Total Reward: 496193.66805954196
Episode 18/100, Total Reward: 556461.5888445964
Episode 19/100, Total Reward: 1297064.0362189366
Episode 20/100, Total Reward: 238975.10428285843
Episode 21/100, Total Reward: 338110.15627

In [5]:
class DQNStrategy(BaseStrategy):
    params = (("model", None),)

    def __init__(self):
        self.model = self.params.model
        self.dataclose = self.data.close

    def next(self):
        state = np.array([
            self.data.open[0],
            self.data.high[0],
            self.data.low[0],
            self.data.close[0],
            self.data.volume[0],
        ])

        # Get the action from the DQN model
        action = self.model.act(state)

        # Execute the action
        if action == 1:  # Buy
            if not self.position:
                self.buy(size=100)
        elif action == 2:  # Sell
            if self.position:
                self.sell(size=100)

# Load the trained model
agent = DQNAgent(state_size, action_size)
agent.model.load_state_dict(torch.load("dqn_model.pth"))

# Run the backtest
trader = AITrader(start_date="2024-01-01", end_date="2024-10-01")
trader.add_strategy(DQNStrategy, model=agent)
trader.run(1, stock_ticker="TSLA")




--- AITrader initialization ---
Strategy 'DQNStrategy' added with model.
Data loaded.
Starting Value: 1000000
Sizer set to 95%.
Analyzers added.

--- Backtesting ---
2024-01-04, [BUY] EXECUTED at Price: 239.25     | Total Cost: 23925.00   | Commision: 34.09     
2024-01-12, [SELL] EXECUTED at Price: 220.08     | Total Cost: 23925.00   | Commision: 31.36     
2024-01-12, [OPERATION PROFIT] Gross: -1917.00   | Net: -1982.45  
2024-01-17, [BUY] EXECUTED at Price: 214.86     | Total Cost: 21486.00   | Commision: 30.62     
2024-01-23, [SELL] EXECUTED at Price: 211.30     | Total Cost: 21486.00   | Commision: 30.11     
2024-01-23, [OPERATION PROFIT] Gross: -356.00    | Net: -416.73   
2024-01-25, [BUY] EXECUTED at Price: 189.70     | Total Cost: 18970.00   | Commision: 27.03     
2024-01-29, [SELL] EXECUTED at Price: 185.63     | Total Cost: 18970.00   | Commision: 26.45     
2024-01-29, [OPERATION PROFIT] Gross: -407.00    | Net: -460.48   
2024-01-30, [BUY] EXECUTED at Price: 195.33     