## RL

In [None]:
import numpy as np
import pandas as pd
import gym
from gym import spaces
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque
import backtrader as bt
from typing import Optional, List
from trading.utils import *
from trading.traditional_strategies import *
from trading.trader import *



In [6]:
class TradingEnv(gym.Env):
    """
    Custom Trading Environment for Reinforcement Learning.
    """

    def __init__(self, data, initial_balance=1000000):
        super(TradingEnv, self).__init__()
        self.data = data
        self.initial_balance = initial_balance
        self.current_step = 0
        self.balance = initial_balance
        self.positions = 0  # Number of stocks held
        self.total_value = initial_balance
        self.max_steps = len(data)

        # Action space: 0 (Hold), 1 (Buy), 2 (Sell)
        self.action_space = spaces.Discrete(3)

        # Observation space: OHLCV + balance + positions
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(len(data.columns) + 2,), dtype=np.float32
        )

    def reset(self):
        self.current_step = 0
        self.balance = self.initial_balance
        self.positions = 0
        self.total_value = self.initial_balance
        return self._get_observation()

    def _get_observation(self):
        return np.concatenate(
            [self.data.iloc[self.current_step].values, [self.balance, self.positions]]
        )

    def step(self, action):
        current_price = self.data.iloc[self.current_step]["Close"]
        transaction_cost = 0.001  # Example: 0.1% transaction cost per trade

        if action == 1:  # Buy
            self.positions += 1
            self.balance -= current_price * (1 + transaction_cost)

        elif action == 2:  # Sell
            if self.positions > 0:
                self.positions -= 1
                self.balance += current_price * (1 - transaction_cost)

        self.current_step += 1
        done = self.current_step >= self.max_steps - 1

        # Update portfolio value
        self.total_value = self.balance + self.positions * current_price

        # Reward: Portfolio value change
        reward = self.total_value - self.initial_balance

        return self._get_observation(), reward, done, {}

    def render(self):
        print(f"Step: {self.current_step}, Balance: {self.balance}, Positions: {self.positions}, Total Value: {self.total_value}")


In [7]:
class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate = 0.001

        self.model = DQN(state_size, action_size).float()
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.criterion = nn.MSELoss()

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        state = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
        act_values = self.model(state)
        return torch.argmax(act_values).item()

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return

        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                next_state = torch.tensor(next_state, dtype=torch.float32).unsqueeze(0)
                target = reward + self.gamma * torch.max(self.model(next_state)).item()

            state = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
            target_f = self.model(state).detach().clone()
            target_f[0][action] = target

            output = self.model(state)
            loss = self.criterion(output, target_f)

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


In [8]:
# Load stock data
data = pd.read_csv("data/us_stock/all_GOOG.csv")
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)

# Initialize trading environment
env = TradingEnv(data)
state_size = env.observation_space.shape[0]
action_size = env.action_space.n

# Initialize DQN agent
agent = DQNAgent(state_size, action_size)

# Train the agent
episodes = 100
batch_size = 32

for e in range(episodes):
    state = env.reset()
    total_reward = 0

    for time in range(env.max_steps):
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward

        if done:
            print(f"Episode {e+1}/{episodes}, Total Reward: {total_reward}")
            break

    agent.replay(batch_size)

# Save the trained model
torch.save(agent.model.state_dict(), "dqn_model.pth")


Episode 1/100, Total Reward: 1040983.9397915911
Episode 2/100, Total Reward: 4955595.933395075
Episode 3/100, Total Reward: 2868851.2344522085
Episode 4/100, Total Reward: 55646.06922918046
Episode 5/100, Total Reward: 6866159.283799852
Episode 6/100, Total Reward: 13025886.08984898
Episode 7/100, Total Reward: 10994483.216600321
Episode 8/100, Total Reward: 952530.3968652207
Episode 9/100, Total Reward: 7311527.3672916265
Episode 10/100, Total Reward: 9047011.778327402
Episode 11/100, Total Reward: 1596101.4916831078
Episode 12/100, Total Reward: 7479052.166760173
Episode 13/100, Total Reward: 1397649.1313585532
Episode 14/100, Total Reward: 595831.6301641741
Episode 15/100, Total Reward: 9153804.680873396
Episode 16/100, Total Reward: 228793.423683709
Episode 17/100, Total Reward: 2400307.3460532506
Episode 18/100, Total Reward: 195142.57748587395
Episode 19/100, Total Reward: 272098.77036317834
Episode 20/100, Total Reward: 17127192.018083483
Episode 21/100, Total Reward: 258428.401

In [9]:
class DQNStrategy(BaseStrategy):
    params = (("model", None),)

    def __init__(self):
        self.model = self.params.model
        self.dataclose = self.data.close

    def next(self):
        state = np.array([
            self.data.open[0],
            self.data.high[0],
            self.data.low[0],
            self.data.close[0],
            self.data.volume[0],
        ])

        # Get the action from the DQN model
        action = self.model.act(state)

        # Execute the action
        if action == 1:  # Buy
            if not self.position:
                self.buy(size=100)
        elif action == 2:  # Sell
            if self.position:
                self.sell(size=100)

# Load the trained model
agent = DQNAgent(state_size, action_size)
agent.model.load_state_dict(torch.load("dqn_model.pth"))

# Run the backtest
trader = AITrader(start_date="2024-01-01", end_date="2024-10-01")
trader.add_strategy(DQNStrategy, model=agent)
trader.run(1, stock_ticker="TSLA")




--- AITrader initialization ---
Strategy 'DQNStrategy' added with model.
Data loaded.
Starting Value: 1000000
Sizer set to 95%.
Analyzers added.

--- Backtesting ---
2024-01-04, [BUY] EXECUTED at Price: 239.25     | Total Cost: 23925.00   | Commision: 34.09     
2024-01-08, [SELL] EXECUTED at Price: 236.14     | Total Cost: 23925.00   | Commision: 33.65     
2024-01-08, [OPERATION PROFIT] Gross: -311.00    | Net: -378.74   
2024-01-12, [BUY] EXECUTED at Price: 220.08     | Total Cost: 22008.00   | Commision: 31.36     
2024-01-16, [SELL] EXECUTED at Price: 215.10     | Total Cost: 22008.00   | Commision: 30.65     
2024-01-16, [OPERATION PROFIT] Gross: -498.00    | Net: -560.01   
2024-01-18, [BUY] EXECUTED at Price: 216.88     | Total Cost: 21688.00   | Commision: 30.91     
2024-01-19, [SELL] EXECUTED at Price: 209.99     | Total Cost: 21688.00   | Commision: 29.92     
2024-01-19, [OPERATION PROFIT] Gross: -689.00    | Net: -749.83   
2024-01-25, [BUY] EXECUTED at Price: 189.70     