## RL

In [18]:
from trading.utils import *
from trading.traditional_strategies import *
from trading.ai_strategies import *
from trading.rl_module import *
from trading.trader import *


In [19]:
# Load stock data

def train_dqn(stock_ticker):
    data = pd.read_csv(f"./data/us_stock/all_{stock_ticker}.csv")
    data['Date'] = pd.to_datetime(data['Date'])
    data.set_index('Date', inplace=True)

    # Initialize trading environment
    env = TradingEnv(data)
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n
    print(state_size, action_size)

    # Initialize DQN agent
    agent = DQNAgent(state_size, action_size)

    # Train the agent
    episodes = 200
    batch_size = 32

    for e in range(episodes):
        state = env.reset()
        total_reward = 0

        for time in range(env.max_steps):
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward

            if done:
                print(f"Episode {e+1}/{episodes}, Total Reward: {total_reward}")
                break

        agent.replay(batch_size)

    # Save the trained model
    torch.save(agent.model.state_dict(), f"./model/{stock_ticker}_DQN_model.pth")


In [20]:
train_dqn("AAPL")

11 3
Episode 1/200, Total Reward: 3473785.055738877
Episode 2/200, Total Reward: 1632238.0197277498
Episode 3/200, Total Reward: 1776251.7906156273
Episode 4/200, Total Reward: 13951320.816774132
Episode 5/200, Total Reward: 10026006.732125659
Episode 6/200, Total Reward: 2278006.0901650074
Episode 7/200, Total Reward: 1928245.7375511643
Episode 8/200, Total Reward: 6171869.959744681
Episode 9/200, Total Reward: 10408294.34958614
Episode 10/200, Total Reward: 16584514.30884199
Episode 11/200, Total Reward: 789605.7317098099
Episode 12/200, Total Reward: 6978964.546436392
Episode 13/200, Total Reward: 1090100.1308889785
Episode 14/200, Total Reward: 12953894.584217003
Episode 15/200, Total Reward: 1397828.6918861303
Episode 16/200, Total Reward: 14247688.354321793
Episode 17/200, Total Reward: 48061.52465718659
Episode 18/200, Total Reward: 349109.7386976901
Episode 19/200, Total Reward: 2811525.7082724264
Episode 20/200, Total Reward: 1918349.6721018674
Episode 21/200, Total Reward: 23

In [21]:

# Load the trained model
agent = DQNAgent(11, 3)
agent.model.load_state_dict(torch.load("./model/AAPL_DQN_model.pth"))

# Run the backtest
trader = AITrader(start_date="2023-01-01", end_date="2024-10-01")
trader.add_strategy(DQNStrategy, {"model": agent})
trader.run(1, stock_ticker="AAPL")


--- AITrader initialization ---
Strategy 'DQNStrategy' added with model and parameters: {'model': <trading.rl_module.DQNAgent object at 0x176111b50>}
Data loaded.
Starting Value: 1000000
Sizer set to 95%.
Analyzers added.

--- Backtesting ---
2023-03-27, [BUY] EXECUTED at Price: 159.94     | Total Cost: 15994.00   | Commision: 22.79     
2023-03-28, [SELL] EXECUTED at Price: 157.97     | Total Cost: 15994.00   | Commision: 22.51     
2023-03-28, [OPERATION PROFIT] Gross: -197.00    | Net: -242.30   
2023-03-29, [BUY] EXECUTED at Price: 159.37     | Total Cost: 15937.00   | Commision: 22.71     
2023-03-31, [SELL] EXECUTED at Price: 162.44     | Total Cost: 15937.00   | Commision: 23.15     
2023-03-31, [OPERATION PROFIT] Gross: 307.00     | Net: 261.14    
2023-04-05, [BUY] EXECUTED at Price: 164.74     | Total Cost: 16474.00   | Commision: 23.48     
2023-04-11, [SELL] EXECUTED at Price: 162.35     | Total Cost: 16474.00   | Commision: 23.13     
2023-04-11, [OPERATION PROFIT] Gross: 

{'Ending Value': 994747,
 'Total Returns (%)': -0.53,
 'Annualized Returns (%)': -0.3,
 'Sharpe Ratio': -9.285,
 'Max Drawdown (%)': 0.7}