## Deep Reinforcement Learning for Stock Market Trading


---

In [1]:
# Import libraries

import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas_datareader as data_reader

from tqdm import tqdm_notebook, tqdm
from collections import deque

In [2]:
# Building the AI Trader network

class AI_Trader():
  
  def __init__(self, state_size, action_space=3, model_name="AITrader"): #Stay, Buy, Sell
    
    self.state_size = state_size
    self.action_space = action_space
    self.memory = deque(maxlen=2000)
    self.inventory = []
    self.model_name = model_name
    
    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_final = 0.01
    self.epsilon_decay = 0.995
    
    self.model = self.model_builder()
    
  def model_builder(self):
    
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.Dense(units=32, activation='relu', input_dim=self.state_size))
    
    model.add(tf.keras.layers.Dense(units=64, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=128, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=self.action_space, activation='linear'))
    
    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=0.001))
    
    return model
  
  def trade(self, state):
    
    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)
    
    actions = self.model.predict(state)
    return np.argmax(actions[0])
  
  
  def batch_train(self, batch_size):
    
    batch = []
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i])
      
    for state, action, reward, next_state, done in batch:
      reward = reward
      if not done:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
        
      target = self.model.predict(state)
      target[0][action] = reward
      
      self.model.fit(state, target, epochs=1, verbose=0)
      
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay

In [3]:
# Dataset preprocessing

# Defining helper functions

# Sigmoid

def sigmoid(x):
  return 1 / (1 + math.exp(-x))

In [4]:
# Price format function

def stocks_price_format(n):
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

In [5]:
# Dataset loader


def dataset_loader(stock_name):
  
  #Complete the dataset loader function
  dataset = data_reader.DataReader(stock_name, data_source="yahoo")
  
  start_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]
  
  close = dataset['Close']

  return close

In [6]:
# State creator

def state_creator(data, timestep, window_size):
  
  starting_id = timestep - window_size + 1
  
  if starting_id >= 0:
    windowed_data = data[starting_id:timestep+1]
  else:
    windowed_data = - starting_id * [data[0]] + list(data[0:timestep+1])
    
  state = []
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))
    
  return np.array([state])

In [7]:
# Loading a dataset

stock_name = "HCAT"
data = dataset_loader(stock_name)
data

Date
2019-07-25    39.169998
2019-07-26    38.299999
2019-07-29    41.419998
2019-07-30    43.369999
2019-07-31    44.250000
                ...    
2020-11-13    34.500000
2020-11-16    33.810001
2020-11-17    34.009998
2020-11-18    33.220001
2020-11-19    33.150002
Name: Close, Length: 336, dtype: float64

In [8]:
# Training the AI Trader

# Setting hyper parameters

window_size = 5
episodes = 1

batch_size = 32
data_samples = len(data) - 1

In [9]:
# Defining the Trader model

trader = AI_Trader(window_size)
trader.model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                192       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_2 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 387       
Total params: 11,011
Trainable params: 11,011
Non-trainable params: 0
_________________________________________________________________


In [10]:
# Training loop

for episode in range(1, episodes + 1):
  
  print("Episode: {}/{}".format(episode, episodes))
  
  state = state_creator(data, 0, window_size + 1)
  
  total_profit = 0
  trader.inventory = []
  
  for t in tqdm(range(data_samples)):
    
    action = trader.trade(state)
    
    next_state = state_creator(data, t+1, window_size + 1)
    reward = 0
    
    if action == 1: #Buying
      trader.inventory.append(data[t])
      print("AI Trader bought: ", stocks_price_format(data[t]))
      
    elif action == 2 and len(trader.inventory) > 0: #Selling
      buy_price = trader.inventory.pop(0)
      
      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      print("AI Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price) )
      
    if t == data_samples - 1:
      done = True
    else:
      done = False
      
    trader.memory.append((state, action, reward, next_state, done))
    
    state = next_state
    
    if done:
      print("########################")
      print("TOTAL PROFIT: {}".format(total_profit))
      print("########################")
    
    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
      
  if episode % 10 == 0:
    trader.model.save("ai_trader_{}.h5".format(episode))
    

  0%|          | 0/335 [00:00<?, ?it/s]

Episode: 1/1
AI Trader bought:  $ 45.910000
AI Trader sold:  $ 48.630001  Profit: $ 2.720001
AI Trader bought:  $ 48.470001
AI Trader sold:  $ 43.400002  Profit: - $ 5.070000
AI Trader bought:  $ 40.000000
AI Trader sold:  $ 39.860001  Profit: - $ 0.139999
AI Trader bought:  $ 39.220001
AI Trader sold:  $ 36.369999  Profit: - $ 2.850002
AI Trader bought:  $ 34.700001
AI Trader bought:  $ 33.099998


 10%|█         | 34/335 [00:08<05:45,  1.15s/it]

AI Trader sold:  $ 34.040001  Profit: - $ 0.660000


 10%|█         | 35/335 [00:11<08:59,  1.80s/it]

AI Trader bought:  $ 35.330002


 11%|█         | 36/335 [00:15<10:56,  2.20s/it]

AI Trader sold:  $ 37.020000  Profit: $ 3.920002


 11%|█         | 37/335 [00:18<12:19,  2.48s/it]

AI Trader bought:  $ 38.189999


 12%|█▏        | 40/335 [00:27<14:33,  2.96s/it]

AI Trader bought:  $ 37.950001


 12%|█▏        | 41/335 [00:31<14:57,  3.05s/it]

AI Trader sold:  $ 37.990002  Profit: $ 2.660000


 13%|█▎        | 44/335 [00:40<14:51,  3.07s/it]

AI Trader bought:  $ 31.650000


 13%|█▎        | 45/335 [00:43<14:52,  3.08s/it]

AI Trader bought:  $ 30.309999


 14%|█▎        | 46/335 [00:46<15:04,  3.13s/it]

AI Trader bought:  $ 31.639999


 14%|█▍        | 47/335 [00:49<15:00,  3.13s/it]

AI Trader bought:  $ 29.150000


 14%|█▍        | 48/335 [00:52<14:57,  3.13s/it]

AI Trader sold:  $ 28.150000  Profit: - $ 10.039999


 15%|█▍        | 50/335 [01:00<16:11,  3.41s/it]

AI Trader sold:  $ 27.200001  Profit: - $ 10.750000


 15%|█▌        | 51/335 [01:03<16:44,  3.54s/it]

AI Trader bought:  $ 28.430000


 16%|█▌        | 52/335 [01:07<16:20,  3.46s/it]

AI Trader bought:  $ 27.139999


 16%|█▌        | 53/335 [01:10<15:46,  3.36s/it]

AI Trader sold:  $ 26.440001  Profit: - $ 5.209999


 16%|█▌        | 54/335 [01:13<15:21,  3.28s/it]

AI Trader sold:  $ 26.950001  Profit: - $ 3.359999


 16%|█▋        | 55/335 [01:16<15:10,  3.25s/it]

AI Trader sold:  $ 27.820000  Profit: - $ 3.820000


 17%|█▋        | 56/335 [01:19<15:01,  3.23s/it]

AI Trader sold:  $ 28.600000  Profit: - $ 0.549999


 17%|█▋        | 58/335 [01:26<14:48,  3.21s/it]

AI Trader sold:  $ 30.100000  Profit: $ 1.670000


 18%|█▊        | 59/335 [01:29<14:36,  3.18s/it]

AI Trader sold:  $ 28.420000  Profit: $ 1.280001


 19%|█▉        | 63/335 [01:42<14:31,  3.21s/it]

AI Trader bought:  $ 27.750000


 19%|█▉        | 64/335 [01:45<14:18,  3.17s/it]

AI Trader bought:  $ 28.879999


 19%|█▉        | 65/335 [01:48<14:09,  3.15s/it]

AI Trader sold:  $ 29.770000  Profit: $ 2.020000


 20%|██        | 67/335 [01:54<13:58,  3.13s/it]

AI Trader bought:  $ 29.840000


 20%|██        | 68/335 [01:58<14:06,  3.17s/it]

AI Trader sold:  $ 32.119999  Profit: $ 3.240000


 21%|██        | 69/335 [02:01<14:02,  3.17s/it]

AI Trader sold:  $ 32.160000  Profit: $ 2.320000


 21%|██        | 71/335 [02:07<13:52,  3.15s/it]

AI Trader bought:  $ 33.270000


 21%|██▏       | 72/335 [02:10<13:53,  3.17s/it]

AI Trader sold:  $ 33.610001  Profit: $ 0.340000


 24%|██▍       | 80/335 [02:36<13:16,  3.12s/it]

AI Trader bought:  $ 34.369999


 25%|██▍       | 83/335 [02:45<12:56,  3.08s/it]

AI Trader sold:  $ 37.279999  Profit: $ 2.910000


 26%|██▌       | 87/335 [02:57<12:59,  3.14s/it]

AI Trader bought:  $ 38.049999


 27%|██▋       | 90/335 [03:08<13:26,  3.29s/it]

AI Trader sold:  $ 37.990002  Profit: - $ 0.059998


 27%|██▋       | 91/335 [03:11<13:27,  3.31s/it]

AI Trader bought:  $ 39.410000


 27%|██▋       | 92/335 [03:14<13:12,  3.26s/it]

AI Trader sold:  $ 39.490002  Profit: $ 0.080002


 28%|██▊       | 94/335 [03:20<12:48,  3.19s/it]

AI Trader bought:  $ 40.630001


 28%|██▊       | 95/335 [03:24<12:40,  3.17s/it]

AI Trader sold:  $ 40.500000  Profit: - $ 0.130001


 29%|██▉       | 97/335 [03:30<12:31,  3.16s/it]

AI Trader bought:  $ 35.590000


 29%|██▉       | 98/335 [03:33<12:26,  3.15s/it]

AI Trader sold:  $ 34.259998  Profit: - $ 1.330002


 30%|██▉       | 99/335 [03:36<12:22,  3.15s/it]

AI Trader bought:  $ 33.439999


 30%|███       | 101/335 [03:42<12:14,  3.14s/it]

AI Trader sold:  $ 31.370001  Profit: - $ 2.069998


 32%|███▏      | 108/335 [04:05<11:54,  3.15s/it]

AI Trader bought:  $ 34.139999


 33%|███▎      | 110/335 [04:11<11:44,  3.13s/it]

AI Trader sold:  $ 34.700001  Profit: $ 0.560001


 36%|███▋      | 122/335 [04:50<12:05,  3.40s/it]

AI Trader bought:  $ 34.119999


 37%|███▋      | 123/335 [04:53<11:44,  3.32s/it]

AI Trader sold:  $ 37.000000  Profit: $ 2.880001


 38%|███▊      | 126/335 [05:03<11:19,  3.25s/it]

AI Trader bought:  $ 34.970001


 38%|███▊      | 128/335 [05:09<10:58,  3.18s/it]

AI Trader sold:  $ 33.880001  Profit: - $ 1.090000


 43%|████▎     | 145/335 [06:06<10:16,  3.25s/it]

AI Trader bought:  $ 30.110001


 44%|████▎     | 146/335 [06:09<10:07,  3.21s/it]

AI Trader sold:  $ 30.110001  Profit: $ 0.000000


 45%|████▍     | 150/335 [06:21<09:42,  3.15s/it]

AI Trader bought:  $ 30.379999


 45%|████▌     | 152/335 [06:28<09:43,  3.19s/it]

AI Trader bought:  $ 28.250000


 46%|████▌     | 153/335 [06:31<09:36,  3.17s/it]

AI Trader sold:  $ 30.160000  Profit: - $ 0.219999


 47%|████▋     | 156/335 [06:40<09:19,  3.13s/it]

AI Trader bought:  $ 26.100000


 47%|████▋     | 159/335 [06:50<09:14,  3.15s/it]

AI Trader bought:  $ 24.490000


 49%|████▉     | 164/335 [07:06<09:06,  3.19s/it]

AI Trader bought:  $ 22.490000


 49%|████▉     | 165/335 [07:09<09:00,  3.18s/it]

AI Trader sold:  $ 23.450001  Profit: - $ 4.799999


 50%|████▉     | 166/335 [07:12<08:56,  3.17s/it]

AI Trader sold:  $ 23.790001  Profit: - $ 2.309999


 51%|█████▏    | 172/335 [07:31<08:30,  3.13s/it]

AI Trader sold:  $ 26.150000  Profit: $ 1.660000


 53%|█████▎    | 177/335 [07:46<08:14,  3.13s/it]

AI Trader sold:  $ 26.790001  Profit: $ 4.300001


 57%|█████▋    | 191/335 [08:31<07:36,  3.17s/it]

AI Trader bought:  $ 25.400000


 58%|█████▊    | 193/335 [08:37<07:24,  3.13s/it]

AI Trader sold:  $ 26.670000  Profit: $ 1.270000


 59%|█████▊    | 196/335 [08:46<07:18,  3.15s/it]

AI Trader bought:  $ 25.000000


 60%|█████▉    | 200/335 [08:59<07:10,  3.19s/it]

AI Trader sold:  $ 28.809999  Profit: $ 3.809999


 61%|██████    | 204/335 [09:12<06:51,  3.14s/it]

AI Trader bought:  $ 28.670000


 61%|██████    | 205/335 [09:15<06:46,  3.13s/it]

AI Trader sold:  $ 28.360001  Profit: - $ 0.309999


 61%|██████▏   | 206/335 [09:18<06:42,  3.12s/it]

AI Trader bought:  $ 28.670000


 62%|██████▏   | 207/335 [09:21<06:41,  3.14s/it]

AI Trader bought:  $ 28.799999


 62%|██████▏   | 209/335 [09:27<06:39,  3.17s/it]

AI Trader sold:  $ 28.360001  Profit: - $ 0.309999


 63%|██████▎   | 210/335 [09:30<06:32,  3.14s/it]

AI Trader sold:  $ 27.430000  Profit: - $ 1.369999


 63%|██████▎   | 211/335 [09:34<06:26,  3.12s/it]

AI Trader bought:  $ 27.500000


 63%|██████▎   | 212/335 [09:37<06:23,  3.12s/it]

AI Trader sold:  $ 27.049999  Profit: - $ 0.450001


 64%|██████▍   | 214/335 [09:43<06:29,  3.22s/it]

AI Trader bought:  $ 26.809999


 64%|██████▍   | 215/335 [09:46<06:20,  3.17s/it]

AI Trader sold:  $ 27.370001  Profit: $ 0.560001


 65%|██████▌   | 218/335 [09:56<06:05,  3.13s/it]

AI Trader bought:  $ 28.820000


 65%|██████▌   | 219/335 [09:59<06:02,  3.12s/it]

AI Trader sold:  $ 29.660000  Profit: $ 0.840000


 67%|██████▋   | 223/335 [10:11<05:50,  3.13s/it]

AI Trader bought:  $ 27.260000


 67%|██████▋   | 224/335 [10:15<05:50,  3.15s/it]

AI Trader sold:  $ 29.209999  Profit: $ 1.949999


 67%|██████▋   | 225/335 [10:18<05:47,  3.16s/it]

AI Trader bought:  $ 29.650000


 68%|██████▊   | 227/335 [10:24<05:41,  3.16s/it]

AI Trader bought:  $ 31.605000


 68%|██████▊   | 228/335 [10:27<05:36,  3.15s/it]

AI Trader sold:  $ 31.629999  Profit: $ 1.980000


 69%|██████▉   | 231/335 [10:37<05:28,  3.16s/it]

AI Trader bought:  $ 31.080000


 69%|██████▉   | 232/335 [10:40<05:23,  3.14s/it]

AI Trader bought:  $ 31.059999


 70%|██████▉   | 233/335 [10:43<05:20,  3.15s/it]

AI Trader bought:  $ 28.610001


 70%|██████▉   | 234/335 [10:46<05:16,  3.13s/it]

AI Trader bought:  $ 27.600000


 70%|███████   | 235/335 [10:49<05:13,  3.14s/it]

AI Trader sold:  $ 29.170000  Profit: - $ 2.434999


 70%|███████   | 236/335 [10:54<05:47,  3.51s/it]

AI Trader sold:  $ 28.549999  Profit: - $ 2.530001


 71%|███████   | 237/335 [10:58<06:01,  3.69s/it]

AI Trader sold:  $ 30.000000  Profit: - $ 1.059999


 71%|███████   | 238/335 [11:01<05:44,  3.55s/it]

AI Trader sold:  $ 31.510000  Profit: $ 2.900000


 71%|███████▏  | 239/335 [11:04<05:30,  3.44s/it]

AI Trader sold:  $ 30.469999  Profit: $ 2.869999


 72%|███████▏  | 240/335 [11:07<05:19,  3.36s/it]

AI Trader bought:  $ 31.200001


 72%|███████▏  | 241/335 [11:10<05:11,  3.31s/it]

AI Trader sold:  $ 32.430000  Profit: $ 1.230000


 73%|███████▎  | 244/335 [11:20<04:51,  3.20s/it]

AI Trader bought:  $ 33.990002


 73%|███████▎  | 245/335 [11:23<04:47,  3.19s/it]

AI Trader bought:  $ 36.410000


 73%|███████▎  | 246/335 [11:26<04:42,  3.17s/it]

AI Trader sold:  $ 35.660000  Profit: $ 1.669998


 74%|███████▎  | 247/335 [11:30<04:43,  3.23s/it]

AI Trader bought:  $ 35.889999


 74%|███████▍  | 248/335 [11:33<04:36,  3.18s/it]

AI Trader sold:  $ 35.650002  Profit: - $ 0.759998


 74%|███████▍  | 249/335 [11:36<04:31,  3.16s/it]

AI Trader sold:  $ 36.330002  Profit: $ 0.440002


 75%|███████▍  | 251/335 [11:42<04:23,  3.13s/it]

AI Trader bought:  $ 36.290001


 75%|███████▌  | 252/335 [11:45<04:19,  3.13s/it]

AI Trader sold:  $ 35.980000  Profit: - $ 0.310001


 76%|███████▌  | 253/335 [11:48<04:20,  3.17s/it]

AI Trader bought:  $ 36.910000


 76%|███████▌  | 254/335 [11:52<04:16,  3.16s/it]

AI Trader sold:  $ 35.660000  Profit: - $ 1.250000


 76%|███████▌  | 255/335 [11:55<04:12,  3.15s/it]

AI Trader bought:  $ 35.849998


 76%|███████▋  | 256/335 [11:58<04:07,  3.13s/it]

AI Trader sold:  $ 35.740002  Profit: - $ 0.109997


 77%|███████▋  | 258/335 [12:04<04:00,  3.12s/it]

AI Trader bought:  $ 35.900002


 77%|███████▋  | 259/335 [12:07<04:02,  3.18s/it]

AI Trader sold:  $ 36.259998  Profit: $ 0.359997


 78%|███████▊  | 260/335 [12:10<03:58,  3.17s/it]

AI Trader bought:  $ 36.400002


 78%|███████▊  | 261/335 [12:14<03:53,  3.16s/it]

AI Trader bought:  $ 36.110001


 78%|███████▊  | 262/335 [12:17<03:49,  3.15s/it]

AI Trader sold:  $ 35.869999  Profit: - $ 0.530003


 79%|███████▊  | 263/335 [12:20<03:47,  3.16s/it]

AI Trader sold:  $ 35.150002  Profit: - $ 0.959999


 79%|███████▉  | 266/335 [12:29<03:37,  3.16s/it]

AI Trader bought:  $ 32.509998


 80%|████████  | 269/335 [12:39<03:26,  3.12s/it]

AI Trader sold:  $ 32.590000  Profit: $ 0.080002


 84%|████████▍ | 282/335 [13:20<02:50,  3.21s/it]

AI Trader bought:  $ 32.130001


 84%|████████▍ | 283/335 [13:23<02:45,  3.19s/it]

AI Trader bought:  $ 32.540001


 85%|████████▍ | 284/335 [13:27<02:41,  3.17s/it]

AI Trader sold:  $ 32.820000  Profit: $ 0.689999


 87%|████████▋ | 291/335 [13:48<02:17,  3.12s/it]

AI Trader sold:  $ 36.020000  Profit: $ 3.480000


 87%|████████▋ | 292/335 [13:52<02:15,  3.15s/it]

AI Trader bought:  $ 34.340000


 87%|████████▋ | 293/335 [13:55<02:14,  3.21s/it]

AI Trader sold:  $ 34.349998  Profit: $ 0.009998


 94%|█████████▎| 314/335 [15:07<01:09,  3.33s/it]

AI Trader bought:  $ 31.900000


 95%|█████████▌| 319/335 [15:23<00:50,  3.17s/it]

AI Trader sold:  $ 35.000000  Profit: $ 3.100000


100%|█████████▉| 334/335 [16:10<00:03,  3.12s/it]

########################
TOTAL PROFIT: -7.044986724853516
########################


100%|██████████| 335/335 [16:13<00:00,  2.91s/it]
