<a href="https://colab.research.google.com/github/pratjz/Tensorflow/blob/master/6_Reinforcement_Learning_for_Stock_Market_Trading.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Stage 1: Installing dependencies and environment setup


In [1]:
#!pip install tensorflow-gpu==2.0.0.alpha0

In [2]:
!pip install pandas-datareader



## Stage 2: Importing project dependencies

In [3]:
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas_datareader as data_reader

from tqdm import tqdm_notebook, tqdm
from collections import deque

In [4]:
tf.__version__

'2.3.0'

## Stage 3: Building the AI Trader network

In [5]:
class AI_Trader():
  
  def __init__(self, state_size, action_space=3, model_name="AITrader"): #Stay, Buy, Sell
    
    self.state_size = state_size
    self.action_space = action_space
    self.memory = deque(maxlen=2000)
    self.inventory = []
    self.model_name = model_name
    
    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_final = 0.01
    self.epsilon_decay = 0.995
    
    self.model = self.model_builder()
    
  def model_builder(self):
    
    model = tf.keras.models.Sequential()    
    model.add(tf.keras.layers.Dense(units=32, activation='relu', input_dim=self.state_size))    
    model.add(tf.keras.layers.Dense(units=64, activation='relu'))    
    model.add(tf.keras.layers.Dense(units=128, activation='relu'))    
    model.add(tf.keras.layers.Dense(units=self.action_space, activation='linear'))    
    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=0.001))
        
    return model
  
  def trade(self, state):
    
    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)
    
    actions = self.model.predict(state)
    return np.argmax(actions[0])
  
  
  def batch_train(self, batch_size):
    
    batch = []
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i])
      
    for state, action, reward, next_state, done in batch:
      reward = reward
      if not done:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
        
      target = self.model.predict(state)
      target[0][action] = reward
      
      self.model.fit(state, target, epochs=1, verbose=0)
      
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay

## Stage 4: Dataset preprocessing

### Defining helper functions

#### Sigmoid

In [6]:
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

#### Price format function

In [7]:
def stocks_price_format(n):
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

#### Dataset loader

In [8]:
def dataset_loader(stock_name):
  
  #Complete the dataset loader function
  dataset = data_reader.DataReader(stock_name, data_source="yahoo")
  
  start_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]
  
  close = dataset['Close']

  return close

### State creator

In [9]:
def state_creator(data, timestep, window_size):
  
  starting_id = timestep - window_size + 1
  
  if starting_id >= 0:
    windowed_data = data[starting_id:timestep+1]
  else:
    windowed_data = - starting_id * [data[0]] + list(data[0:timestep+1])
    
  state = []
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))
    
  return np.array([state])

### Loading a dataset

In [10]:
stock_name = "AAPL"
data = dataset_loader(stock_name)

## Stage 5: Training the AI Trader

### Setting hyper parameters

In [11]:
window_size = 10
episodes = 1000

batch_size = 32
data_samples = len(data) - 1

### Defining the Trader model

In [12]:
trader = AI_Trader(window_size)

In [13]:
trader.model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                352       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                2112      
_________________________________________________________________
dense_2 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 387       
Total params: 11,171
Trainable params: 11,171
Non-trainable params: 0
_________________________________________________________________


### Training loop

In [None]:
for episode in range(1, episodes + 1):
  
  print("Episode: {}/{}".format(episode, episodes))
  
  state = state_creator(data, 0, window_size + 1)
  
  total_profit = 0
  trader.inventory = []
  
  for t in tqdm(range(data_samples)):
    
    action = trader.trade(state)
    
    next_state = state_creator(data, t+1, window_size + 1)
    reward = 0
    
    if action == 1: #Buying
      trader.inventory.append(data[t])
      print("AI Trader bought: ", stocks_price_format(data[t]))
      
    elif action == 2 and len(trader.inventory) > 0: #Selling
      buy_price = trader.inventory.pop(0)
      
      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      print("AI Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price) )
      
    if t == data_samples - 1:
      done = True
    else:
      done = False
      
    trader.memory.append((state, action, reward, next_state, done))
    
    state = next_state
    
    if done:
      print("########################")
      print("TOTAL PROFIT: {}".format(total_profit))
      print("########################")
    
    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
      
  if episode % 10 == 0:
    trader.model.save("ai_trader_{}.h5".format(episode))
    

  0%|          | 0/1256 [00:00<?, ?it/s]

Episode: 1/1000
AI Trader bought:  $ 29.875000
AI Trader sold:  $ 30.642500  Profit: $ 0.767500
AI Trader bought:  $ 30.500000
AI Trader bought:  $ 30.264999
AI Trader bought:  $ 30.142500
AI Trader sold:  $ 29.192499  Profit: - $ 1.307501
AI Trader sold:  $ 28.545000  Profit: - $ 1.719999
AI Trader sold:  $ 29.322500  Profit: - $ 0.820000
AI Trader bought:  $ 29.437500
AI Trader sold:  $ 29.719999  Profit: $ 0.282499
AI Trader bought:  $ 29.452499
AI Trader sold:  $ 29.575001  Profit: $ 0.122501
AI Trader bought:  $ 29.334999
AI Trader sold:  $ 29.070000  Profit: - $ 0.264999
AI Trader bought:  $ 29.757500
AI Trader sold:  $ 28.905001  Profit: - $ 0.852499


  3%|▎         | 35/1256 [00:09<28:34,  1.40s/it]

AI Trader bought:  $ 27.245001


  3%|▎         | 36/1256 [00:12<36:12,  1.78s/it]

AI Trader sold:  $ 26.507500  Profit: - $ 0.737501


  3%|▎         | 40/1256 [00:23<50:13,  2.48s/it]

AI Trader bought:  $ 27.007500


  3%|▎         | 41/1256 [00:26<51:43,  2.55s/it]

AI Trader sold:  $ 26.705000  Profit: - $ 0.302500


  3%|▎         | 43/1256 [00:31<53:52,  2.67s/it]

AI Trader bought:  $ 26.830000


  4%|▎         | 44/1256 [00:34<55:11,  2.73s/it]

AI Trader bought:  $ 26.315001


  4%|▎         | 45/1256 [00:37<55:25,  2.75s/it]

AI Trader sold:  $ 26.337500  Profit: - $ 0.492500


  4%|▎         | 46/1256 [00:40<56:01,  2.78s/it]

AI Trader sold:  $ 25.677500  Profit: - $ 0.637501


  4%|▍         | 52/1256 [00:56<56:01,  2.79s/it]

AI Trader bought:  $ 24.347500


  4%|▍         | 54/1256 [01:02<55:52,  2.79s/it]

AI Trader sold:  $ 24.282499  Profit: - $ 0.065001


  4%|▍         | 55/1256 [01:05<56:02,  2.80s/it]

AI Trader bought:  $ 24.165001


  4%|▍         | 56/1256 [01:08<56:14,  2.81s/it]

AI Trader sold:  $ 24.197500  Profit: $ 0.032499


  5%|▍         | 57/1256 [01:10<55:53,  2.80s/it]

AI Trader bought:  $ 24.075001


  5%|▍         | 58/1256 [01:13<55:47,  2.79s/it]

AI Trader sold:  $ 25.355000  Profit: $ 1.279999


  5%|▍         | 60/1256 [01:19<56:19,  2.83s/it]

AI Trader bought:  $ 24.997499


  5%|▍         | 61/1256 [01:22<57:17,  2.88s/it]

AI Trader sold:  $ 23.355000  Profit: - $ 1.642500


  5%|▌         | 65/1256 [01:33<55:36,  2.80s/it]

AI Trader bought:  $ 23.620001


  5%|▌         | 66/1256 [01:36<56:03,  2.83s/it]

AI Trader bought:  $ 24.087500


  5%|▌         | 67/1256 [01:39<55:49,  2.82s/it]

AI Trader sold:  $ 24.150000  Profit: $ 0.529999


  6%|▌         | 70/1256 [01:47<54:31,  2.76s/it]

AI Trader sold:  $ 23.747499  Profit: - $ 0.340000


  6%|▋         | 79/1256 [02:12<54:25,  2.77s/it]

AI Trader bought:  $ 23.672501


  6%|▋         | 80/1256 [02:15<54:07,  2.76s/it]

AI Trader sold:  $ 24.025000  Profit: $ 0.352499


  6%|▋         | 81/1256 [02:18<54:06,  2.76s/it]

AI Trader bought:  $ 24.190001


  7%|▋         | 82/1256 [02:21<54:54,  2.81s/it]

AI Trader bought:  $ 24.227501


  7%|▋         | 83/1256 [02:24<55:05,  2.82s/it]

AI Trader bought:  $ 24.172501


  7%|▋         | 84/1256 [02:26<54:31,  2.79s/it]

AI Trader sold:  $ 25.132500  Profit: $ 0.942499


  7%|▋         | 85/1256 [02:29<54:36,  2.80s/it]

AI Trader bought:  $ 25.187500


  7%|▋         | 86/1256 [02:32<54:34,  2.80s/it]

AI Trader bought:  $ 25.375000


  7%|▋         | 87/1256 [02:35<54:07,  2.78s/it]

AI Trader sold:  $ 25.752501  Profit: $ 1.525000


  7%|▋         | 88/1256 [02:37<54:37,  2.81s/it]

AI Trader bought:  $ 25.467501


  7%|▋         | 90/1256 [02:43<54:38,  2.81s/it]

AI Trader sold:  $ 25.280001  Profit: $ 1.107500


  7%|▋         | 91/1256 [02:46<54:21,  2.80s/it]

AI Trader sold:  $ 25.292500  Profit: $ 0.105000


  7%|▋         | 92/1256 [02:49<54:01,  2.79s/it]

AI Trader sold:  $ 25.565001  Profit: $ 0.190001


  7%|▋         | 94/1256 [02:54<54:10,  2.80s/it]

AI Trader bought:  $ 26.145000


  8%|▊         | 95/1256 [02:57<53:49,  2.78s/it]

AI Trader sold:  $ 26.492500  Profit: $ 1.025000


  8%|▊         | 96/1256 [03:00<53:41,  2.78s/it]

AI Trader sold:  $ 26.450001  Profit: $ 0.305000


  8%|▊         | 99/1256 [03:08<54:11,  2.81s/it]

AI Trader bought:  $ 26.680000


  8%|▊         | 100/1256 [03:11<53:53,  2.80s/it]

AI Trader bought:  $ 26.532499


  8%|▊         | 101/1256 [03:14<53:39,  2.79s/it]

AI Trader bought:  $ 26.417500


  8%|▊         | 102/1256 [03:17<53:43,  2.79s/it]

AI Trader bought:  $ 26.297501


  8%|▊         | 103/1256 [03:19<54:13,  2.82s/it]

AI Trader bought:  $ 26.920000


  8%|▊         | 104/1256 [03:22<54:29,  2.84s/it]

AI Trader sold:  $ 27.389999  Profit: $ 0.709999


  8%|▊         | 105/1256 [03:25<53:56,  2.81s/it]

AI Trader sold:  $ 27.247499  Profit: $ 0.715000


  8%|▊         | 106/1256 [03:28<53:50,  2.81s/it]

AI Trader sold:  $ 27.497499  Profit: $ 1.080000


  9%|▊         | 108/1256 [03:34<53:49,  2.81s/it]

AI Trader sold:  $ 27.452499  Profit: $ 1.154999


  9%|▊         | 109/1256 [03:37<55:12,  2.89s/it]

AI Trader sold:  $ 27.740000  Profit: $ 0.820000


  9%|▉         | 111/1256 [03:42<54:14,  2.84s/it]

AI Trader bought:  $ 27.165001


  9%|▉         | 112/1256 [03:45<53:46,  2.82s/it]

AI Trader sold:  $ 27.254999  Profit: $ 0.089998


  9%|▉         | 114/1256 [03:51<54:00,  2.84s/it]

AI Trader bought:  $ 28.010000


  9%|▉         | 115/1256 [03:54<54:46,  2.88s/it]

AI Trader sold:  $ 28.025000  Profit: $ 0.014999


  9%|▉         | 117/1256 [03:59<53:40,  2.83s/it]

AI Trader bought:  $ 26.870001


  9%|▉         | 119/1256 [04:05<52:44,  2.78s/it]

AI Trader sold:  $ 26.782499  Profit: - $ 0.087502


 10%|█         | 131/1256 [04:38<52:26,  2.80s/it]

AI Trader bought:  $ 23.180000


 11%|█         | 133/1256 [04:44<52:44,  2.82s/it]

AI Trader sold:  $ 23.355000  Profit: $ 0.174999


 11%|█         | 140/1256 [05:03<51:21,  2.76s/it]

AI Trader bought:  $ 23.549999


 11%|█▏        | 142/1256 [05:09<51:25,  2.77s/it]

AI Trader sold:  $ 24.107500  Profit: $ 0.557501


 12%|█▏        | 151/1256 [05:34<50:19,  2.73s/it]

AI Trader bought:  $ 24.657499


 12%|█▏        | 152/1256 [05:36<50:26,  2.74s/it]

AI Trader bought:  $ 24.757500


 12%|█▏        | 154/1256 [05:42<50:59,  2.78s/it]

AI Trader bought:  $ 24.912500


 12%|█▏        | 155/1256 [05:45<50:48,  2.77s/it]

AI Trader bought:  $ 24.707500


 12%|█▏        | 156/1256 [05:48<50:52,  2.78s/it]

AI Trader bought:  $ 24.334999


 12%|█▎        | 157/1256 [05:50<50:50,  2.78s/it]

AI Trader sold:  $ 24.365000  Profit: - $ 0.292500


 13%|█▎        | 160/1256 [05:59<51:39,  2.83s/it]

AI Trader sold:  $ 23.832500  Profit: - $ 0.924999


 13%|█▎        | 162/1256 [06:05<50:48,  2.79s/it]

AI Trader bought:  $ 23.977501


 13%|█▎        | 163/1256 [06:07<50:17,  2.76s/it]

AI Trader bought:  $ 23.887501


 13%|█▎        | 164/1256 [06:10<51:50,  2.85s/it]

AI Trader bought:  $ 24.025000


 13%|█▎        | 168/1256 [06:21<50:44,  2.80s/it]

AI Trader bought:  $ 23.600000


 14%|█▎        | 170/1256 [06:27<51:22,  2.84s/it]

AI Trader bought:  $ 23.972500


 14%|█▎        | 172/1256 [06:33<52:07,  2.89s/it]

AI Trader bought:  $ 23.882500


 14%|█▍        | 173/1256 [06:36<51:46,  2.87s/it]

AI Trader sold:  $ 23.985001  Profit: - $ 0.927500


 14%|█▍        | 174/1256 [06:39<52:09,  2.89s/it]