In [14]:
import numpy as np 
import pandas as pd 
import random 
import matplotlib.pyplot as plt 
import torch 
import tensorflow as tf
import math  
from tqdm import tqdm_notebook , tqdm
from collections import deque

# DataLoading

In [15]:
df=pd.read_csv('2011-2019pricedata.csv')#loading Dataset
df=df.dropna() #dropping Nan values
print(df.head())

         Date         Open         High          Low        Close  \
0  2011-01-03  6177.450195  6178.549805  6147.200195  6157.600098   
1  2011-01-04  6172.750000  6181.049805  6124.399902  6146.350098   
2  2011-01-05  6141.350098  6141.350098  6062.350098  6079.799805   
3  2011-01-06  6107.000000  6116.149902  6022.299805  6048.250000   
4  2011-01-07  6030.899902  6051.200195  5883.600098  5904.600098   

     Adj Close  Volume  
0  6157.600098     0.0  
1  6146.350098     0.0  
2  6079.799805     0.0  
3  6048.250000     0.0  
4  5904.600098     0.0  


In [16]:
# opening price would represent the price at a particular day
dataset=list(df['Open'])
len(dataset)

2052

In [24]:
def stock_price_format(n):
    if n < 0 :
        return '-Rs {:2f}'.format(n)
    else :
        return '-Rs {:2f}'.format(n)

# Define State

State would basically include features  like current day price and would also include historical prices which would be used to represent current market conditions

In [25]:
def sigmoid(x):
    return 1/(1+math.exp(-x))

In [26]:
def state_creator(data, timestep, window_size):
    starting_id = timestep - window_size + 1
    if starting_id >= 0:
        windowed_data = list(data[starting_id:timestep+1])
    else:
        windowed_data = - starting_id * [data[0]] + list(data[0:timestep+1])
    state = []
    for i in range(window_size - 1):
        #state would basically consider the increase in prices for the past few days 
        #state would also use sigmoid function to recduce the skewness in the increase of price and ouputs between 0 and 1
        state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))
    return np.array([state])

# Building the Trader and Model

In [27]:
class ModelandTrade:
    
    def __init__(self, state_size, action_space=3, model_name="ModelandTrade"):
        
        self.state_size = state_size
        self.action_space = action_space
        self.model_name = model_name
        self.inventory = []
        self.memory = deque(maxlen=2000)
        #exploration parameters
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_final = 0.01
        self.epsilon_decay = 0.995
        #model creation
        self.model = self.model_builder()

    def model_builder(self):
        model = tf.keras.models.Sequential()
        model.add(tf.keras.layers.Dense(units=32, activation='relu', input_dim=self.state_size))
        model.add(tf.keras.layers.Dense(units=64, activation='relu'))
        model.add(tf.keras.layers.Dense(units=128, activation='relu'))
        model.add(tf.keras.layers.Dense(units=self.action_space, activation='linear'))
        #mean Square error 
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=0.001))
    
        return model
    
    def trade(self, state):
        #exploration
        if random.random() <= self.epsilon:
            return random.randrange(self.action_space)
        #exploitation
        
        Q_value=self.model.predict(state,verbose=0)
        return np.argmax(Q_value[0])
    
    def batch_train(self,batch_size):
        batch = []
        for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
            batch.append(self.memory[i])
      
        for state, action, reward, next_state, done in batch:
            Q_target=reward
            if not done:
                #q_target found using belman equation
                Q_target = reward + self.gamma * np.amax(self.model.predict(next_state,verbose=0)[0])
            target = self.model.predict(state,verbose=0)
            target[0][action] = Q_target
            #fitting the model
            self.model.fit(state, target, epochs=1, verbose=0)
            #Mean Square Error Loss

        if self.epsilon > self.epsilon_final:
            self.epsilon *= self.epsilon_decay

## Hyperparmeters

In [28]:
window_size = 10 
episodes = 1000

batch_size = 32
data_samples = len(dataset) - 1

In [29]:
trader=ModelandTrade(window_size)



# Reward function

In [39]:
def RewardFunction(action, dataset, datasamples, t, total_profit):
        reward=0
        if action == 1: #Buying
            trader.inventory.append(dataset[t])
            print("AI Trader bought: ", stock_price_format(dataset[t]))
      
        elif action == 2 and len(trader.inventory) > 0: #Selling
            buy_price = trader.inventory.pop(0)
      
            reward = max(dataset[t] - buy_price, 0)
            total_profit += dataset[t] - buy_price
            print("AI Trader sold: ", stock_price_format(dataset[t]), " Profit: " + stock_price_format(dataset[t] - buy_price) )
      
        if t == data_samples - 1:
            done = True
        else:
            done = False
            
        return reward, done, total_profit
    

# Training the Dataset

In [40]:
for episode in range(1, episodes + 1):
    state = state_creator(dataset, 0, window_size + 1)
    total_profit = 0
    trader.inventory = []
    for t in tqdm(range(data_samples)):
        action = trader.trade(state)
        next_state = state_creator(dataset, t+1, window_size + 1)
        reward, done, total_profit = RewardFunction(action, dataset, data_samples, t, total_profit)
        trader.memory.append((state, action, reward, next_state, done))
        state = next_state
        if done:
            print("TOTAL PROFIT: {}".format(total_profit))
        if len(trader.memory) > batch_size:
            trader.batch_train(batch_size)
    if episode % 100 == 0:
        trader.model.save("ai_trader_{}.h5".format(episode))
        

  0%|                                                                                         | 0/2051 [00:00<?, ?it/s]

AI Trader bought:  -Rs 6177.450195
AI Trader bought:  -Rs 6172.750000
AI Trader sold:  -Rs 6107.000000  Profit: -Rs -70.450195
AI Trader sold:  -Rs 5901.299805  Profit: -Rs -271.450195
AI Trader bought:  -Rs 5800.049805
AI Trader bought:  -Rs 5850.750000
AI Trader sold:  -Rs 5648.799805  Profit: -Rs -151.250000
AI Trader bought:  -Rs 5682.549805
AI Trader sold:  -Rs 5737.350098  Profit: -Rs -113.399902
AI Trader sold:  -Rs 5656.000000  Profit: -Rs -26.549805
AI Trader bought:  -Rs 5717.100098
AI Trader sold:  -Rs 5452.549805  Profit: -Rs -264.550293
AI Trader bought:  -Rs 5469.549805
AI Trader sold:  -Rs 5430.450195  Profit: -Rs -39.099610
AI Trader bought:  -Rs 5219.649902


  1%|█▏                                                                              | 30/2051 [00:05<06:19,  5.32it/s]

AI Trader bought:  -Rs 5467.750000


  2%|█▏                                                                              | 31/2051 [00:10<12:59,  2.59it/s]

AI Trader bought:  -Rs 5467.600098


  2%|█▎                                                                              | 33/2051 [00:19<31:53,  1.05it/s]

AI Trader bought:  -Rs 5557.549805


  2%|█▍                                                                            | 37/2051 [00:37<1:28:49,  2.65s/it]

AI Trader sold:  -Rs 5408.750000  Profit: -Rs 189.100098


  2%|█▍                                                                            | 38/2051 [00:42<1:42:23,  3.05s/it]

AI Trader bought:  -Rs 5321.049805


  2%|█▍                                                                            | 39/2051 [00:47<1:53:38,  3.39s/it]

AI Trader sold:  -Rs 5330.149902  Profit: -Rs -137.600098


  2%|█▌                                                                            | 40/2051 [00:51<2:02:56,  3.67s/it]

AI Trader sold:  -Rs 5382.000000  Profit: -Rs -85.600098


  2%|█▌                                                                            | 41/2051 [00:56<2:10:18,  3.89s/it]

AI Trader sold:  -Rs 5478.450195  Profit: -Rs -79.099610


  2%|█▌                                                                            | 42/2051 [01:00<2:16:33,  4.08s/it]

AI Trader bought:  -Rs 5586.200195


  2%|█▋                                                                            | 43/2051 [01:05<2:21:08,  4.22s/it]

AI Trader sold:  -Rs 5490.049805  Profit: -Rs 169.000000


  2%|█▋                                                                            | 44/2051 [01:09<2:25:18,  4.34s/it]

AI Trader bought:  -Rs 5466.100098


  2%|█▋                                                                            | 45/2051 [01:14<2:29:19,  4.47s/it]

AI Trader bought:  -Rs 5542.399902


  2%|█▋                                                                            | 46/2051 [01:19<2:32:21,  4.56s/it]

AI Trader sold:  -Rs 5516.100098  Profit: -Rs -70.100097


  2%|█▊                                                                            | 47/2051 [01:24<2:35:23,  4.65s/it]

AI Trader sold:  -Rs 5456.149902  Profit: -Rs -9.950196


  2%|█▊                                                                            | 48/2051 [01:29<2:36:18,  4.68s/it]

AI Trader sold:  -Rs 5436.500000  Profit: -Rs -105.899902


  2%|█▉                                                                            | 50/2051 [01:38<2:39:51,  4.79s/it]

AI Trader bought:  -Rs 5475.950195


  2%|█▉                                                                            | 51/2051 [01:43<2:37:58,  4.74s/it]

AI Trader sold:  -Rs 5455.399902  Profit: -Rs -20.550293


  3%|█▉                                                                            | 52/2051 [01:50<1:10:52,  2.13s/it]


KeyboardInterrupt: 