## Import Functions

In [1]:
import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.models import load_model
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam
from keras.utils import multi_gpu_model
import numpy as np
import math
import random
import sys
from collections import deque

Using TensorFlow backend.


## Defining The Agent

In [2]:
class Agent:
    def __init__(self, state_size, is_eval=False, model_name=""):
        self.state_size = state_size # normalized previous days
        self.action_size = 3 # sit, buy, sell
        self.memory = deque(maxlen=1000)
        self.inventory = []
        self.model_name = model_name
        self.is_eval = is_eval
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = self._model()
        #self.model = load_model("/Users/maharshichattopadhyay/Documents/Study/Major_Project/Model/" + model_name) if is_eval else self._model()
    def _model(self):
        model = Sequential()
        model.add(Dense(units=64, input_dim=self.state_size, activation="relu"))
        model.add(Dense(units=32, activation="relu"))
        model.add(Dense(units=8, activation="relu"))
        model.add(Dense(self.action_size, activation="linear"))
        model.compile(loss="mse", optimizer=Adam(lr=0.001))
        return model
    def act(self, state):
        if not self.is_eval and random.random() <= self.epsilon:
            return random.randrange(self.action_size)
        options = self.model.predict(state)
        return np.argmax(options[0])
    def expReplay(self, batch_size):
        mini_batch = []
        l = len(self.memory)
        for i in range(l - batch_size + 1, l):
            mini_batch.append(self.memory[i])
        for state, action, reward, next_state, done in mini_batch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
    #def load_checkpoint(self, path):
        #self.model.load_weights(path)

## Math-Functions
#### Intuition: This Function Defines How The Input Will Be Processed and The Output Will Be Shown

In [3]:
def formatPrice(n):
    return ("-$" if n < 0 else "$") + "{0:.2f}".format(abs(n))
def getStockDataVec(CSV):
    vec = []
    lines = open(CSV, "r").read().splitlines()
    for line in lines[1:]:
        vec.append(float(line.split(",")[8]))# Selecting The Close Price And Multiplying it By 1000
    return vec
def sigmoid(x):
    return 1 / (1 + math.exp(-x))
def getState(data, t, n):
    d = t - n + 1
    block = data[d:t + 1] if d >= 0 else -d * [data[0]] + data[0:t + 1] # pad with t0
    res = []
    for i in range(n - 1):
        res.append(sigmoid(block[i + 1] - block[i]))
    return np.array([res])

## Importing From GPU To CPU

In [33]:
def make_parallel(model, gpu_count):
    def get_slice(data, idx, parts):
        shape = tf.shape(data)
        size = tf.concat([ shape[:1] // parts, shape[1:] ],axis=0)
        stride = tf.concat([ shape[:1] // parts, shape[1:]*0 ],axis=0)
        start = stride * idx
        return tf.slice(data, start, size)
    outputs_all = []
    for i in range(len(model.outputs)):
        outputs_all.append([])

    #Place a copy of the model on each GPU, each getting a slice of the batch
    for i in range(gpu_count):
        with tf.device('/gpu:%d' % i):
            with tf.name_scope('tower_%d' % i) as scope:

                inputs = []
                #Slice each input into a piece for processing on this GPU
                for x in model.inputs:
                    input_shape = tuple(x.get_shape().as_list())[1:]
                    slice_n = Lambda(get_slice, output_shape=input_shape, arguments={'idx':i,'parts':gpu_count})(x)
                    inputs.append(slice_n)                

                outputs = model(inputs)

                if not isinstance(outputs, list):
                    outputs = [outputs]

                #Save all the outputs for merging back together later
                for l in range(len(outputs)):
                    outputs_all[l].append(outputs[l])
     # merge outputs on CPU
    with tf.device('/cpu:0'):
        merged = []
        for outputs in outputs_all:
            merged.append(merge(outputs, mode='concat', concat_axis=0))

        return Model(input=model.inputs, output=merged)

## Training The Model

In [4]:
stock_name="/Users/maharshichattopadhyay/Documents/Study/Major_Project/DataSet/Final_Dataset/Final_Data_MCB_Train.csv"
window_size=10
episode_count = 1000
agent = Agent(window_size)
data = getStockDataVec(stock_name)
l = len(data) - 1
batch_size = 32
#path = '/Users/maharshichattopadhyay/Desktop/Model/No_Of_Episodes_380.h5'
#agent.load_checkpoint(path)
for e in range(episode_count + 1):
    print("Episode " + str(e) + "/" + str(episode_count))
    state = getState(data, 0, window_size + 1)
    total_profit = 0
    agent.inventory = []
    for t in range(l):
        action = agent.act(state)
        # sit
        next_state = getState(data, t + 1, window_size + 1)
        reward = 0
        if action == 1: # buy
            agent.inventory.append(data[t])
            print("Buy: " + formatPrice(data[t]))
        elif action == 2 and len(agent.inventory) > 0: # sell
            bought_price = agent.inventory.pop(0)
            reward = max(data[t] - bought_price, 0)
            total_profit += data[t] - bought_price
            print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))
        done = True if t == l - 1 else False
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state
        if done:
            print("--------------------------------")
            print("Total Profit: " + formatPrice(total_profit))
            print("--------------------------------")
        if len(agent.memory) > batch_size:
            agent.expReplay(batch_size)
        if e % 10 == 0:
            filepath="/Users/maharshichattopadhyay/Documents/Study/Major_Project/Model/No_Of_Episodes_" + str(e)+".hdf5"
            agent.model.save(filepath)

Instructions for updating:
Colocations handled automatically by placer.
Episode 0/1000
Buy: $224.20
Buy: $223.80
Sell: $223.50 | Profit: -$0.70
Buy: $242.10
Sell: $239.55 | Profit: $15.75
Buy: $238.00
Sell: $238.05 | Profit: -$4.05
Buy: $231.20
Sell: $229.10 | Profit: -$8.90
Buy: $228.25
Buy: $229.76
Buy: $232.50
Sell: $231.40 | Profit: $0.20
Buy: $225.00
Buy: $222.00
Sell: $228.00 | Profit: -$0.25
Buy: $224.75
Sell: $222.04 | Profit: -$7.72
Buy: $213.50
Sell: $207.10 | Profit: -$25.40
Sell: $210.10 | Profit: -$14.90
Sell: $213.55 | Profit: -$8.45
Instructions for updating:
Use tf.cast instead.
Buy: $213.25
Sell: $211.33 | Profit: -$13.42
Sell: $206.00 | Profit: -$7.50
Buy: $205.55
Buy: $195.89
Sell: $195.55 | Profit: -$17.70
Buy: $203.50
Sell: $210.60 | Profit: $5.05
Sell: $210.90 | Profit: $15.01
Sell: $216.50 | Profit: $13.00
Buy: $224.00
Buy: $226.10
Sell: $223.61 | Profit: -$0.39
Sell: $225.17 | Profit: -$0.93
Buy: $214.01
Sell: $192.20 | Profit: -$21.81


KeyboardInterrupt: 

## Testing The Model

In [7]:
stock_name="/Users/maharshichattopadhyay/Documents/Study/Major_Project/DataSet/Final_Dataset/Final_Data_MCB_Test.csv"
model_name = "/Users/maharshichattopadhyay/Documents/Study/Major_Project/Model/No_Of_Episodes_100.hdf5"
model = agent.model.load_weights(model_name)
#model = agent.model.load_weights(model_name)[-4]
window_size = agent.model.layers[0].input.shape.as_list()[1]
agent = Agent(window_size, True, model_name)
data = getStockDataVec(stock_name)
l = len(data) - 1
batch_size = 32
state = getState(data, 0, window_size + 1)
total_profit = 0
agent.inventory = []
for t in range(l):
    action = agent.act(state)
    # sit
    next_state = getState(data, t + 1, window_size + 1)
    reward = 0
    if action == 1: # buy
        agent.inventory.append(data[t])
        print("Buy: " + formatPrice(data[t]))
    elif action == 2 and len(agent.inventory) > 0: # sell
        bought_price = agent.inventory.pop(0)
        reward = max(data[t] - bought_price, 0)
        total_profit += data[t] - bought_price
        print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))
    done = True if t == l - 1 else False
    agent.memory.append((state, action, reward, next_state, done))
    state = next_state
    if done:
        print("--------------------------------")
        print(stock_name + " Total Profit: " + formatPrice(total_profit))
        print("--------------------------------")

Buy: $232.35
Buy: $235.51
Buy: $237.90
Buy: $240.00
Sell: $237.00 | Profit: $4.65
Sell: $236.60 | Profit: $1.09
Buy: $254.00
Buy: $244.51
Buy: $242.10
Buy: $242.70
Buy: $241.50
Buy: $241.00
Buy: $240.00
Buy: $237.00
Buy: $229.75
Buy: $225.10
Buy: $224.00
Buy: $223.30
Buy: $226.00
Buy: $226.70
Sell: $228.00 | Profit: -$9.90
Buy: $228.00
Buy: $236.00
Buy: $237.02
Sell: $235.00 | Profit: -$5.00
Buy: $227.00
Buy: $231.99
Buy: $227.62
Buy: $227.58
Buy: $226.80
Buy: $218.00
Buy: $218.00
Buy: $219.91
Sell: $218.80 | Profit: -$35.20
Buy: $216.00
Buy: $213.97
Buy: $216.00
Buy: $216.55
Sell: $212.00 | Profit: -$32.51
Buy: $228.75
Buy: $248.25
Buy: $252.50
Buy: $248.49
Buy: $244.10
Buy: $241.00
Sell: $238.00 | Profit: -$4.10
Buy: $214.00
Buy: $215.80
Sell: $214.60 | Profit: -$28.10
Sell: $206.76 | Profit: -$34.74
Buy: $208.01
Buy: $197.00
Buy: $205.00
Buy: $209.98
Sell: $198.25 | Profit: -$42.75
Buy: $200.10
Buy: $202.10
Buy: $197.55
Buy: $205.00
Buy: $207.50
Buy: $214.70
Buy: $205.25
Buy: $211.0