In [1]:
import warnings
warnings.filterwarnings("ignore")

#Imports
import gym
import numpy as np
from collections import deque
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import random

#Create Gym
from gym import wrappers

  'nearest': pil_image.NEAREST,
  'bilinear': pil_image.BILINEAR,
  'bicubic': pil_image.BICUBIC,
  if hasattr(pil_image, 'HAMMING'):
  if hasattr(pil_image, 'BOX'):
  if hasattr(pil_image, 'LANCZOS'):


In [2]:
import math

# prints formatted price
def formatPrice(n):
	return ("-$" if n < 0 else "$") + "{0:.2f}".format(abs(n))

# returns the vector containing stock data from a fixed file
def getStockDataVec(key):
	vec = []
	lines = open("" + key + ".csv", "r").read().splitlines()

	for line in lines[1:]:
		vec.append(float(line.split(",")[4]))

	return vec

# returns the sigmoid
def sigmoid(x):
	return 1 / (1 + math.exp(-x))

# returns an an n-day state representation ending at time t
def getState(data, t, n):
	d = t - n + 1
	block = data[d:t + 1] if d >= 0 else -d * [data[0]] + data[0:t + 1] # pad with t0
	res = []
	for i in range(n - 1):
		res.append(sigmoid(block[i + 1] - block[i]))

	return np.array([res])


In [3]:
class Agent():
    def __init__(self, window_size, is_eval=False ,model_name=""):
        self.nS = window_size
        self.nA = 3
        self.memory = deque([], maxlen=1000)
        self.alpha = 0.001
        self.gamma = 0.95
        #Explore/Exploit
        self.epsilon = 1
        self.epsilon_min = 0.05
        self.epsilon_decay = 0.995
        self.model = self.build_model()
        self.loss = []
        
    def build_model(self):
        model =keras.Sequential() 
        model.add(keras.layers.Dense(24, input_dim=self.nS, activation='relu')) #[Input] -> Layer 1
        #   Dense: Densely connected layer https://keras.io/layers/core/
        #   24: Number of neurons
        #   input_dim: Number of input variables
        #   activation: Rectified Linear Unit (relu) ranges >= 0
        model.add(keras.layers.Dense(42, activation='relu')) #Layer 2 -> 3
        model.add(keras.layers.Dense(42, activation='relu')) #Layer 2 -> 3
        model.add(keras.layers.Dense(42, activation='relu')) #Layer 3 -> 4
        model.add(keras.layers.Dense(42, activation='relu')) #layer 4 -> 5
        model.add(keras.layers.Dense(self.nA, activation='linear')) #Layer 5 -> [output]
        #   Size has to match the output (different actions)
        #   Linear activation on the last layer
        model.compile(loss='mean_squared_error', #Loss function: Mean Squared Error
                      optimizer=keras.optimizers.Adam(lr=self.alpha)) #Optimaizer: Adam (Feel free to check other options)
        return model

    def act(self, state):#act
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.nA) #Explore
        action_vals = self.model.predict(state) #Exploit: Use the NN to predict the correct action from this state
        return np.argmax(action_vals[0])

    def test_action(self, state): #Exploit
        action_vals = self.model.predict(state)
        return np.argmax(action_vals[0])

    def memory(self, state, action, reward, nstate, done):
        #Store the experience in memory
        self.memory.append( (state, action, reward, nstate, done) )

    def expReplay(self, batch_size):
        #Execute the experience replay
        minibatch = random.sample( self.memory, batch_size ) #Randomly sample from memory

        #Convert to numpy for speed by vectorization
        x = []
        y = []
        np_array = np.array(minibatch)
        st = np.zeros((0,self.nS)) #States
        nst = np.zeros( (0,self.nS) )#Next States
        for i in range(len(np_array)): #Creating the state and next state np arrays
            st = np.append( st, np_array[i,0], axis=0)
            nst = np.append( nst, np_array[i,3], axis=0)
        st_predict = self.model.predict(st) #Here is the speedup! I can predict on the ENTIRE batch
        nst_predict = self.model.predict(nst)
        index = 0
        for state, action, reward, nstate, done in minibatch:
            x.append(state)
            #Predict from state
            nst_action_predict_model = nst_predict[index]
            if done == True: #Terminal: Just assign reward much like {* (not done) - QB[state][action]}
                target = reward
            else:   #Non terminal
                target = reward + self.gamma * np.amax(nst_action_predict_model)
            target_f = st_predict[index]
            target_f[action] = target
            y.append(target_f)
            index += 1
        #Reshape for Keras Fit
        x_reshape = np.array(x).reshape(batch_size,self.nS)
        y_reshape = np.array(y)
        epoch_count = 1 #Epochs is the number or iterations
        hist = self.model.fit(x_reshape, y_reshape, epochs=epoch_count, verbose=0)
        #Graph Losses
        for i in range(epoch_count):
            self.loss.append( hist.history['loss'][i] )
        #Decay Epsilon
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [4]:
import sys

if len(sys.argv) != 4:
	print ("Usage: python train.py [stock] [window] [episodes]")
	exit()


stock_name = input("Enter stock_name, window_size, Episode_count")
#Fill the given information when prompted: 
#Enter stock_name = GSPC_Training_Dataset
#window_size = 10GSPC_Training_Dataset
#Episode_count = 100 or it can be 10 or 20 or 30 and so on.

window_size = input()
episode_count = input()
stock_name = str(stock_name)
window_size = int(window_size)
episode_count = int(episode_count)

agent = Agent(window_size)
data = getStockDataVec(stock_name)
l = len(data) - 1
batch_size = 32 

for e in range(episode_count + 1):
	print ("Episode " + str(e) + "/" + str(episode_count))
	state = getState(data, 0, window_size + 1)
	total_profit = 0
	agent.inventory = []

	for t in range(l):
		action = agent.act(state)

		# sit
		next_state = getState(data, t + 1, window_size + 1)
		reward = 0

		if action == 1: # buy
			agent.inventory.append(data[t])
#			print ("Buy: " + formatPrice(data[t]))

		elif action == 2 and len(agent.inventory) > 0: # sell
			bought_price = agent.inventory.pop(0)
			reward = max(data[t] - bought_price, 0)
			total_profit += data[t] - bought_price       
#			print ("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))

		done = True if t == l - 1 else False
		agent.memory.append((state, action, reward, next_state, done))
		state = next_state

		if done:
			print ("--------------------------------")
			print ("Total Profit: " + formatPrice(total_profit))
            

		if len(agent.memory) > batch_size:
			agent.expReplay(batch_size)

	if e % 1 == 0:
		agent.model.save("model_ep" + str(e))

Usage: python train.py [stock] [window] [episodes]
Enter stock_name, window_size, Episode_countGSPC_Training_Dataset
10
100
Episode 0/100
--------------------------------
Total Profit: $3297.80
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: model_ep0\assets
Episode 1/100
--------------------------------
Total Profit: $125.12
INFO:tensorflow:Assets written to: model_ep1\assets
Episode 2/100
--------------------------------
Total Profit: -$645.32
INFO:tensorflow:Assets written to: model_ep2\assets
Episode 3/100
--------------------------------
Total Profit: -$2813.53
INFO:tensorflow:Assets written to: model_ep3\assets
Episode 4/100
--------------------------------
Total Profit: $1360.02
INFO:tensorflow:Assets written to: model_ep4\assets
Episode 5/100
------------------

KeyboardInterrupt: 

In [5]:
import sys
from tensorflow import keras
from tensorflow.keras.models import load_model


if len(sys.argv) != 3:
    print ("Usage: python evaluate.py [stock] [model]")
    exit()


stock_name = input("Enter Stock_name, Model_name")
model_name = input()
#Note: 
#Fill the given information when prompted: 
#Enter stock_name = GSPC_Evaluation_Dataset
#Model_name = respective model name

model = load_model("" + model_name)
window_size = model.layers[0].input.shape.as_list()[1]

agent = Agent(window_size, True, model_name)
data = getStockDataVec(stock_name)
l = len(data) - 1
batch_size = 32

state = getState(data, 0, window_size + 1)
total_profit = 0
agent.inventory = []

for t in range(l):
    action = agent.act(state)

    # sit
    next_state = getState(data, t + 1, window_size + 1)
    reward = 0

    if action == 1: # buy
        agent.inventory.append(data[t])
        print ("Buy: " + formatPrice(data[t]))

    elif action == 2 and len(agent.inventory) > 0: # sell
        bought_price = agent.inventory.pop(0)
        reward = max(data[t] - bought_price, 0)
        total_profit += data[t] - bought_price
        print ("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))

    done = True if t == l - 1 else False
    agent.memory.append((state, action, reward, next_state, done))
    state = next_state

    if done:
        print ("--------------------------------")
        print (stock_name + " Total Profit: " + formatPrice(total_profit))



Enter Stock_name, Model_nameGSPC_Evaluation_Dataset
model_ep0
Buy: $1273.85
Buy: $1271.50
Sell: $1274.48 | Profit: $0.63
Buy: $1285.96
Sell: $1283.76 | Profit: $12.26
Buy: $1293.24
Buy: $1295.02
Buy: $1280.26
Buy: $1290.84
Buy: $1291.18
Sell: $1307.59 | Profit: $21.63
Sell: $1304.03 | Profit: $10.79
Sell: $1307.10 | Profit: $12.08
Sell: $1319.05 | Profit: $38.79
Sell: $1324.57 | Profit: $33.73
Buy: $1320.88
Buy: $1321.87
Buy: $1329.15
Buy: $1332.32
Buy: $1328.01
Sell: $1336.32 | Profit: $45.14
Sell: $1340.43 | Profit: $19.55
Sell: $1343.01 | Profit: $21.14
Buy: $1315.44
Sell: $1307.40 | Profit: -$21.75
Buy: $1306.10
Buy: $1319.88
Buy: $1306.33
Buy: $1308.44
Buy: $1310.13
Buy: $1321.82
Sell: $1295.11 | Profit: -$37.21
Sell: $1304.28 | Profit: -$23.73
Buy: $1296.39
Sell: $1281.87 | Profit: -$33.57
Buy: $1256.88
Sell: $1273.72 | Profit: -$32.38
Buy: $1279.21
Sell: $1298.38 | Profit: -$21.50
Sell: $1293.77 | Profit: -$12.56
Sell: $1297.54 | Profit: -$10.90
Sell: $1310.19 | Profit: $0.06
Bu