<a href="https://colab.research.google.com/github/neltoms/Reinforcement-Bot/blob/mo_bot/ms_trading_bot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [0]:
# from google.colab import drive
# drive.mount('/content/drive')

In [2]:
import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras.optimizers import Adam

import numpy as np
import random
from collections import deque
import sys

Using TensorFlow backend.


In [0]:
class Agent:
	def __init__(self, state_size, is_eval=False, model_name=""):
		self.state_size = state_size # normalized previous days
		self.action_size = 3 # sit, buy, sell
		self.memory = deque(maxlen=1000)
		self.inventory = []
		self.model_name = model_name
		self.is_eval = is_eval

		self.gamma = 0.95
		self.epsilon = 1.0
		self.epsilon_min = 0.01
		self.epsilon_decay = 0.995

		self.model = load_model("/content/drive/My Drive/TradingBot/models/" + model_name) if is_eval else self._model()

	def _model(self):
		model = Sequential()
		model.add(Dense(units=64, input_dim=self.state_size, activation="relu"))
		model.add(Dense(units=32, activation="relu"))
		model.add(Dense(units=8, activation="relu"))
		model.add(Dense(self.action_size, activation="linear"))
		model.compile(loss="mse", optimizer=Adam(lr=0.001))

		return model

	def act(self, state):
		if not self.is_eval and random.random() <= self.epsilon:
			return random.randrange(self.action_size)

		options = self.model.predict(state)
		return np.argmax(options[0])

	def expReplay(self, batch_size):
		mini_batch = []
		l = len(self.memory)
		for i in range(l - batch_size + 1, l):
			mini_batch.append(self.memory[i])

		for state, action, reward, next_state, done in mini_batch:
			target = reward
			if not done:
				target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])

			target_f = self.model.predict(state)
			target_f[0][action] = target
			self.model.fit(state, target_f, epochs=1, verbose=0)

		if self.epsilon > self.epsilon_min:
			self.epsilon *= self.epsilon_decay 


In [0]:
import numpy as np
import math

In [0]:
# prints formatted price
def formatPrice(n):
	return ("-$" if n < 0 else "$") + "{0:.2f}".format(abs(n))

# returns the vector containing stock data from a fixed file
def getStockDataVec(key):
	vec = []
	lines = open("/content/drive/My Drive/TradingBot/data/" + key + ".csv", "r").read().splitlines()

	for line in lines[1:]:
		vec.append(float(line.split(",")[4]))

	return vec

# returns the sigmoid
def sigmoid(x):
	return 1 / (1 + math.exp(-x))

# returns an an n-day state representation ending at time t
def getState(data, t, n):
	d = t - n + 1
	block = data[d:t + 1] if d >= 0 else -d * [data[0]] + data[0:t + 1] # pad with t0
	res = []
	for i in range(n - 1):
		res.append(sigmoid(block[i + 1] - block[i]))

	return np.array([res])

In [0]:
# from agent.agent import Agent
# from functions import *


In [12]:
# if len(sys.argv) != 4:
# 	print("Usage: python train.py [stock] [window] [episodes]")
# 	exit()
# stock_name, window_size, episode_count = sys.argv[1], int(sys.argv[2]), int(sys.argv[3])

stock_name, window_size, episode_count = "^GSPC", 10, 1

agent = Agent(window_size)
data = getStockDataVec(stock_name)
l = len(data) - 1
batch_size = 32

for e in range(episode_count + 1):
	print("Episode " + str(e) + "/" + str(episode_count))
	state = getState(data, 0, window_size + 1)

	total_profit = 0
	agent.inventory = []

	for t in range(l):
		action = agent.act(state)

		# sit
		next_state = getState(data, t + 1, window_size + 1)
		reward = 0

		if action == 1: # buy
			agent.inventory.append(data[t])
			print("Buy: " + formatPrice(data[t]))

		elif action == 2 and len(agent.inventory) > 0: # sell
			bought_price = agent.inventory.pop(0)
			reward = max(data[t] - bought_price, 0)
			total_profit += data[t] - bought_price
			print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))

		done = True if t == l - 1 else False
		agent.memory.append((state, action, reward, next_state, done))
		state = next_state

		if done:
			print("--------------------------------")
			print("Total Profit: " + formatPrice(total_profit))
			print("--------------------------------")

		if len(agent.memory) > batch_size:
			agent.expReplay(batch_size)

	if e % 10 == 0:
		agent.model.save("/content/drive/My Drive/TradingBot/models/model_ep" + str(e))


Episode 0/1
Buy: $1333.34
Sell: $1298.35 | Profit: -$34.99
Buy: $1300.80
Sell: $1313.27 | Profit: $12.47
Buy: $1326.82
Sell: $1318.55 | Profit: -$8.27
Buy: $1364.30
Sell: $1357.51 | Profit: -$6.79
Buy: $1373.73
Sell: $1373.47 | Profit: -$0.26
Buy: $1314.76
Buy: $1330.31
Sell: $1318.80 | Profit: $4.04
Sell: $1315.92 | Profit: -$14.39
Buy: $1326.61
Sell: $1301.53 | Profit: -$25.08
Buy: $1278.94
Buy: $1255.27
Sell: $1252.82 | Profit: -$26.12
Buy: $1245.86
Buy: $1267.65
Sell: $1257.94 | Profit: $2.67
Sell: $1239.94 | Profit: -$5.92
Buy: $1241.23
Buy: $1261.89
Sell: $1264.74 | Profit: -$2.91
Sell: $1180.16 | Profit: -$61.07
Buy: $1197.66
Buy: $1166.71
Sell: $1150.53 | Profit: -$111.36
Sell: $1170.81 | Profit: -$26.85
Buy: $1142.62
Buy: $1122.14
Buy: $1117.58
Buy: $1139.83
Buy: $1152.69
Buy: $1182.17
Sell: $1147.95 | Profit: -$18.76
Sell: $1160.33 | Profit: $17.71
Buy: $1145.87
Buy: $1128.43
Sell: $1183.50 | Profit: $61.36
Sell: $1253.69 | Profit: $136.11
Buy: $1242.98
Sell: $1224.36 | Profi

In [0]:
import keras
from keras.models import load_model

#from agent.agent import Agent
#from functions import *
import sys

In [11]:
# if len(sys.argv) != 3:
# 	print("Usage: python evaluate.py [stock] [model]")
# 	exit()

#  ^GSPC_2011 model_ep1000

stock_name, model_name = "^GSPC_2011", "model_ep20"
model = load_model("/content/drive/My Drive/TradingBot/models/" + model_name)
window_size = model.layers[0].input.shape.as_list()[1]

agent = Agent(window_size, True, model_name)
data = getStockDataVec(stock_name)
l = len(data) - 1
batch_size = 32

state = getState(data, 0, window_size + 1)
total_profit = 0
agent.inventory = []

for t in range(l):
	action = agent.act(state)

	# sit
	next_state = getState(data, t + 1, window_size + 1)
	reward = 0

	if action == 1: # buy
		agent.inventory.append(data[t])
		print("Buy: " + formatPrice(data[t]))

	elif action == 2 and len(agent.inventory) > 0: # sell
		bought_price = agent.inventory.pop(0)
		reward = max(data[t] - bought_price, 0)
		total_profit += data[t] - bought_price
		print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))

	done = True if t == l - 1 else False
	agent.memory.append((state, action, reward, next_state, done))
	state = next_state

	if done:
		print("--------------------------------")
		print(stock_name + " Total Profit: " + formatPrice(total_profit))
		print("--------------------------------")

--------------------------------
^GSPC_2011 Total Profit: $0.00
--------------------------------
