In [0]:
import pandas as pd
import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras.optimizers import Adam

import numpy as np
import random
from collections import deque

class Agent:
	def __init__(self, state_size, is_eval=False, model_name=""):
		self.state_size = state_size # normalized previous days
		self.action_size = 3 # sit, buy, sell
		self.memory = deque(maxlen=1000)
		self.inventory = []
		self.model_name = model_name
		self.is_eval = is_eval

		self.gamma = 0.95
		self.epsilon = 1.0
		self.epsilon_min = 0.01
		self.epsilon_decay = 0.995

		self.model = load_model("models/" + model_name) if is_eval else self._model()

	def _model(self):
		model = Sequential()
		model.add(Dense(units=64, input_dim=self.state_size, activation="relu"))
		model.add(Dense(units=32, activation="relu"))
		model.add(Dense(units=8, activation="relu"))
		model.add(Dense(self.action_size, activation="linear"))
		model.compile(loss="mse", optimizer=Adam(lr=0.001))

		return model

	def act(self, state):
		if not self.is_eval and random.random() <= self.epsilon:
			return random.randrange(self.action_size)

		options = self.model.predict(state)
		return np.argmax(options[0])

	def expReplay(self, batch_size):
		mini_batch = []
		l = len(self.memory)
		for i in range(l - batch_size + 1, l):
			mini_batch.append(self.memory[i])

		for state, action, reward, next_state, done in mini_batch:
			target = reward
			if not done:
				target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])

			target_f = self.model.predict(state)
			target_f[0][action] = target
			self.model.fit(state, target_f, epochs=1, verbose=0)

		if self.epsilon > self.epsilon_min:
			self.epsilon *= self.epsilon_decay 

Using TensorFlow backend.


In [0]:
import pandas as pd
data=pd.read_csv('gdrive/My Drive/TSLA.csv')

In [0]:
import numpy as np
import math
def formatPrice(n):
	return ("-$" if n < 0 else "$") + "{0:.2f}".format(abs(n))




In [0]:
def getStockDataVec( key):
	vec = []
	lines = open("data/" + key + ".csv", "r").read().splitlines()

	for line in lines[1:]:
		vec.append(float(line.split(",")[4]))

	return vec



UFuncTypeError: ignored

In [0]:
def sigmoid(x):
	return 1 / (1 + math.exp(-x))



In [0]:
# returns an an n-day state representation ending at time t
def getState(data, t, n):
	d = t - n + 1
	block = data[d:t + 1] if d >= 0 else -d * [data[0]] + data[0:t + 1] # pad with t0
	res = []
	for i in xrange(n - 1):
		res.append(sigmoid(block[i + 1] - block[i]))

	return np.array([res])

In [0]:
import pandas as pd
data=pd.read_csv('gdrive/My Drive/TSLA.csv', index_col='Date', parse_dates=['Date'])
getState(data,10,21)

NameError: ignored

In [0]:
import sys
if len(sys.argv) != 4:
	print("Usage: python train.py [stock] [window] [episodes]")
	exit()
stock_name, window_size, episode_count = sys.argv[0], 10, 200



Usage: python train.py [stock] [window] [episodes]


In [0]:
agent = Agent(window_size)
import pandas as pd
data=pd.read_csv('gdrive/My Drive/TSLA.csv', index_col='Date', parse_dates=['Date'])
l = len(data) - 1
batch_size = 32



In [0]:
for e in range(episode_count + 1):
	print("Episode " + str(e) + "/" + str(episode_count))
	state = getState(data, 0, window_size + 1)

	total_profit = 0
	agent.inventory = []

	for t in range(l):
		action = agent.act(state)

		# sit
		next_state = getState(data, t + 1, 21)
		reward = 0

		if action == 1: # buy
			agent.inventory.append(data[t])
			print("Buy: " + formatPrice(data[t]))

		elif action == 2 and len(agent.inventory) > 0: # sell
			bought_price = agent.inventory.pop(0)
			reward = max(data[t] - bought_price, 0)
			total_profit += data[t] - bought_price
			print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))

		done = True if t == l - 1 else False
		agent.memory.append((state, action, reward, next_state, done))
		state = next_state

		if done:
			print("--------------------------------")
			print("Total Profit: " + formatPrice(total_profit))
			print("--------------------------------")

		if len(agent.memory) > batch_size:
			agent.expReplay(batch_size)


In [0]:
if e % 10 == 0:
  agent.model.save("models/model_ep" + str(e))

Saving TSLA.csv to TSLA.csv


{'TSLA.csv': b'Date,Open,High,Low,Close,Adj Close,Volume\n2017-11-27,313.250000,317.339996,309.510010,316.809998,316.809998,4555900\n2017-11-28,316.359985,320.000000,313.920013,317.549988,317.549988,4949500\n2017-11-29,317.299988,318.000000,301.230011,307.540009,307.540009,8767400\n2017-11-30,308.559998,310.700012,304.540009,308.850006,308.850006,4351600\n2017-12-01,305.440002,310.320007,305.049988,306.529999,306.529999,4292900\n2017-12-04,306.500000,308.269989,300.609985,305.200012,305.200012,5835100\n2017-12-05,302.000000,308.000000,301.000000,303.700012,303.700012,4646500\n2017-12-06,300.100006,313.390015,300.000000,313.260010,313.260010,7195300\n2017-12-07,312.000000,318.630005,311.049988,311.239990,311.239990,4780600\n2017-12-08,314.600006,316.980011,311.260010,315.130005,315.130005,3468500\n2017-12-11,314.630005,329.010010,313.750000,328.910004,328.910004,7938000\n2017-12-12,330.450012,341.440002,330.029999,341.029999,341.029999,8733200\n2017-12-13,340.929993,344.220001,336.50000

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
data.head()
data.iloc[0][1]

313.25