In [1]:
import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras.optimizers import Adam

import math
import numpy as np
import pandas as pd
import random
from collections import deque
import matplotlib.pyplot as plt
from datetime import datetime

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Network Architecture

In [2]:
class Agent:
    def __init__(self, state_size, is_eval=False, model_name=""):
        self.state_size = state_size
        self.action_size = 3           # stay, buy, sell
        self.memory = deque(maxlen=1000)
        self.inventory = []
        self.model_name = model_name
        self.is_eval = is_eval
        
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        
        self.model = load_model("models/" + model_name) if is_eval else self._model()
    
    def _model(self):
        model = Sequential()
        model.add(Dense(units=64, input_dim=self.state_size, activation='relu'))
        model.add(Dense(units=32, activation='relu'))
        model.add(Dense(units=8 , activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=0.001))
        
        return model

    def act(self, state):
        if not self.is_eval and np.random.rand() <= self.epsilon:
            # randomize an integer in [0, self.action_size)
            return random.randrange(self.action_size)
        options = self.model.predict(state)
        return np.argmax(options[0])
    
    def expReplay(self, batch_size):
        mini_batch = []
        l = len(self.memory)
        for i in range(l - batch_size + 1, l):
            mini_batch.append(self.memory[i])
        for state, action, reward, next_state, done in mini_batch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=False)
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [3]:
def getStockDataVec(key):
    df = pd.read_csv(key + ".csv")
    return df['price_close'].values

In [4]:
def getStockDateVec(key):
    df = pd.read_csv(key + ".csv")
    return df['time_period_start'].values

In [5]:
def sigmoid(x):
    """
    Return the sigmoid function of x
    """
    return 1.0 / (1.0 + math.exp(-x))
    
def getState(data, t, n):
    """
    Return an n-day state representation ending at time t
    """
    d = t - n + 1
    block = data[d: t+1] if d >= 0 else np.append(-d * [data[0]], data[0: t + 1]) # pad with t0
    res = []
    for i in range(n - 1):
        res.append(sigmoid(block[i+1] - block[i]))
    return np.array([res])

## Training

In [None]:
STOCK_NAME = "^GSPC"
window_size = 10
episode_count = 0

In [None]:
agent = Agent(window_size)
data = getStockDataVec(STOCK_NAME)
l = len(data) - 1
batch_size = 32

In [None]:
for e in range(episode_count + 1):
    print("Episode {:}/{:}".format(e, episode_count))
    state = getState(data, 0, window_size+1)
    total_profit = 0
    agent.inventory = []
    
    for t in range(l):
        action = agent.act(state)
        
        # Sit
        next_state = getState(data, t+1, window_size+1)
        reward = 0

        # Buy action
        if action == 1:
            agent.inventory.append(data[t])
            print("Buy: {:}".format(data[t]))

        # Sell action
        elif action == 2 and len(agent.inventory) > 0:
            bought_price = agent.inventory.pop(0)
            reward = max(data[t] - bought_price, 0)
            total_profit += data[t] - bought_price
            print("Sell: {:} | Profit: {:}".format(data[t], data[t] - bought_price))

        done = True if t == l-1 else False
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state

        if done:
            print("------------------------------------")
            print("Total Profit: {:}".format(total_profit))
            print("------------------------------------")
    
        if len(agent.memory) > batch_size:
            agent.expReplay(batch_size)
    
    if e % 10 == 0:
        agent.model.save("models/model_ep" + str(e))

## Evaluation

In [6]:
STOCK_NAME = "BITSTAMP_SPOT_BTC_USD-2018-04-25"
model_name = 'model_ep0'

In [7]:
model = load_model("models/" + model_name)
window_size = model.layers[0].input.shape.as_list()[1]

In [8]:
agent = Agent(window_size, True, model_name)
data = getStockDataVec(STOCK_NAME)
dates_temp = getStockDateVec(STOCK_NAME)
dates = []
l = len(data) - 1
batch_size = 32

In [9]:
state = getState(data, 0, window_size + 1)
total_profit = 0
agent.inventory = []

for date in dates_temp:
    dates.append(datetime.strptime(date[:10], '%Y-%m-%d'))

In [10]:
buy_dates = []
sell_dates = []
buy_prices = []
sell_prices = []
for t in range(l):
    action = agent.act(state)
    # Sit 
    next_state = getState(data, t+1, window_size+1)
    reward = 0
    
    # Buy action
    if action == 1:
        agent.inventory.append(data[t])
        print("Buy: {:}".format(data[t]))
        buy_dates.append(dates[t])
        buy_prices.append(data[t])

    # Sell action
    elif action == 2 and len(agent.inventory) > 0:
        sell_dates.append(dates[t])
        sell_prices.append(data[t])
        bought_price = agent.inventory.pop(0)
        reward = max(data[t] - bought_price, 0)
        total_profit += data[t] - bought_price
        print("Sell: {:} | Profit: {:}".format(data[t], data[t] - bought_price))

    done = True if t == l-1 else False
    agent.memory.append((state, action, reward, next_state, done))
    state = next_state

    if done:
        print("------------------------------------")
        print("Total Profit: {:}".format(total_profit))
        print("------------------------------------")

Buy: 3987.52
Sell: 4085.0 | Profit: 97.48000000000002
Buy: 3865.34
Buy: 3227.79
Buy: 3700.01
Buy: 3678.93
Buy: 3662.99
Buy: 4101.6
Buy: 3888.8
Buy: 3874.46
Buy: 3617.05
Buy: 3612.18
Sell: 3779.17 | Profit: -86.17000000000007
Buy: 3664.22
Sell: 3918.0 | Profit: 690.21
Buy: 3888.03
Sell: 4199.29 | Profit: 499.27999999999975
Sell: 4184.84 | Profit: 505.9100000000003
Sell: 4164.82 | Profit: 501.8299999999999
Sell: 4326.09 | Profit: 224.48999999999978
Sell: 4377.22 | Profit: 488.4200000000001
Sell: 4391.48 | Profit: 517.0199999999995
Sell: 4315.83 | Profit: 698.7799999999997
Sell: 4219.53 | Profit: 607.3499999999999
Sell: 4301.09 | Profit: 636.8700000000003
Sell: 4362.95 | Profit: 474.9199999999996


OverflowError: math range error

In [None]:
plt.xticks(rotation=70)
plt.plot(dates, data, label='prices')
plt.plot(buy_dates, buy_prices, '^', markersize=5, color='g', label='buy')
plt.plot(sell_dates, sell_prices, 'v', markersize=5, color='r', label='sell')
plt.legend(loc='upper left')
plt.savefig('BITSTAMP_SPOT_BTC_USD-2017-01-01_results')

In [None]:
buy_dates