In [1]:
import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras.optimizers import Adam

import math
import numpy as np
import pandas as pd
import random
from collections import deque

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Network Architecture

In [2]:
class Agent:
    def __init__(self, state_size, is_eval=False, model_name=""):
        self.state_size = state_size
        self.action_size = 3           # stay, buy, sell
        self.memory = deque(maxlen=1000)
        self.inventory = []
        self.model_name = model_name
        self.is_eval = is_eval
        
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        
        self.model = load_model("models/" + model_name) if is_eval else self._model()
    
    def _model(self):
        model = Sequential()
        model.add(Dense(units=64, input_dim=self.state_size, activation='relu'))
        model.add(Dense(units=32, activation='relu'))
        model.add(Dense(units=8 , activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=0.001))
        
        return model

    def act(self, state):
        if not self.is_eval and np.random.rand() <= self.epsilon:
            # randomize an integer in [0, self.action_size)
            return random.randrange(self.action_size)
        options = self.model.predict(state)
        return np.argmax(options[0])
    
    def expReplay(self, batch_size):
        mini_batch = []
        l = len(self.memory)
        for i in range(l - batch_size + 1, l):
            mini_batch.append(self.memory[i])
        for state, action, reward, next_state, done in mini_batch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=False)
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [3]:
def getStockDataVec(key):
    df = pd.read_csv(key + ".csv")
    return df['Close'].values

In [4]:
def sigmoid(x):
    """
    Return the sigmoid function of x
    """
    return 1.0 / (1.0 + math.exp(-x))
    
def getState(data, t, n):
    """
    Return an n-day state representation ending at time t
    """
    d = t - n + 1
    block = data[d: t+1] if d >= 0 else np.append(-d * [data[0]], data[0: t + 1]) # pad with t0
    res = []
    for i in range(n - 1):
        res.append(sigmoid(block[i+1] - block[i]))
    return np.array([res])

## Training

In [5]:
STOCK_NAME = "^GSPC"
window_size = 10
episode_count = 100

In [None]:
agent = Agent(window_size)
data = getStockDataVec(STOCK_NAME)
l = len(data) - 1
batch_size = 32

In [None]:
for e in range(episode_count + 1):
    print("Episode {:}/{:}".format(e, episode_count))
    state = getState(data, 0, window_size+1)
    total_profit = 0
    agent.inventory = []
    
    for t in range(l):
        action = agent.act(state)
        
        # Sit
        next_state = getState(data, t+1, window_size+1)
        reward = 0

        # Buy action
        if action == 1:
            agent.inventory.append(data[t])
            print("Buy: {:}".format(data[t]))

        # Sell action
        elif action == 2 and len(agent.inventory) > 0:
            bought_price = agent.inventory.pop(0)
            reward = max(data[t] - bought_price, 0)
            total_profit += data[t] - bought_price
            print("Sell: {:} | Profit: {:}".format(data[t], data[t] - bought_price))

        done = True if t == l-1 else False
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state

        if done:
            print("------------------------------------")
            print("Total Profit: {:}".format(total_profit))
            print("------------------------------------")
    
        if len(agent.memory) > batch_size:
            agent.expReplay(batch_size)
    
    if e % 10 == 0:
        agent.model.save("models/model_ep" + str(e))

Episode 0/100
Buy: 1300.800049
Sell: 1313.27002 | Profit: 12.469970999999987
Buy: 1329.469971
Buy: 1347.969971
Sell: 1342.540039 | Profit: 13.070067999999992
Buy: 1342.900024
Sell: 1364.300049 | Profit: 16.330077999999958
Sell: 1357.51001 | Profit: 14.609985999999935
Buy: 1332.530029
Buy: 1330.310059
Buy: 1318.800049
Buy: 1315.920044
Sell: 1301.530029 | Profit: -31.0
Buy: 1278.939941
Buy: 1255.27002
Buy: 1245.859985
Buy: 1267.650024
Buy: 1257.939941
Buy: 1241.22998
Sell: 1241.410034 | Profit: -88.90002499999991
Buy: 1253.800049
Buy: 1261.890015
Buy: 1264.73999
Buy: 1197.660034
Buy: 1173.560059
Buy: 1150.530029
Sell: 1170.810059 | Profit: -147.98999000000003
Sell: 1142.619995 | Profit: -173.30004899999994
Buy: 1122.140015
Buy: 1117.579956
Buy: 1139.829956
Buy: 1152.689941
Sell: 1182.170044 | Profit: -96.76989700000013
Sell: 1145.869995 | Profit: -109.40002499999991
Sell: 1151.439941 | Profit: -94.42004399999996
Buy: 1165.890015
Sell: 1238.160034 | Profit: -29.489990000000034
Sell: 1209.

## Evaluation

In [None]:
STOCK_NAME = "^GSPC"
model_name = 'model_ep0'

In [None]:
model = load_model("models/" + model_name)
window_size = model.layers[0].input.shape.as_list()[1]

In [None]:
agent = Agent(window_size, True, model_name)
data = getStockDataVec(STOCK_NAME)
l = len(data) - 1
batch_size = 32

In [None]:
state = getState(data, 0, window_size + 1)
total_profit = 0
agent.inventory = []

In [None]:
for t in range(l):
    action = agent.act(state)
    
    # Sit 
    next_state = getState(data, t+1, window_size+1)
    reward = 0
    
    # Buy action
    if action == 1:
        agent.inventory.append(data[t])
        print("Buy: {:}".format(data[t]))

    # Sell action
    elif action == 2 and len(agent.inventory) > 0:
        bought_price = agent.inventory.pop(0)
        reward = max(data[t] - bought_price, 0)
        total_profit += data[t] - bought_price
        print("Sell: {:} | Profit: {:}".format(data[t], data[t] - bought_price))

    done = True if t == l-1 else False
    agent.memory.append((state, action, reward, next_state, done))
    state = next_state

    if done:
        print("------------------------------------")
        print("Total Profit: {:}".format(total_profit))
        print("------------------------------------")