In [1]:
import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras.optimizers import Adam

import math
import numpy as np
import pandas as pd
import random
from collections import deque

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Network Architecture

In [2]:
class Agent:
    def __init__(self, state_size, is_eval=False, model_name=""):
        self.state_size = state_size
        self.action_size = 3           # stay, buy, sell
        self.memory = deque(maxlen=1000)
        self.inventory = []
        self.model_name = model_name
        self.is_eval = is_eval
        
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        
        self.model = load_model("models/" + model_name) if is_eval else self._model()
    
    def _model(self):
        model = Sequential()
        model.add(Dense(units=64, input_dim=self.state_size, activation='relu'))
        model.add(Dense(units=32, activation='relu'))
        model.add(Dense(units=8 , activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=0.001))
        
        return model

    def act(self, state):
        if not self.is_eval and np.random.rand() <= self.epsilon:
            # randomize an integer in [0, self.action_size)
            return random.randrange(self.action_size)
        options = self.model.predict(state)
        return np.argmax(options[0])
    
    def expReplay(self, batch_size):
        mini_batch = []
        l = len(self.memory)
        for i in range(l - batch_size + 1, l):
            mini_batch.append(self.memory[i])
        for state, action, reward, next_state, done in mini_batch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=False)
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [3]:
def getStockDataVec(key):
    df = pd.read_csv(key + ".csv")
    return df['Close'].values

In [4]:
def sigmoid(x):
    """
    Return the sigmoid function of x
    """
    return 1.0 / (1.0 + math.exp(-x))
    
def getState(data, t, n):
    """
    Return an n-day state representation ending at time t
    """
    d = t - n + 1
    block = data[d: t+1] if d >= 0 else np.append(-d * [data[0]], data[0: t + 1]) # pad with t0
    res = []
    for i in range(n - 1):
        res.append(sigmoid(block[i+1] - block[i]))
    return np.array([res])

## Training

In [5]:
STOCK_NAME = "^GSPC"
window_size = 10
episode_count = 0

In [6]:
agent = Agent(window_size)
data = getStockDataVec(STOCK_NAME)
l = len(data) - 1
batch_size = 32

In [7]:
for e in range(episode_count + 1):
    print("Episode {:}/{:}".format(e, episode_count))
    state = getState(data, 0, window_size+1)
    total_profit = 0
    agent.inventory = []
    
    for t in range(l):
        action = agent.act(state)
        
        # Sit
        next_state = getState(data, t+1, window_size+1)
        reward = 0

        # Buy action
        if action == 1:
            agent.inventory.append(data[t])
            print("Buy: {:}".format(data[t]))

        # Sell action
        elif action == 2 and len(agent.inventory) > 0:
            bought_price = agent.inventory.pop(0)
            reward = max(data[t] - bought_price, 0)
            total_profit += data[t] - bought_price
            print("Sell: {:} | Profit: {:}".format(data[t], data[t] - bought_price))

        done = True if t == l-1 else False
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state

        if done:
            print("------------------------------------")
            print("Total Profit: {:}".format(total_profit))
            print("------------------------------------")
    
        if len(agent.memory) > batch_size:
            agent.expReplay(batch_size)
    
    if e % 10 == 0:
        agent.model.save("models/model_ep" + str(e))

Episode 0/1
Buy: 1347.560059
Buy: 1333.339966
Buy: 1298.349976
Sell: 1295.859985 | Profit: -51.70007399999986
Buy: 1300.800049
Sell: 1313.27002 | Profit: -20.069946000000073
Sell: 1326.650024 | Profit: 28.30004800000006
Buy: 1347.969971
Sell: 1342.540039 | Profit: 41.739990000000034
Buy: 1342.900024
Buy: 1360.400024
Buy: 1364.300049
Buy: 1357.51001
Buy: 1354.949951
Sell: 1364.170044 | Profit: 16.200072999999975
Sell: 1373.72998 | Profit: 30.82995600000004
Buy: 1366.01001
Sell: 1373.469971 | Profit: 13.069946999999956
Buy: 1349.469971
Buy: 1354.310059
Buy: 1332.530029
Buy: 1314.76001
Buy: 1330.310059
Sell: 1318.800049 | Profit: -45.5
Sell: 1315.920044 | Profit: -41.589966000000004
Sell: 1301.530029 | Profit: -53.41992200000004
Sell: 1278.939941 | Profit: -87.07006899999988
Buy: 1255.27002
Sell: 1257.939941 | Profit: -91.5300299999999
Sell: 1239.939941 | Profit: -114.37011799999982
Sell: 1234.180054 | Profit: -98.34997500000009
Buy: 1241.410034
Buy: 1264.73999
Sell: 1233.420044 | Profit:

Buy: 914.150024
Buy: 933.76001
Sell: 930.5499880000001 | Profit: -206.21002199999987
Sell: 932.8699949999999 | Profit: -192.53002900000013
Sell: 913.3099980000001 | Profit: -213.02996799999994
Sell: 938.8699949999999 | Profit: -183.85998500000017
Sell: 936.3099980000001 | Profit: -188.98004099999991
Sell: 934.5300289999999 | Profit: -183.27002000000005
Sell: 920.75 | Profit: -209.719971
Sell: 917.580017 | Profit: -186.1099240000001
Sell: 906.5499880000001 | Profit: -204.46002199999987
Sell: 912.2299800000001 | Profit: -190.32006899999988
Sell: 889.4799800000001 | Profit: -236.589966
Sell: 902.98999 | Profit: -221.47998099999995
Sell: 891.1199949999999 | Profit: -216.70996100000013
Buy: 822.099976
Sell: 876.4500119999999 | Profit: -224.50994900000012
Sell: 978.7999880000001 | Profit: -112.67999199999997
Sell: 990.3099980000001 | Profit: -86.00994800000001
Sell: 1000.2999880000001 | Profit: -65.14996299999996
Sell: 1003.2700199999999 | Profit: -73.65002400000003
Sell: 993.7100220000001 |

Buy: 1003.349976
Buy: 998.01001
Buy: 907.840027
Sell: 946.429993 | Profit: -49.7999870000001
Sell: 940.5499880000001 | Profit: -44.39001399999984
Sell: 985.400024 | Profit: 75.48004100000003
Sell: 955.0499880000001 | Profit: 55.830017
Sell: 896.7800289999999 | Profit: -106.56994700000007
Sell: 908.1099849999999 | Profit: -89.90002500000003
Sell: 876.7700199999999 | Profit: -31.070007000000032
Buy: 865.2999880000001
Buy: 869.599976
Sell: 832.3900150000001 | Profit: -32.909973000000036
Sell: 850.080017 | Profit: -19.51995899999997
Buy: 866.2299800000001
Sell: 857.51001 | Profit: -8.719970000000103
Buy: 877.5200199999999
Buy: 907.23999
Buy: 903.7999880000001
Buy: 919.5300289999999
Sell: 907.3900150000001 | Profit: 29.86999500000013
Buy: 929.2299800000001
Buy: 909.23999
Buy: 908.349976
Buy: 883.919983
Buy: 893.070007
Buy: 882.8800050000001
Buy: 909.7100220000001
Buy: 908.1300050000001
Buy: 903.4699710000001
Buy: 888.330017
Buy: 887.0
Buy: 910.330017
Buy: 893.0599980000001
Buy: 906.830017
S

Sell: 1109.550049 | Profit: 39.300048999999944
Sell: 1121.900024 | Profit: 43.94006300000001
Sell: 1121.099976 | Profit: 42.34997599999997
Sell: 1125.069946 | Profit: 29.72998000000007
Sell: 1124.660034 | Profit: -1.1999510000000555
Sell: 1125.589966 | Profit: 5.130004999999983
Sell: 1142.709961 | Profit: 15.469970999999987
Sell: 1139.780029 | Profit: 13.969970000000103
Sell: 1134.280029 | Profit: 12.640014000000065
Sell: 1124.829956 | Profit: -2.9600829999999405
Sell: 1148.670044 | Profit: 27.60998500000005
Sell: 1142.160034 | Profit: 52.69006300000001
Sell: 1147.699951 | Profit: 64.089966
Sell: 1144.72998 | Profit: 65.47998000000007
Sell: 1141.199951 | Profit: 61.81994600000007
Sell: 1146.23999 | Profit: 53.699951000000056
Buy: 1182.449951
Buy: 1183.780029
Sell: 1183.26001 | Profit: 0.8100589999999102
Sell: 1184.380005 | Profit: 0.5999759999999696
------------------------------------
Total Profit: 16415.110717
------------------------------------
Episode 1/1
Buy: 1314.76001
Buy: 1330

KeyboardInterrupt: 

## Evaluation

In [8]:
STOCK_NAME = "^GSPC_2011"
model_name = 'model_ep0'

In [9]:
model = load_model("models/" + model_name)
window_size = model.layers[0].input.shape.as_list()[1]

In [None]:
agent = Agent(window_size, True, model_name)
data = getStockDataVec(STOCK_NAME)
l = len(data) - 1
batch_size = 32

In [10]:
state = getState(data, 0, window_size + 1)
total_profit = 0
agent.inventory = []

In [11]:
for t in range(l):
    action = agent.act(state)
    
    # Sit 
    next_state = getState(data, t+1, window_size+1)
    reward = 0
    
    # Buy action
    if action == 1:
        agent.inventory.append(data[t])
        print("Buy: {:}".format(data[t]))

    # Sell action
    elif action == 2 and len(agent.inventory) > 0:
        bought_price = agent.inventory.pop(0)
        reward = max(data[t] - bought_price, 0)
        total_profit += data[t] - bought_price
        print("Sell: {:} | Profit: {:}".format(data[t], data[t] - bought_price))

    done = True if t == l-1 else False
    agent.memory.append((state, action, reward, next_state, done))
    state = next_state

    if done:
        print("------------------------------------")
        print("Total Profit: {:}".format(total_profit))
        print("------------------------------------")

Buy: 1278.939941
Sell: 1255.27002 | Profit: -23.66992100000016
Buy: 1100.640015
Sell: 1113.569946 | Profit: 12.929931000000124
Buy: 919.7299800000001
Sell: 923.419983 | Profit: 3.6900029999999333
Buy: 989.2800289999999
Sell: 987.48999 | Profit: -1.7900389999998652
Buy: 1216.890015
Sell: 1227.680054 | Profit: 10.790038999999979
Buy: 1546.630005
Sell: 1539.589966 | Profit: -7.040038999999979
Buy: 1325.189941
Sell: 1310.5 | Profit: -14.68994100000009
Buy: 1395.420044
Sell: 1380.819946 | Profit: -14.600097999999889
Buy: 1196.47998
Sell: 1197.300049 | Profit: 0.8200689999998758
------------------------------------
Total Profit: -33.55999600000007
------------------------------------
