# Q Function
* used to approximate the reward based on a state
* Q(s,a) calculates the expected future value from state **s** and action **a**
* in DQN, we use a **neural network to approximate the reward**

# Classes
* Environment
* Agent
* Runner

## Environment

In [1]:
class Action:
    def __init__(self, act, days, percentage):
        self.act = act
        self.days = days
        self.percentage = percentage

In [2]:
import numpy as np
import pandas as pd
import pandas_datareader as pdr
import datetime

BUY = 'buy'
SELL = 'sell'
SKIP = 'skip'

class Environment:
    
    min_days_to_hold = 5
    max_days_to_hold = 15
    max_drawdowns = 3
    
    def __init__(self, 
                 ticker, 
                 initial_deposit = 100000,
                 from_date = datetime.datetime(2007, 1, 1), 
                 to_date = datetime.datetime(2017, 1, 1),
                 window = 50):
        self.initial_deposit = initial_deposit
        self.window = window
        self.data = pdr.get_data_google(ticker, from_date, to_date)
        self.pct_data = self.data.pct_change().fillna(0)
        self.data_length = len(self.data)
        
        actions = np.array([BUY, SELL, SKIP])
        days_to_holds = np.arange(Environment.min_days_to_hold, 
                                  Environment.max_days_to_hold + 1,
                                  2)
        
        self.action_space = [Action(act, days, 3) for act in actions for days in days_to_holds]
        self.reset()
        
    def reset(self):
        self.deposit = self.initial_deposit
        self.current_index = self.window
        self.actions = {}
        self.drawdowns = 0
        
        return self.state()
    
    def score(self):
        return self.deposit
    
    def enough_data_provided(self):
        return self.current_index + Environment.max_days_to_hold <= self.data_length
    
    def state(self):
        return self.pct_data.iloc[self.current_index - self.window:self.current_index]['Close']
    
    # def price_state(self):
    #    return self.data.iloc[self.current_index - self.window:self.current_index]['Close']
        
    def state_size(self):
        return self.window
    
    def action_size(self):
        return len(self.action_space)
        
    def step(self, action_idx: int):
        action = self.action_space[action_idx]
        #print('\t=> current action is: {} at {}'.format(action, self.data.index[self.current_index]))
        
        df = self.data.iloc[self.current_index: self.current_index + action.days]
        on_date = df.index[0]
        first_day_price = df.iloc[0]['Close']
        last_day_price = df.iloc[-1]['Close']
        
        if action.act == BUY:
            reward = last_day_price - first_day_price
        elif action.act == SELL:
            reward = first_day_price - last_day_price
        elif action.act == SKIP:
            reward = 0
            
        self.actions[on_date] = (action, reward)
        
        self.current_index += action.days
        self.deposit += reward * (self.deposit*action.percentage/100)
        
        if reward < 0:
            self.drawdowns += 1
        else:
            self.drawdowns = 0
        
        next_state = self.state()
        done = self.drawdowns > Environment.max_drawdowns
        _ = None
        return next_state, reward, done, _ 

## Agent

In [3]:
import random
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.losses import mean_squared_error

class Agent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.05
        self.epsilon_decay = 0.999
        self.learning_rate = 0.001
        self.model = self._build_model()
    
    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(48, input_dim=self.state_size, activation='relu'))
        model.add(Dense(36, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss=mean_squared_error,
                      optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

Using TensorFlow backend.


## Runner

In [4]:
env = Environment('AAPL')
state_size = env.state_size()
action_size = env.action_size()
print('Action size: {}, state size: {}'.format(action_size, state_size))

Action size: 18, state size: 50


In [None]:
agent = Agent(state_size, action_size)
EPISODES = 50
batch_size = 32

In [None]:
max_reward = None
for e in range(EPISODES):
    print('==> EPISODE {}'.format(e))
    state = env.reset()
    state = state.values.reshape([1, state_size])
    while env.enough_data_provided():
        action_idx = agent.act(state)
        next_state, reward, done, _ = env.step(action_idx) # build these parameters into the NN model
        # reward = reward if not done else -10
        next_state = next_state.values.reshape([1, state_size])
        agent.remember(state, action_idx, reward, next_state, done)
        state = next_state
        
        if done:
            break
        elif len(agent.memory) > batch_size:
            agent.replay(batch_size)
            print("episode: {}/{}, score: {}, e: {:.2}".format(e, EPISODES, env.score(), agent.epsilon))
    
    if max_reward is None or max_reward[0] < env.score():
        max_reward = (env.score(), env.actions)

==> EPISODE 0
episode: 0/50, score: 100592.29045986259, e: 1.0
episode: 0/50, score: 95160.30677503001, e: 1.0
episode: 0/50, score: 92876.45941242929, e: 1.0
episode: 0/50, score: 97752.47353158183, e: 1.0
episode: 0/50, score: 115875.7821243371, e: 1.0
episode: 0/50, score: 115736.7311857879, e: 0.99
episode: 0/50, score: 115736.7311857879, e: 0.99
episode: 0/50, score: 107993.94386945869, e: 0.99
episode: 0/50, score: 107993.94386945869, e: 0.99
episode: 0/50, score: 109387.06574537471, e: 0.99
episode: 0/50, score: 108140.05319587744, e: 0.99
episode: 0/50, score: 108140.05319587744, e: 0.99
episode: 0/50, score: 112876.58752585687, e: 0.99
episode: 0/50, score: 118836.47134722212, e: 0.99
episode: 0/50, score: 123684.99937818879, e: 0.99
episode: 0/50, score: 129213.71885039382, e: 0.98
episode: 0/50, score: 126422.7025232253, e: 0.98
episode: 0/50, score: 128356.96987183065, e: 0.98
episode: 0/50, score: 125622.96641356066, e: 0.98
episode: 0/50, score: 127205.81579037153, e: 0.9

episode: 1/50, score: 112888.82877379819, e: 0.85
episode: 1/50, score: 112888.82877379819, e: 0.84
episode: 1/50, score: 112888.82877379819, e: 0.84
episode: 1/50, score: 114717.62779993372, e: 0.84
episode: 1/50, score: 114717.62779993372, e: 0.84
episode: 1/50, score: 107765.73955525774, e: 0.84
episode: 1/50, score: 107765.73955525774, e: 0.84
episode: 1/50, score: 107765.73955525774, e: 0.84
episode: 1/50, score: 107765.73955525774, e: 0.84
episode: 1/50, score: 108509.32315818901, e: 0.84
episode: 1/50, score: 103886.82599165017, e: 0.84
episode: 1/50, score: 103824.49389605518, e: 0.84
episode: 1/50, score: 105008.09312647021, e: 0.84
episode: 1/50, score: 96848.96429054347, e: 0.83
episode: 1/50, score: 100044.98011213141, e: 0.83
episode: 1/50, score: 100285.08806440052, e: 0.83
episode: 1/50, score: 100375.34464365848, e: 0.83
episode: 1/50, score: 100375.34464365848, e: 0.83
episode: 1/50, score: 100375.34464365848, e: 0.83
episode: 1/50, score: 108505.7475597948, e: 0.83
ep

episode: 1/50, score: 229987.82322088233, e: 0.72
episode: 1/50, score: 206322.0762114535, e: 0.72
episode: 1/50, score: 218515.7109155504, e: 0.71
episode: 1/50, score: 218515.7109155504, e: 0.71
episode: 1/50, score: 200291.50062519347, e: 0.71
episode: 1/50, score: 156427.66198827612, e: 0.71
episode: 1/50, score: 162716.05400020484, e: 0.71
episode: 1/50, score: 158273.90572599927, e: 0.71
episode: 1/50, score: 158273.90572599927, e: 0.71
episode: 1/50, score: 158273.90572599927, e: 0.71
episode: 1/50, score: 117581.68456384489, e: 0.71
episode: 1/50, score: 122625.9388316338, e: 0.71
episode: 1/50, score: 122625.9388316338, e: 0.71
episode: 1/50, score: 122625.9388316338, e: 0.71
episode: 1/50, score: 117365.28605575673, e: 0.71
episode: 1/50, score: 118421.57363025853, e: 0.71
episode: 1/50, score: 96537.26682338672, e: 0.7
episode: 1/50, score: 107542.5152412528, e: 0.7
episode: 1/50, score: 107542.5152412528, e: 0.7
episode: 1/50, score: 123157.68845428272, e: 0.7
==> EPISODE 2

episode: 3/50, score: 101560.0, e: 0.61
episode: 3/50, score: 100067.068, e: 0.61
episode: 3/50, score: 107061.7560532, e: 0.6
episode: 3/50, score: 107382.94132135961, e: 0.6
episode: 3/50, score: 112505.10762238846, e: 0.6
episode: 3/50, score: 113517.65359098995, e: 0.6
episode: 3/50, score: 110861.34049696078, e: 0.6
episode: 3/50, score: 115850.10081932401, e: 0.6
episode: 3/50, score: 121480.41571914316, e: 0.6
episode: 3/50, score: 121480.41571914316, e: 0.6
episode: 3/50, score: 121480.41571914316, e: 0.6
episode: 3/50, score: 119913.31835636622, e: 0.6
episode: 3/50, score: 117503.06065740326, e: 0.6
episode: 3/50, score: 127902.08152558345, e: 0.6
episode: 3/50, score: 127902.08152558345, e: 0.6
episode: 3/50, score: 122069.74660801684, e: 0.6
episode: 3/50, score: 118297.79143782913, e: 0.6
episode: 3/50, score: 118297.79143782913, e: 0.6
episode: 3/50, score: 115990.98450479146, e: 0.59
episode: 3/50, score: 95634.56672420056, e: 0.59
episode: 3/50, score: 98159.31928571947

episode: 4/50, score: 151033.66775617228, e: 0.51
episode: 4/50, score: 151033.66775617228, e: 0.51
episode: 4/50, score: 151033.66775617228, e: 0.51
episode: 4/50, score: 165215.72915847687, e: 0.51
episode: 4/50, score: 189205.05303228772, e: 0.51
episode: 4/50, score: 189205.05303228772, e: 0.51
episode: 4/50, score: 189205.05303228772, e: 0.51
episode: 4/50, score: 206914.64599610984, e: 0.51
episode: 4/50, score: 206914.64599610984, e: 0.51
episode: 4/50, score: 192823.7586037748, e: 0.51
episode: 4/50, score: 192823.7586037748, e: 0.51
episode: 4/50, score: 192823.7586037748, e: 0.51
episode: 4/50, score: 238175.90662738262, e: 0.51
episode: 4/50, score: 282119.3614001346, e: 0.51
episode: 4/50, score: 215680.25179040298, e: 0.5
episode: 4/50, score: 204809.96710016672, e: 0.5
episode: 4/50, score: 170770.55056811904, e: 0.5
==> EPISODE 5
episode: 5/50, score: 101410.0, e: 0.5
episode: 5/50, score: 101410.0, e: 0.5
episode: 5/50, score: 103843.84, e: 0.5
episode: 5/50, score: 107

episode: 7/50, score: 95630.48988760001, e: 0.43
episode: 7/50, score: 93364.04727726389, e: 0.43
episode: 7/50, score: 96305.0147664977, e: 0.43
episode: 7/50, score: 98674.11812975354, e: 0.43
episode: 7/50, score: 101693.54614452399, e: 0.43
episode: 7/50, score: 101693.54614452399, e: 0.43
episode: 7/50, score: 111242.5701274948, e: 0.43
episode: 7/50, score: 110641.86024880632, e: 0.43
episode: 7/50, score: 102775.22398511619, e: 0.43
episode: 7/50, score: 95529.5706941655, e: 0.43
episode: 7/50, score: 95529.5706941655, e: 0.43
episode: 7/50, score: 95529.5706941655, e: 0.43
episode: 7/50, score: 106047.3764275931, e: 0.43
episode: 7/50, score: 104170.3378648247, e: 0.43
episode: 7/50, score: 97420.09997118406, e: 0.43
episode: 7/50, score: 111390.14230705184, e: 0.43
episode: 7/50, score: 127797.91026888057, e: 0.43
episode: 7/50, score: 127797.91026888057, e: 0.43
episode: 7/50, score: 127797.91026888057, e: 0.43
episode: 7/50, score: 138762.97096995052, e: 0.43
episode: 7/50, 

episode: 9/50, score: 112045.51870988178, e: 0.37
episode: 9/50, score: 96448.78250546624, e: 0.37
episode: 9/50, score: 92831.95316151125, e: 0.37
episode: 9/50, score: 92831.95316151125, e: 0.37
episode: 9/50, score: 87707.62934699582, e: 0.37
episode: 9/50, score: 80234.93932663178, e: 0.36
episode: 9/50, score: 87672.71820221054, e: 0.36
episode: 9/50, score: 96930.95724436398, e: 0.36
episode: 9/50, score: 96930.95724436398, e: 0.36
episode: 9/50, score: 96930.95724436398, e: 0.36
episode: 9/50, score: 109347.81286736699, e: 0.36
episode: 9/50, score: 109347.81286736699, e: 0.36
episode: 9/50, score: 114202.85575867807, e: 0.36
episode: 9/50, score: 114202.85575867807, e: 0.36
episode: 9/50, score: 130476.76270428966, e: 0.36
episode: 9/50, score: 130476.76270428966, e: 0.36
episode: 9/50, score: 159403.46099583068, e: 0.36
episode: 9/50, score: 113017.05384604394, e: 0.36
episode: 9/50, score: 113017.05384604394, e: 0.36
episode: 9/50, score: 113017.05384604394, e: 0.36
episode: 

episode: 10/50, score: 93222.87449941541, e: 0.31
episode: 10/50, score: 91964.3656936733, e: 0.31
episode: 10/50, score: 91964.3656936733, e: 0.31
episode: 10/50, score: 91964.3656936733, e: 0.31
episode: 10/50, score: 88791.59507724155, e: 0.31
episode: 10/50, score: 88791.59507724155, e: 0.31
episode: 10/50, score: 87752.73341483783, e: 0.31
episode: 10/50, score: 87752.73341483783, e: 0.31
episode: 10/50, score: 90227.36049713625, e: 0.31
episode: 10/50, score: 90227.36049713625, e: 0.31
episode: 10/50, score: 90227.36049713625, e: 0.31
episode: 10/50, score: 102029.09925016167, e: 0.31
episode: 10/50, score: 125199.9076898734, e: 0.31
episode: 10/50, score: 125199.9076898734, e: 0.31
episode: 10/50, score: 165764.67778139238, e: 0.31
episode: 10/50, score: 181678.08684840606, e: 0.31
episode: 10/50, score: 191815.72409454713, e: 0.31
episode: 10/50, score: 168222.39003091777, e: 0.31
episode: 10/50, score: 149347.83786944882, e: 0.31
episode: 10/50, score: 149347.83786944882, e: 0

episode: 13/50, score: 162965.66113098618, e: 0.26
episode: 13/50, score: 149325.43529432264, e: 0.26
episode: 13/50, score: 157568.19932256924, e: 0.26
episode: 13/50, score: 162909.76127960434, e: 0.26
episode: 13/50, score: 153037.4297460603, e: 0.26
episode: 13/50, score: 153037.4297460603, e: 0.26
episode: 13/50, score: 144130.6513348396, e: 0.26
episode: 13/50, score: 141147.14685220842, e: 0.26
episode: 13/50, score: 144831.08738505107, e: 0.26
episode: 13/50, score: 143744.85422966318, e: 0.26
episode: 13/50, score: 143744.85422966318, e: 0.26
episode: 13/50, score: 142623.6443666718, e: 0.26
episode: 13/50, score: 142623.6443666718, e: 0.26
episode: 13/50, score: 145875.4634582319, e: 0.26
episode: 13/50, score: 149945.38888871658, e: 0.26
episode: 13/50, score: 144637.322122056, e: 0.26
episode: 13/50, score: 133051.87262007932, e: 0.26
episode: 13/50, score: 133051.87262007932, e: 0.26
episode: 13/50, score: 132892.21037293522, e: 0.26
episode: 13/50, score: 126034.972317691

episode: 14/50, score: 34011.60765266212, e: 0.22
episode: 14/50, score: 26348.79244851734, e: 0.22
episode: 14/50, score: 25360.71273169794, e: 0.22
episode: 14/50, score: 22248.95327951861, e: 0.22
episode: 14/50, score: 22248.95327951861, e: 0.22
episode: 14/50, score: 22248.95327951861, e: 0.22
episode: 14/50, score: 25279.26071618905, e: 0.22
episode: 14/50, score: 26204.481658401568, e: 0.22
episode: 14/50, score: 26204.481658401568, e: 0.22
episode: 14/50, score: 27352.237955039553, e: 0.22
episode: 14/50, score: 23126.317190985945, e: 0.22
episode: 14/50, score: 22037.067651290505, e: 0.22
episode: 14/50, score: 25659.961573162665, e: 0.22
episode: 14/50, score: 19686.322518930403, e: 0.22
episode: 14/50, score: 19349.686403856696, e: 0.22
episode: 14/50, score: 23227.36355918958, e: 0.22
episode: 14/50, score: 23227.36355918958, e: 0.22
episode: 14/50, score: 23227.36355918958, e: 0.22
episode: 14/50, score: 21736.16681868961, e: 0.22
episode: 14/50, score: 21736.16681868961, 

episode: 15/50, score: 143444.2121490921, e: 0.19
episode: 15/50, score: 143444.2121490921, e: 0.19
episode: 15/50, score: 143444.2121490921, e: 0.19
episode: 15/50, score: 143444.2121490921, e: 0.19
episode: 15/50, score: 143444.2121490921, e: 0.19
episode: 15/50, score: 143444.2121490921, e: 0.19
episode: 15/50, score: 155106.2265968133, e: 0.19
episode: 15/50, score: 157991.20241151404, e: 0.19
episode: 15/50, score: 154483.79771797842, e: 0.19
episode: 15/50, score: 155688.77134017865, e: 0.19
episode: 15/50, score: 153867.21271549855, e: 0.19
episode: 15/50, score: 144035.09782297822, e: 0.19
episode: 15/50, score: 134312.7287199272, e: 0.19
episode: 15/50, score: 149866.14270569477, e: 0.19
episode: 15/50, score: 149866.14270569477, e: 0.19
episode: 15/50, score: 143886.48361173752, e: 0.19
episode: 15/50, score: 140864.86745589104, e: 0.19
episode: 15/50, score: 140864.86745589104, e: 0.19
episode: 15/50, score: 135666.95384676868, e: 0.19
episode: 15/50, score: 135666.953846768

episode: 16/50, score: 350834.56306969683, e: 0.16
episode: 16/50, score: 379041.6619405004, e: 0.16
episode: 16/50, score: 468078.5483303239, e: 0.16
episode: 16/50, score: 457968.0516863889, e: 0.16
episode: 16/50, score: 454670.681714247, e: 0.16
episode: 16/50, score: 381832.4385036246, e: 0.16
==> EPISODE 17
episode: 17/50, score: 102670.0, e: 0.16
episode: 17/50, score: 101407.159, e: 0.16
episode: 17/50, score: 104297.2630315, e: 0.16
episode: 17/50, score: 104297.2630315, e: 0.16
episode: 17/50, score: 104297.2630315, e: 0.16
episode: 17/50, score: 104297.2630315, e: 0.16
episode: 17/50, score: 103202.14176966925, e: 0.16
episode: 17/50, score: 103883.27590534906, e: 0.16
episode: 17/50, score: 103883.27590534906, e: 0.16
episode: 17/50, score: 106438.80449262065, e: 0.16
episode: 17/50, score: 106438.80449262065, e: 0.16
episode: 17/50, score: 115251.93750460964, e: 0.16
episode: 17/50, score: 121579.2688736127, e: 0.16
episode: 17/50, score: 128035.12805080153, e: 0.16
episod

episode: 17/50, score: 537102.1188862732, e: 0.14
episode: 17/50, score: 586730.3546713649, e: 0.14
episode: 17/50, score: 550998.4760718787, e: 0.14
episode: 17/50, score: 474630.08728831646, e: 0.14
episode: 17/50, score: 527456.416003506, e: 0.14
episode: 17/50, score: 559103.8009637165, e: 0.14
episode: 17/50, score: 571180.4430645327, e: 0.14
episode: 17/50, score: 527485.1391700958, e: 0.14
episode: 17/50, score: 586510.7262432296, e: 0.14
episode: 17/50, score: 733607.6163850316, e: 0.14
episode: 17/50, score: 704776.8370610998, e: 0.14
episode: 17/50, score: 823602.2117896014, e: 0.14
episode: 17/50, score: 830273.3897050974, e: 0.14
episode: 17/50, score: 990184.0445622993, e: 0.14
episode: 17/50, score: 993451.6519093548, e: 0.13
episode: 17/50, score: 979443.9836174325, e: 0.13
episode: 17/50, score: 937132.0035251596, e: 0.13
episode: 17/50, score: 971149.8952531227, e: 0.13
episode: 17/50, score: 1338244.5556588029, e: 0.13
episode: 17/50, score: 1751762.1233573728, e: 0.1

episode: 20/50, score: 96196.38982592708, e: 0.12
episode: 20/50, score: 98447.38534785376, e: 0.12
episode: 20/50, score: 96114.18231510963, e: 0.12
episode: 20/50, score: 91529.53581867891, e: 0.12
episode: 20/50, score: 86367.26999850542, e: 0.12
==> EPISODE 21
episode: 21/50, score: 102190.0, e: 0.12
episode: 21/50, score: 104918.473, e: 0.12
episode: 21/50, score: 109513.9021174, e: 0.12
episode: 21/50, score: 109513.9021174, e: 0.12
episode: 21/50, score: 108364.0061451673, e: 0.11
episode: 21/50, score: 102869.95103360731, e: 0.11
episode: 21/50, score: 106141.21547647602, e: 0.11
episode: 21/50, score: 106141.21547647602, e: 0.11
episode: 21/50, score: 106141.21547647602, e: 0.11
episode: 21/50, score: 101683.28442646403, e: 0.11
episode: 21/50, score: 111231.344834109, e: 0.11
episode: 21/50, score: 119039.78524146345, e: 0.11
episode: 21/50, score: 119039.78524146345, e: 0.11
episode: 21/50, score: 123646.6249303081, e: 0.11
episode: 21/50, score: 130323.54267654473, e: 0.11


episode: 21/50, score: 284382.36987232615, e: 0.098
episode: 21/50, score: 285576.7758257899, e: 0.098
episode: 21/50, score: 294229.75213331135, e: 0.098
episode: 21/50, score: 268720.03262335324, e: 0.098
episode: 21/50, score: 312978.2219964195, e: 0.098
episode: 21/50, score: 312978.2219964195, e: 0.098
episode: 21/50, score: 309785.844132056, e: 0.098
episode: 21/50, score: 344265.0085839538, e: 0.098
episode: 21/50, score: 355006.0768517732, e: 0.098
episode: 21/50, score: 433071.9131514781, e: 0.098
episode: 21/50, score: 481142.8955112922, e: 0.097
episode: 21/50, score: 481142.8955112922, e: 0.097
episode: 21/50, score: 478400.38100687787, e: 0.097
episode: 21/50, score: 539252.9094709527, e: 0.097
episode: 21/50, score: 643113.0198350583, e: 0.097
episode: 21/50, score: 572692.1441631193, e: 0.097
episode: 21/50, score: 495378.70470109815, e: 0.097
episode: 21/50, score: 495378.70470109815, e: 0.097
episode: 21/50, score: 426124.76178388466, e: 0.097
episode: 21/50, score: 47

episode: 23/50, score: 147664.34690099326, e: 0.084
episode: 23/50, score: 144740.5928323536, e: 0.084
episode: 23/50, score: 148518.32230527804, e: 0.084
episode: 23/50, score: 150389.65316632454, e: 0.084
episode: 23/50, score: 152284.5627962202, e: 0.083
episode: 23/50, score: 152284.5627962202, e: 0.083
episode: 23/50, score: 152284.5627962202, e: 0.083
episode: 23/50, score: 154340.4043939692, e: 0.083
episode: 23/50, score: 160683.79501456133, e: 0.083
episode: 23/50, score: 157887.89698130795, e: 0.083
episode: 23/50, score: 170961.01485136026, e: 0.083
episode: 23/50, score: 164755.1300122559, e: 0.083
episode: 23/50, score: 159466.49033886247, e: 0.083
episode: 23/50, score: 158653.21123813424, e: 0.083
episode: 23/50, score: 158653.21123813424, e: 0.083
episode: 23/50, score: 158653.21123813424, e: 0.083
episode: 23/50, score: 166934.90886476482, e: 0.082
episode: 23/50, score: 166934.90886476482, e: 0.082
episode: 23/50, score: 161826.70065350304, e: 0.082
episode: 23/50, sc

episode: 25/50, score: 294798.3302539911, e: 0.071
episode: 25/50, score: 294798.3302539911, e: 0.071
episode: 25/50, score: 294798.3302539911, e: 0.071
episode: 25/50, score: 278790.78092119936, e: 0.071
episode: 25/50, score: 275110.7426130395, e: 0.071
episode: 25/50, score: 278907.2708610995, e: 0.071
episode: 25/50, score: 268866.6091100999, e: 0.071
episode: 25/50, score: 269511.8889719642, e: 0.071
episode: 25/50, score: 267652.2569380576, e: 0.071
episode: 25/50, score: 244446.806261528, e: 0.071
episode: 25/50, score: 258086.93805092125, e: 0.071
episode: 25/50, score: 306323.3867726385, e: 0.071
episode: 25/50, score: 261569.539965156, e: 0.071
episode: 25/50, score: 261569.539965156, e: 0.07
episode: 25/50, score: 261569.539965156, e: 0.07
episode: 25/50, score: 261569.539965156, e: 0.07
episode: 25/50, score: 261569.539965156, e: 0.07
episode: 25/50, score: 261569.539965156, e: 0.07
episode: 25/50, score: 261569.539965156, e: 0.07
episode: 25/50, score: 261569.539965156, e:

episode: 26/50, score: 165894.28588550977, e: 0.061
episode: 26/50, score: 165894.28588550977, e: 0.061
episode: 26/50, score: 158130.43330606792, e: 0.061
episode: 26/50, score: 155853.35506646053, e: 0.061
episode: 26/50, score: 159921.12763369514, e: 0.061
episode: 26/50, score: 159921.12763369514, e: 0.061
episode: 26/50, score: 174985.69785678922, e: 0.06
episode: 26/50, score: 174985.69785678922, e: 0.06
episode: 26/50, score: 195301.53737796246, e: 0.06
episode: 26/50, score: 196180.3942961633, e: 0.06
episode: 26/50, score: 196180.3942961633, e: 0.06
episode: 26/50, score: 200417.8908129604, e: 0.06
episode: 26/50, score: 200417.8908129604, e: 0.06
episode: 26/50, score: 191399.08572637718, e: 0.06
episode: 26/50, score: 199380.42760116712, e: 0.06
episode: 26/50, score: 226775.29835356746, e: 0.06
episode: 26/50, score: 184186.8973227675, e: 0.06
episode: 26/50, score: 185402.53084509776, e: 0.06
episode: 26/50, score: 189907.81234463362, e: 0.06
episode: 26/50, score: 169796.

episode: 27/50, score: 214228.93338496218, e: 0.052
episode: 27/50, score: 211979.52958442006, e: 0.052
episode: 27/50, score: 211979.52958442006, e: 0.052
episode: 27/50, score: 223935.17505298133, e: 0.052
episode: 27/50, score: 223935.17505298133, e: 0.052
episode: 27/50, score: 198003.4817818461, e: 0.052
episode: 27/50, score: 198003.4817818461, e: 0.052
episode: 27/50, score: 198003.4817818461, e: 0.051
episode: 27/50, score: 198003.4817818461, e: 0.051
episode: 27/50, score: 184816.44989517512, e: 0.051
episode: 27/50, score: 187311.47196876, e: 0.051
episode: 27/50, score: 197875.83898779805, e: 0.051
episode: 27/50, score: 193601.72086566163, e: 0.051
episode: 27/50, score: 169265.98455284798, e: 0.051
episode: 27/50, score: 168351.9482362626, e: 0.051
==> EPISODE 28
episode: 28/50, score: 98440.0, e: 0.051
episode: 28/50, score: 98912.512, e: 0.051
episode: 28/50, score: 98912.512, e: 0.051
episode: 28/50, score: 98912.512, e: 0.051
episode: 28/50, score: 93838.30013440001, e

episode: 30/50, score: 318269.69582263, e: 0.05
episode: 30/50, score: 388734.60647776024, e: 0.05
episode: 30/50, score: 356080.8995336284, e: 0.05
episode: 30/50, score: 324674.56419476244, e: 0.05
episode: 30/50, score: 340064.13853759423, e: 0.05
episode: 30/50, score: 340064.13853759423, e: 0.05
episode: 30/50, score: 383014.2392348924, e: 0.05
episode: 30/50, score: 383014.2392348924, e: 0.05
episode: 30/50, score: 370834.3864272228, e: 0.05
episode: 30/50, score: 370834.3864272228, e: 0.05
episode: 30/50, score: 370834.3864272228, e: 0.05
episode: 30/50, score: 370834.3864272228, e: 0.05
episode: 30/50, score: 370834.3864272228, e: 0.05
episode: 30/50, score: 394196.9527721379, e: 0.05
episode: 30/50, score: 460540.29992368864, e: 0.05
episode: 30/50, score: 460540.29992368864, e: 0.05
episode: 30/50, score: 387176.230145845, e: 0.05
episode: 30/50, score: 464069.42945280985, e: 0.05
episode: 30/50, score: 464069.42945280985, e: 0.05
episode: 30/50, score: 427732.79312665487, e:

episode: 31/50, score: 83819.11520028199, e: 0.05
episode: 31/50, score: 90155.8403094233, e: 0.05
episode: 31/50, score: 79391.23297647816, e: 0.05
episode: 31/50, score: 64672.0983826391, e: 0.05
episode: 31/50, score: 64672.0983826391, e: 0.05
episode: 31/50, score: 64672.0983826391, e: 0.05
episode: 31/50, score: 63139.369650970555, e: 0.05
episode: 31/50, score: 63139.369650970555, e: 0.05
episode: 31/50, score: 63139.369650970555, e: 0.05
episode: 31/50, score: 63139.369650970555, e: 0.05
episode: 31/50, score: 63139.369650970555, e: 0.05
episode: 31/50, score: 63271.96232723759, e: 0.05
episode: 31/50, score: 61089.079626947896, e: 0.05
episode: 31/50, score: 64442.870098467334, e: 0.05
episode: 31/50, score: 68425.43947055262, e: 0.05
episode: 31/50, score: 68199.6355202998, e: 0.05
episode: 31/50, score: 71432.298243962, e: 0.05
episode: 31/50, score: 69932.21998083878, e: 0.05
episode: 31/50, score: 69932.21998083878, e: 0.05
episode: 31/50, score: 69932.21998083878, e: 0.05


episode: 32/50, score: 268618.75144509703, e: 0.05
episode: 32/50, score: 284897.04778267, e: 0.05


In [None]:
signals = np.array([[], [], []]).T
signals.shape

In [None]:
max_reward[0]

In [None]:
for key, value in max_reward[1].items():
    print('Date: {} => Action: {} => Reward: {}'.format(key, value[0].act, value[1]))
    signals = np.concatenate((signals, np.array([[key, value[0].act, env.data.loc[key]['Close']]])), axis = 0)

In [None]:
signals.shape

In [None]:
signals_df = pd.DataFrame(signals)

In [None]:
signals_df.set_index(keys = [0], inplace=True)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
fig = plt.figure(figsize=(20,20))

ax1 = fig.add_subplot(211,  ylabel='Price in $')
ax2 = fig.add_subplot(212,  ylabel='Signals')

# Plot the closing price
env.data['Close'].plot(ax=ax1, color='r', lw=2.)
env.data['Close'].plot(ax=ax2, color='r', lw=2.)

ax2.plot(signals_df.loc[signals_df[1] == 'skip'].index, 
         signals_df.loc[signals_df[1] == 'skip'][2],
         'o', markersize=10, color='b')

ax2.plot(signals_df.loc[signals_df[1] == 'sell'].index, 
         signals_df.loc[signals_df[1] == 'sell'][2],
         'v', markersize=10, color='k')

ax2.plot(signals_df.loc[signals_df[1] == 'buy'].index, 
         signals_df.loc[signals_df[1] == 'buy'][2],
         '^', markersize=10, color='m')
       
# Show the plot
plt.show()

In [None]:
agent.save('best_model.h5')