In [1]:
import os
import json
import io
from data_generator import DataGenerator
from trade_env import TraderEnv

In [2]:
dg = DataGenerator()

In [3]:
trade = TraderEnv(dg)

In [4]:
import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

ENV_NAME = 'CartPole-v0'

env = trade

np.random.seed(123)
#env.seed(123)
nb_actions = env.action_space.n

# Next, we build a very simple model regardless of the dueling architecture
# if you enable dueling network in DQN , DQN will build a dueling network base on your model automatically
# Also, you can build a dueling network by yourself and turn off the dueling network in DQN.
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(env.observation_space.shape[0]*20))
model.add(Activation('tanh'))
model.add(Dropout(0.2))
model.add(Dense(env.observation_space.shape[0]*10))
model.add(Activation('relu'))
model.add(Dense(env.observation_space.shape[0]))
model.add(Dense(30))
model.add(Dropout(0.2))
model.add(Dense(20))
model.add(Activation('relu'))
model.add(Dense(nb_actions, activation='softmax'))
#print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
# enable the dueling network
# you can specify the dueling_type to one of {'avg','max','naive'}
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
               enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])



Using TensorFlow backend.


In [None]:
dqn.test(env, nb_episodes=2, visualize=True)

Testing for 2 episodes ...
#######################
Profite: 92.21500000000015 instant_pnl: 8689.036225000027 current reward 1
#######################
#######################
Profite: 3.5450000000018917 instant_pnl: 20.657025000017196 current reward 1
#######################
#######################
Profite: 0.37999999999738066 instant_pnl: 1.9043999999927705 current reward 1
#######################
#######################
Profite: 20.2400000000016 instant_pnl: 451.137600000068 current reward 1
#######################
#######################
Profite: 131.82500000000437 instant_pnl: 17642.48062500116 current reward 1
#######################
#######################
Profite: 53.12999999999738 instant_pnl: 2930.0568999997163 current reward 1
#######################
#######################
Profite: 8.5 instant_pnl: 90.25 current reward 1
#######################
#######################
Profite: 10.864999999997963 instant_pnl: 140.77822499995165 current reward 1
#######################
########

#######################
Profite: 7.06000000000131 instant_pnl: 64.96360000002112 current reward 1
#######################
#######################
Profite: 24.474999999998545 instant_pnl: 648.9756249999259 current reward 1
#######################
#######################
Profite: 8.319999999999709 instant_pnl: 86.86239999999458 current reward 1
#######################
#######################
Profite: 28.624999999996362 instant_pnl: 877.6406249997844 current reward 1
#######################
#######################
Profite: 47.674999999995634 instant_pnl: 2369.255624999575 current reward 1
#######################
#######################
Profite: 490.75 instant_pnl: 241818.0625 current reward 1
#######################
#######################
Profite: 89.36000000000058 instant_pnl: 8164.929600000105 current reward 1
#######################
Episode 2: reward: -370842.804, steps: 9554


<keras.callbacks.History at 0x121be8ef0>

In [None]:
dqn.fit(env, nb_steps=dg.max_steps(), visualize=True, verbose=2)

Training for 9555 steps ...
#######################
Profite: 3.3649999999979627 instant_pnl: 19.053224999982216 current reward 1
#######################
#######################
Profite: 54.95999999999913 instant_pnl: 3131.5215999999023 current reward 1
#######################


In [None]:
# After training is done, we save the final weights.
dqn.save_weights('duel_dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)

In [None]:
pow(2)