### Imports


In [None]:
import numpy as np
import gym
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import interactive
interactive(True)

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, LSTM
from keras.optimizers import Adam
from keras.callbacks import History

### Import Env

In [None]:
import TradingGym
ENV_NAME = 'trading-v0'

### Split Trading Days into Train-Val-Test

In [None]:
hdf_path = '../../Data/Si-3.18/hdf5/Si-3_18.h5'
keys = []
with pd.HDFStore(hdf_path) as store:
    for key in store:
        keys.append(key)
trading_days = len(keys)
print("Trading days: %d" % trading_days)


sz = {
    'train' : int(0.4 * trading_days),
    'val' : int(0.3 * trading_days),
    'test' : trading_days - (int(0.4 * trading_days) + int(0.3 * trading_days)),
}

np.random.seed(123)
indexes = np.random.permutation(trading_days)
ids = {
    'train' : indexes[:sz['train']],
    'val' : indexes[sz['train']:sz['train']+sz['val']],
    'test' : indexes[sz['train']+sz['val']:],
}
assert(sz['train'] == len(ids['train']))
assert(sz['val'] == len(ids['val']))
assert(sz['test'] == len(ids['test']))
print("Train size: %d" % sz['train'])
print("Validation size: %d" % sz['val'])
print("Test size: %d" % sz['test'])

splits = {
    'train' : [keys[i] for i in ids['train']],
    'val' : [keys[i] for i in ids['val']],
    'test' : [keys[i] for i in ids['test']],
}

def sample(split):
    return splits[split][np.random.randint(0, sz[split])]

### Make Env

In [None]:
ENV_NAME = 'trading-v0'
def makeRandomEnv(split):
    env = gym.make(ENV_NAME)
    key = sample(split)
    print("%s: %s" % (split, key))
    env.init(hdf_path, key)
    return env

def iterateEnv(split):
    env = gym.make(ENV_NAME)
    for key in splits[split]:
        env.init(hdf_path, key)
        yield env

### Define NN

In [None]:
model_env = gym.make(ENV_NAME)
nb_actions = model_env.ACTION_SPACE

model = Sequential()
model.add(Flatten(input_shape=(1,) + model_env.observation_space.shape))
model.add(Dense(64))
model.add(Activation('elu'))
model.add(Dense(64))
model.add(Activation('elu'))
model.add(Dense(64))
model.add(Activation('elu'))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))
print(model.summary())

### Define Agents

In [None]:
from rl.policy import GreedyQPolicy
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from rl.agents.cem import CEMAgent
from rl.memory import EpisodeParameterMemory
from rl.agents import SARSAAgent
from rl.policy import BoltzmannQPolicy
from rl.policy import EpsGreedyQPolicy

NB_STEPS_WARMUP = 1000
MEM_LIMIT = 10
LR = 1e-4
TARGET_MODEL_UPDATE = 1e-3
WINDOW_LENGTH = 1

def CompileAgent(algo):
    if (algo == 'DQN'):
        memory = SequentialMemory(limit=MEM_LIMIT, window_length=WINDOW_LENGTH)
        policy = BoltzmannQPolicy()
        test_policy = GreedyQPolicy()
        dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=NB_STEPS_WARMUP,
               target_model_update=TARGET_MODEL_UPDATE, policy=policy, test_policy = GreedyQPolicy())
        dqn.compile(Adam(lr=LR), metrics=['mae'])
        
        return dqn
    elif (algo == 'CEM'):
        memory = EpisodeParameterMemory(limit=MEM_LIMIT, window_length=WINDOW_LENGTH)
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=10, nb_steps_warmup=NB_STEPS_WARMUP, train_interval=10,
                       elite_frac=0.20, noise_decay_const=0.1, noise_ampl=1.0)
        cem.compile()
        
        return cem
    elif (algo == 'DNDQN'):
        memory = SequentialMemory(limit=MEM_LIMIT, window_length=WINDOW_LENGTH)
        policy = BoltzmannQPolicy()
        test_policy = GreedyQPolicy()
        # enable the dueling network
        # you can specify the dueling_type to one of {'avg','max','naive'}
        dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=NB_STEPS_WARMUP,
                       enable_dueling_network=True, dueling_type='avg', target_model_update=TARGET_MODEL_UPDATE,
                       policy=policy, test_policy = GreedyQPolicy())
        dqn.compile(Adam(lr=LR), metrics=['mae'])
        
        return dqn
    elif (algo == 'SARSA'):
        # SARSA does not require a memory.
        policy = BoltzmannQPolicy()
        test_policy = GreedyQPolicy()
        sarsa = SARSAAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=WINDOW_LENGTH,
                           policy=policy, test_policy = GreedyQPolicy())
        sarsa.compile(Adam(lr=LR), metrics=['mae'])
        
        return sarsa
    else:
        raise NameError('Unknown RL algorithm')

### Train-Val Loop

In [None]:
def run(trainEpisodes, testEpisodes):
    df_all = pd.DataFrame()
    ALGO_NAMES = ['DQN', 'CEM', 'DNDQN', 'SARSA']
    
    for algo in ALGO_NAMES:
        print('Training algo {}'.format(algo))
        agent = CompileAgent(algo)
        for env in iterateEnv('train'):
                agent.fit(env, nb_steps=env.EPISODE*trainEpisodes, visualize=False, verbose=False)
               
        print('Validating algo {}'.format(algo))
        agent.training = False
        df_new = []
        history = History()
        for env in iterateEnv('val'):
            agent.test(env, nb_episodes=testEpisodes, callbacks=[history], verbose=False)
            df_new += history.history['episode_reward']
        df_all[algo] = df_new
        
        mean = np.mean(np.array(df_new))
        std = np.std(np.array(df_new))
        z = mean / std
        print("mu: %.1f, sigma: %.1f, z: %.1f" % (mean, std, z))
        
    return df_all

In [None]:
df = run(10, 10)

### Plot

In [None]:
fig = plt.figure()
ax = plt.axes()
sns.violinplot(data=df*1e-3, ax=ax)
ax.set_xlabel('Algorithm')
ax.set_ylabel('Return in Rubles')
plt.show()
fig.savefig('violin.png')