In [1]:
import warnings
warnings.filterwarnings(action='ignore',
                        category=DeprecationWarning,
                        module='stable_baselines')
warnings.filterwarnings(action='ignore',
                        category=UserWarning,
                        module='stable_baselines')
warnings.filterwarnings("ignore", category=FutureWarning, module='tensorflow')
warnings.filterwarnings("ignore", category=FutureWarning, module='tensorboard')
warnings.filterwarnings("ignore", category=UserWarning, module='gym')

import gym
import PortfolioAllocationGym
import numpy as np
from stable_baselines import A2C
from stable_baselines.common.policies import MlpLnLstmPolicy #, MlpPolicy, MlpLstmPolicy
from stable_baselines.common.evaluation import evaluate_policy
from stable_baselines.common.env_checker import check_env
from stable_baselines.bench import Monitor
from tensorflow import nn as nn

import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

from datetime import datetime

In [2]:
reward_fn = 'benchmark'
sample_size=100
observations = ['daily_returns', 'ema_50', 'ema_200', 'bb_bbm', 'bb_bbh', 'bb_bbl','bb_bbhi', 'bb_bbli', 'stoch', 'stoch_signal', 'macd','macd_signal', 'obv']
env_kwargs = {'filename':'sp500.csv',
    'date_from':'2008-01-01',
    'date_to':'2017-12-31',
    'investment':1000000,
    'risk_free_rate': 0.5, # approx US Treasury Note return
    'sample_size':sample_size,
    'random_sample':True,
    'observations' : observations,
    'save_info' : True,
    #'report_point' : 252,
    'reward_function':reward_fn}

train_env = gym.make('PortfolioAllocation-v0', **env_kwargs)
train_env = Monitor(train_env, 'monitor')

In [3]:
check_env(train_env)

In [4]:
venv, obs = train_env.get_sb_env()

In [5]:
#{'gamma': 0.9999, 'n_steps': 1, 'lr_schedule': 'constant', 'lr': 0.001, 'ent_coef': 0.1, 'vf_coef': 0, 'max_grad_norm': 5, 'n_lstm': 128, 'activation_fn': 'tanh', 'net_arch': 'medium'}.
model_kwargs =   {
    'gamma': 0.9999,
    'n_steps': 1,
    'lr_schedule': 'linear',
    'learning_rate': 0.001,
    'ent_coef': 0.1,
    'vf_coef': 0,
    'max_grad_norm': 5,
    'full_tensorboard_log': True,
    'policy_kwargs' : dict (
        n_lstm=128,
        act_fun=nn.tanh,
        net_arch=[64, 'lstm', dict(pi=[256, 256], vf=[256, 256])]
        )
    }

a2c_model = A2C(policy = MlpLnLstmPolicy, tensorboard_log="tensorboard",env = venv, **model_kwargs)





Instructions for updating:
Use keras.layers.flatten instead.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor



In [6]:
ts_factor = 40
total_timesteps = ts_factor* (len(venv.venv.envs[0].data.date.unique()))

In [7]:
trained_a2c_model= a2c_model.learn(total_timesteps=total_timesteps,
                                   tb_log_name='A2C_'+str(sample_size)+'_'+reward_fn+'_'+datetime.now().strftime("%H-%M"))


day: 2516                 sharpe: 9.002                 index : 18.340                 excess mean: 0.433                 cum. rtns: 122.542                 portf val: 2,225,424.88
day: 2516                 sharpe: 12.440                 index : 18.340                 excess mean: 1.825                 cum. rtns: 216.213                 portf val: 3,162,130.13
day: 2516                 sharpe: 9.765                 index : 18.340                 excess mean: 0.767                 cum. rtns: 138.078                 portf val: 2,380,784.30
day: 2516                 sharpe: 11.826                 index : 18.340                 excess mean: 1.245                 cum. rtns: 179.282                 portf val: 2,792,823.34
day: 2516                 sharpe: 11.979                 index : 18.340                 excess mean: 1.105                 cum. rtns: 166.133                 portf val: 2,661,332.58
day: 2516                 sharpe: 10.637                 index : 18.340                 exc

In [8]:
trained_a2c_model.save('ac2_mlplnltsm_'+str(sample_size)+'_'+str(ts_factor)+'_'+reward_fn+'.zip')

In [19]:
eval_kwargs = {'filename':'sp500.csv',
    'date_from':'2018-01-01',
    'date_to':'2020-12-31',
    'investment':1000000,
    'risk_free_rate': 0.5, # approx US Treasury Note return
    'sample_size':sample_size,
    'random_sample':False,
    'observations' : observations,
    'save_info' : True,
    #'report_point' : 252,
    'reward_function':reward_fn}

eval_env =  gym.make('PortfolioAllocation-v0', **eval_kwargs)

In [20]:
eval_venv, obs = eval_env.get_sb_env()

In [21]:
# Random Agent, before training
mean_reward, std_reward = evaluate_policy(a2c_model, eval_venv, n_eval_episodes=10)
print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

day: 653                 sharpe: 2.113                 index : 64.277                 excess mean: 0.994                 cum. rtns: 30.603                 portf val: 1,306,031.22
day: 653                 sharpe: 2.233                 index : 64.277                 excess mean: 1.277                 cum. rtns: 32.975                 portf val: 1,329,752.52
day: 653                 sharpe: 2.105                 index : 64.277                 excess mean: 0.975                 cum. rtns: 30.393                 portf val: 1,303,926.48
day: 653                 sharpe: 2.121                 index : 64.277                 excess mean: 1.014                 cum. rtns: 30.748                 portf val: 1,307,481.55
day: 653                 sharpe: 2.303                 index : 64.277                 excess mean: 1.442                 cum. rtns: 34.484                 portf val: 1,344,841.85
day: 653                 sharpe: 2.363                 index : 64.277                 excess mean: 1.584 

In [22]:
obs = eval_venv.reset()
mean_reward, std_reward = evaluate_policy(trained_a2c_model, eval_venv, n_eval_episodes=10)
print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

day: 653                 sharpe: 2.394                 index : 64.277                 excess mean: 1.657                 cum. rtns: 36.428                 portf val: 1,364,283.82
day: 653                 sharpe: 2.395                 index : 64.277                 excess mean: 1.659                 cum. rtns: 36.447                 portf val: 1,364,469.36
day: 653                 sharpe: 2.392                 index : 64.277                 excess mean: 1.651                 cum. rtns: 36.372                 portf val: 1,363,718.29
day: 653                 sharpe: 2.385                 index : 64.277                 excess mean: 1.634                 cum. rtns: 36.227                 portf val: 1,362,271.62
day: 653                 sharpe: 2.364                 index : 64.277                 excess mean: 1.586                 cum. rtns: 35.800                 portf val: 1,358,004.98
day: 653                 sharpe: 2.358                 index : 64.277                 excess mean: 1.571 