In [1]:
import warnings
warnings.filterwarnings(action='ignore',
                        category=DeprecationWarning,
                        module='stable_baselines')
warnings.filterwarnings(action='ignore',
                        category=UserWarning,
                        module='stable_baselines')
warnings.filterwarnings("ignore", category=FutureWarning, module='tensorflow')
warnings.filterwarnings("ignore", category=FutureWarning, module='tensorboard')
warnings.filterwarnings("ignore", category=UserWarning, module='gym')

import gym
import PortfolioAllocationGym
import numpy as np
from stable_baselines import A2C
from stable_baselines.common.policies import MlpLnLstmPolicy #, MlpPolicy, MlpLstmPolicy
from stable_baselines.common.evaluation import evaluate_policy
from stable_baselines.common.env_checker import check_env
from stable_baselines.bench import Monitor
from tensorflow import nn as nn

import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

from datetime import datetime

In [2]:
reward_fn = 'benchmark'
sample_size=500
observations = ['daily_returns', 'ema_50', 'ema_200', 'bb_bbm', 'bb_bbh', 'bb_bbl','bb_bbhi', 'bb_bbli', 'stoch', 'stoch_signal', 'macd','macd_signal', 'obv']
env_kwargs = {'filename':'sp500.csv',
    'date_from':'2008-01-01',
    'date_to':'2017-12-31',
    'investment':1000000,
    'risk_free_rate': 0.5, # approx US Treasury Note return
    'sample_size':sample_size,
    'random_sample':True,
    'observations' : observations,
    'save_info' : True,
    #'report_point' : 252,
    'reward_function':reward_fn}

train_env = gym.make('PortfolioAllocation-v0', **env_kwargs)
train_env = Monitor(train_env, 'monitor')

In [3]:
check_env(train_env)

In [4]:
venv, obs = train_env.get_sb_env()

In [5]:
#{'gamma': 0.9999, 'n_steps': 1, 'lr_schedule': 'constant', 'lr': 0.001, 'ent_coef': 0.1, 'vf_coef': 0, 'max_grad_norm': 5, 'n_lstm': 128, 'activation_fn': 'tanh', 'net_arch': 'medium'}.
model_kwargs =   {
    'gamma': 0.9999,
    'n_steps': 1,
    'lr_schedule': 'linear',
    'learning_rate': 0.001,
    'ent_coef': 0.1,
    'vf_coef': 0,
    'max_grad_norm': 5,
    'full_tensorboard_log': True,
    'policy_kwargs' : dict (
        n_lstm=128,
        act_fun=nn.tanh,
        net_arch=[64, 'lstm', dict(pi=[256, 256], vf=[256, 256])]
        )
    }

a2c_model = A2C(policy = MlpLnLstmPolicy, tensorboard_log="tensorboard",env = venv, **model_kwargs)





Instructions for updating:
Use keras.layers.flatten instead.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor



In [6]:
ts_factor = 40
total_timesteps = ts_factor* (len(venv.venv.envs[0].data.date.unique()))

In [7]:
trained_a2c_model= a2c_model.learn(total_timesteps=total_timesteps,
                                   tb_log_name='A2C_'+str(sample_size)+'_'+reward_fn+'_'+datetime.now().strftime("%H-%M"))


day: 2516                 sharpe: 11.701                 index : 18.340                 excess mean: 1.442                 cum. rtns: 187.913                 portf val: 2,879,132.36
day: 2516                 sharpe: 10.747                 index : 18.340                 excess mean: 0.795                 cum. rtns: 147.935                 portf val: 2,479,348.30
day: 2516                 sharpe: 12.077                 index : 18.340                 excess mean: 1.063                 cum. rtns: 162.795                 portf val: 2,627,954.22
day: 2516                 sharpe: 12.165                 index : 18.340                 excess mean: 1.058                 cum. rtns: 163.439                 portf val: 2,634,390.72
day: 2516                 sharpe: 10.976                 index : 18.340                 excess mean: 1.204                 cum. rtns: 168.571                 portf val: 2,685,705.84
day: 2516                 sharpe: 12.632                 index : 18.340                 e

In [8]:
trained_a2c_model.save('ac2_mlplnltsm_'+str(sample_size)+'_'+str(ts_factor)+'_'+reward_fn+'.zip')

In [9]:
eval_kwargs = {'filename':'sp500.csv',
    'date_from':'2018-01-01',
    'date_to':'2020-12-31',
    'investment':1000000,
    'risk_free_rate': 0.5, # approx US Treasury Note return
    'sample_size':sample_size,
    'random_sample':False,
    'observations' : observations,
    'save_info' : True,
    #'report_point' : 252,
    'reward_function':reward_fn}

eval_env =  gym.make('PortfolioAllocation-v0', **eval_kwargs)

In [10]:
eval_venv, obs = eval_env.get_sb_env()

In [11]:
# Random Agent, before training
mean_reward, std_reward = evaluate_policy(a2c_model, eval_venv, n_eval_episodes=10)
print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

day: 653                 sharpe: 1.174                 index : 64.277                 excess mean: -1.355                 cum. rtns: 13.391                 portf val: 1,133,907.09
day: 653                 sharpe: 1.146                 index : 64.277                 excess mean: -1.412                 cum. rtns: 12.926                 portf val: 1,129,257.38
day: 653                 sharpe: 1.161                 index : 64.277                 excess mean: -1.378                 cum. rtns: 13.171                 portf val: 1,131,709.46
day: 653                 sharpe: 1.139                 index : 64.277                 excess mean: -1.426                 cum. rtns: 12.807                 portf val: 1,128,071.74
day: 653                 sharpe: 1.126                 index : 64.277                 excess mean: -1.453                 cum. rtns: 12.607                 portf val: 1,126,067.72
day: 653                 sharpe: 1.136                 index : 64.277                 excess mean: -

In [12]:
obs = eval_venv.reset()
mean_reward, std_reward = evaluate_policy(trained_a2c_model, eval_venv, n_eval_episodes=10)
print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

day: 653                 sharpe: 1.183                 index : 64.277                 excess mean: -1.324                 cum. rtns: 13.556                 portf val: 1,135,563.34
day: 653                 sharpe: 1.186                 index : 64.277                 excess mean: -1.319                 cum. rtns: 13.597                 portf val: 1,135,969.46
day: 653                 sharpe: 1.188                 index : 64.277                 excess mean: -1.313                 cum. rtns: 13.637                 portf val: 1,136,365.90
day: 653                 sharpe: 1.190                 index : 64.277                 excess mean: -1.309                 cum. rtns: 13.665                 portf val: 1,136,648.69
day: 653                 sharpe: 1.192                 index : 64.277                 excess mean: -1.305                 cum. rtns: 13.700                 portf val: 1,137,000.42
day: 653                 sharpe: 1.194                 index : 64.277                 excess mean: -