In [1]:
import gym
import PortfolioAllocationGym
import numpy as np
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_checker import check_env

In [27]:
env_kwargs = {'filename':'sp500.csv',
    'date_from':'2008-01-01',
    'date_to':'2017-12-31',
    'investment':1000000,
    'risk_free_rate': 0.5, # approx US Treasury Note return
    'sample_size':100,
    'random_sample':True,
    'reward_function':'daily_returns'}

train_env = gym.make('PortfolioAllocation-v0', **env_kwargs)

In [28]:
check_env(train_env)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._set_item(key, value)


In [29]:
venv, obs = train_env.get_sb_env()

In [30]:
from stable_baselines3 import A2C
from stable_baselines3.a2c import MlpPolicy

In [31]:
import torch

model_kwargs =   {'gamma': 0.9999,
    'normalize_advantage': False,
    'max_grad_norm': 0.7,
    'use_rms_prop': False,
    'gae_lambda': 0.92,
    'n_steps': 10,
    'learning_rate': 0.0038610316815332825,
    'ent_coef': 0.012292116134058367,
    'vf_coef': 0.7960524189522955,
    'policy_kwargs': dict(
        log_std_init=-3.353286611055509,
        ortho_init= False,
        activation_fn=torch.nn.modules.activation.ReLU,
        net_arch=[dict(pi=[64, 64, 64], vf=[64, 64, 64])])
    }
a2c_model = A2C(policy = MlpPolicy,
                env = venv,
                **model_kwargs)


In [32]:
from PortfolioAllocationGym.callbacks import TensorBoardCallback as tbc
from datetime import datetime

In [34]:
# Random Agent, before training
mean_reward, std_reward = evaluate_policy(a2c_model, venv, n_eval_episodes=5)
print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

day: 2516                 reward: 30.821                 sharpe: 0.282                  psr: 0.000                  cum. rtns: 113.917                 portf val: 2,139,165.56
day: 2516                 reward: 23.657                 sharpe: 0.282                  psr: 0.000                  cum. rtns: 127.399                 portf val: 2,273,989.57
day: 2516                 reward: 31.044                 sharpe: 0.190                  psr: 0.000                  cum. rtns: 142.567                 portf val: 2,425,673.07
day: 2516                 reward: 19.775                 sharpe: 0.391                  psr: 0.000                  cum. rtns: 234.174                 portf val: 3,341,744.99
mean_reward:14.92 +/- 5.43
day: 2516                 reward: 20.070                 sharpe: 0.270                  psr: 0.000                  cum. rtns: 203.444                 portf val: 3,034,441.53
day: 2516                 reward: 13.728                 sharpe: 0.192                  psr: 0.000

In [35]:
#total_timesteps = 2 * (len(train_env.venv.venv.envs[0].data.date.unique())-1)
total_timesteps = 200 * (len(train_env.data.date.unique())-1)
trained_a2c_model= a2c_model.learn(total_timesteps=total_timesteps,
                                   tb_log_name='A2C'+datetime.now().strftime("%H-%M"))

day: 2516                 reward: 30.884                 sharpe: 0.379                  psr: 0.000                  cum. rtns: 185.604                 portf val: 2,856,043.11
day: 2516                 reward: 24.204                 sharpe: 0.363                  psr: 0.000                  cum. rtns: 179.938                 portf val: 2,799,375.63
day: 2516                 reward: 24.051                 sharpe: 0.095                  psr: 0.000                  cum. rtns: 157.827                 portf val: 2,578,268.11
day: 2516                 reward: 11.469                 sharpe: 0.365                  psr: 0.000                  cum. rtns: 127.689                 portf val: 2,276,893.38
day: 2516                 reward: 21.310                 sharpe: 0.372                  psr: 0.000                  cum. rtns: 139.462                 portf val: 2,394,623.82
day: 2516                 reward: 25.373                 sharpe: 0.220                  psr: 0.000                  cum. rtns

In [36]:
mean_reward, std_reward = evaluate_policy(trained_a2c_model, venv, n_eval_episodes=5)
print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

'''

eval_kwargs = {'filename':'sp500.csv',
    'date_from':'2018-01-01',
    'date_to':'2020-12-31',
    'investment':1000000,
    'risk_free_rate': 0.5,
    'reward_function':'daily_returns'}

eval_env =  Monitor(gym.make('PortfolioAllocation-v0', **eval_kwargs))

mean_reward, std_reward = evaluate_policy(trained_a2c_model, eval_env, n_eval_episodes=10)
print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")
'''

day: 2516                 reward: 13.764                 sharpe: 0.315                  psr: 0.000                  cum. rtns: 103.846                 portf val: 2,038,460.38
day: 2516                 reward: 18.366                 sharpe: 0.274                  psr: 0.000                  cum. rtns: 214.370                 portf val: 3,143,700.78
day: 2516                 reward: 18.488                 sharpe: 0.373                  psr: 0.000                  cum. rtns: 195.657                 portf val: 2,956,566.45
day: 2516                 reward: 12.679                 sharpe: 0.269                  psr: 0.000                  cum. rtns: 115.795                 portf val: 2,157,948.12
day: 2516                 reward: 29.830                 sharpe: 0.245                  psr: 0.000                  cum. rtns: 223.343                 portf val: 3,233,430.63
mean_reward:18.10 +/- 3.62


'\n\neval_kwargs = {\'filename\':\'sp500.csv\',\n    \'date_from\':\'2018-01-01\',\n    \'date_to\':\'2020-12-31\',\n    \'investment\':1000000,\n    \'risk_free_rate\': 0.5,\n    \'reward_function\':\'daily_returns\'}\n\neval_env =  Monitor(gym.make(\'PortfolioAllocation-v0\', **eval_kwargs))\n\nmean_reward, std_reward = evaluate_policy(trained_a2c_model, eval_env, n_eval_episodes=10)\nprint(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")\n'

In [39]:
trained_a2c_model.save('sp500_08_17_opt_49')



