In [10]:
from tensorforce.environments import Environment
from tensorforce.agents import Agent
from tensorforce.execution import Runner
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'


In [11]:
train_env_kwargs = {'filename':'sp500.csv',
    'date_from':'2008-01-01',
    'date_to':'2017-12-31',
    'investment':1000000,
    'risk_free_rate': 0.5, # approx US Treasury Note return
    'sample_size':100,
    #'report_point':252,
    'random_sample':True,
    'reward_function':'portfolio_value'}

train_portfolio = Environment.create(
    environment='PortfolioTF',
    **train_env_kwargs
)
_ = train_portfolio.reset()
total_timesteps = 1 * (len(train_portfolio._environment.data.date.unique())-1)


In [12]:
total_timesteps = 10 * (len(train_portfolio._environment.data.date.unique())-1)
a2cagent = Agent.create(
    agent='a2c',
    environment=train_portfolio,
    max_episode_timesteps=total_timesteps,
    network=dict(type='auto',
                 rnn=10  ),
    critic_optimizer=dict(optimizer='adam'),
    batch_size=5
)



In [13]:
a2crunner = Runner(
    agent=a2cagent,
    environment=train_portfolio
)

In [14]:
print(a2crunner.agent.get_architecture())

Policy:
    Network:  
        Conv2d(name=conv2d0, size=64, window=3, padding=same, bias=True, activation=relu)
        Conv2d(name=conv2d1, size=64, window=3, padding=same, bias=True, activation=relu)
        Pooling(name=pooling, reduction=max)
        Lstm(name=lstm, cell=lstm, size=64, horizon=10, bias=False)
    Action-distribution:
        Mean:  Linear(name=mean, size=100, bias=True, initialization_scale=0.01)
        Stddev:  Linear(name=stddev, size=100, bias=True, initialization_scale=0.01)
Baseline:
    Network:  
        Conv2d(name=conv2d0, size=64, window=3, padding=same, bias=True, activation=relu)
        Conv2d(name=conv2d1, size=64, window=3, padding=same, bias=True, activation=relu)
        Pooling(name=pooling, reduction=max)
    State-value:  Linear(name=value, size=0, bias=True)


In [16]:
a2crunner.agent.get_specification()

{'agent': 'a2c',
 'states': {'type': 'float', 'shape': (100, 1, 13)},
 'actions': {'type': 'float', 'shape': (100,), 'min_value': 0, 'max_value': 1},
 'batch_size': 5,
 'max_episode_timesteps': 25170,
 'network': {'type': 'auto', 'rnn': 10},
 'use_beta_distribution': False,
 'memory': 'minimum',
 'update_frequency': 1.0,
 'learning_rate': 0.001,
 'horizon': 1,
 'discount': 0.99,
 'return_processing': None,
 'advantage_processing': None,
 'predict_terminal_values': False,
 'critic': 'auto',
 'critic_optimizer': {'optimizer': 'adam'},
 'state_preprocessing': 'linear_normalization',
 'reward_preprocessing': None,
 'exploration': 0.0,
 'variable_noise': 0.0,
 'l2_regularization': 0.0,
 'entropy_regularization': 0.0,
 'parallel_interactions': 1,
 'config': None,
 'saver': None,
 'summarizer': None,
 'tracking': None,
 'recorder': None}

In [17]:
a2crunner.run(num_episodes=10)

Episodes:   0%|          | 0/10 [00:00, return=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]



day: 2516                 reward: 2435212.329                 sharpe: 0.313                  cum. rtns: 143.521                 portf val: 2,435,212.33
day: 2516                 reward: 2463270.868                 sharpe: 0.272                  cum. rtns: 146.327                 portf val: 2,463,270.87
day: 2516                 reward: 2576439.598                 sharpe: 0.432                  cum. rtns: 157.644                 portf val: 2,576,439.60
day: 2516                 reward: 2200693.001                 sharpe: 0.342                  cum. rtns: 120.069                 portf val: 2,200,693.00
day: 2516                 reward: 3125848.207                 sharpe: 0.441                  cum. rtns: 212.585                 portf val: 3,125,848.21
day: 2516                 reward: 2502562.767                 sharpe: 0.399                  cum. rtns: 150.256                 portf val: 2,502,562.77
day: 2516                 reward: 2992904.410                 sharpe: 0.417             

In [19]:
a2crunner.run(num_episodes=10, evaluation=True, save_best_agent=('model'))

##%

Episodes:   0%|          | 0/10 [00:00, return=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


day: 2516                 reward: 2716839.626                 sharpe: 0.374                  cum. rtns: 171.684                 portf val: 2,716,839.63
day: 2516                 reward: 2747959.742                 sharpe: 0.456                  cum. rtns: 174.796                 portf val: 2,747,959.74
day: 2516                 reward: 2326394.347                 sharpe: 0.268                  cum. rtns: 132.639                 portf val: 2,326,394.35
day: 2516                 reward: 2823562.330                 sharpe: 0.448                  cum. rtns: 182.356                 portf val: 2,823,562.33
day: 2516                 reward: 2819091.869                 sharpe: 0.431                  cum. rtns: 181.909                 portf val: 2,819,091.87
day: 2516                 reward: 2380090.695                 sharpe: 0.335                  cum. rtns: 138.009                 portf val: 2,380,090.69
day: 2516                 reward: 2863271.313                 sharpe: 0.318             

In [20]:
test_env_kwargs = {'filename':'sp500.csv',
    'date_from':'2017-01-01',
    'date_to':'2019-12-31',
    'investment':1000000,
    'risk_free_rate': 0.5, # approx US Treasury Note return
    'sample_size':100,
    #'report_point':252,
    'random_sample':False,
    'reward_function':'portfolio_value'}

test_portfolio = Environment.create(
    environment='PortfolioTF',
    **test_env_kwargs
)
_ = test_portfolio.reset()
test_timesteps = 1 * (len(test_portfolio._environment.data.date.unique())-1)

In [21]:
testa2c=Agent.load(directory='model',filename='best-model',environment=test_portfolio)




In [22]:
test2crunner = Runner(
    agent=testa2c,
    environment=test_portfolio
)

In [23]:
print(test2crunner.agent.get_architecture())
test2crunner.agent.get_specification()

Policy:
    Network:  
        Conv2d(name=conv2d0, size=64, window=3, padding=same, bias=True, activation=relu)
        Conv2d(name=conv2d1, size=64, window=3, padding=same, bias=True, activation=relu)
        Pooling(name=pooling, reduction=max)
        Lstm(name=lstm, cell=lstm, size=64, horizon=252, bias=False)
    Action-distribution:
        Mean:  Linear(name=mean, size=100, bias=True, initialization_scale=0.01)
        Stddev:  Linear(name=stddev, size=100, bias=True, initialization_scale=0.01)
Baseline:
    Network:  
        Conv2d(name=conv2d0, size=64, window=3, padding=same, bias=True, activation=relu)
        Conv2d(name=conv2d1, size=64, window=3, padding=same, bias=True, activation=relu)
        Pooling(name=pooling, reduction=max)
    State-value:  Linear(name=value, size=0, bias=True)


{'agent': 'a2c',
 'states': {'type': 'float', 'shape': [100, 1, 13]},
 'actions': {'type': 'float', 'shape': [100], 'min_value': 0, 'max_value': 1},
 'batch_size': 5,
 'max_episode_timesteps': 25170,
 'network': {'type': 'auto', 'rnn': 252},
 'use_beta_distribution': False,
 'memory': 'minimum',
 'update_frequency': 1.0,
 'learning_rate': 0.001,
 'horizon': 1,
 'discount': 0.99,
 'return_processing': None,
 'advantage_processing': None,
 'predict_terminal_values': False,
 'critic': 'auto',
 'critic_optimizer': {'optimizer': 'adam'},
 'state_preprocessing': 'linear_normalization',
 'reward_preprocessing': None,
 'exploration': 0.0,
 'variable_noise': 0.0,
 'l2_regularization': 0.0,
 'entropy_regularization': 0.0,
 'parallel_interactions': 1,
 'config': None,
 'saver': None,
 'summarizer': None,
 'tracking': None,
 'recorder': None}

In [24]:
test2crunner.run(num_episodes=10)

Episodes:   0%|          | 0/10 [00:00, return=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]



day: 751                 reward: 1467151.144                 sharpe: 0.351                  cum. rtns: 46.715                 portf val: 1,467,151.14
day: 751                 reward: 1479934.122                 sharpe: 0.345                  cum. rtns: 47.993                 portf val: 1,479,934.12
day: 751                 reward: 1476597.791                 sharpe: 0.359                  cum. rtns: 47.660                 portf val: 1,476,597.79
day: 751                 reward: 1472070.959                 sharpe: 0.354                  cum. rtns: 47.207                 portf val: 1,472,070.96
day: 751                 reward: 1473712.938                 sharpe: 0.360                  cum. rtns: 47.371                 portf val: 1,473,712.94
day: 751                 reward: 1474768.664                 sharpe: 0.355                  cum. rtns: 47.477                 portf val: 1,474,768.66
day: 751                 reward: 1485059.913                 sharpe: 0.375                  cum. rtn

In [25]:
timesteps = len(test_portfolio._environment.data.date.unique())-1
random = Agent.create(
    agent='random',
    environment=test_portfolio,
    max_episode_timesteps=timesteps,
)



In [26]:
randomrunner = Runner(
    agent=random,
    environment=test_portfolio
)

In [27]:
randomrunner.run(num_episodes=10)

Episodes:   0%|          | 0/10 [00:00, return=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


day: 751                 reward: 1425852.642                 sharpe: 0.334                  cum. rtns: 42.585                 portf val: 1,425,852.64
day: 751                 reward: 1436983.676                 sharpe: 0.343                  cum. rtns: 43.698                 portf val: 1,436,983.68
day: 751                 reward: 1541072.161                 sharpe: 0.378                  cum. rtns: 54.107                 portf val: 1,541,072.16
day: 751                 reward: 1475162.084                 sharpe: 0.380                  cum. rtns: 47.516                 portf val: 1,475,162.08
day: 751                 reward: 1434898.877                 sharpe: 0.346                  cum. rtns: 43.490                 portf val: 1,434,898.88
day: 751                 reward: 1481293.404                 sharpe: 0.366                  cum. rtns: 48.129                 portf val: 1,481,293.40
day: 751                 reward: 1484133.162                 sharpe: 0.345                  cum. rtn