In [1]:
import numpy as np
import pandas as pd
import collections
import datetime
import pprint
import gym
import gym_anytrading
import matplotlib.pyplot as plt
from lib import data, environ
from helpers import validation, environ
from typing import List, Optional, Tuple, Any
from tensorforce import Agent, Environment
from tensorforce.agents import ConstantAgent
from tensorforce.core.networks import AutoNetwork
from tensorforce.execution import Runner
from tensorforce.core.layers import Dense, Gru
%load_ext blackcellmagic




# Prepare environments

## Load prices

## Create environments

In [15]:
env = environ.TradingEnv(
    window_size=10,
    commission_perc=0.001,
    random_ofs_on_reset=True,
    date_range=('2018-01-01', '2019-12-31')
)
env_val = environ.TradingEnv(
    window_size=10,
    commission_perc=0.001,
    random_ofs_on_reset=False,
    date_range=('2020-01-01', '2020-12-31')
)

obs = env.reset()
obs, reward, done, info = env.step(0)
print(f'Observation: {obs}')
print(f"Reward: {reward}")
print(f"Done: {done}")
print(f"Info: {info}")
print(f'Number of trading days in data: {len(env.prices)}')

Observation: [-0.02325273  0.04247996  0.01807203 -0.03536022  0.0107492   0.01565327
  0.13853578 -0.00494537  0.01483379  0.04487532]
Reward: -0.04692015786483642
Done: False
Info: {'total_reward': -0.04692015786483642, 'total_profit': 1.0437865768315533, 'position': 0, 'offset': 94}
Number of trading days in data: 730


## Create tensorforce environments

In [16]:
environment = Environment.create(environment=env, max_episode_timesteps=100)
environment_val = Environment.create(environment=env_val, max_episode_timesteps=1000)
print(f'Action space: {environment.actions()}')
print(f'State space: {environment.states()}')
print(f'Initial state: {environment.reset()}')
print(f'Initial state (validation): {environment_val.reset()}')

Action space: {'type': 'int', 'shape': (), 'num_values': 3}
State space: {'type': 'float', 'shape': (10,), 'min_value': None, 'max_value': None}
Initial state: [ 0.04664661 -0.03726715  0.04602445  0.00670416 -0.0180561  -0.01840975
  0.01854566  0.05479704 -0.00421551  0.01492491]
Initial state (validation): [ 0.         -0.03233423  0.05398306  0.00458934 -0.00058291  0.05243811
  0.05111418 -0.01178317 -0.02694923  0.04395338]


# Create agent

In [6]:
# Instantiate a Tensorforce agent
if False:
    agent_ = Agent.create(
        agent="dueling_dqn",
        max_episode_timesteps=1000,
        environment=environment,
        memory=100000,
        # update=dict(unit="timesteps", batch_size=32),
        batch_size=32,
        # optimizer=dict(type="adam", learning_rate=3e-4),
        # policy=dict(network="auto"),
        # objective="policy_gradient",
        start_updating=1e4,
        # network=dict(network=[Gru(size=5, horizon=5, name='GRU_1'), Dense(size=3, name='Dense_1')]),
        # network=dict(network=[dict(type='dense', size=32), dict(type='dense', size=3)]),
        network='auto',
        # reward_estimation=dict(horizon=4, discount=0.99),
        discount=0.99,
        target_sync_frequency=1e3,
        config=dict(name="agent_007"),
        summarizer=dict(
            directory="runs/summaries",
            # list of labels, or 'all'
            summaries=["entropy", "kl-divergence", "loss", "reward", "update-norm"],
        ),
    )

In [17]:
# Instantiate a Tensorforce agent
agent = Agent.create(
    agent="a2c",
    environment=environment,
    network=[
        dict(
            type="gru",
            size=64,
            activation="tanh",
            horizon=1,
            # dropout=0.1,
            l2_regularization=0.01,
        ),
        # dict(
        #     type="lstm",
        #     size=64,
        #     activation="tanh",
        #     horizon=1,
        #     dropout=0.1,
        #     l2_regularization=0.01,
        # ),
        dict(type="dense", size=16, activation="tanh"),
    ],
    critic=[
        dict(
            type="gru",
            size=64,
            activation="tanh",
            horizon=1,
            # dropout=0.1,
            l2_regularization=0.01,
        ),
        # dict(
        #     type="lstm",
        #     size=64,
        #     activation="tanh",
        #     horizon=1,
        #     dropout=0.1,
        #     l2_regularization=0.01,
        # ),
        dict(type="dense", size=32, activation="tanh"),
    ],
    # update=dict(unit="timesteps", batch_size=32),
    batch_size=32,
    # objective="policy_gradient",
    # reward_estimation=dict(horizon=5),
    # optimizer=dict(optimizer="rmsprop", learning_rate=1e-3),
    # memory=10000,  # Replay memory capacity
    config=dict(name="agent_001", device="gpu"),
    summarizer=dict(
        directory="runs/summaries",
        # list of labels, or 'all'
        # summaries=["entropy", "kl-divergence", "loss", "reward", "update-norm"],
        summaries="all",
    ),
)

# pprint.PrettyPrinter(indent=2).pprint(agent.get_specification())



# Run Training and Validation

In [19]:
runner = Runner(
    agent=agent,
    environment=environment,
)

## Run training

In [13]:
latest_run_name = f'save_{datetime.datetime.now().isoformat(timespec="seconds").replace(":", "_")}'
print(latest_run_name)

save_2022-03-22T16_11_32


In [20]:
runner.run(num_episodes=400, evaluation=True)
agent.save('./saves', filename=f'{latest_run_name}', format='checkpoint')
runner.close()
print(f'{latest_run_name}')
print('Finished')

Episodes:   0%|          | 0/400 [00:00, return=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]

save_2022-03-22T16_11_32
Finished


In [None]:
latest_run_name = 'save_2022-03-22T15_37_15'
agent = Agent.load('./saves', filename=f'{latest_run_name}', environment=environment_val)

## Run validation

In [None]:
validator = validation.Validator(env=environment_val, agent=agent, commission=None, num_episodes=1)
res = validator.run()

In [None]:
for key, val in res.items():
    print(f'{key}: {val}')

In [None]:
environment_val.environment.render_all()