## Example "dumb" trading agent

In [None]:
import os
import sys
import warnings

def warn(*args, **kwargs):
    pass

warnings.warn = warn
warnings.simplefilter(action='ignore', category=FutureWarning)

sys.path.append(os.path.dirname(os.path.abspath('')))

from tensortrade.environments import TradingEnvironment
from tensortrade.exchanges.simulated import FBMExchange
from tensortrade.actions import DiscreteActionStrategy
from tensortrade.rewards import SimpleProfitStrategy

exchange = FBMExchange()
action_strategy = DiscreteActionStrategy()
reward_strategy = SimpleProfitStrategy()

env = TradingEnvironment(exchange=exchange,
                         action_strategy=action_strategy,
                         reward_strategy=reward_strategy)

obs = env.reset()
sell_price = 1e9
stop_price = -1

print('Initial portfolio: ', exchange.portfolio)

for i in range(1000):
    action = 0 if obs['close'] < sell_price else 18
    action = 19 if obs['close'] < stop_price else action
    
    if i == 0 or portfolio['BTC'] == 0:
        action = 16
        sell_price = obs['close'] + (obs['close'] / 50)
        stop_price = obs['close'] - (obs['close'] / 50)
    
    obs, reward, done, info = env.step(action)
    executed_trade = info['executed_trade']
    filled_trade = info['filled_trade']
    portfolio = exchange.portfolio
    
    print('Obs: ', obs)
    print('Reward: ', reward)
    print('Portfolio: ', portfolio)
    print('Trade executed: ', executed_trade.trade_type, executed_trade.price, executed_trade.amount)
    print('Trade filled: ', filled_trade.trade_type, filled_trade.price, filled_trade.amount)

In [4]:
import sys
import os
import warnings

def warn(*args, **kwargs):
    pass

warnings.warn = warn
warnings.simplefilter(action='ignore', category=FutureWarning)

import gym
import numpy as np

from tensorforce.agents import Agent
from tensorforce.execution import Runner
from tensorforce.contrib.openai_gym import OpenAIGym

sys.path.append(os.path.dirname(os.path.abspath('')))

from tensortrade.environments import TradingEnvironment
from tensortrade.exchanges.simulated import FBMExchange
from tensortrade.actions import DiscreteActionStrategy
from tensortrade.rewards import SimpleProfitStrategy

exchange = FBMExchange(times_to_generate=100000)
action_strategy = DiscreteActionStrategy()
reward_strategy = SimpleProfitStrategy()

env = TradingEnvironment(exchange=exchange,
                         action_strategy=action_strategy,
                         reward_strategy=reward_strategy)

agent_config = {
    "type": "dqn_agent",

    "update_mode": {
        "unit": "timesteps",
        "batch_size": 64,
        "frequency": 4
    },
    
    "memory": {
        "type": "replay",
        "capacity": 10000,
        "include_next_states": True
    },

    "optimizer": {
        "type": "clipped_step",
        "clipping_value": 0.1,
        "optimizer": {
            "type": "adam",
            "learning_rate": 1e-3
        }
    },

    "discount": 0.999,
    "entropy_regularization": None,
    "double_q_model": True,

    "target_sync_frequency": 1000,
    "target_update_weight": 1.0,

    "actions_exploration": {
        "type": "epsilon_anneal",
        "initial_epsilon": 0.5,
        "final_epsilon": 0.,
        "timesteps": 100000
    },

    "saver": {
        "directory": None,
        "seconds": 600
    },
    "summarizer": {
        "directory": None,
        "labels": ["graph", "total-loss"]
    },
    "execution": {
        "type": "single",
        "session_config": None,
        "distributed_spec": None
    }
}

network_spec = [
    dict(type='dense', size=64),
    dict(type='dense', size=32)
]

agent = Agent.from_spec(
        spec=agent_config,
        kwargs=dict(
            states=env.states,
            actions=env.actions,
            network=network_spec,
        )
    )

# Create the runner
runner = Runner(agent=agent, environment=env)


# Callback function printing episode statistics
def episode_finished(r):
    print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(ep=r.episode, ts=r.episode_timestep,
                                                                                 reward=r.episode_rewards[-1]))
    return True


# Start learning
runner.run(episodes=300, max_episode_timesteps=10000, episode_finished=episode_finished)
runner.close()

# Print statistics
print("Learning finished. Total episodes: {ep}. Average reward of last 100 episodes: {ar}.".format(
    ep=runner.episode,
    ar=np.mean(runner.episode_rewards))
)

Finished episode 1 after 1665 timesteps (reward: -1737.4344118113577)
Finished episode 2 after 1665 timesteps (reward: -1920.42328116457)
Finished episode 3 after 1665 timesteps (reward: -1906.2373192354394)
Finished episode 4 after 1665 timesteps (reward: -1509.2974570114163)
Finished episode 5 after 1665 timesteps (reward: -1390.3538400672226)
Finished episode 6 after 1665 timesteps (reward: -1063.5511660013083)
Finished episode 7 after 1665 timesteps (reward: -1168.964049598298)
Finished episode 8 after 1665 timesteps (reward: -2170.342073537597)
Finished episode 9 after 1665 timesteps (reward: -1200.728702516103)
Finished episode 10 after 1665 timesteps (reward: -1543.8439047372585)
Finished episode 11 after 1665 timesteps (reward: -1633.7554258849916)
Finished episode 12 after 1665 timesteps (reward: -1062.8677323670868)
Finished episode 13 after 1665 timesteps (reward: -951.1228009892161)
Finished episode 14 after 1665 timesteps (reward: -1202.7116799412731)
Finished episode 15 a

Finished episode 131 after 1665 timesteps (reward: -1665)
Finished episode 132 after 1665 timesteps (reward: -1665)
Finished episode 133 after 1665 timesteps (reward: -1665)
Finished episode 134 after 1665 timesteps (reward: -1665)
Finished episode 135 after 1665 timesteps (reward: -1665)
Finished episode 136 after 1665 timesteps (reward: -1665)
Finished episode 137 after 1665 timesteps (reward: -1665)
Finished episode 138 after 1665 timesteps (reward: -1665)
Finished episode 139 after 1665 timesteps (reward: -1665)
Finished episode 140 after 1665 timesteps (reward: -1665)
Finished episode 141 after 1665 timesteps (reward: -1665)
Finished episode 142 after 1665 timesteps (reward: -1665)
Finished episode 143 after 1665 timesteps (reward: -1665)
Finished episode 144 after 1665 timesteps (reward: -1665)
Finished episode 145 after 1665 timesteps (reward: -1665)
Finished episode 146 after 1665 timesteps (reward: -1665)
Finished episode 147 after 1665 timesteps (reward: -1665)
Finished episo

Finished episode 273 after 1665 timesteps (reward: -1665)
Finished episode 274 after 1665 timesteps (reward: -1665)
Finished episode 275 after 1665 timesteps (reward: -1665)
Finished episode 276 after 1665 timesteps (reward: -1665)
Finished episode 277 after 1665 timesteps (reward: -1665)
Finished episode 278 after 1665 timesteps (reward: -1665)
Finished episode 279 after 1665 timesteps (reward: -1665)
Finished episode 280 after 1665 timesteps (reward: -1665)
Finished episode 281 after 1665 timesteps (reward: -1665)
Finished episode 282 after 1665 timesteps (reward: -1665)
Finished episode 283 after 1665 timesteps (reward: -1665)
Finished episode 284 after 1665 timesteps (reward: -1665)
Finished episode 285 after 1665 timesteps (reward: -1665)
Finished episode 286 after 1665 timesteps (reward: -1665)
Finished episode 287 after 1665 timesteps (reward: -1665)
Finished episode 288 after 1665 timesteps (reward: -1665)
Finished episode 289 after 1665 timesteps (reward: -1665)
Finished episo