In [1]:
%matplotlib inline
%load_ext autoreload

%autoreload 2

In [2]:
import os
import sys
import warnings
import numpy

def warn(*args, **kwargs):
    pass

warnings.warn = warn
warnings.simplefilter(action='ignore', category=FutureWarning)
numpy.seterr(divide = 'ignore') 

sys.path.append(os.path.dirname(os.path.abspath('')))

In [4]:
from tensortrade.rewards import SimpleProfitStrategy
from tensortrade.actions import DiscreteActionStrategy
from tensortrade.exchanges.simulated import FBMExchange
from tensortrade.features.stationarity import FractionalDifference
from tensortrade.features.scalers import MinMaxNormalizer
from tensortrade.features import FeaturePipeline

normalize = MinMaxNormalizer(inplace=True)
difference = FractionalDifference(difference_order=0.6,
                                  inplace=True)
feature_pipeline = FeaturePipeline(steps=[normalize, difference])

reward_strategy = SimpleProfitStrategy()
action_strategy = DiscreteActionStrategy(n_actions=20, instrument_symbol='ETH/BTC')

exchange = FBMExchange(base_instrument='BTC',
                       timeframe='1h',
                       should_pretransform_obs=True)

In [18]:
from tensortrade.environments import TradingEnvironment
from tensortrade.strategies import TensorforceTradingStrategy

network_spec = [
    dict(type='dense', size=128, activation="tanh"),
    dict(type='dense', size=64, activation="tanh"),
    dict(type='dense', size=32, activation="tanh")
]

agent_spec = {
    "type": "ppo",
    "learning_rate": 1e-4,
    "discount": 0.99,
    "likelihood_ratio_clipping": 0.2,
    "estimate_terminal": False,
    "max_episode_timesteps": 2000,
    "network": network_spec,
    "batch_size": 10,
    "update_frequency": "never"
}

environment = TradingEnvironment(exchange=exchange,
                                 action_strategy=action_strategy,
                                 reward_strategy=reward_strategy,
                                 feature_pipeline=feature_pipeline)

strategy = TensorforceTradingStrategy(environment=environment, agent_spec=agent_spec)

In [20]:
performance = strategy.run(episodes=10, evaluation=False)

performance[-5:]



Episodes:  10%|█         | 1/10 [00:00, reward=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%][A[A

Episodes:  20%|██        | 2/10 [00:10, reward=-2584.70, ts/ep=1666, sec/ep=10.33, ms/ts=6.2, agent=17.1%][A[A

Episodes:  30%|███       | 3/10 [00:20, reward=-2567.92, ts/ep=1666, sec/ep=10.40, ms/ts=6.2, agent=16.2%][A[A

Episodes:  40%|████      | 4/10 [00:31, reward=-2620.64, ts/ep=1666, sec/ep=10.49, ms/ts=6.3, agent=15.8%][A[A

Episodes:  50%|█████     | 5/10 [00:42, reward=-2634.35, ts/ep=1666, sec/ep=10.52, ms/ts=6.3, agent=15.6%][A[A

Episodes:  60%|██████    | 6/10 [00:52, reward=-2627.85, ts/ep=1666, sec/ep=10.43, ms/ts=6.3, agent=15.5%][A[A

Episodes:  70%|███████   | 7/10 [01:02, reward=-2614.50, ts/ep=1666, sec/ep=10.41, ms/ts=6.2, agent=15.3%][A[A

Episodes:  80%|████████  | 8/10 [01:12, reward=-2582.35, ts/ep=1666, sec/ep=10.36, ms/ts=6.2, agent=15.3%][A[A

Episodes:  90%|█████████ | 9/10 [01:22, reward=-2593.84, ts/ep=1666, sec/ep=10.34, ms/ts=6.2, a

Finished running strategy.
Total episodes: 9 (14994 timesteps).
Average reward: -2593.1446321459885.





Unnamed: 0,balance,net_worth
1054,49.022928,3208.919632
1055,0.91294,3208.861868
1056,2402.866715,3204.853915
1057,3003.677181,3204.549881
1058,50.646196,3195.967906


In [None]:
performance.balance.plot()

In [None]:
strategy.save_agent(directory='agents')