In [None]:
%matplotlib inline
%load_ext autoreload

%autoreload 2

In [2]:
import os
import sys
import warnings
import numpy

def warn(*args, **kwargs):
    pass

warnings.warn = warn
warnings.simplefilter(action='ignore', category=FutureWarning)
numpy.seterr(divide = 'ignore') 

sys.path.append(os.path.dirname(os.path.abspath('')))

In [3]:
import ccxt

from tensorforce.agents import Agent
from tensorforce.environments import Environment

from tensortrade.environments import TradingEnvironment
from tensortrade.exchanges.simulated import FBMExchange
from tensortrade.features.scalers import MinMaxNormalizer
from tensortrade.features.stationarity import FractionalDifference
from tensortrade.features import FeaturePipeline
from tensortrade.rewards import SimpleProfitStrategy, RunningProfitStrategy
from tensortrade.actions import DiscreteActionStrategy
from tensortrade.strategies import TensorforceTradingStrategy

In [4]:
normalize = MinMaxNormalizer(inplace=True)
difference = FractionalDifference(difference_order=0.6,
                                  inplace=True)
feature_pipeline = FeaturePipeline(steps=[normalize, difference])

reward_strategy = RunningProfitStrategy()
action_strategy = DiscreteActionStrategy(n_actions=20, instrument_symbol='ETH/BTC')

exchange = FBMExchange(base_instrument='BTC',
                       timeframe='1h',
                       should_pretransform_obs=True,
                       window_size=20)

In [5]:
network_spec = [
    dict(type='dense', size=128, activation="tanh"),
    dict(type='dense', size=64, activation="tanh"),
    dict(type='dense', size=32, activation="tanh")
]

agent_spec = {
    "type": "ppo",
    "learning_rate": 1e-4,
    "discount": 0.99,
    "likelihood_ratio_clipping": 0.2,
    "estimate_terminal": False,
    "max_episode_timesteps": 2000,
    "network": network_spec,
    "batch_size": 10,
    "update_frequency": "never"
}

environment = TradingEnvironment(exchange=exchange,
                                 action_strategy=action_strategy,
                                 reward_strategy=reward_strategy,
                                 feature_pipeline=feature_pipeline)

strategy = TensorforceTradingStrategy(environment=environment, agent_spec=agent_spec)

In [None]:
performance = strategy.run(episodes=10, evaluation=False)

performance[-5:]

In [None]:
performance.net_worth.plot()

In [None]:
strategy.save_agent(directory='agents')

In [None]:
data_url = "http://www.cryptodatadownload.com/cdd/Coinbase_BTCUSD_1h.csv"
data = pd.read_csv(data_url, skiprows=1)
data = data.get(['Open', 'High', 'Low', 'Close', 'Volume BTC'])
data = data.rename({'Volume BTC': 'volume'}, axis=1)
data = data.rename({name: name.lower() for name in data.columns}, axis=1)
data = data.tail(1000)

In [None]:
data.reset_index(inplace=True, drop=True)

In [None]:
exchange = SimulatedExchange(data_frame=data, base_instrument='USD')

In [None]:
environment = TradingEnvironment(exchange=exchange,
                                 action_strategy=action_strategy,
                                 reward_strategy=reward_strategy,
                                 feature_pipeline=feature_pipeline)

strategy = TensorforceTradingStrategy(environment=environment, agent_spec=agent_spec)

In [None]:
strategy.restore_agent(directory='agents/ppo')
performance = strategy.run(episodes=1, evaluation=True)
performance[-5:]

In [None]:
(performance.net_worth).tail(1500).plot()

In [None]:
import numpy as np

In [None]:
out = np.mgrid[0:5,0:5][0]

In [None]:
out

In [None]:
out.reshape((1,25))

In [6]:
strategy._environment

<tensorforce.environments.openai_gym.OpenAIGym at 0x7f65260daf50>