In [1]:
import tensorflow as tf
import pandas as pd

from stable_baselines.common.policies import MlpLnLstmPolicy
from stable_baselines import PPO2

from tensortrade.strategies import StableBaselinesTradingStrategy
from tensortrade.environments import TradingEnvironment
from tensortrade.rewards import RiskAdjustedReturns
from tensortrade.actions import PairCriteriaSizeActions
from tensortrade.instruments import Quantity, TradingPair, BTC, USD
from tensortrade.wallets import Wallet, Portfolio
from tensortrade.exchanges.simulated import SimulatedExchange
from tensortrade.features.stationarity import FractionalDifference
from tensortrade.features.scalers import MinMaxNormalizer
from tensortrade.features import FeaturePipeline

WINDOW_SIZE = 20
PRICE_COLUMN = 'close'

normalize = MinMaxNormalizer(inplace=True)
difference = FractionalDifference(difference_order=0.6, inplace=True)
feature_pipeline = FeaturePipeline(steps=[normalize, difference])

action_scheme = PairCriteriaSizeActions(USD/BTC)
reward_scheme = RiskAdjustedReturns(return_algorithm="sharpe")

csv_file = tf.keras.utils.get_file(
    'Coinbase_BTCUSD_1h.csv', 'https://www.cryptodatadownload.com/cdd/Coinbase_BTCUSD_1h.csv')
ohlcv_data = pd.read_csv(csv_file, skiprows=1, index_col="Date")
ohlcv_data.columns = map(str.lower, ohlcv_data.columns)
ohlcv_data = ohlcv_data.rename(columns={'volume btc': 'volume'})

# ohlcv_data = pd.read_csv('./data/updateOHLCdata_master.csv')
ohlcv_data = ohlcv_data.iloc[0:10000]

exchange = SimulatedExchange(data_frame=ohlcv_data, price_column=PRICE_COLUMN)

wallets = [(exchange, USD, 10000), (exchange, BTC, 0)]

portfolio = Portfolio(base_instrument=USD, wallet_tuples=wallets)

print('Transformed Data:')
print(exchange.observation_columns)

model = PPO2
policy = MlpLnLstmPolicy
params = {'learning_rate': 1e-5,
          'nminibatches': 1,
          'verbose': 2}

environment = TradingEnvironment(exchange=exchange,
                                 portfolio=portfolio,
                                 action_scheme=action_scheme,
                                 reward_scheme=reward_scheme,
                                 feature_pipeline=feature_pipeline,
                                 window_size=WINDOW_SIZE)

strategy = StableBaselinesTradingStrategy(environment=environment,
                                          model=model,
                                          policy=policy,
                                          model_kwargs=params)

performance = strategy.run(steps=10000)

W1208 14:07:19.020293 4537671104 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/tensorforce/core/module.py:31: The name tf.enable_resource_variables is deprecated. Please use tf.compat.v1.enable_resource_variables instead.

W1208 14:07:19.034725 4537671104 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/tensorforce/core/optimizers/tf_optimizer.py:46: The name tf.train.AdadeltaOptimizer is deprecated. Please use tf.compat.v1.train.AdadeltaOptimizer instead.

W1208 14:07:19.035535 4537671104 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/tensorforce/core/optimizers/tf_optimizer.py:47: The name tf.train.AdagradOptimizer is deprecated. Please use tf.compat.v1.train.AdagradOptimizer instead.

W1208 14:07:19.036586 4537671104 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/tensorforce/core/optimizers/tf_optimizer.py:48: The name tf.train.AdamOptimizer is deprecated. Please use tf.compat.v1.train

Transformed Data:
Index(['open', 'high', 'low', 'close', 'volume', 'volume usd'], dtype='object')


  out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer]


Finished running strategy.
Total episodes: 1 (10000 timesteps).
Average reward: 0.01658393008112988.
