## Install TensorTrade

In [1]:
if not "cdDone" in globals():
    %cd -q ..
    cdDone = True

In [2]:
# !python3 -m pip install git+https://github.com/nsarang/tensortrade.git --force

## Setup

In [3]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline


import re
import sys
import time
import pandas as pd
pd.options.mode.use_inf_as_na = True

import numpy as np
from datetime import datetime, timedelta, timezone
from tenacity import retry, retry_if_exception_type, stop_after_attempt
import pytz


In [4]:
import asyncio
import ccxt

# import ccxt.async_support as ccxt

creds = {
    "binance": {
        "apiKey": "jxlzo1mxQ1PDckz4aYgH2WDgFxpJjBu47r3OB4vyLyZkEeyJ4xjOM6m32mvsIgmu",
        "secret": "EffQgaLRPl52q0YEpVKcIHDeqyrFBQWm2K1Er99egbQ1c75X7fDREg4UtzhSaCJM",
    },
    "ftx": {
        "apiKey": "4HO0ffan2qCuTHI06w-Wt-1Bj74WHWYeq4L4-5Ga",
        "secret": "jY85jbW05BWNCkhkathvKfUSl6lGdNgyZXrWnL3W",
    }
}


exchange = ccxt.binance(
    {
        **creds["binance"],
        "enableRateLimit": True,
        # 'options': {
        #     'defaultType': 'spot', // spot, future, margin
        # },
    }
)

## Data Util

In [5]:
@retry(retry=retry_if_exception_type(ccxt.NetworkError), stop=stop_after_attempt(3))
def get_historical_data(
    symbol,
    exchange,
    timeframe,
    start_date=None,
    limit=500,
    max_per_page=500,
    backup_fp=None,
):
    """Get historical OHLCV for a symbol pair

    Decorators:
        retry

    Args:
        symbol (str): Contains the symbol pair to operate on i.e. BURST/BTC
        exchange (str): Contains the exchange to fetch the historical data from.
        timeframe (str): A string specifying the ccxt time unit i.e. 5m or 1d.
        start_date (int, optional): Timestamp in milliseconds.
        max_periods (int, optional): Defaults to 100. Maximum number of time periods
          back to fetch data for.

    Returns:
        list: Contains a list of lists which contain timestamp, open, high, low, close, volume.
    """

    try:
        if timeframe not in exchange.timeframes:
            raise ValueError(
                "{} does not support {} timeframe for OHLCV data. Possible values are: {}".format(
                    exchange, timeframe, list(exchange.timeframes)
                )
            )
    except AttributeError:
        self.logger.error(
            "%s interface does not support timeframe queries! We are unable to fetch data!",
            exchange,
        )
        raise AttributeError(sys.exc_info())

    timeframe_regex = re.compile("([0-9]+)([a-zA-Z])")
    timeframe_matches = timeframe_regex.match(timeframe)
    time_quantity = timeframe_matches.group(1)
    time_period = timeframe_matches.group(2)
    timedelta_values = {
        "m": "minutes",
        "h": "hours",
        "d": "days",
        "w": "weeks",
        "M": "months",
        "y": "years",
    }

    timedelta_args = {timedelta_values[time_period]: int(time_quantity)}
    single_frame = timedelta(**timedelta_args)

    if not start_date:
        start_datetime = datetime.now() - (limit * single_frame)
        start_date = int(start_datetime.timestamp() * 1000)

    stop_limit = limit or np.inf

    try:
        historical_data = []
        cursor = int(start_date)
        while True:
            ohlcv = exchange.fetch_ohlcv(
                symbol, timeframe=timeframe, since=cursor, limit=limit
            )
            historical_data += ohlcv
            if not ohlcv:
                break
            if len(historical_data) >= stop_limit:
                historical_data = historical_data[:limit]
                break
            cursor = ohlcv[-1][0] + 1
    except:
        if historical_data and backup_fp:
            convert_to_dataframe(historical_data).to_csv(backup_fp)

    if not historical_data:
        raise ValueError("No historical data provided returned by exchange.")

    #     if len(historical_data) != total:
    #         raise ValueError("Gaps detected in historical data.")

    # Sort by timestamp in ascending order
    historical_data.sort(key=lambda d: d[0])

    return historical_data

In [6]:
def timestamp_to_datetime(
    timestamp, timezone=pytz.timezone("America/Montreal"), to_str=False
):
    time = datetime.fromtimestamp(timestamp, timezone)
    if to_str:
        time = time.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
    return time


def convert_to_dataframe(historical_data):
    """Converts historical data matrix to a pandas dataframe.

    Args:
        historical_data (list): A matrix of historical OHCLV data.

    Returns:
        pandas.DataFrame: Contains the historical data in a pandas dataframe.
    """

    dataframe = pd.DataFrame(historical_data)
    dataframe.transpose()

    #     print(dataframe.head())
    dataframe.columns = ["timestamp", "open", "high", "low", "close", "volume"]
    dataframe["datetime"] = dataframe.timestamp.apply(
        lambda x: timestamp_to_datetime(x / 1000)
    )

    dataframe.set_index("datetime", inplace=True, drop=True)
    dataframe.drop("timestamp", axis=1, inplace=True)

    return dataframe

**Notes**

- .replace(tzinfo=timezone.utc) wont't change the time!
- datetime.timestamp() automatically converts to UTC

In [7]:
since = datetime.strptime("2020-10-01", "%Y-%m-%d").timestamp() * 1000

In [8]:
# symbol = "ETH/USDT"
# filename = symbol.replace("/", "-") + ".csv"

# df = convert_to_dataframe(
#     get_historical_data(
#         symbol,
#         exchange,
#         timeframe="5m",
#         start_date=since,
#         limit=100,
# #         backup_fp=filename,
#     )
# )
# # df.to_csv(filename)

## Indicator Util

In [9]:
def shift(values: np.ndarray, periods: int, axis, fill_value) -> np.ndarray:
    new_values = values

    if periods == 0 or values.size == 0:
        return new_values.copy()

    # make sure array sent to np.roll is c_contiguous
    f_ordered = values.flags.f_contiguous
    if f_ordered:
        new_values = new_values.T
        axis = new_values.ndim - axis - 1

    if np.prod(new_values.shape):
        new_values = np.roll(new_values, periods, axis=axis)

    axis_indexer = [slice(None)] * values.ndim
    if periods > 0:
        axis_indexer[axis] = slice(None, periods)
    else:
        axis_indexer[axis] = slice(periods, None)
    new_values[tuple(axis_indexer)] = fill_value

    # restore original order
    if f_ordered:
        new_values = new_values.T

    return new_values


def crossing(a, b):
    a_plus = shift(a, 1, axis=0, fill_value=0)
    b_plus = shift(b, 1, axis=0, fill_value=0)
    cross = np.where(
        (a <= b) & (a_plus >= b_plus),
        1,
        np.where(((a >= b) & (a_plus <= b_plus)), -1, 0),
    )
    return cross


def smooth_range(series, period, mult):
    wper = period * 2 - 1
    diff = (series - series.shift(1, fill_value=0)).abs()
    average = ta.EMA(diff, period)
    smoothed = ta.EMA(average, wper) * mult
    smoothed = pd.Series(smoothed, index=series.index)
    return smoothed


def filter_range(series, smoothrng):
    result = series.shift(1, fill_value=0)
    for time, (close, smth) in enumerate(zip(series, smoothrng)):
        prev = result.iloc[time]
        if time == 0 or ((close >= prev - smth) and (close <= prev + smth)):
            continue

        if close > prev + smth:
            prev = close - smth
        else:
            prev = close + smth
        result.iloc[time] = prev
    return result

In [10]:
def SWING_CALLS(df):
    ema = ta.EMA(df.close, 5)
    sma = ta.SMA(df.close, 50)
    rsi = ta.RSI(df.close, 14)

    color = np.where(
        (rsi >= 85) | (rsi <= 15),
        "YELLOW",
        np.where(df.low > sma, "LIME", np.where(df.high < sma, "RED", "YELLOW")),
    )

    buyexit = rsi > 80
    sellexit = rsi < 30

    sellcall = (crossing(sma, ema) > 0) & (df.open > df.close)
    buycall = (crossing(sma, ema) < 0) & (df.high > sma)

    return buyexit, sellexit, sellcall, buycall


def Range_Filter_Buy_Sell(df, period=100, range_multiplier=3):
    # Smooth Average Range
    smoothed = smooth_range(df.close, period, range_multiplier)

    # Range Filter
    filtered = filter_range(df.close, smoothed)

    buycall = (
        (df.close > filtered)
        & (df.close > df.close.shift(1))
        & (filtered > filtered.shift(1))
    )
    sellcall = (
        (df.close < filtered)
        & (df.close < df.close.shift(1))
        & (filtered < filtered.shift(1))
    )
    return buycall, sellcall


def calculate_profit(ohlvc, buycall, sellcall, start_from=100, trade_fee=0.1):
    money = 1
    asset = 0
    last_buy = ohlvc.iloc[start_from]["close"]
    trade_cost = 0
    trade_fee /= 100
    for time, (buy, sell) in enumerate(zip(buycall, sellcall)):
        if time < start_from:
            continue

        if buy and money and (time != len(ohlvc) - 1):
            trade_cost += money * trade_fee
            money *= 1 - trade_fee
            asset = money / ohlvc.iloc[time]["close"]
            money = 0
            last_buy = ohlvc.iloc[time]["close"]

        elif (sell or (time == len(ohlvc) - 1)) and asset:
            money = asset * ohlvc.iloc[time]["close"]
            trade_cost += money * trade_fee
            money *= 1 - trade_fee
            asset = 0

    return money, trade_cost

## Data

### Load

In [13]:
import pandas as pd
import tensortrade as tt
import tensortrade.env.default as default

from tensortrade.data.cdd import CryptoDataDownload
from tensortrade.feed.core import Stream, DataFeed
from tensortrade.oms.exchanges import Exchange, ExchangeOptions
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.instruments import USD, BTC, ETH
from tensortrade.oms.wallets import Wallet, Portfolio
from tensortrade.agents import DQNAgent, A2CAgent

In [14]:
# cdd = CryptoDataDownload()
# data = cdd.fetch("Coinbase", "USD", "BTC", "1h")


df = pd.read_csv("data/BTC-USDT.csv")
date = df["datetime"]
date = date.apply(
    lambda x: x.rsplit("-", 1)[0].split(".")[0]
)  # remove ms and UTC offset (.%f%z)
date = pd.to_datetime(date)
df["date"] = date
df = df.sort_values("date")
df.head()

Unnamed: 0,datetime,open,high,low,close,volume,date
0,2017-08-17 00:00:00-04:00,4261.48,4280.56,4261.48,4261.48,2.189061,2017-08-17 00:00:00
1,2017-08-17 00:05:00-04:00,4261.48,4261.48,4261.48,4261.48,0.0,2017-08-17 00:05:00
2,2017-08-17 00:10:00-04:00,4261.48,4261.48,4261.48,4261.48,0.0,2017-08-17 00:10:00
3,2017-08-17 00:15:00-04:00,4261.48,4264.88,4261.48,4261.48,0.484666,2017-08-17 00:15:00
4,2017-08-17 00:20:00-04:00,4264.88,4266.29,4264.88,4266.29,2.32857,2017-08-17 00:20:00


In [18]:
df.isnull().mean()

datetime         0.000000
open             0.000000
high             0.000000
low              0.000000
close            0.000000
                   ...   
momentum_kama    0.997527
momentum_roc     0.000037
others_dr        0.000000
others_dlr       0.000003
others_cr        0.000000
Length: 79, dtype: float64

In [15]:
import ta

data = ta.add_all_ta_features(
    df, open="open", high="high", low="low", close="close", volume="volume"
)

  dip[i] = 100 * (self._dip[i]/self._trs[i])
  din[i] = 100 * (self._din[i]/self._trs[i])


In [16]:
data.columns

Index(['datetime', 'open', 'high', 'low', 'close', 'volume', 'date',
       'volume_adi', 'volume_obv', 'volume_cmf', 'volume_fi', 'momentum_mfi',
       'volume_em', 'volume_sma_em', 'volume_vpt', 'volume_nvi', 'volume_vwap',
       'volatility_atr', 'volatility_bbm', 'volatility_bbh', 'volatility_bbl',
       'volatility_bbw', 'volatility_bbp', 'volatility_bbhi',
       'volatility_bbli', 'volatility_kcc', 'volatility_kch', 'volatility_kcl',
       'volatility_kcw', 'volatility_kcp', 'volatility_kchi',
       'volatility_kcli', 'volatility_dcl', 'volatility_dch', 'trend_macd',
       'trend_macd_signal', 'trend_macd_diff', 'trend_sma_fast',
       'trend_sma_slow', 'trend_ema_fast', 'trend_ema_slow', 'trend_adx',
       'trend_adx_pos', 'trend_adx_neg', 'trend_vortex_ind_pos',
       'trend_vortex_ind_neg', 'trend_vortex_ind_diff', 'trend_trix',
       'trend_mass_index', 'trend_cci', 'trend_dpo', 'trend_kst',
       'trend_kst_sig', 'trend_kst_diff', 'trend_ichimoku_conv',
       't

### Create features with the feed module

In [19]:
def rsi(price: Stream[float], period: float) -> Stream[float]:
    r = price.diff()
    upside = r.clamp_min(0).abs()
    downside = r.clamp_max(0).abs()
    rs = upside.ewm(alpha=1 / period).mean() / downside.ewm(alpha=1 / period).mean()
    return 100*(1 - (1 + rs) ** -1)


def macd(price: Stream[float], fast: float, slow: float, signal: float) -> Stream[float]:
    fm = price.ewm(span=fast, adjust=False).mean()
    sm = price.ewm(span=slow, adjust=False).mean()
    md = fm - sm
    signal = md - md.ewm(span=signal, adjust=False).mean()
    return signal

In [20]:
# features = [
#     Stream.source(list(data[c]), dtype="float").rename(data[c].name)
#     for c in data.columns[2:]
# ]

# close = Stream.select(features, lambda s: s.name == "close")

In [21]:
# class Listener:
#     def on_next(self, value):
#         print(value)

# close.attach(Listener())

In [22]:
# from tensortrade.feed.core import Stream
# ss = Stream.source([1, 2, 3, 4, 5], dtype="float")

# ff = DataFeed([ss.rolling(2).mean()])
# ff.next()

In [23]:
# features = [
#     close.ewm(span=14).mean().rename("ema"),
#     close.ewm(alpha=1).mean().rename("sma"),
#     close.log().diff().rename("lr"),
#     rsi(close, period=20).rename("rsi"),
#     macd(close, fast=10, slow=50, signal=5).rename("macd")
# ]

# feed = DataFeed(features)
# feed.compile()

In [24]:
# import json

# for i in range(5):
#     obsv = feed.next()
#     print(json.dumps(obsv, indent=4))

## Env

In [25]:
features = data[[x for x in data.columns if not x.startswith("date")]]
features = features.pct_change()
features = features.fillna(0)

In [102]:
commission = 0.005
window_size = 200

feed = DataFeed(
    [
        Stream.source(list(features[c]), dtype="float").rename(features[c].name)
        for c in features.columns
    ]
)

renderer_feed = DataFeed(
    [
        Stream.source(list(data["date"])).rename("date"),
        Stream.source(list(data["open"]), dtype="float").rename("open"),
        Stream.source(list(data["high"]), dtype="float").rename("high"),
        Stream.source(list(data["low"]), dtype="float").rename("low"),
        Stream.source(list(data["close"]), dtype="float").rename("close"),
        Stream.source(list(data["volume"]), dtype="float").rename("volume"),
    ]
)

exchange_opts = ExchangeOptions(commission=commission)
coinbase = Exchange("coinbase", service=execute_order, options=exchange_opts)(
    Stream.source(list(data["close"]), dtype="float").rename("USD/BTC")
)

cash = Wallet(coinbase, 10000 * USD)
asset = Wallet(coinbase, 0 * BTC)
portfolio = Portfolio(USD, [cash, asset])


reward_scheme = default.rewards.SimpleProfit()
action_scheme = default.actions.SimpleOrders()


env = default.create(
    feed=feed,
    renderer_feed=renderer_feed,
    renderer=default.renderers.PlotlyTradingChart(display=False, save_format="html"),
    portfolio=portfolio,
    action_scheme=action_scheme,
    reward_scheme=reward_scheme,
    window_size=window_size,
    min_periods=window_size,
    #         max_allowed_loss=0.5,
)


[33mWARN: Box bound precision lowered by casting to float32[0m



In [103]:
env.action_space

Discrete(21)

In [104]:
env.observer.observe(env)

array([[ 0.0000000e+00, -4.4573606e-03,  0.0000000e+00, ...,
        -1.0000000e+00,  0.0000000e+00,  0.0000000e+00],
       [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
       [ 0.0000000e+00,  7.9784490e-04,  0.0000000e+00, ...,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
       ...,
       [-6.8888320e-03,  3.4654043e-03, -3.4166751e-03, ...,
        -2.0069737e+00, -2.0000000e+00,  2.5513699e+00],
       [ 1.0426059e-02,  5.1127598e-05,  9.3223999e-04, ...,
        -9.9266857e-01, -9.9264324e-01,  5.3037610e-03],
       [ 1.8661573e-03,  5.1264414e-03,  9.3770605e-03, ...,
         7.2723045e+00,  7.2707667e+00,  4.3645084e-02]], dtype=float32)

In [105]:
# # for i in range(1000):
# #     env.observer.observe(env)
 
# env.observer.observe(env)

In [175]:
from tensortrade.agents import VPGAgent
import tensortrade.agents.vpg.core as vcore

env_fn = lambda: env
agent = VPGAgent(
    env_fn,
    exp_name="test",
    actor_critic=vcore.CNNActorCritic,
    ac_kwargs=dict(device="cuda:0"),
    steps_per_epoch=200,
    train_v_iters=50,
    epochs=20
)
r = agent.train(render_interval=3, save_path="agents/")

P tensor(-0.4405, device='cuda:0', grad_fn=<NegBackward>) {'kl': -1.5497207073167374e-08, 'ent': 1.999746322631836}
V tensor(0.1400, device='cuda:0', grad_fn=<MeanBackward0>)
V tensor(0.1400, device='cuda:0', grad_fn=<MeanBackward0>)
V tensor(0.1400, device='cuda:0', grad_fn=<MeanBackward0>)
V tensor(0.1400, device='cuda:0', grad_fn=<MeanBackward0>)
V tensor(0.1400, device='cuda:0', grad_fn=<MeanBackward0>)
V tensor(0.1400, device='cuda:0', grad_fn=<MeanBackward0>)
V tensor(0.1400, device='cuda:0', grad_fn=<MeanBackward0>)
V tensor(0.1400, device='cuda:0', grad_fn=<MeanBackward0>)
V tensor(0.1400, device='cuda:0', grad_fn=<MeanBackward0>)
V tensor(0.1400, device='cuda:0', grad_fn=<MeanBackward0>)
V tensor(0.1400, device='cuda:0', grad_fn=<MeanBackward0>)
V tensor(0.1400, device='cuda:0', grad_fn=<MeanBackward0>)
V tensor(0.1400, device='cuda:0', grad_fn=<MeanBackward0>)
V tensor(0.1400, device='cuda:0', grad_fn=<MeanBackward0>)
V tensor(0.1400, device='cuda:0', grad_fn=<MeanBackward0>)

KeyboardInterrupt: 

In [170]:
len(r[0]["ach_sch"].actions)

21

In [169]:
print({idx:x for idx,x in enumerate(r[0]["ach_sch"].actions)}, sep="\n")

{0: None, 1: (coinbase:USD/BTC, (None, 0.1, None, <TradeSide.BUY: 'buy'>)), 2: (coinbase:USD/BTC, (None, 0.1, None, <TradeSide.SELL: 'sell'>)), 3: (coinbase:USD/BTC, (None, 0.2, None, <TradeSide.BUY: 'buy'>)), 4: (coinbase:USD/BTC, (None, 0.2, None, <TradeSide.SELL: 'sell'>)), 5: (coinbase:USD/BTC, (None, 0.3, None, <TradeSide.BUY: 'buy'>)), 6: (coinbase:USD/BTC, (None, 0.3, None, <TradeSide.SELL: 'sell'>)), 7: (coinbase:USD/BTC, (None, 0.4, None, <TradeSide.BUY: 'buy'>)), 8: (coinbase:USD/BTC, (None, 0.4, None, <TradeSide.SELL: 'sell'>)), 9: (coinbase:USD/BTC, (None, 0.5, None, <TradeSide.BUY: 'buy'>)), 10: (coinbase:USD/BTC, (None, 0.5, None, <TradeSide.SELL: 'sell'>)), 11: (coinbase:USD/BTC, (None, 0.6, None, <TradeSide.BUY: 'buy'>)), 12: (coinbase:USD/BTC, (None, 0.6, None, <TradeSide.SELL: 'sell'>)), 13: (coinbase:USD/BTC, (None, 0.7, None, <TradeSide.BUY: 'buy'>)), 14: (coinbase:USD/BTC, (None, 0.7, None, <TradeSide.SELL: 'sell'>)), 15: (coinbase:USD/BTC, (None, 0.8, None, <Trade

In [161]:
r[0]["ach_sch"].broker.trades

OrderedDict([('a6b7420a-649e-4b96-8b7f-eda9ca82c02d',
              [<Trade: id=42fe9078-f5db-4fc9-b515-24c5f9691784, order_id=a6b7420a-649e-4b96-8b7f-eda9ca82c02d, step=5, exchange_pair=coinbase:USD/BTC, base_symbol=USD, quote_symbol=BTC, side=buy, type=market, size=9950.00, quantity=9950.00 USD, price=4693.16, commission=50.00 USD, created_at=2020-11-09 16:00:29.514395>]),
             ('5a0f5e27-f9b1-4cf7-9b23-a8349276e9f7',
              [<Trade: id=1690f424-53a4-485c-9058-743982824602, order_id=5a0f5e27-f9b1-4cf7-9b23-a8349276e9f7, step=10, exchange_pair=coinbase:USD/BTC, base_symbol=USD, quote_symbol=BTC, side=sell, type=market, size=1.26570371, quantity=1.26570371 BTC, price=4673.12, commission=0.00636032 BTC, created_at=2020-11-09 16:00:29.527327>]),
             ('b017c27f-a6f6-438e-92e8-163b72464ae0',
              [<Trade: id=e8feea8d-9b20-40f9-afab-c00dc56ee074, order_id=b017c27f-a6f6-438e-92e8-163b72464ae0, step=11, exchange_pair=coinbase:USD/BTC, base_symbol=USD, quote_sy

In [158]:
import json
print(json.dumps({idx:i["actions"] for idx, i in enumerate(r)}, indent=4))

{
    "0": [
        8,
        20,
        0,
        16,
        19,
        9,
        15,
        13,
        5,
        12,
        18,
        13,
        13,
        7,
        0,
        13,
        12,
        8,
        2,
        13,
        6,
        7,
        9,
        9,
        4,
        2,
        19,
        8,
        14,
        0,
        10,
        13,
        14,
        20,
        9,
        20,
        12,
        0,
        16,
        7,
        4,
        18,
        2,
        13,
        5,
        16,
        0,
        10,
        13,
        0,
        2,
        2,
        0,
        18,
        10,
        6,
        7,
        5,
        11,
        5,
        8,
        0,
        16,
        0,
        3,
        20,
        5,
        12,
        10,
        14,
        2,
        20,
        11,
        6,
        6,
        2,
        13,
        10,
        14,
        3,
        13,
        4,
        10,
        14,
        5,
        10

In [173]:
import json
print(json.dumps({idx:i["rewards"] for idx, i in enumerate(r)}, indent=4))

{
    "0": [
        0.0,
        0.0,
        0.0,
        0.0,
        -0.007529788006809923,
        -0.006527533879509995,
        -0.032694835680751044,
        -0.010354986507212338,
        0.07213006216206264,
        -0.0019686088849932837,
        -0.002222110870434779,
        -0.0011288828094575543,
        -0.005601160200313315,
        0.0012399320004685688,
        -0.0011999938369684493,
        0.00027165160975917146,
        -0.006436444014118137,
        -0.0010101880517355832,
        0.0030917511411385767,
        -0.0033571156405529656,
        -0.005055636807432706,
        0.00402282757310557,
        -0.0021402191473955945,
        -0.0016917839748965546,
        -0.0005865202454987672,
        0.00015579074066529586,
        -0.0025568738218216813,
        0.004388535418668793,
        0.005493361295176635,
        -0.0011572540020462485,
        -0.004429525396166034,
        -0.0022478521674195173,
        -0.0023604652018945904,
        -0.00126281580487586

In [174]:
import json
print(json.dumps({idx:i["value_est"] for idx, i in enumerate(r)}, indent=4))

{
    "0": [
        0.14495544135570526,
        0.14501413702964783,
        0.14529435336589813,
        0.14528584480285645,
        0.14579561352729797,
        0.14590740203857422,
        0.1460210084915161,
        0.14666053652763367,
        0.14797382056713104,
        0.1491314321756363,
        0.1484973430633545,
        0.1481049507856369,
        0.14739233255386353,
        0.14871293306350708,
        0.14948280155658722,
        0.1504388004541397,
        0.1520061492919922,
        0.152657613158226,
        0.15228840708732605,
        0.1522900015115738,
        0.15222546458244324,
        0.15259571373462677,
        0.15776480734348297,
        0.1589532196521759,
        0.16134525835514069,
        0.21223902702331543,
        0.21487784385681152,
        0.21149781346321106,
        0.1697559654712677,
        0.21833333373069763,
        0.2194734662771225,
        0.25280678272247314,
        0.21699249744415283,
        0.26944807171821594,
        0.263

In [140]:
def get_rew_from_perf(perf):
    net_worths = [nw['net_worth'] for nw in perf]
    returns = [(b - a) / a for a, b in zip(net_worths[::1], net_worths[1::1])]
    returns = np.array([x + 1 for x in returns[-1:]]).cumprod() - 1
    return returns

In [138]:
r[2]["rewards"][20:30]

[0.24986083805561066,
 0.2724217176437378,
 0.2759019434452057,
 2465596.75,
 3215659.75,
 2656564.75,
 2277011.25,
 2006236.375,
 2294707.0,
 3475422.0]

In [143]:
perf_vals = list(gon.values())

In [149]:
get_rew_from_perf(perf_vals[:35])

array([0.])

In [150]:
gon = r[2]["perf"]

In [151]:
gon

OrderedDict([(0,
              {'coinbase:/USD/BTC': 4295.75,
               'coinbase:/USD:/free': 10000.0,
               'coinbase:/USD:/locked': 0.0,
               'coinbase:/USD:/total': 10000.0,
               'coinbase:/BTC:/free': 0.0,
               'coinbase:/BTC:/locked': 0.0,
               'coinbase:/BTC:/total': 0.0,
               'coinbase:/BTC:/worth': 0.0,
               'net_worth': 10000.0,
               'base_symbol': 'USD'}),
             (1,
              {'coinbase:/USD/BTC': 4295.75,
               'coinbase:/USD:/free': 0.0,
               'coinbase:/USD:/locked': 0.0,
               'coinbase:/USD:/total': 0.0,
               'coinbase:/BTC:/free': 2.3162428,
               'coinbase:/BTC:/locked': 0.0,
               'coinbase:/BTC:/total': 2.3162428,
               'coinbase:/BTC:/worth': 9950.0000081,
               'net_worth': 9950.0000081,
               'base_symbol': 'USD'}),
             (2,
              {'coinbase:/USD/BTC': 4280.81,
            

## Ray

In [32]:
import ray
import numpy as np

from ray import tune
from ray.tune.registry import register_env

import tensortrade.env.default as default
from tensortrade.feed.core import DataFeed, Stream
from tensortrade.oms.instruments import Instrument
from tensortrade.oms.exchanges import Exchange, ExchangeOptions
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.wallets import Wallet, Portfolio


def create_env(config):
    features = config["features"].copy()
    data = config["data"].copy()
    feed = DataFeed(
        [
            Stream.source(list(features[c]), dtype="float").rename(features[c].name)
            for c in features.columns
        ]
    )

    renderer_feed = DataFeed(
        [
            Stream.source(list(data["date"])).rename("date"),
            Stream.source(list(data["open"]), dtype="float").rename("open"),
            Stream.source(list(data["high"]), dtype="float").rename("high"),
            Stream.source(list(data["low"]), dtype="float").rename("low"),
            Stream.source(list(data["close"]), dtype="float").rename("close"),
            Stream.source(list(data["volume"]), dtype="float").rename("volume"),
        ]
    )

    exchange_opts = ExchangeOptions(commission=config["commission"])
    coinbase = Exchange("coinbase", service=execute_order, options=exchange_opts)(
        Stream.source(list(data["close"]), dtype="float").rename("USD/BTC")
    )

    cash = Wallet(coinbase, 10000 * USD)
    asset = Wallet(coinbase, 0 * BTC)
    portfolio = Portfolio(USD, [cash, asset])

    reward_scheme = default.rewards.SimpleProfit(window_size=config["window_size"])
    action_scheme = default.actions.SimpleOrders()

    env = default.create(
        feed=feed,
        #         renderer_feed=renderer_feed,
        #         renderer=default.renderers.PlotlyTradingChart(display=False, save_format="html"),
        portfolio=portfolio,
        action_scheme=action_scheme,
        reward_scheme=reward_scheme,
        window_size=config["window_size"],
        min_periods=config["window_size"],
        #         max_allowed_loss=0.5,
    )
    return env

In [33]:
# data_norm = data.copy()

# z_score = lambda x: (x - x.mean()) / x.std(ddof=0)
# abs_max = lambda x: x / x.abs().quantile(0.9)
# data_norm[data_norm.columns[2:]] = data_norm[data_norm.columns[2:]].apply(abs_max)
# data_norm = data_norm.fillna(0)

In [34]:
env_config = {
    "data": data,
    "features": features,
    "commission": 0.005,
    "window_size": 50,
}
# environment = create_env(env_config)

In [35]:
# from stable_baselines.common.policies import MlpLnLstmPolicy
# from stable_baselines import PPO2

# policy = MlpLnLstmPolicy
# # params = { "learning_rate": 1e-5 }

# agent = PPO2(policy, environment, nminibatches=1)

In [36]:
# agent.learn(total_timesteps=10000)

In [37]:
ray.init()
register_env("TradingEnv", create_env)

2020-11-08 20:32:59,825	INFO services.py:1164 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m


In [39]:
from ray.rllib import agents

In [41]:
# config = agents.ppo.DEFAULT_CONFIG.copy()
# config.update(
#     {
#         "env": "TradingEnv",
#         "env_config": env_config,
#         "log_level": "WARN",
#         "framework": "torch",
#         #         "ignore_worker_failures": True,
#         "num_workers": 4,
#         "num_gpus": 1,
#     }
# )

# agent = agents.ppo.PPOTrainer(config=config, env="TradingEnv")

[2m[36m(pid=77043)[0m Instructions for updating:
[2m[36m(pid=77043)[0m non-resource variables are not supported in the long term
[2m[36m(pid=77039)[0m Instructions for updating:
[2m[36m(pid=77039)[0m non-resource variables are not supported in the long term
[2m[36m(pid=77037)[0m Instructions for updating:
[2m[36m(pid=77037)[0m non-resource variables are not supported in the long term
[2m[36m(pid=77029)[0m Instructions for updating:
[2m[36m(pid=77029)[0m non-resource variables are not supported in the long term
[2m[36m(pid=77043)[0m   tensor = torch.from_numpy(np.asarray(item))
[2m[36m(pid=77039)[0m   tensor = torch.from_numpy(np.asarray(item))
[2m[36m(pid=77037)[0m   tensor = torch.from_numpy(np.asarray(item))
[2m[36m(pid=77029)[0m   tensor = torch.from_numpy(np.asarray(item))


In [None]:
# import random
# from ray.tune.schedulers import PopulationBasedTraining

# # Postprocess the perturbed config to ensure it's still valid
# def explore(config):
#     # ensure we collect enough timesteps to do sgd
#     if config["train_batch_size"] < config["sgd_minibatch_size"] * 2:
#         config["train_batch_size"] = config["sgd_minibatch_size"] * 2
#     # ensure we run at least one sgd iter
#     if config["num_sgd_iter"] < 1:
#         config["num_sgd_iter"] = 1
#     return config


# pbt = PopulationBasedTraining(
#     time_attr="time_total_s",
#     metric="episode_reward_mean",
#     mode="max",
#     perturbation_interval=100,
#     resample_probability=0.25,
#     # Specifies the mutations of these hyperparams
#     hyperparam_mutations={
#         "lambda": lambda: random.uniform(0.9, 1.0),
#         "clip_param": lambda: random.uniform(0.01, 0.5),
#         "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
#         "num_sgd_iter": lambda: random.randint(1, 30),
#         "sgd_minibatch_size": lambda: random.randint(32, 4096),
#         "train_batch_size": lambda: random.randint(2000, 16000),
#     },
#     custom_explore_fn=explore,
# )

# tune.run(
#     "PPO",
#     checkpoint_freq=10,
#     name="show_me_the_money_test",
#     #     scheduler=pbt,
#     #     num_samples=8,
#     config={
#         "env": "TradingEnv",
#         "env_config": env_config,
#         "framework": "tf",
# #         "log_level": "DEBUG",
#         "kl_coeff": 1.0,
# #         "num_workers": 8,
#         "num_gpus": 1,
#         #         "model": {
#         #             "free_log_std": True
#         #         },
#         # These params are tuned from a fixed starting value.
#         "lambda": 0.95,
#         "clip_param": 0.2,
#         "lr": 1e-4,
#         # These params start off randomly drawn from a set.
#         "num_sgd_iter": tune.sample_from(lambda spec: random.choice([10, 20, 30])),
#         "sgd_minibatch_size": tune.sample_from(
#             lambda spec: random.choice([32, 128, 512, 2048])
#         ),
#         "train_batch_size": tune.sample_from(
#             lambda spec: random.choice([1000, 2000, 4000])
#         ),
#     },
# )

In [None]:
analysis = tune.run(
    "PPO",
    stop={"episode_reward_mean": 100},
    config={
        "env": "TradingEnv",
        "env_config": env_config,
        "log_level": "WARN",
        "framework": "tf",
#         "ignore_worker_failures": True,
        "num_workers": 4,
        "num_gpus": 2,
#         "clip_rewards": True,
        "lr": 1e-4,
#         "lr_schedule": [
# #             [0, 1e-1],
# #             [int(1e2), 1e-2],
# #             [int(1e3), 1e-3],
#             [int(1e4), 1e-4],
#             [int(1e5), 1e-5],
#             [int(1e6), 1e-6],
#             [int(1e7), 1e-7],
#         ],
#         "gamma": 0,
#         "observation_filter": "MeanStdFilter",
#         "lambda": 0.72,
#         "vf_loss_coeff": 0.5,
#         "entropy_coeff": 0.01,
    },
#     local_dir="./ray",
#     resources_per_trial={"cpu": 4, "gpu": 1},
    reuse_actors=True,
    checkpoint_at_end=True,
    global_checkpoint_period=np.inf
)