## Install TensorTrade

In [1]:
# !python3 -m pip install git+https://github.com/nsarang/tensortrade.git --force

## Setup

In [2]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline


import re
import sys
import time
import pandas as pd
pd.options.mode.use_inf_as_na = True

import numpy as np
from datetime import datetime, timedelta, timezone
from tenacity import retry, retry_if_exception_type, stop_after_attempt
import pytz


In [3]:
import asyncio
import ccxt
# import ccxt.async_support as ccxt

apiKey = "jxlzo1mxQ1PDckz4aYgH2WDgFxpJjBu47r3OB4vyLyZkEeyJ4xjOM6m32mvsIgmu"
secret = "EffQgaLRPl52q0YEpVKcIHDeqyrFBQWm2K1Er99egbQ1c75X7fDREg4UtzhSaCJM"

exchange = ccxt.binance({
        "apiKey": apiKey,
        "secret": secret,
        "enableRateLimit": True,
        # 'options': {
        #     'defaultType': 'spot', // spot, future, margin
        # },
    }
)

In [4]:
@retry(retry=retry_if_exception_type(ccxt.NetworkError), stop=stop_after_attempt(3))
def get_historical_data(
    symbol, exchange, timeframe, start_date=None, limit=500, max_per_page=500
):
    """Get historical OHLCV for a symbol pair

    Decorators:
        retry

    Args:
        symbol (str): Contains the symbol pair to operate on i.e. BURST/BTC
        exchange (str): Contains the exchange to fetch the historical data from.
        timeframe (str): A string specifying the ccxt time unit i.e. 5m or 1d.
        start_date (int, optional): Timestamp in milliseconds.
        max_periods (int, optional): Defaults to 100. Maximum number of time periods
          back to fetch data for.

    Returns:
        list: Contains a list of lists which contain timestamp, open, high, low, close, volume.
    """

    try:
        if timeframe not in exchange.timeframes:
            raise ValueError(
                "{} does not support {} timeframe for OHLCV data. Possible values are: {}".format(
                    exchange, timeframe, list(exchange.timeframes)
                )
            )
    except AttributeError:
        self.logger.error(
            "%s interface does not support timeframe queries! We are unable to fetch data!",
            exchange,
        )
        raise AttributeError(sys.exc_info())

    timeframe_regex = re.compile("([0-9]+)([a-zA-Z])")
    timeframe_matches = timeframe_regex.match(timeframe)
    time_quantity = timeframe_matches.group(1)
    time_period = timeframe_matches.group(2)
    timedelta_values = {
        "m": "minutes",
        "h": "hours",
        "d": "days",
        "w": "weeks",
        "M": "months",
        "y": "years",
    }

    timedelta_args = {timedelta_values[time_period]: int(time_quantity)}
    single_frame = timedelta(**timedelta_args)

    if not start_date:
        start_datetime = datetime.now() - (limit * single_frame)
        start_date = int(start_datetime.timestamp() * 1000)
        total = limit

    else:
        total = (datetime.now() - start_date * 1000) // single_frame.total_seconds()
        if limit:
            total = min(limit, total)

    historical_data = []
    for cursor in range(0, total, max_per_page):
        curr_start_date = start_date + int(cursor * single_frame.total_seconds() * 1000)
        limit = min(total - cursor, max_per_page)
        historical_data += exchange.fetch_ohlcv(
            symbol, timeframe=timeframe, since=curr_start_date, limit=limit
        )

    if not historical_data:
        raise ValueError("No historical data provided returned by exchange.")

    if len(historical_data) != total:
        raise ValueError("Gaps detected in historical data.")

    # Sort by timestamp in ascending order
    historical_data.sort(key=lambda d: d[0])

    return historical_data


def timestamp_to_datetime(
    timestamp, timezone=pytz.timezone("America/Montreal"), to_str=False
):
    time = datetime.fromtimestamp(timestamp, timezone)
    if to_str:
        time = time.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
    return time


def convert_to_dataframe(historical_data):
    """Converts historical data matrix to a pandas dataframe.

    Args:
        historical_data (list): A matrix of historical OHCLV data.

    Returns:
        pandas.DataFrame: Contains the historical data in a pandas dataframe.
    """

    dataframe = pd.DataFrame(historical_data)
    dataframe.transpose()

    dataframe.columns = ["timestamp", "open", "high", "low", "close", "volume"]
    dataframe["datetime"] = dataframe.timestamp.apply(
        lambda x: timestamp_to_datetime(x / 1000)
    )

    dataframe.set_index("datetime", inplace=True, drop=True)
    dataframe.drop("timestamp", axis=1, inplace=True)

    return dataframe

In [5]:
def shift(values: np.ndarray, periods: int, axis, fill_value) -> np.ndarray:
    new_values = values

    if periods == 0 or values.size == 0:
        return new_values.copy()

    # make sure array sent to np.roll is c_contiguous
    f_ordered = values.flags.f_contiguous
    if f_ordered:
        new_values = new_values.T
        axis = new_values.ndim - axis - 1

    if np.prod(new_values.shape):
        new_values = np.roll(new_values, periods, axis=axis)

    axis_indexer = [ slice(None)] * values.ndim
    if periods > 0:
        axis_indexer[axis] = slice(None, periods)
    else:
        axis_indexer[axis] = slice(periods, None)
    new_values[tuple(axis_indexer)] = fill_value

    # restore original order
    if f_ordered:
        new_values = new_values.T

    return new_values


def crossing(a, b):
    a_plus = shift(a, 1, axis=0, fill_value=0)
    b_plus = shift(b, 1, axis=0, fill_value=0)
    cross = np.where(
        (a <= b) & (a_plus >= b_plus),
        1,
        np.where(((a >= b) & (a_plus <= b_plus)), -1, 0),
    )
    return cross


def SWING_CALLS(df):
    ema = ta.EMA(df.close, 5)
    sma = ta.SMA(df.close, 50)
    rsi = ta.RSI(df.close, 14)

    color = np.where(
        (rsi >= 85) | (rsi <= 15),
        "YELLOW",
        np.where(df.low > sma, "LIME", np.where(df.high < sma, "RED", "YELLOW")),
    )

    buyexit = rsi > 80
    sellexit = rsi < 30

    sellcall = (crossing(sma, ema) > 0) & (df.open > df.close)
    buycall = (crossing(sma, ema) < 0) & (df.high > sma)

    return buyexit, sellexit, sellcall, buycall


def smooth_range(series, period, mult):
    wper = period * 2 - 1
    diff = (series - series.shift(1, fill_value=0)).abs()
    average = ta.EMA(diff, period)
    smoothed = ta.EMA(average, wper) * mult
    smoothed = pd.Series(smoothed, index=series.index)
    return smoothed


def filter_range(series, smoothrng):
    result = series.shift(1, fill_value=0)
    for time, (close, smth) in enumerate(zip(series, smoothrng)):
        prev = result.iloc[time]
        if time == 0 or ((close >= prev - smth) and (close <=  prev +smth)):
            continue
            
        if close > prev + smth:
            prev = close - smth
        else:
            prev = close + smth
        result.iloc[time] = prev
    return result


def Range_Filter_Buy_Sell(df, period=100, range_multiplier=3):
    # Smooth Average Range
    smoothed = smooth_range(df.close, period, range_multiplier)

    # Range Filter
    filtered = filter_range(df.close, smoothed)

    buycall = (df.close > filtered) & (df.close > df.close.shift(1)) & (filtered > filtered.shift(1))
    sellcall = (df.close < filtered) & (df.close < df.close.shift(1)) & (filtered < filtered.shift(1))
    return buycall, sellcall


def calculate_profit(ohlvc, buycall, sellcall, start_from=100, trade_fee=0.1):
    money = 1
    asset = 0
    last_buy = ohlvc.iloc[start_from]["close"]
    trade_cost = 0
    trade_fee /= 100
    for time, (buy, sell) in enumerate(zip(buycall, sellcall)):
        if time < start_from:
            continue

        if buy and money and (time != len(ohlvc) - 1):
            trade_cost += money * trade_fee
            money *= (1 - trade_fee)
            asset = money / ohlvc.iloc[time]["close"]
            money = 0
            last_buy = ohlvc.iloc[time]["close"]
        
        elif (sell or (time == len(ohlvc) - 1)) and asset:
            money = asset * ohlvc.iloc[time]["close"]
            trade_cost += money * trade_fee
            money *= (1 - trade_fee)
            asset = 0
    
    return money, trade_cost

## Data

### Load

In [6]:
import pandas as pd
import tensortrade as tt
import tensortrade.env.default as default

from tensortrade.data.cdd import CryptoDataDownload
from tensortrade.feed.core import Stream, DataFeed
from tensortrade.oms.exchanges import Exchange, ExchangeOptions
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.instruments import USD, BTC, ETH
from tensortrade.oms.wallets import Wallet, Portfolio
from tensortrade.agents import DQNAgent, A2CAgent

In [7]:
# cdd = CryptoDataDownload()
# data = cdd.fetch("Coinbase", "USD", "BTC", "1h")


data = pd.read_csv("data/Coinbase_BTCUSD_1h.csv", skiprows=1)
data["date"] = pd.to_datetime(data["date"], format="%Y-%m-%d %I-%p")
data = data.sort_values("date")
data.head()

Unnamed: 0,date,symbol,open,high,low,close,volume_btc,volume
20110,2017-07-01 11:00:00,BTCUSD,2505.56,2513.38,2495.12,2509.17,114.6,287000.32
20109,2017-07-01 12:00:00,BTCUSD,2509.17,2512.87,2484.99,2488.43,157.36,393142.5
20108,2017-07-01 13:00:00,BTCUSD,2488.43,2488.43,2454.4,2454.43,280.28,693254.01
20107,2017-07-01 14:00:00,BTCUSD,2454.43,2473.93,2450.83,2459.35,289.42,712864.8
20106,2017-07-01 15:00:00,BTCUSD,2459.35,2475.0,2450.0,2467.83,276.82,682105.41


In [8]:
import ta

data = ta.add_all_ta_features(
    data, open="open", high="high", low="low", close="close", volume="volume_btc"
)


invalid value encountered in double_scalars


invalid value encountered in double_scalars



In [9]:
data.columns

Index(['date', 'symbol', 'open', 'high', 'low', 'close', 'volume_btc',
       'volume', 'volume_adi', 'volume_obv', 'volume_cmf', 'volume_fi',
       'momentum_mfi', 'volume_em', 'volume_sma_em', 'volume_vpt',
       'volume_nvi', 'volume_vwap', 'volatility_atr', 'volatility_bbm',
       'volatility_bbh', 'volatility_bbl', 'volatility_bbw', 'volatility_bbp',
       'volatility_bbhi', 'volatility_bbli', 'volatility_kcc',
       'volatility_kch', 'volatility_kcl', 'volatility_kcw', 'volatility_kcp',
       'volatility_kchi', 'volatility_kcli', 'volatility_dcl',
       'volatility_dch', 'trend_macd', 'trend_macd_signal', 'trend_macd_diff',
       'trend_sma_fast', 'trend_sma_slow', 'trend_ema_fast', 'trend_ema_slow',
       'trend_adx', 'trend_adx_pos', 'trend_adx_neg', 'trend_vortex_ind_pos',
       'trend_vortex_ind_neg', 'trend_vortex_ind_diff', 'trend_trix',
       'trend_mass_index', 'trend_cci', 'trend_dpo', 'trend_kst',
       'trend_kst_sig', 'trend_kst_diff', 'trend_ichimoku_conv

### Create features with the feed module

In [10]:
def rsi(price: Stream[float], period: float) -> Stream[float]:
    r = price.diff()
    upside = r.clamp_min(0).abs()
    downside = r.clamp_max(0).abs()
    rs = upside.ewm(alpha=1 / period).mean() / downside.ewm(alpha=1 / period).mean()
    return 100*(1 - (1 + rs) ** -1)


def macd(price: Stream[float], fast: float, slow: float, signal: float) -> Stream[float]:
    fm = price.ewm(span=fast, adjust=False).mean()
    sm = price.ewm(span=slow, adjust=False).mean()
    md = fm - sm
    signal = md - md.ewm(span=signal, adjust=False).mean()
    return signal

In [11]:
features = [
    Stream.source(list(data[c]), dtype="float").rename(data[c].name)
    for c in data.columns[2:]
]

In [12]:
close = Stream.select(features, lambda s: s.name == "close")

In [13]:
# class Listener:
#     def on_next(self, value):
#         print(value)

# close.attach(Listener())

In [14]:
from tensortrade.feed.core import Stream
ss = Stream.source([1, 2, 3, 4, 5], dtype="float")

In [15]:
ff = DataFeed([ss.rolling(2).mean()])

In [16]:
ff.next()

{'stream:/78': 1.0}

In [17]:
# features = [
#     close.ewm(span=14).mean().rename("ema"),
#     close.ewm(alpha=1).mean().rename("sma"),
#     close.log().diff().rename("lr"),
#     rsi(close, period=20).rename("rsi"),
#     macd(close, fast=10, slow=50, signal=5).rename("macd")
# ]

feed = DataFeed(features)
feed.compile()

In [18]:
import json

for i in range(5):
    obsv = feed.next()
    print(json.dumps(obsv, indent=4))

{
    "open": 2505.56,
    "high": 2513.38,
    "low": 2495.12,
    "close": 2509.17,
    "volume_btc": 114.6,
    "volume": 287000.32,
    "volume_adi": 61.75596933187312,
    "volume_obv": 114.6,
    "volume_cmf": NaN,
    "volume_fi": NaN,
    "momentum_mfi": NaN,
    "volume_em": NaN,
    "volume_sma_em": NaN,
    "volume_vpt": -74.53917175731418,
    "volume_nvi": 1000.0,
    "volume_vwap": NaN,
    "volatility_atr": 0.0,
    "volatility_bbm": NaN,
    "volatility_bbh": NaN,
    "volatility_bbl": NaN,
    "volatility_bbw": NaN,
    "volatility_bbp": NaN,
    "volatility_bbhi": 0.0,
    "volatility_bbli": 0.0,
    "volatility_kcc": NaN,
    "volatility_kch": 2524.15,
    "volatility_kcl": 2487.6299999999997,
    "volatility_kcw": NaN,
    "volatility_kcp": 0.5898138006571786,
    "volatility_kchi": 0.0,
    "volatility_kcli": 0.0,
    "volatility_dcl": NaN,
    "volatility_dch": NaN,
    "trend_macd": NaN,
    "trend_macd_signal": NaN,
    "trend_macd_diff": NaN,
    "trend_sma_fas

## Setup Trading Environment

In [19]:
coinbase = Exchange("coinbase", service=execute_order,
                    options=ExchangeOptions(commission=0.005))(
    Stream.source(list(data["close"]), dtype="float").rename("USD-BTC")
)

portfolio = Portfolio(USD, [
    Wallet(coinbase, 10000 * USD),
    Wallet(coinbase, 0 * BTC)
])


renderer_feed = DataFeed([
    Stream.source(list(data["date"])).rename("date"),
    Stream.source(list(data["open"]), dtype="float").rename("open"),
    Stream.source(list(data["high"]), dtype="float").rename("high"),
    Stream.source(list(data["low"]), dtype="float").rename("low"),
    Stream.source(list(data["close"]), dtype="float").rename("close"), 
    Stream.source(list(data["volume"]), dtype="float").rename("volume") 
])


env = default.create(
    portfolio=portfolio,
#     action_scheme="managed-risk",
    action_scheme= "simple",
    reward_scheme="risk-adjusted",
    feed=feed,
    renderer_feed=renderer_feed,
    renderer=default.renderers.PlotlyTradingChart(display=False, save_format="html"),
    window_size=50
)

In [20]:
env.observer.feed.next()

{'internal': {'coinbase:/USD-BTC': 2509.17,
  'coinbase:/USD:/free': 10000.0,
  'coinbase:/USD:/locked': 0.0,
  'coinbase:/USD:/total': 10000.0,
  'coinbase:/BTC:/free': 0.0,
  'coinbase:/BTC:/locked': 0.0,
  'coinbase:/BTC:/total': 0.0,
  'coinbase:/BTC:/worth': 0.0,
  'net_worth': 10000.0},
 'external': {'open': 2505.56,
  'high': 2513.38,
  'low': 2495.12,
  'close': 2509.17,
  'volume_btc': 114.6,
  'volume': 287000.32,
  'volume_adi': 61.75596933187312,
  'volume_obv': 114.6,
  'volume_cmf': nan,
  'volume_fi': nan,
  'momentum_mfi': nan,
  'volume_em': nan,
  'volume_sma_em': nan,
  'volume_vpt': -74.53917175731418,
  'volume_nvi': 1000.0,
  'volume_vwap': nan,
  'volatility_atr': 0.0,
  'volatility_bbm': nan,
  'volatility_bbh': nan,
  'volatility_bbl': nan,
  'volatility_bbw': nan,
  'volatility_bbp': nan,
  'volatility_bbhi': 0.0,
  'volatility_bbli': 0.0,
  'volatility_kcc': nan,
  'volatility_kch': 2524.15,
  'volatility_kcl': 2487.6299999999997,
  'volatility_kcw': nan,
  '

## Setup and Train

In [21]:
# agent = DQNAgent(env)
# agent.train(n_steps=200, n_episodes=100, render_interval=100, save_path="agents/")

In [22]:
import ray
import numpy as np

from ray import tune
from ray.tune.registry import register_env

import tensortrade.env.default as default
from tensortrade.feed.core import DataFeed, Stream
from tensortrade.oms.instruments import Instrument
from tensortrade.oms.exchanges import Exchange, ExchangeOptions
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.wallets import Wallet, Portfolio


def create_env(config):
    data = config["data"].copy()
    feed = DataFeed(
        [
            Stream.source(list(data[c]), dtype="float").rename(data[c].name)
            for c in data.columns[2:]
        ]
    )

    renderer_feed = DataFeed(
        [
            Stream.source(list(data["date"])).rename("date"),
            Stream.source(list(data["open"]), dtype="float").rename("open"),
            Stream.source(list(data["high"]), dtype="float").rename("high"),
            Stream.source(list(data["low"]), dtype="float").rename("low"),
            Stream.source(list(data["close"]), dtype="float").rename("close"),
            Stream.source(list(data["volume"]), dtype="float").rename("volume"),
        ]
    )

    exchange_opts = ExchangeOptions(commission=config["commission"])
    coinbase = Exchange("coinbase", service=execute_order, options=exchange_opts)(
        Stream.source(list(data["close"]), dtype="float").rename("USD/BTC")
    )

    cash = Wallet(coinbase, 10000 * USD)
    asset = Wallet(coinbase, 0 * BTC)
    portfolio = Portfolio(USD, [cash, asset])

    reward_scheme = default.rewards.SimpleProfit(
        window_size=config["window_size"]
    )
    action_scheme = default.actions.SimpleOrders()

    env = default.create(
        feed=feed,
        #         renderer_feed=renderer_feed,
        #         renderer=default.renderers.PlotlyTradingChart(display=False, save_format="html"),
        portfolio=portfolio,
        action_scheme=action_scheme,
        reward_scheme=reward_scheme,
        window_size=config["window_size"],
        #         max_allowed_loss=0.5,
    )
    return env

In [23]:
data_norm = data.copy()

z_score = lambda x: (x - x.mean()) / x.std(ddof=0)
abs_max = lambda x: x / x.abs().quantile(0.9)
data_norm[data_norm.columns[2:]] = data_norm[data_norm.columns[2:]].apply(abs_max)
data_norm = data_norm.fillna(0)

In [24]:
env_config = {"data": data_norm, "commission": 0.005, "window_size": 50}
# environment = create_env(env_config)

In [25]:
# from stable_baselines.common.policies import MlpLnLstmPolicy
# from stable_baselines import PPO2

# policy = MlpLnLstmPolicy
# # params = { "learning_rate": 1e-5 }

# agent = PPO2(policy, environment, nminibatches=1)

In [26]:
# agent.learn(total_timesteps=10000)

In [27]:
# ray.init()
register_env("TradingEnv", create_env)

In [28]:
# ray.cluster_resources()

In [29]:
!free -m

              total        used        free      shared  buff/cache   available
Mem:         128539        2192       90549        9738       35798      115454
Swap:         10239           0       10239


In [30]:
!nvidia-smi

Sun Sep 20 17:27:32 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.51.06    Driver Version: 450.51.06    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  On   | 00000000:04:00.0 Off |                    0 |
| N/A   46C    P0    25W / 250W |      0MiB / 12198MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla P100-PCIE...  On   | 00000000:83:00.0 Off |                    0 |
| N/A   43C    P0    27W / 250W |      0MiB / 12198MiB |      0%      Defaul

In [34]:
import random
from ray.tune.schedulers import PopulationBasedTraining

# Postprocess the perturbed config to ensure it's still valid
def explore(config):
    # ensure we collect enough timesteps to do sgd
    if config["train_batch_size"] < config["sgd_minibatch_size"] * 2:
        config["train_batch_size"] = config["sgd_minibatch_size"] * 2
    # ensure we run at least one sgd iter
    if config["num_sgd_iter"] < 1:
        config["num_sgd_iter"] = 1
    return config


pbt = PopulationBasedTraining(
    time_attr="time_total_s",
    metric="episode_reward_mean",
    mode="max",
    perturbation_interval=100,
    resample_probability=0.25,
    # Specifies the mutations of these hyperparams
    hyperparam_mutations={
        "lambda": lambda: random.uniform(0.9, 1.0),
        "clip_param": lambda: random.uniform(0.01, 0.5),
        "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
        "num_sgd_iter": lambda: random.randint(1, 30),
        "sgd_minibatch_size": lambda: random.randint(32, 4096),
        "train_batch_size": lambda: random.randint(2000, 160000),
    },
    custom_explore_fn=explore,
)

tune.run(
    "PPO",
    name="show_me_the_money_test",
    #     scheduler=pbt,
    #     num_samples=8,
    config={
        "env": "TradingEnv",
        "env_config": env_config,
        "framework": "tf",
        "log_level": "DEBUG",
        "kl_coeff": 1.0,
#         "num_workers": 8,
        "num_gpus": 1,
        #         "model": {
        #             "free_log_std": True
        #         },
        # These params are tuned from a fixed starting value.
        "lambda": 0.95,
        "clip_param": 0.2,
        "lr": 1e-4,
        # These params start off randomly drawn from a set.
        "num_sgd_iter": tune.sample_from(lambda spec: random.choice([10, 20, 30])),
        "sgd_minibatch_size": tune.sample_from(
            lambda spec: random.choice([32, 128, 512, 2048])
        ),
        "train_batch_size": tune.sample_from(
            lambda spec: random.choice([10000, 20000, 40000])
        ),
    },
)

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size
PPO_TradingEnv_6a284_00000,RUNNING,,20,128,20000


[2m[36m(pid=31676)[0m Instructions for updating:
[2m[36m(pid=31676)[0m non-resource variables are not supported in the long term
[2m[36m(pid=31676)[0m 2020-09-20 18:27:37,396	INFO trainer.py:605 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=31676)[0m 2020-09-20 18:27:37,724	DEBUG worker_set.py:204 -- Creating TF session {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}
[2m[36m(pid=31676)[0m 2020-09-20 18:27:38,803	DEBUG rollout_worker.py:957 -- Creating policy for default_policy
[2m[36m(pid=31676)[0m 2020-09-20 18:27:38,812	DEBUG catalog.py:471 -- Created preprocessor <ray.rllib.models.preprocessors.NoPreprocessor object at 0x2b0cf8c239b0>: Box(50, 78) -> (50, 78)
[2m[36m(pid=31676)[0m 2020-09-20 18:27:39,530	DEBUG dynamic_tf_policy.py:433 -- Initializing loss function w

[2m[36m(pid=31690)[0m 2020-09-20 18:27:50,841	DEBUG tf_policy.py:293 -- These tensors were used in the loss_fn:
[2m[36m(pid=31690)[0m 
[2m[36m(pid=31690)[0m { 'action_dist_inputs': <tf.Tensor 'default_policy/action_dist_inputs:0' shape=(?, 21) dtype=float32>,
[2m[36m(pid=31690)[0m   'action_logp': <tf.Tensor 'default_policy/action_logp:0' shape=(?,) dtype=float32>,
[2m[36m(pid=31690)[0m   'actions': <tf.Tensor 'default_policy/actions:0' shape=(?,) dtype=int64>,
[2m[36m(pid=31690)[0m   'advantages': <tf.Tensor 'default_policy/advantages:0' shape=(?,) dtype=float32>,
[2m[36m(pid=31690)[0m   'obs': <tf.Tensor 'default_policy/observation:0' shape=(?, 50, 78) dtype=float32>,
[2m[36m(pid=31690)[0m   'prev_actions': <tf.Tensor 'default_policy/prev_action:0' shape=(?,) dtype=int64>,
[2m[36m(pid=31690)[0m   'prev_rewards': <tf.Tensor 'default_policy/prev_reward:0' shape=(?,) dtype=float32>,
[2m[36m(pid=31690)[0m   'value_targets': <tf.Tensor 'default_policy/value_t

[2m[36m(pid=31676)[0m 2020-09-20 18:28:24,749	INFO tf_policy.py:620 -- Optimizing variable <tf.Variable 'default_policy/fc_1/kernel:0' shape=(3900, 256) dtype=float32>
[2m[36m(pid=31676)[0m 2020-09-20 18:28:24,750	INFO tf_policy.py:620 -- Optimizing variable <tf.Variable 'default_policy/fc_1/bias:0' shape=(256,) dtype=float32>
[2m[36m(pid=31676)[0m 2020-09-20 18:28:24,750	INFO tf_policy.py:620 -- Optimizing variable <tf.Variable 'default_policy/fc_value_1/kernel:0' shape=(3900, 256) dtype=float32>
[2m[36m(pid=31676)[0m 2020-09-20 18:28:24,750	INFO tf_policy.py:620 -- Optimizing variable <tf.Variable 'default_policy/fc_value_1/bias:0' shape=(256,) dtype=float32>
[2m[36m(pid=31676)[0m 2020-09-20 18:28:24,750	INFO tf_policy.py:620 -- Optimizing variable <tf.Variable 'default_policy/fc_2/kernel:0' shape=(256, 256) dtype=float32>
[2m[36m(pid=31676)[0m 2020-09-20 18:28:24,750	INFO tf_policy.py:620 -- Optimizing variable <tf.Variable 'default_policy/fc_2/bias:0' shape=(256,)

[2m[36m(pid=31676)[0m 2020-09-20 18:28:31,819	DEBUG train_ops.py:217 -- 10 {'cur_kl_coeff': 1.0, 'cur_lr': 9.999999747378752e-05, 'total_loss': 0.2618594, 'policy_loss': -0.029811183, 'vf_loss': 0.28508425, 'vf_explained_var': 0.504959, 'kl': 0.0065863323, 'entropy': 3.0379488, 'entropy_coeff': 0.0, 'model': {}}
[2m[36m(pid=31676)[0m 2020-09-20 18:28:32,350	DEBUG train_ops.py:217 -- 11 {'cur_kl_coeff': 1.0, 'cur_lr': 9.999999747378752e-05, 'total_loss': 0.25923395, 'policy_loss': -0.032398365, 'vf_loss': 0.28483748, 'vf_explained_var': 0.50458056, 'kl': 0.006794865, 'entropy': 3.037741, 'entropy_coeff': 0.0, 'model': {}}
[2m[36m(pid=31676)[0m 2020-09-20 18:28:32,854	DEBUG train_ops.py:217 -- 12 {'cur_kl_coeff': 1.0, 'cur_lr': 9.999999747378752e-05, 'total_loss': 0.25675836, 'policy_loss': -0.035238173, 'vf_loss': 0.28475758, 'vf_explained_var': 0.50595915, 'kl': 0.0072389427, 'entropy': 3.0373023, 'entropy_coeff': 0.0, 'model': {}}
[2m[36m(pid=31676)[0m 2020-09-20 18:28:33,

Result for PPO_TradingEnv_6a284_00000:
  custom_metrics: {}
  date: 2020-09-20_18-28-36
  done: false
  episode_len_mean: 565.1764705882352
  episode_reward_max: -26.5983676564183
  episode_reward_mean: -29.913038810569887
  episode_reward_min: -32.092969764102
  episodes_this_iter: 34
  episodes_total: 34
  experiment_id: 762e7d6db6a54bacbf6f63b359153b6f
  experiment_tag: 0_num_sgd_iter=20,sgd_minibatch_size=128,train_batch_size=20000
  hostname: gra856
  info:
    learner:
      default_policy:
        cur_kl_coeff: 1.0
        cur_lr: 9.999999747378752e-05
        entropy: 3.0351245403289795
        entropy_coeff: 0.0
        kl: 0.009435994550585747
        model: {}
        policy_loss: -0.04855655878782272
        total_loss: 0.24444951117038727
        vf_explained_var: 0.5069094896316528
        vf_loss: 0.2835700809955597
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore: 1
  node_ip: 10.29.85.30
  num_healthy_workers: 2
  off_policy_estimato

Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward
PPO_TradingEnv_6a284_00000,RUNNING,10.29.85.30:31676,20,128,20000,1,52.7598,20000,-29.913


[2m[36m(pid=31690)[0m 2020-09-20 18:28:51,942	INFO sampler.py:714 -- Preprocessed obs: np.ndarray((50, 78), dtype=float32, min=-1.419, max=1.189, mean=0.181)
[2m[36m(pid=31690)[0m 2020-09-20 18:28:51,942	INFO sampler.py:719 -- Filtered obs: np.ndarray((50, 78), dtype=float32, min=-1.419, max=1.189, mean=0.181)
[2m[36m(pid=31690)[0m 2020-09-20 18:28:51,943	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=31690)[0m 
[2m[36m(pid=31690)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=31690)[0m                                   'env_id': 0,
[2m[36m(pid=31690)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b83fad4f9b0>,
[2m[36m(pid=31690)[0m                                             'net_worth': 8265.20537242706,
[2m[36m(pid=31690)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b83fad4f6d8>,
[2m

[2m[33m(pid=raylet)[0m E0920 18:29:19.679894 31607 31607 node_manager.cc:3671] Failed to send get core worker stats request: IOError: 14: Connection reset by peer
2020-09-20 18:29:20,027	ERROR trial_runner.py:523 -- Trial PPO_TradingEnv_6a284_00000: Error processing event.
Traceback (most recent call last):
  File "/project/6007383/nimas/env-python3.6/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 471, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/project/6007383/nimas/env-python3.6/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 430, in fetch_result
    result = ray.get(trial_future[0], DEFAULT_GET_TIMEOUT)
  File "/project/6007383/nimas/env-python3.6/lib/python3.6/site-packages/ray/worker.py", line 1540, in get
    raise value
ray.exceptions.RayActorError: The actor died unexpectedly before finishing this task.


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward
PPO_TradingEnv_6a284_00000,ERROR,,20,128,20000,1,52.7598,20000,-29.913

Trial name,# failures,error file
PPO_TradingEnv_6a284_00000,1,"/home/nimas/ray_results/show_me_the_money_test/PPO_TradingEnv_0_num_sgd_iter=20,sgd_minibatch_size=128,train_batch_size=20000_2020-09-20_18-27-06y63054ba/error.txt"


Trial name,status,loc,num_sgd_iter,sgd_minibatch_size,train_batch_size,iter,total time (s),ts,reward
PPO_TradingEnv_6a284_00000,ERROR,,20,128,20000,1,52.7598,20000,-29.913

Trial name,# failures,error file
PPO_TradingEnv_6a284_00000,1,"/home/nimas/ray_results/show_me_the_money_test/PPO_TradingEnv_0_num_sgd_iter=20,sgd_minibatch_size=128,train_batch_size=20000_2020-09-20_18-27-06y63054ba/error.txt"


TuneError: ('Trials did not complete', [PPO_TradingEnv_6a284_00000])

analysis = tune.run(
    "PPO",
    stop={"episode_reward_mean": 50000},
    config={
        "env": "TradingEnv",
        "env_config": env_config,
        "log_level": "DEBUG",
        "framework": "tf",
#         "ignore_worker_failures": True,
#         "num_workers": 4,
        "num_gpus": 2,
#         "clip_rewards": True,
#         "lr": 1e-6,
#         "lr_schedule": [
# #             [0, 1e-1],
# #             [int(1e2), 1e-2],
# #             [int(1e3), 1e-3],
#             [int(1e4), 1e-4],
#             [int(1e5), 1e-5],
#             [int(1e6), 1e-6],
#             [int(1e7), 1e-7],
#         ],
#         "gamma": 0,
#         "observation_filter": "MeanStdFilter",
#         "lambda": 0.72,
#         "vf_loss_coeff": 0.5,
#         "entropy_coeff": 0.01,
    },
#     local_dir="./ray",
#     resources_per_trial={"cpu": 4, "gpu": 1},
    checkpoint_at_end=True,
)