## Install TensorTrade

In [1]:
# !python3 -m pip install git+https://github.com/nsarang/tensortrade.git --force

## Setup

In [2]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline


import re
import sys
import time
import pandas as pd
pd.options.mode.use_inf_as_na = True

import numpy as np
from datetime import datetime, timedelta, timezone
from tenacity import retry, retry_if_exception_type, stop_after_attempt
import pytz


In [3]:
import asyncio
import ccxt
# import ccxt.async_support as ccxt

apiKey = "jxlzo1mxQ1PDckz4aYgH2WDgFxpJjBu47r3OB4vyLyZkEeyJ4xjOM6m32mvsIgmu"
secret = "EffQgaLRPl52q0YEpVKcIHDeqyrFBQWm2K1Er99egbQ1c75X7fDREg4UtzhSaCJM"

exchange = ccxt.binance({
        "apiKey": apiKey,
        "secret": secret,
        "enableRateLimit": True,
        # 'options': {
        #     'defaultType': 'spot', // spot, future, margin
        # },
    }
)

In [4]:
@retry(retry=retry_if_exception_type(ccxt.NetworkError), stop=stop_after_attempt(3))
def get_historical_data(
    symbol, exchange, timeframe, start_date=None, limit=500, max_per_page=500
):
    """Get historical OHLCV for a symbol pair

    Decorators:
        retry

    Args:
        symbol (str): Contains the symbol pair to operate on i.e. BURST/BTC
        exchange (str): Contains the exchange to fetch the historical data from.
        timeframe (str): A string specifying the ccxt time unit i.e. 5m or 1d.
        start_date (int, optional): Timestamp in milliseconds.
        max_periods (int, optional): Defaults to 100. Maximum number of time periods
          back to fetch data for.

    Returns:
        list: Contains a list of lists which contain timestamp, open, high, low, close, volume.
    """

    try:
        if timeframe not in exchange.timeframes:
            raise ValueError(
                "{} does not support {} timeframe for OHLCV data. Possible values are: {}".format(
                    exchange, timeframe, list(exchange.timeframes)
                )
            )
    except AttributeError:
        self.logger.error(
            "%s interface does not support timeframe queries! We are unable to fetch data!",
            exchange,
        )
        raise AttributeError(sys.exc_info())

    timeframe_regex = re.compile("([0-9]+)([a-zA-Z])")
    timeframe_matches = timeframe_regex.match(timeframe)
    time_quantity = timeframe_matches.group(1)
    time_period = timeframe_matches.group(2)
    timedelta_values = {
        "m": "minutes",
        "h": "hours",
        "d": "days",
        "w": "weeks",
        "M": "months",
        "y": "years",
    }

    timedelta_args = {timedelta_values[time_period]: int(time_quantity)}
    single_frame = timedelta(**timedelta_args)

    if not start_date:
        start_datetime = datetime.now() - (limit * single_frame)
        start_date = int(start_datetime.timestamp() * 1000)
        total = limit

    else:
        total = (datetime.now() - start_date * 1000) // single_frame.total_seconds()
        if limit:
            total = min(limit, total)

    historical_data = []
    for cursor in range(0, total, max_per_page):
        curr_start_date = start_date + int(cursor * single_frame.total_seconds() * 1000)
        limit = min(total - cursor, max_per_page)
        historical_data += exchange.fetch_ohlcv(
            symbol, timeframe=timeframe, since=curr_start_date, limit=limit
        )

    if not historical_data:
        raise ValueError("No historical data provided returned by exchange.")

    if len(historical_data) != total:
        raise ValueError("Gaps detected in historical data.")

    # Sort by timestamp in ascending order
    historical_data.sort(key=lambda d: d[0])

    return historical_data


def timestamp_to_datetime(
    timestamp, timezone=pytz.timezone("America/Montreal"), to_str=False
):
    time = datetime.fromtimestamp(timestamp, timezone)
    if to_str:
        time = time.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
    return time


def convert_to_dataframe(historical_data):
    """Converts historical data matrix to a pandas dataframe.

    Args:
        historical_data (list): A matrix of historical OHCLV data.

    Returns:
        pandas.DataFrame: Contains the historical data in a pandas dataframe.
    """

    dataframe = pd.DataFrame(historical_data)
    dataframe.transpose()

    dataframe.columns = ["timestamp", "open", "high", "low", "close", "volume"]
    dataframe["datetime"] = dataframe.timestamp.apply(
        lambda x: timestamp_to_datetime(x / 1000)
    )

    dataframe.set_index("datetime", inplace=True, drop=True)
    dataframe.drop("timestamp", axis=1, inplace=True)

    return dataframe

In [5]:
def shift(values: np.ndarray, periods: int, axis, fill_value) -> np.ndarray:
    new_values = values

    if periods == 0 or values.size == 0:
        return new_values.copy()

    # make sure array sent to np.roll is c_contiguous
    f_ordered = values.flags.f_contiguous
    if f_ordered:
        new_values = new_values.T
        axis = new_values.ndim - axis - 1

    if np.prod(new_values.shape):
        new_values = np.roll(new_values, periods, axis=axis)

    axis_indexer = [ slice(None)] * values.ndim
    if periods > 0:
        axis_indexer[axis] = slice(None, periods)
    else:
        axis_indexer[axis] = slice(periods, None)
    new_values[tuple(axis_indexer)] = fill_value

    # restore original order
    if f_ordered:
        new_values = new_values.T

    return new_values


def crossing(a, b):
    a_plus = shift(a, 1, axis=0, fill_value=0)
    b_plus = shift(b, 1, axis=0, fill_value=0)
    cross = np.where(
        (a <= b) & (a_plus >= b_plus),
        1,
        np.where(((a >= b) & (a_plus <= b_plus)), -1, 0),
    )
    return cross


def SWING_CALLS(df):
    ema = ta.EMA(df.close, 5)
    sma = ta.SMA(df.close, 50)
    rsi = ta.RSI(df.close, 14)

    color = np.where(
        (rsi >= 85) | (rsi <= 15),
        "YELLOW",
        np.where(df.low > sma, "LIME", np.where(df.high < sma, "RED", "YELLOW")),
    )

    buyexit = rsi > 80
    sellexit = rsi < 30

    sellcall = (crossing(sma, ema) > 0) & (df.open > df.close)
    buycall = (crossing(sma, ema) < 0) & (df.high > sma)

    return buyexit, sellexit, sellcall, buycall


def smooth_range(series, period, mult):
    wper = period * 2 - 1
    diff = (series - series.shift(1, fill_value=0)).abs()
    average = ta.EMA(diff, period)
    smoothed = ta.EMA(average, wper) * mult
    smoothed = pd.Series(smoothed, index=series.index)
    return smoothed


def filter_range(series, smoothrng):
    result = series.shift(1, fill_value=0)
    for time, (close, smth) in enumerate(zip(series, smoothrng)):
        prev = result.iloc[time]
        if time == 0 or ((close >= prev - smth) and (close <=  prev +smth)):
            continue
            
        if close > prev + smth:
            prev = close - smth
        else:
            prev = close + smth
        result.iloc[time] = prev
    return result


def Range_Filter_Buy_Sell(df, period=100, range_multiplier=3):
    # Smooth Average Range
    smoothed = smooth_range(df.close, period, range_multiplier)

    # Range Filter
    filtered = filter_range(df.close, smoothed)

    buycall = (df.close > filtered) & (df.close > df.close.shift(1)) & (filtered > filtered.shift(1))
    sellcall = (df.close < filtered) & (df.close < df.close.shift(1)) & (filtered < filtered.shift(1))
    return buycall, sellcall


def calculate_profit(ohlvc, buycall, sellcall, start_from=100, trade_fee=0.1):
    money = 1
    asset = 0
    last_buy = ohlvc.iloc[start_from]["close"]
    trade_cost = 0
    trade_fee /= 100
    for time, (buy, sell) in enumerate(zip(buycall, sellcall)):
        if time < start_from:
            continue

        if buy and money and (time != len(ohlvc) - 1):
            trade_cost += money * trade_fee
            money *= (1 - trade_fee)
            asset = money / ohlvc.iloc[time]["close"]
            money = 0
            last_buy = ohlvc.iloc[time]["close"]
        
        elif (sell or (time == len(ohlvc) - 1)) and asset:
            money = asset * ohlvc.iloc[time]["close"]
            trade_cost += money * trade_fee
            money *= (1 - trade_fee)
            asset = 0
    
    return money, trade_cost

## Data

### Load

In [6]:
import pandas as pd
import tensortrade as tt
import tensortrade.env.default as default

from tensortrade.data.cdd import CryptoDataDownload
from tensortrade.feed.core import Stream, DataFeed
from tensortrade.oms.exchanges import Exchange, ExchangeOptions
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.instruments import USD, BTC, ETH
from tensortrade.oms.wallets import Wallet, Portfolio
# from tensortrade.agents import DQNAgent, A2CAgent

In [7]:
# cdd = CryptoDataDownload()
# data = cdd.fetch("Coinbase", "USD", "BTC", "1h")


data = pd.read_csv("data/Coinbase_BTCUSD_1h.csv", skiprows=1)
data["date"] = pd.to_datetime(data["date"], format="%Y-%m-%d %I-%p")
data = data.sort_values("date")
data.head()

Unnamed: 0,date,symbol,open,high,low,close,volume_btc,volume
20110,2017-07-01 11:00:00,BTCUSD,2505.56,2513.38,2495.12,2509.17,114.6,287000.32
20109,2017-07-01 12:00:00,BTCUSD,2509.17,2512.87,2484.99,2488.43,157.36,393142.5
20108,2017-07-01 13:00:00,BTCUSD,2488.43,2488.43,2454.4,2454.43,280.28,693254.01
20107,2017-07-01 14:00:00,BTCUSD,2454.43,2473.93,2450.83,2459.35,289.42,712864.8
20106,2017-07-01 15:00:00,BTCUSD,2459.35,2475.0,2450.0,2467.83,276.82,682105.41


In [8]:
import ta

data = ta.add_all_ta_features(
    data, open="open", high="high", low="low", close="close", volume="volume_btc"
)


invalid value encountered in double_scalars


invalid value encountered in double_scalars



In [9]:
data.columns

Index(['date', 'symbol', 'open', 'high', 'low', 'close', 'volume_btc',
       'volume', 'volume_adi', 'volume_obv', 'volume_cmf', 'volume_fi',
       'momentum_mfi', 'volume_em', 'volume_sma_em', 'volume_vpt',
       'volume_nvi', 'volume_vwap', 'volatility_atr', 'volatility_bbm',
       'volatility_bbh', 'volatility_bbl', 'volatility_bbw', 'volatility_bbp',
       'volatility_bbhi', 'volatility_bbli', 'volatility_kcc',
       'volatility_kch', 'volatility_kcl', 'volatility_kcw', 'volatility_kcp',
       'volatility_kchi', 'volatility_kcli', 'volatility_dcl',
       'volatility_dch', 'trend_macd', 'trend_macd_signal', 'trend_macd_diff',
       'trend_sma_fast', 'trend_sma_slow', 'trend_ema_fast', 'trend_ema_slow',
       'trend_adx', 'trend_adx_pos', 'trend_adx_neg', 'trend_vortex_ind_pos',
       'trend_vortex_ind_neg', 'trend_vortex_ind_diff', 'trend_trix',
       'trend_mass_index', 'trend_cci', 'trend_dpo', 'trend_kst',
       'trend_kst_sig', 'trend_kst_diff', 'trend_ichimoku_conv

### Create features with the feed module

In [10]:
def rsi(price: Stream[float], period: float) -> Stream[float]:
    r = price.diff()
    upside = r.clamp_min(0).abs()
    downside = r.clamp_max(0).abs()
    rs = upside.ewm(alpha=1 / period).mean() / downside.ewm(alpha=1 / period).mean()
    return 100*(1 - (1 + rs) ** -1)


def macd(price: Stream[float], fast: float, slow: float, signal: float) -> Stream[float]:
    fm = price.ewm(span=fast, adjust=False).mean()
    sm = price.ewm(span=slow, adjust=False).mean()
    md = fm - sm
    signal = md - md.ewm(span=signal, adjust=False).mean()
    return signal

In [11]:
features = [
    Stream.source(list(data[c]), dtype="float").rename(data[c].name)
    for c in data.columns[2:]
]

In [12]:
close = Stream.select(features, lambda s: s.name == "close")

In [13]:
# class Listener:
#     def on_next(self, value):
#         print(value)

# close.attach(Listener())

In [14]:
from tensortrade.feed.core import Stream
ss = Stream.source([1, 2, 3, 4, 5], dtype="float")

In [15]:
ff = DataFeed([ss.rolling(2).mean()])

In [16]:
ff.next()

{'stream:/78': 1.0}

In [17]:
# features = [
#     close.ewm(span=14).mean().rename("ema"),
#     close.ewm(alpha=1).mean().rename("sma"),
#     close.log().diff().rename("lr"),
#     rsi(close, period=20).rename("rsi"),
#     macd(close, fast=10, slow=50, signal=5).rename("macd")
# ]

feed = DataFeed(features)
feed.compile()

In [18]:
import json

for i in range(5):
    obsv = feed.next()
    print(json.dumps(obsv, indent=4))

{
    "open": 2505.56,
    "high": 2513.38,
    "low": 2495.12,
    "close": 2509.17,
    "volume_btc": 114.6,
    "volume": 287000.32,
    "volume_adi": 61.75596933187312,
    "volume_obv": 114.6,
    "volume_cmf": NaN,
    "volume_fi": NaN,
    "momentum_mfi": NaN,
    "volume_em": NaN,
    "volume_sma_em": NaN,
    "volume_vpt": -74.53917175731418,
    "volume_nvi": 1000.0,
    "volume_vwap": NaN,
    "volatility_atr": 0.0,
    "volatility_bbm": NaN,
    "volatility_bbh": NaN,
    "volatility_bbl": NaN,
    "volatility_bbw": NaN,
    "volatility_bbp": NaN,
    "volatility_bbhi": 0.0,
    "volatility_bbli": 0.0,
    "volatility_kcc": NaN,
    "volatility_kch": 2524.15,
    "volatility_kcl": 2487.6299999999997,
    "volatility_kcw": NaN,
    "volatility_kcp": 0.5898138006571786,
    "volatility_kchi": 0.0,
    "volatility_kcli": 0.0,
    "volatility_dcl": NaN,
    "volatility_dch": NaN,
    "trend_macd": NaN,
    "trend_macd_signal": NaN,
    "trend_macd_diff": NaN,
    "trend_sma_fas

## Setup Trading Environment

In [19]:
coinbase = Exchange("coinbase", service=execute_order,
                    options=ExchangeOptions(commission=0.005))(
    Stream.source(list(data["close"]), dtype="float").rename("USD-BTC")
)

portfolio = Portfolio(USD, [
    Wallet(coinbase, 10000 * USD),
    Wallet(coinbase, 0 * BTC)
])


renderer_feed = DataFeed([
    Stream.source(list(data["date"])).rename("date"),
    Stream.source(list(data["open"]), dtype="float").rename("open"),
    Stream.source(list(data["high"]), dtype="float").rename("high"),
    Stream.source(list(data["low"]), dtype="float").rename("low"),
    Stream.source(list(data["close"]), dtype="float").rename("close"), 
    Stream.source(list(data["volume"]), dtype="float").rename("volume") 
])


env = default.create(
    portfolio=portfolio,
#     action_scheme="managed-risk",
    action_scheme= "simple",
    reward_scheme="risk-adjusted",
    feed=feed,
    renderer_feed=renderer_feed,
    renderer=default.renderers.PlotlyTradingChart(display=False, save_format="html"),
    window_size=50
)

In [20]:
env.observer.feed.next()

{'internal': {'coinbase:/USD-BTC': 2509.17,
  'coinbase:/USD:/free': 10000.0,
  'coinbase:/USD:/locked': 0.0,
  'coinbase:/USD:/total': 10000.0,
  'coinbase:/BTC:/free': 0.0,
  'coinbase:/BTC:/locked': 0.0,
  'coinbase:/BTC:/total': 0.0,
  'coinbase:/BTC:/worth': 0.0,
  'net_worth': 10000.0},
 'external': {'open': 2505.56,
  'high': 2513.38,
  'low': 2495.12,
  'close': 2509.17,
  'volume_btc': 114.6,
  'volume': 287000.32,
  'volume_adi': 61.75596933187312,
  'volume_obv': 114.6,
  'volume_cmf': nan,
  'volume_fi': nan,
  'momentum_mfi': nan,
  'volume_em': nan,
  'volume_sma_em': nan,
  'volume_vpt': -74.53917175731418,
  'volume_nvi': 1000.0,
  'volume_vwap': nan,
  'volatility_atr': 0.0,
  'volatility_bbm': nan,
  'volatility_bbh': nan,
  'volatility_bbl': nan,
  'volatility_bbw': nan,
  'volatility_bbp': nan,
  'volatility_bbhi': 0.0,
  'volatility_bbli': 0.0,
  'volatility_kcc': nan,
  'volatility_kch': 2524.15,
  'volatility_kcl': 2487.6299999999997,
  'volatility_kcw': nan,
  '

## Setup and Train

In [21]:
# agent = DQNAgent(env)
# agent.train(n_steps=200, n_episodes=100, render_interval=100, save_path="agents/")

In [22]:
import ray
import numpy as np

from ray import tune
from ray.tune.registry import register_env

import tensortrade.env.default as default
from tensortrade.feed.core import DataFeed, Stream
from tensortrade.oms.instruments import Instrument
from tensortrade.oms.exchanges import Exchange, ExchangeOptions
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.wallets import Wallet, Portfolio


def create_env(config):
    data = config["data"].copy()
    feed = DataFeed(
        [
            Stream.source(list(data[c]), dtype="float").rename(data[c].name)
            for c in data.columns[2:]
        ]
    )

    renderer_feed = DataFeed(
        [
            Stream.source(list(data["date"])).rename("date"),
            Stream.source(list(data["open"]), dtype="float").rename("open"),
            Stream.source(list(data["high"]), dtype="float").rename("high"),
            Stream.source(list(data["low"]), dtype="float").rename("low"),
            Stream.source(list(data["close"]), dtype="float").rename("close"),
            Stream.source(list(data["volume"]), dtype="float").rename("volume"),
        ]
    )

    exchange_opts = ExchangeOptions(commission=config["commission"])
    coinbase = Exchange("coinbase", service=execute_order, options=exchange_opts)(
        Stream.source(list(data["close"]), dtype="float").rename("USD/BTC")
    )

    cash = Wallet(coinbase, 10000 * USD)
    asset = Wallet(coinbase, 0 * BTC)
    portfolio = Portfolio(USD, [cash, asset])

    reward_scheme = default.rewards.SimpleProfit(
        window_size=config["window_size"]
    )
    action_scheme = default.actions.SimpleOrders()

    env = default.create(
        feed=feed,
        #         renderer_feed=renderer_feed,
        #         renderer=default.renderers.PlotlyTradingChart(display=False, save_format="html"),
        portfolio=portfolio,
        action_scheme=action_scheme,
        reward_scheme=reward_scheme,
        window_size=config["window_size"],
        #         max_allowed_loss=0.5,
    )
    return env

In [23]:
data_norm = data.copy()

z_score = lambda x: (x - x.mean()) / x.std(ddof=0)
abs_max = lambda x: x / x.abs().quantile(0.9)
data_norm[data_norm.columns[2:]] = data_norm[data_norm.columns[2:]].apply(abs_max)
data_norm = data_norm.fillna(0)

In [24]:
env_config = {"data": data_norm, "commission": 0.005, "window_size": 50}
# environment = create_env(env_config)

In [25]:
# from stable_baselines.common.policies import MlpLnLstmPolicy
# from stable_baselines import PPO2

# policy = MlpLnLstmPolicy
# # params = { "learning_rate": 1e-5 }

# agent = PPO2(policy, environment, nminibatches=1)

In [26]:
# agent.learn(total_timesteps=10000)

In [27]:
# ray.init()
register_env("TradingEnv", create_env)

In [28]:
# ray.cluster_resources()

In [None]:
import ray
import ray.rllib.agents.ppo as ppo
from ray.tune.logger import pretty_print

# ray.init()
config = {
    "env": "TradingEnv",
    "env_config": env_config,
    "framework": "torch",
    "log_level": "DEBUG",
    "num_workers": 5,
    "num_gpus": 2,
    "num_sgd_iter": 30,
    "sgd_minibatch_size": 64,
    "train_batch_size": 1000,
    "lambda": 0.95,
    "clip_param": 0.3,
    "kl_coeff": 0.3,
    "kl_target": 0.01,
    "entropy_coeff": 0.005,
    "lr": 1e-5,
}
trainer = ppo.PPOTrainer(config=config, env="TradingEnv")

# Can optionally call trainer.restore(path) to load a checkpoint.

for i in range(1000):
    # Perform one iteration of training the policy with PPO
    result = trainer.train()
    print(pretty_print(result))

    if i % 100 == 0:
        checkpoint = trainer.save()
        print("checkpoint saved at", checkpoint)

2020-09-21 15:12:51,108	DEBUG rollout_worker.py:957 -- Creating policy for default_policy
2020-09-21 15:12:51,115	DEBUG catalog.py:471 -- Created preprocessor <ray.rllib.models.preprocessors.NoPreprocessor object at 0x2af165b36048>: Box(50, 78) -> (50, 78)
2020-09-21 15:12:51,354	INFO rollout_worker.py:990 -- Built policy map: {'default_policy': <ray.rllib.policy.torch_policy_template.PPOTorchPolicy object at 0x2ae81751aba8>}
2020-09-21 15:12:51,357	INFO rollout_worker.py:991 -- Built preprocessor map: {'default_policy': <ray.rllib.models.preprocessors.NoPreprocessor object at 0x2af165b36048>}
2020-09-21 15:12:51,359	DEBUG rollout_worker.py:415 -- Creating policy evaluation worker 0 on CPU (please ignore any CUDA init errors)
2020-09-21 15:12:51,361	INFO rollout_worker.py:446 -- Built filter map: {'default_policy': <ray.rllib.utils.filter.NoFilter object at 0x2af20bc60978>}
2020-09-21 15:12:51,362	DEBUG rollout_worker.py:540 -- Created rollout worker with env <ray.rllib.env.base_env._V

[2m[36m(pid=24699)[0m Instructions for updating:
[2m[36m(pid=24699)[0m non-resource variables are not supported in the long term
[2m[36m(pid=24677)[0m 2020-09-21 15:13:00,131	DEBUG rollout_worker.py:415 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=24677)[0m 2020-09-21 15:13:00,137	DEBUG rollout_worker.py:540 -- Created rollout worker with env <ray.rllib.env.base_env._VectorEnvToBaseEnv object at 0x2acdaad1f470> (<TradingEnv instance>), policies {'default_policy': <ray.rllib.policy.tf_policy_template.PPOTFPolicy object at 0x2acdaa0d8ba8>}
[2m[36m(pid=24670)[0m 2020-09-21 15:13:00,163	DEBUG rollout_worker.py:415 -- Creating policy evaluation worker 3 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=24670)[0m 2020-09-21 15:13:00,174	DEBUG rollout_worker.py:540 -- Created rollout worker with env <ray.rllib.env.base_env._VectorEnvToBaseEnv object at 0x2b551885b2b0> (<TradingEnv instance>), policies {'default_policy':

[2m[36m(pid=24686)[0m 2020-09-21 15:13:08,225	DEBUG rollout_worker.py:957 -- Creating policy for default_policy
[2m[36m(pid=24686)[0m 2020-09-21 15:13:08,235	DEBUG catalog.py:471 -- Created preprocessor <ray.rllib.models.preprocessors.NoPreprocessor object at 0x2b65d487e438>: Box(50, 78) -> (50, 78)
[2m[36m(pid=24686)[0m 2020-09-21 15:13:08,309	DEBUG rollout_worker.py:415 -- Creating policy evaluation worker 2 on CPU (please ignore any CUDA init errors)
[2m[36m(pid=24686)[0m 2020-09-21 15:13:08,312	DEBUG rollout_worker.py:540 -- Created rollout worker with env <ray.rllib.env.base_env._VectorEnvToBaseEnv object at 0x2b65dcc65e10> (<TradingEnv instance>), policies {'default_policy': <ray.rllib.policy.torch_policy_template.PPOTorchPolicy object at 0x2b65dcc65550>}
[2m[36m(pid=24686)[0m 
[2m[36m(pid=24686)[0m 
[2m[36m(pid=24697)[0m 2020-09-21 15:13:08,538	DEBUG rollout_worker.py:957 -- Creating policy for default_policy
[2m[36m(pid=24697)[0m 2020-09-21 15:13:08,547	

[2m[36m(pid=24699)[0m 2020-09-21 15:13:10,313	INFO sample_batch_builder.py:204 -- Trajectory fragment after postprocess_trajectory():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'agent0': { 'data': { 'action_dist_inputs': np.ndarray((200, 21), dtype=float32, min=-0.015, max=0.017, mean=-0.0),
[2m[36m(pid=24699)[0m                         'action_logp': np.ndarray((200,), dtype=float32, min=-3.054, max=-3.035, mean=-3.044),
[2m[36m(pid=24699)[0m                         'action_prob': np.ndarray((200,), dtype=float32, min=0.047, max=0.048, mean=0.048),
[2m[36m(pid=24699)[0m                         'actions': np.ndarray((200,), dtype=int64, min=0.0, max=20.0, mean=8.785),
[2m[36m(pid=24699)[0m                         'advantages': np.ndarray((200,), dtype=float32, min=-1.795, max=0.889, mean=-0.804),
[2m[36m(pid=24699)[0m                         'agent_index': np.ndarray((200,), dtype=int64, min=0.0, max=0.0, mean=0.0),
[2m[36m(pid=24699)[0m                  

2020-09-21 15:13:10,921	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.74215735681355, 'policy_loss': 0.0049689438892528415, 'vf_loss': 0.7524105031043291, 'vf_explained_var': -0.055546276, 'kl': 1.4840966358342644e-06, 'entropy': 3.0445145815610886, 'entropy_coeff': 0.005}
2020-09-21 15:13:11,402	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.4072019543964416, 'policy_loss': -0.00019139843061566353, 'vf_loss': 0.4226140882819891, 'vf_explained_var': 0.014846619, 'kl': 5.996636417648915e-06, 'entropy': 3.044510066509247, 'entropy_coeff': 0.005}
2020-09-21 15:13:11,848	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.32706704130396247, 'policy_loss': -0.007329353829845786, 'vf_loss': 0.3496142402291298, 'vf_explained_var': 0.22108528, 'kl': 1.553400903731017e-05, 'entropy': 3.0445005297660828, 'entropy_coeff': 0.005}
2020-0

2020-09-21 15:13:22,732	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.15497729158960283, 'policy_loss': -0.031132487813010812, 'vf_loss': 0.19888629019260406, 'vf_explained_var': 0.5366366, 'kl': 0.008021197514608502, 'entropy': 3.0365739166736603, 'entropy_coeff': 0.005}
2020-09-21 15:13:23,185	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1513091285014525, 'policy_loss': -0.03412269667023793, 'vf_loss': 0.19795575086027384, 'vf_explained_var': 0.53470504, 'kl': 0.008849467820255086, 'entropy': 3.0357529670000076, 'entropy_coeff': 0.005}
2020-09-21 15:13:23,665	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.14754583756439388, 'policy_loss': -0.03741527674719691, 'vf_loss': 0.19729734398424625, 'vf_explained_var': 0.53313345, 'kl': 0.009464869159273803, 'entropy': 3.0351396650075912, 'entropy_coeff': 0.005}
2020-09-

custom_metrics: {}
date: 2020-09-21_15-13-24
done: false
episode_len_mean: .nan
episode_reward_max: .nan
episode_reward_mean: .nan
episode_reward_min: .nan
episodes_this_iter: 0
episodes_total: 0
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 3.034578040242195
      entropy_coeff: 0.005
      kl: 0.010023656825069338
      policy_loss: -0.03918753028847277
      total_loss: 0.14416752418037504
      vf_explained_var: 0.5387840867042542
      vf_loss: 0.1955208396539092
  num_steps_sampled: 1000
  num_steps_trained: 1000
iterations_since_restore: 1
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 8.85952380952381
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 10.714285714285714
  vram_util_percent0: 0.9449909821282179
  vram_util_percent1: 0.03025086079685194
pid: 24278
policy

2020-09-21 15:13:26,755	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.7253275122493505, 'policy_loss': -0.0008961539715528488, 'vf_loss': 0.7413937505334616, 'vf_explained_var': 0.34614238, 'kl': 2.4377285421373074e-06, 'entropy': 3.0341580659151077, 'entropy_coeff': 0.005}
2020-09-21 15:13:27,245	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3244570791721344, 'policy_loss': 0.0061579515459015965, 'vf_loss': 0.33345364034175873, 'vf_explained_var': 0.5451143, 'kl': 4.536377798558533e-05, 'entropy': 3.033623531460762, 'entropy_coeff': 0.005}
2020-09-21 15:13:27,699	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2742005274631083, 'policy_loss': -0.002429720014333725, 'vf_loss': 0.291753095574677, 'vf_explained_var': 0.5433365, 'kl': 0.00014508600088447565, 'entropy': 3.0332770198583603, 'entropy_coeff': 0.005}
2020-09-21

2020-09-21 15:13:38,601	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.16560704971197993, 'policy_loss': -0.024903249694034457, 'vf_loss': 0.20361760491505265, 'vf_explained_var': 0.6812929, 'kl': 0.006805595563491806, 'entropy': 3.029797464609146, 'entropy_coeff': 0.005}
2020-09-21 15:13:39,056	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1534872418269515, 'policy_loss': -0.037310892483219504, 'vf_loss': 0.20383224822580814, 'vf_explained_var': 0.69185424, 'kl': 0.0070505571202374995, 'entropy': 3.0298562943935394, 'entropy_coeff': 0.005}
2020-09-21 15:13:39,539	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.15697968704625964, 'policy_loss': -0.03345680329948664, 'vf_loss': 0.20340579003095627, 'vf_explained_var': 0.676563, 'kl': 0.007270206668181345, 'entropy': 3.030071869492531, 'entropy_coeff': 0.005}
2020-09-21

custom_metrics: {}
date: 2020-09-21_15-13-40
done: false
episode_len_mean: 344.0
episode_reward_max: -29.50523196013276
episode_reward_mean: -29.633807364730348
episode_reward_min: -29.762382769327935
episodes_this_iter: 2
episodes_total: 2
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 3.0302843153476715
      entropy_coeff: 0.005
      kl: 0.007646060170372948
      policy_loss: -0.034981953096576035
      total_loss: 0.1556775646749884
      vf_explained_var: 0.6822574734687805
      vf_loss: 0.20351712219417095
  num_steps_sampled: 2000
  num_steps_trained: 2000
iterations_since_restore: 2
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.057142857142857
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.39047619047619
  vram_util_percent0: 0.9449909821282179
  vram_util_

2020-09-21 15:13:42,529	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.40909063536673784, 'policy_loss': 0.0012504226760938764, 'vf_loss': 0.42301369458436966, 'vf_explained_var': 0.5601331, 'kl': 3.3093277947982003e-06, 'entropy': 3.0348943769931793, 'entropy_coeff': 0.005}
2020-09-21 15:13:42,981	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.24380928510800004, 'policy_loss': -0.006296465173363686, 'vf_loss': 0.26526109874248505, 'vf_explained_var': 0.7041556, 'kl': 5.314511952292378e-05, 'entropy': 3.034259155392647, 'entropy_coeff': 0.005}
2020-09-21 15:13:43,463	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2095429904293269, 'policy_loss': -0.006450325716286898, 'vf_loss': 0.23104672599583864, 'vf_explained_var': 0.73666596, 'kl': 0.00036588378043234115, 'entropy': 3.032633990049362, 'entropy_coeff': 0.005}
2020-09

2020-09-21 15:13:54,275	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.11791367386467755, 'policy_loss': -0.034842533990740776, 'vf_loss': 0.16504077147692442, 'vf_explained_var': 0.80793655, 'kl': 0.009397605201229453, 'entropy': 3.020769402384758, 'entropy_coeff': 0.005}
2020-09-21 15:13:54,757	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.11035476345568895, 'policy_loss': -0.04593653278425336, 'vf_loss': 0.16841223184019327, 'vf_explained_var': 0.8127768, 'kl': 0.00993076927261427, 'entropy': 3.0200323313474655, 'entropy_coeff': 0.005}
2020-09-21 15:13:55,209	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1197656566509977, 'policy_loss': -0.03419464419130236, 'vf_loss': 0.16597072035074234, 'vf_explained_var': 0.79295176, 'kl': 0.010295602900441736, 'entropy': 3.019819959998131, 'entropy_coeff': 0.005}
2020-09-21 

custom_metrics: {}
date: 2020-09-21_15-13-56
done: false
episode_len_mean: 402.5
episode_reward_max: -29.50523196013276
episode_reward_mean: -30.351647627290397
episode_reward_min: -31.429962008865026
episodes_this_iter: 2
episodes_total: 4
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 3.0190131068229675
      entropy_coeff: 0.005
      kl: 0.011043091595638543
      policy_loss: -0.04286684957332909
      total_loss: 0.1114863627590239
      vf_explained_var: 0.807428240776062
      vf_loss: 0.16613535583019257
  num_steps_sampled: 3000
  num_steps_trained: 3000
iterations_since_restore: 3
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.161904761904762
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.9449909821282179
  vram_util_p

2020-09-21 15:13:58,550	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.47911103814840317, 'policy_loss': -0.004111881600692868, 'vf_loss': 0.4982970505952835, 'vf_explained_var': 0.39556876, 'kl': 7.235991507370532e-06, 'entropy': 3.0152586698532104, 'entropy_coeff': 0.005}
2020-09-21 15:13:59,011	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3186361468397081, 'policy_loss': 0.0011570588685572147, 'vf_loss': 0.3325287438929081, 'vf_explained_var': 0.4680634, 'kl': 0.00012728000785955373, 'entropy': 3.0175683349370956, 'entropy_coeff': 0.005}
2020-09-21 15:13:59,493	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.27776406705379486, 'policy_loss': -0.00512288510799408, 'vf_loss': 0.297854358330369, 'vf_explained_var': 0.5092962, 'kl': 0.00044339165651763324, 'entropy': 3.020084783434868, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:14:10,381	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.12985958997160196, 'policy_loss': -0.0362103704828769, 'vf_loss': 0.17877829214558005, 'vf_explained_var': 0.7125299, 'kl': 0.008014981664018705, 'entropy': 3.022565260529518, 'entropy_coeff': 0.005}
2020-09-21 15:14:10,513	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-0.526, max=0.624, mean=-0.026),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-3.528, max=-2.495, mean=-3.042),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.029, max=0.082, mean=0.049),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=9.234),
               

custom_metrics: {}
date: 2020-09-21_15-14-12
done: false
episode_len_mean: 392.0
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.82489374287135
episode_reward_min: -31.429962008865026
episodes_this_iter: 2
episodes_total: 6
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 3.0194168388843536
      entropy_coeff: 0.005
      kl: 0.008751091896556318
      policy_loss: -0.04189324239268899
      total_loss: 0.11835866526234895
      vf_explained_var: 0.7135152220726013
      vf_loss: 0.1727236625738442
  num_steps_sampled: 4000
  num_steps_trained: 4000
iterations_since_restore: 4
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 4.999999999999999
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.9449909821282179
  vram_util_

[2m[36m(pid=24699)[0m 2020-09-21 15:14:12,593	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 5167.227025046796,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 800},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.664, max=1.714, mean=0.211),
[2m[36m(pid=24699)[0m                                   'prev_action': 7,
[2m[36m(pid=24699)[0

2020-09-21 15:14:14,427	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.5151088368147612, 'policy_loss': 0.000852187629789114, 'vf_loss': 0.5293716434389353, 'vf_explained_var': 0.7032739, 'kl': 8.143711196506587e-06, 'entropy': 3.0234904885292053, 'entropy_coeff': 0.005}
2020-09-21 15:14:14,883	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.43780282977968454, 'policy_loss': 0.002165095997042954, 'vf_loss': 0.45073208399116993, 'vf_explained_var': 0.7381097, 'kl': 7.855167712023103e-05, 'entropy': 3.0235839784145355, 'entropy_coeff': 0.005}
2020-09-21 15:14:15,368	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.4016416920349002, 'policy_loss': -0.0035920856753364205, 'vf_loss': 0.4202777035534382, 'vf_explained_var': 0.7509671, 'kl': 0.0002461751068949525, 'entropy': 3.023558020591736, 'entropy_coeff': 0.005}
2020-09-21 15

2020-09-21 15:14:26,293	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2505369409918785, 'policy_loss': -0.040920310537330806, 'vf_loss': 0.3042274219915271, 'vf_explained_var': 0.82039416, 'kl': 0.007688537705689669, 'entropy': 3.0153471529483795, 'entropy_coeff': 0.005}
2020-09-21 15:14:26,782	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.25307613774202764, 'policy_loss': -0.037562136771157384, 'vf_loss': 0.3033697521314025, 'vf_explained_var': 0.82075495, 'kl': 0.00781912516686134, 'entropy': 3.015445202589035, 'entropy_coeff': 0.005}
2020-09-21 15:14:27,272	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2511769199045375, 'policy_loss': -0.04103410698007792, 'vf_loss': 0.30484093353152275, 'vf_explained_var': 0.8244495, 'kl': 0.00814326637191698, 'entropy': 3.0145758986473083, 'entropy_coeff': 0.005}
2020-09-21 15

custom_metrics: {}
date: 2020-09-21_15-14-28
done: false
episode_len_mean: 452.2857142857143
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.92847334411669
episode_reward_min: -31.429962008865026
episodes_this_iter: 1
episodes_total: 7
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 3.0146709978580475
      entropy_coeff: 0.005
      kl: 0.008417920442298055
      policy_loss: -0.04263411811552942
      total_loss: 0.2446187175810337
      vf_explained_var: 0.8279019594192505
      vf_loss: 0.2998008197173476
  num_steps_sampled: 5000
  num_steps_trained: 5000
iterations_since_restore: 5
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.123809523809523
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.9449909821282179
 

[2m[36m(pid=24699)[0m 2020-09-21 15:14:28,449	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:14:30,159	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.5522604025900364, 'policy_loss': 0.004388323053717613, 'vf_loss': 0.5629226844757795, 'vf_explained_var': 0.77281725, 'kl': 9.989410945487887e-06, 'entropy': 3.010724723339081, 'entropy_coeff': 0.005}
2020-09-21 15:14:30,647	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.41258617118000984, 'policy_loss': -0.0016686615417711437, 'vf_loss': 0.429275318980217, 'vf_explained_var': 0.8219372, 'kl': 0.00010182767869082454, 'entropy': 3.010206028819084, 'entropy_coeff': 0.005}
2020-09-21 15:14:31,136	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.360874948091805, 'policy_loss': -0.0062040010234341025, 'vf_loss': 0.3820707928389311, 'vf_explained

2020-09-21 15:14:42,027	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.20650074677541852, 'policy_loss': -0.05072426365222782, 'vf_loss': 0.26971684116870165, 'vf_explained_var': 0.8883637, 'kl': 0.008508443977916613, 'entropy': 3.00887294113636, 'entropy_coeff': 0.005}
2020-09-21 15:14:42,515	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.20816917018964887, 'policy_loss': -0.04360469325911254, 'vf_loss': 0.2641662135720253, 'vf_explained_var': 0.8891427, 'kl': 0.008835945627652109, 'entropy': 3.008625864982605, 'entropy_coeff': 0.005}
2020-09-21 15:14:42,973	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.19666486082132906, 'policy_loss': -0.059336451813578606, 'vf_loss': 0.268319140188396, 'vf_explained_var': 0.88551587, 'kl': 0.00908009271370247, 'entropy': 3.008371204137802, 'entropy_coeff': 0.005}
2020-09-21 15:14

custom_metrics: {}
date: 2020-09-21_15-14-43
done: false
episode_len_mean: 488.6666666666667
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.924304911653437
episode_reward_min: -31.429962008865026
episodes_this_iter: 2
episodes_total: 9
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 3.0070034712553024
      entropy_coeff: 0.005
      kl: 0.00932422187179327
      policy_loss: -0.05627769383136183
      total_loss: 0.19963559741154313
      vf_explained_var: 0.8882610201835632
      vf_loss: 0.2681510429829359
  num_steps_sampled: 6000
  num_steps_trained: 6000
iterations_since_restore: 6
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.17
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.9449909821282179
  vram_util_p

2020-09-21 15:14:45,911	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.49031268060207367, 'policy_loss': -0.0011791368015110493, 'vf_loss': 0.5065454114228487, 'vf_explained_var': 0.793297, 'kl': 7.0266427588117075e-06, 'entropy': 3.0111404061317444, 'entropy_coeff': 0.005}
2020-09-21 15:14:46,367	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.37607317604124546, 'policy_loss': -0.0026616324903443456, 'vf_loss': 0.3937744041904807, 'vf_explained_var': 0.84068584, 'kl': 4.499792987644469e-05, 'entropy': 3.0106161385774612, 'entropy_coeff': 0.005}
2020-09-21 15:14:46,863	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.337442509829998, 'policy_loss': -0.007733745384030044, 'vf_loss': 0.3601783514022827, 'vf_explained_var': 0.85511136, 'kl': 0.00015713194761701743, 'entropy': 3.009845867753029, 'entropy_coeff': 0.005}
2020-09-

2020-09-21 15:14:57,772	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.18982568033970892, 'policy_loss': -0.03818469727411866, 'vf_loss': 0.24062622524797916, 'vf_explained_var': 0.89761615, 'kl': 0.007829061622032896, 'entropy': 2.992914095520973, 'entropy_coeff': 0.005}
2020-09-21 15:14:58,261	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1847810943145305, 'policy_loss': -0.037445218418724835, 'vf_loss': 0.23471865989267826, 'vf_explained_var': 0.9052566, 'kl': 0.008235661429353058, 'entropy': 2.992608055472374, 'entropy_coeff': 0.005}
2020-09-21 15:14:58,744	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1799248184543103, 'policy_loss': -0.0382100633578375, 'vf_loss': 0.23051774268969893, 'vf_explained_var': 0.903746, 'kl': 0.008591955876909196, 'entropy': 2.992093086242676, 'entropy_coeff': 0.005}
2020-09-21 15:1

custom_metrics: {}
date: 2020-09-21_15-14-59
done: false
episode_len_mean: 514.9090909090909
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.896916174101555
episode_reward_min: -31.429962008865026
episodes_this_iter: 2
episodes_total: 11
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.9918481409549713
      entropy_coeff: 0.005
      kl: 0.008906436793040484
      policy_loss: -0.04114439041586593
      total_loss: 0.17831503180786967
      vf_explained_var: 0.9049184322357178
      vf_loss: 0.23174673691391945
  num_steps_sampled: 7000
  num_steps_trained: 7000
iterations_since_restore: 7
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.0095238095238095
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.9449909821282

2020-09-21 15:15:01,719	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.6710840631276369, 'policy_loss': -0.0010273871012032032, 'vf_loss': 0.6870696991682053, 'vf_explained_var': 0.8192839, 'kl': 1.051217413872152e-05, 'entropy': 2.9922736138105392, 'entropy_coeff': 0.005}
2020-09-21 15:15:02,181	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.5235028341412544, 'policy_loss': 0.0005850710440427065, 'vf_loss': 0.5378532037138939, 'vf_explained_var': 0.85332257, 'kl': 9.541091537812463e-05, 'entropy': 2.9928147345781326, 'entropy_coeff': 0.005}
2020-09-21 15:15:02,685	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.4542090483009815, 'policy_loss': -0.0075575466034933925, 'vf_loss': 0.4766570841893554, 'vf_explained_var': 0.8702401, 'kl': 0.0002503304631318315, 'entropy': 2.9931209087371826, 'entropy_coeff': 0.005}
2020-09-21

2020-09-21 15:15:10,592	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.3,
                                         'cur_lr': 1e-05,
                                         'entropy': 2.987664222717285,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.006382048595696688,
                                         'policy_loss': -0.14404445886611938,
                                         'total_loss': -0.016676634550094604,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.96, max=0.96, mean=0.96),
                                         'vf_loss': 0.1403915137052536}}}

2020-09-21 15:15:10,732	DEBUG sgd.py:120 -- 19 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.18937004473991692, 'policy_loss': -0.03495435530203394

custom_metrics: {}
date: 2020-09-21_15-15-15
done: false
episode_len_mean: 539.6923076923077
episode_reward_max: -26.684516892237284
episode_reward_mean: -30.08808733409095
episode_reward_min: -31.429962008865026
episodes_this_iter: 2
episodes_total: 13
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.9804300367832184
      entropy_coeff: 0.005
      kl: 0.010814019828103483
      policy_loss: -0.04094390297541395
      total_loss: 0.1374239643337205
      vf_explained_var: 0.9483164548873901
      vf_loss: 0.1900258124805987
  num_steps_sampled: 8000
  num_steps_trained: 8000
iterations_since_restore: 8
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.1952380952380945
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.9449909821282179

2020-09-21 15:15:17,803	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3331521840300411, 'policy_loss': 0.0033753698226064444, 'vf_loss': 0.34468797501176596, 'vf_explained_var': 0.91925585, 'kl': 9.079285587909602e-06, 'entropy': 2.982779800891876, 'entropy_coeff': 0.005}
2020-09-21 15:15:18,258	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.22276877472177148, 'policy_loss': -0.001688948948867619, 'vf_loss': 0.2393571538850665, 'vf_explained_var': 0.9430043, 'kl': 3.936175676244602e-05, 'entropy': 2.982247978448868, 'entropy_coeff': 0.005}
2020-09-21 15:15:18,740	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.20098936487920582, 'policy_loss': 0.0003914309199899435, 'vf_loss': 0.2154733561910689, 'vf_explained_var': 0.9480715, 'kl': 0.00010489279338798951, 'entropy': 2.981377452611923, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:15:29,587	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1028193929232657, 'policy_loss': -0.05309114954434335, 'vf_loss': 0.1684148320928216, 'vf_explained_var': 0.9590622, 'kl': 0.007775315374601632, 'entropy': 2.96737639605999, 'entropy_coeff': 0.005}
2020-09-21 15:15:30,077	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.10233407584019005, 'policy_loss': -0.05718577734660357, 'vf_loss': 0.1719081625342369, 'vf_explained_var': 0.9602489, 'kl': 0.008153858769219369, 'entropy': 2.9668923765420914, 'entropy_coeff': 0.005}
2020-09-21 15:15:30,527	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.10693825612543151, 'policy_loss': -0.053446088219061494, 'vf_loss': 0.1726451190188527, 'vf_explained_var': 0.9591032, 'kl': 0.008572323247790337, 'entropy': 2.9664950370788574, 'entropy_coeff': 0.005}
2020-09-21 15:1

custom_metrics: {}
date: 2020-09-21_15-15-31
done: false
episode_len_mean: 537.1428571428571
episode_reward_max: -26.684516892237284
episode_reward_mean: -30.00809191438118
episode_reward_min: -31.429962008865026
episodes_this_iter: 1
episodes_total: 14
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.964755639433861
      entropy_coeff: 0.005
      kl: 0.009455559775233269
      policy_loss: -0.056913122767582536
      total_loss: 0.09950156765989959
      vf_explained_var: 0.9589958786964417
      vf_loss: 0.16840180521830916
  num_steps_sampled: 9000
  num_steps_trained: 9000
iterations_since_restore: 9
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.038095238095238
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.944990982128217

[2m[36m(pid=24699)[0m 2020-09-21 15:15:31,746	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 7967.7053421684695,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 200},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.209, max=1.189, mean=0.22),
[2m[36m(pid=24699)[0m                                   'prev_action': 16,
[2m[36m(pid=24699)[

2020-09-21 15:15:33,637	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3811308965086937, 'policy_loss': 0.0018968064105138183, 'vf_loss': 0.39396730437874794, 'vf_explained_var': 0.9215917, 'kl': 1.4979667779191352e-05, 'entropy': 2.9475420713424683, 'entropy_coeff': 0.005}
2020-09-21 15:15:34,119	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2380869199987501, 'policy_loss': 0.0018697070190683007, 'vf_loss': 0.2509184367954731, 'vf_explained_var': 0.94417775, 'kl': 0.00012903789001939003, 'entropy': 2.947987422347069, 'entropy_coeff': 0.005}
2020-09-21 15:15:34,604	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.17272338806651533, 'policy_loss': -0.003341848263517022, 'vf_loss': 0.1907069617882371, 'vf_explained_var': 0.9608139, 'kl': 0.0003321170597700984, 'entropy': 2.9482723623514175, 'entropy_coeff': 0.005}
2020-09-2

2020-09-21 15:15:45,493	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04395803465740755, 'policy_loss': -0.046126605942845345, 'vf_loss': 0.10192290460690856, 'vf_explained_var': 0.9780741, 'kl': 0.00953146000392735, 'entropy': 2.9395398795604706, 'entropy_coeff': 0.005}
2020-09-21 15:15:45,975	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.0455119835678488, 'policy_loss': -0.043989952420815825, 'vf_loss': 0.10115334670990705, 'vf_explained_var': 0.9787027, 'kl': 0.01015167630976066, 'entropy': 2.9393850713968277, 'entropy_coeff': 0.005}
2020-09-21 15:15:46,428	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.048106530448421836, 'policy_loss': -0.043481891276314855, 'vf_loss': 0.10319994948804379, 'vf_explained_var': 0.9783218, 'kl': 0.010260456067044288, 'entropy': 2.9379320442676544, 'entropy_coeff': 0.005}
2020-09-2

custom_metrics: {}
date: 2020-09-21_15-15-47
done: false
episode_len_mean: 537.1428571428571
episode_reward_max: -26.684516892237284
episode_reward_mean: -30.00809191438118
episode_reward_min: -31.429962008865026
episodes_this_iter: 0
episodes_total: 14
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.937513843178749
      entropy_coeff: 0.005
      kl: 0.01077236223500222
      policy_loss: -0.04321013973094523
      total_loss: 0.04739706008695066
      vf_explained_var: 0.978017270565033
      vf_loss: 0.10206305654719472
  num_steps_sampled: 10000
  num_steps_trained: 10000
iterations_since_restore: 10
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.2
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.9449909821282179
  vram_util_

[2m[36m(pid=24699)[0m 2020-09-21 15:15:47,630	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:15:49,494	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.7786347176879644, 'policy_loss': -0.0049350791377946734, 'vf_loss': 0.7983323633670807, 'vf_explained_var': 0.8198077, 'kl': 0.00014800364715061853, 'entropy': 2.9613910019397736, 'entropy_coeff': 0.005}
2020-09-21 15:15:49,976	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.651186415925622, 'policy_loss': -0.008471584995277226, 'vf_loss': 0.6741790547966957, 'vf_explained_var': 0.85149294, 'kl': 0.0009821097955864388, 'entropy': 2.963138073682785, 'entropy_coeff': 0.005}
2020-09-21 15:15:50,421	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.5546403210610151, 'policy_loss': -0.018200313672423363, 'vf_loss': 0.5867780558764935, 'vf_explaine

2020-09-21 15:16:01,324	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.280650831758976, 'policy_loss': -0.05510517489165068, 'vf_loss': 0.3474175641313195, 'vf_explained_var': 0.9166676, 'kl': 0.01019826193805784, 'entropy': 2.944206863641739, 'entropy_coeff': 0.005}
2020-09-21 15:16:01,777	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2770382799208164, 'policy_loss': -0.05197056906763464, 'vf_loss': 0.3405172051861882, 'vf_explained_var': 0.91821885, 'kl': 0.010692928393837065, 'entropy': 2.9432471245527267, 'entropy_coeff': 0.005}
2020-09-21 15:16:02,270	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2717637214809656, 'policy_loss': -0.06288289051735774, 'vf_loss': 0.3460825840011239, 'vf_explained_var': 0.920231, 'kl': 0.010901540576014668, 'entropy': 2.9412866681814194, 'entropy_coeff': 0.005}
2020-09-21 15:16:02

custom_metrics: {}
date: 2020-09-21_15-16-03
done: false
episode_len_mean: 548.9375
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.980654619512794
episode_reward_min: -31.429962008865026
episodes_this_iter: 2
episodes_total: 16
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.941627860069275
      entropy_coeff: 0.005
      kl: 0.011242008535191417
      policy_loss: -0.056538538774475455
      total_loss: 0.26975684566423297
      vf_explained_var: 0.9208196401596069
      vf_loss: 0.3376309173181653
  num_steps_sampled: 11000
  num_steps_trained: 11000
iterations_since_restore: 11
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.233333333333334
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.9449909821282179
  vr

2020-09-21 15:16:05,555	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.5975210815668106, 'policy_loss': 0.0025965217500925064, 'vf_loss': 0.6096124351024628, 'vf_explained_var': 0.73539543, 'kl': 9.550294834001494e-05, 'entropy': 2.9433069080114365, 'entropy_coeff': 0.005}
2020-09-21 15:16:06,005	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.4220433449372649, 'policy_loss': -0.005949153215624392, 'vf_loss': 0.44254764914512634, 'vf_explained_var': 0.7937242, 'kl': 0.0005123656574141933, 'entropy': 2.9417723417282104, 'entropy_coeff': 0.005}
2020-09-21 15:16:06,490	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.34130513621494174, 'policy_loss': -0.010395467281341553, 'vf_loss': 0.3658888675272465, 'vf_explained_var': 0.8301783, 'kl': 0.001717242572340183, 'entropy': 2.940688267350197, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:16:12,200	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.15774033684283495, 'policy_loss': -0.04727935581468046, 'vf_loss': 0.21772110369056463, 'vf_explained_var': 0.8963785, 'kl': 0.00651139224646613, 'entropy': 2.930966541171074, 'entropy_coeff': 0.005}
2020-09-21 15:16:12,654	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1633721946272999, 'policy_loss': -0.03928098769392818, 'vf_loss': 0.21520843729376793, 'vf_explained_var': 0.8982411, 'kl': 0.006999398174230009, 'entropy': 2.931015208363533, 'entropy_coeff': 0.005}
2020-09-21 15:16:13,140	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.15078260854352266, 'policy_loss': -0.04972813290078193, 'vf_loss': 0.21303291525691748, 'vf_explained_var': 0.897038, 'kl': 0.007092675718013197, 'entropy': 2.9299962520599365, 'entropy_coeff': 0.005}
2020-09-21 15:1

custom_metrics: {}
date: 2020-09-21_15-16-19
done: false
episode_len_mean: 580.9473684210526
episode_reward_max: -26.684516892237284
episode_reward_mean: -30.087207160803676
episode_reward_min: -31.67115871160356
episodes_this_iter: 3
episodes_total: 19
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.9173905700445175
      entropy_coeff: 0.005
      kl: 0.011366330552846193
      policy_loss: -0.0688830164144747
      total_loss: 0.10566038335673511
      vf_explained_var: 0.9135808944702148
      vf_loss: 0.18572045117616653
  num_steps_sampled: 12000
  num_steps_trained: 12000
iterations_since_restore: 12
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.057142857142858
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.9449909821282

2020-09-21 15:16:21,274	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.5313525032252073, 'policy_loss': -0.003458544611930847, 'vf_loss': 0.5492439577355981, 'vf_explained_var': 0.739956, 'kl': 5.896595688836825e-06, 'entropy': 2.8869362473487854, 'entropy_coeff': 0.005}
2020-09-21 15:16:21,734	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.4506206391379237, 'policy_loss': -1.4875316992402077e-05, 'vf_loss': 0.4650465967133641, 'vf_explained_var': 0.76433265, 'kl': 6.76114688076268e-05, 'entropy': 2.8862753361463547, 'entropy_coeff': 0.005}
2020-09-21 15:16:22,228	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3977136514149606, 'policy_loss': -0.004670500056818128, 'vf_loss': 0.4167331922799349, 'vf_explained_var': 0.7653737, 'kl': 0.0002442040304231341, 'entropy': 2.8844595551490784, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:16:33,161	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2193511058576405, 'policy_loss': -0.03852114430628717, 'vf_loss': 0.26902527222409844, 'vf_explained_var': 0.8598866, 'kl': 0.01080465957056731, 'entropy': 2.878884419798851, 'entropy_coeff': 0.005}
2020-09-21 15:16:33,644	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.21869355300441384, 'policy_loss': -0.04431224288418889, 'vf_loss': 0.27404981199651957, 'vf_explained_var': 0.8568763, 'kl': 0.011152375838719308, 'entropy': 2.8779462724924088, 'entropy_coeff': 0.005}
2020-09-21 15:16:34,097	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2193413395434618, 'policy_loss': -0.04292618407635018, 'vf_loss': 0.27319670002907515, 'vf_explained_var': 0.8636036, 'kl': 0.011498511768877506, 'entropy': 2.8757468163967133, 'entropy_coeff': 0.005}
2020-09-21 15:

custom_metrics: {}
date: 2020-09-21_15-16-35
done: false
episode_len_mean: 557.8571428571429
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.904699473381946
episode_reward_min: -31.67115871160356
episodes_this_iter: 2
episodes_total: 21
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.8734046816825867
      entropy_coeff: 0.005
      kl: 0.012073034420609474
      policy_loss: -0.05349628860130906
      total_loss: 0.205654451623559
      vf_explained_var: 0.8640261888504028
      vf_loss: 0.269895835313946
  num_steps_sampled: 13000
  num_steps_trained: 13000
iterations_since_restore: 13
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.161904761904761
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.9449909821282179

2020-09-21 15:16:37,018	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.5001662131398916, 'policy_loss': -0.006451391789596528, 'vf_loss': 0.5210972633212805, 'vf_explained_var': 0.82174015, 'kl': 1.6495853045750408e-05, 'entropy': 2.896921142935753, 'entropy_coeff': 0.005}
2020-09-21 15:16:37,473	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.404498646967113, 'policy_loss': -0.004875930724665523, 'vf_loss': 0.4238153249025345, 'vf_explained_var': 0.85993165, 'kl': 0.0001908833191919257, 'entropy': 2.8996020704507828, 'entropy_coeff': 0.005}
2020-09-21 15:16:37,959	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3658545631915331, 'policy_loss': -0.003904767334461212, 'vf_loss': 0.3840806791558862, 'vf_explained_var': 0.86992306, 'kl': 0.000649783785775071, 'entropy': 2.903256207704544, 'entropy_coeff': 0.005}
2020-09-21 15

2020-09-21 15:16:48,842	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.21440977090969682, 'policy_loss': -0.05359460809268057, 'vf_loss': 0.27930972538888454, 'vf_explained_var': 0.906026, 'kl': 0.010644775058608502, 'entropy': 2.8997558504343033, 'entropy_coeff': 0.005}
2020-09-21 15:16:49,327	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.21210900624282658, 'policy_loss': -0.04809897439554334, 'vf_loss': 0.27150088734924793, 'vf_explained_var': 0.9050859, 'kl': 0.010655849298927933, 'entropy': 2.897933155298233, 'entropy_coeff': 0.005}
2020-09-21 15:16:49,814	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.21414332510903478, 'policy_loss': -0.04793125297874212, 'vf_loss': 0.2733025290071964, 'vf_explained_var': 0.9064994, 'kl': 0.010861540969926864, 'entropy': 2.897282525897026, 'entropy_coeff': 0.005}
2020-09-21 15:

custom_metrics: {}
date: 2020-09-21_15-16-50
done: false
episode_len_mean: 534.6666666666666
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.756084956390236
episode_reward_min: -31.67115871160356
episodes_this_iter: 3
episodes_total: 24
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.8967882096767426
      entropy_coeff: 0.005
      kl: 0.011509723379276693
      policy_loss: -0.05072227196069434
      total_loss: 0.21251026401296258
      vf_explained_var: 0.9087189435958862
      vf_loss: 0.27426356775686145
  num_steps_sampled: 14000
  num_steps_trained: 14000
iterations_since_restore: 14
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.165000000000001
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.944990982128

[2m[36m(pid=24699)[0m 2020-09-21 15:16:51,002	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 7377.529088923189,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 189},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.419, max=1.189, mean=0.198),
[2m[36m(pid=24699)[0m                                   'prev_action': 2,
[2m[36m(pid=24699)[0

2020-09-21 15:16:52,856	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.23255535098724067, 'policy_loss': -0.0028282341081649065, 'vf_loss': 0.24960616882890463, 'vf_explained_var': 0.95860296, 'kl': 1.2382568329427808e-05, 'entropy': 2.8452617675065994, 'entropy_coeff': 0.005}
2020-09-21 15:16:53,308	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.15965208504348993, 'policy_loss': 3.299338277429342e-05, 'vf_loss': 0.17380540678277612, 'vf_explained_var': 0.9700116, 'kl': 8.5694068502562e-05, 'entropy': 2.842404305934906, 'entropy_coeff': 0.005}
2020-09-21 15:16:53,798	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.13764146529138088, 'policy_loss': -0.0020287674851715565, 'vf_loss': 0.15380559489130974, 'vf_explained_var': 0.97326887, 'kl': 0.0002563822226875345, 'entropy': 2.8424545377492905, 'entropy_coeff': 0.005}
2020-

2020-09-21 15:17:04,688	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.061043612426146865, 'policy_loss': -0.05210321303457022, 'vf_loss': 0.12421943806111813, 'vf_explained_var': 0.97811997, 'kl': 0.010570606624241918, 'entropy': 2.8487586826086044, 'entropy_coeff': 0.005}
2020-09-21 15:17:05,173	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06008840189315379, 'policy_loss': -0.0533961511682719, 'vf_loss': 0.12443237891420722, 'vf_explained_var': 0.9779951, 'kl': 0.010988772381097078, 'entropy': 2.8488918095827103, 'entropy_coeff': 0.005}
2020-09-21 15:17:05,651	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06330572615843266, 'policy_loss': -0.05198738910257816, 'vf_loss': 0.12616252806037664, 'vf_explained_var': 0.9785392, 'kl': 0.011256131459958851, 'entropy': 2.8492500483989716, 'entropy_coeff': 0.005}
2020-09-2

custom_metrics: {}
date: 2020-09-21_15-17-06
done: false
episode_len_mean: 534.6666666666666
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.756084956390236
episode_reward_min: -31.67115871160356
episodes_this_iter: 0
episodes_total: 24
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.848358392715454
      entropy_coeff: 0.005
      kl: 0.011983380769379437
      policy_loss: -0.048850575694814324
      total_loss: 0.06407462188508362
      vf_explained_var: 0.9780111312866211
      vf_loss: 0.12357197748497128
  num_steps_sampled: 15000
  num_steps_trained: 15000
iterations_since_restore: 15
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.790476190476191
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.944990982128

[2m[36m(pid=24699)[0m 2020-09-21 15:17:07,128	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:17:09,088	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.7860419489443302, 'policy_loss': -0.000246428040554747, 'vf_loss': 0.8008282352238894, 'vf_explained_var': 0.79976046, 'kl': 4.142988920929902e-05, 'entropy': 2.9104568511247635, 'entropy_coeff': 0.005}
2020-09-21 15:17:09,574	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.522877024486661, 'policy_loss': 0.0013321175938472152, 'vf_loss': 0.5360669959336519, 'vf_explained_var': 0.87083256, 'kl': 9.843525913311169e-05, 'entropy': 2.910323843359947, 'entropy_coeff': 0.005}
2020-09-21 15:17:10,027	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.43481635535135865, 'policy_loss': -0.009474801947362721, 'vf_loss': 0.45878261514008045, 'vf_explain

2020-09-21 15:17:15,744	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2846281314268708, 'policy_loss': -0.03557829558849335, 'vf_loss': 0.33310686610639095, 'vf_explained_var': 0.91742605, 'kl': 0.005609110958175734, 'entropy': 2.916634574532509, 'entropy_coeff': 0.005}
2020-09-21 15:17:16,189	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.26757717196596786, 'policy_loss': -0.0391121213324368, 'vf_loss': 0.3194293472915888, 'vf_explained_var': 0.9212425, 'kl': 0.0061355178186204284, 'entropy': 2.9161400496959686, 'entropy_coeff': 0.005}
2020-09-21 15:17:16,675	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2641706126742065, 'policy_loss': -0.04387924412731081, 'vf_loss': 0.3206565733999014, 'vf_explained_var': 0.92388225, 'kl': 0.006574095255928114, 'entropy': 2.915787011384964, 'entropy_coeff': 0.005}
2020-09-21 15:

custom_metrics: {}
date: 2020-09-21_15-17-22
done: false
episode_len_mean: 537.0344827586207
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.759948488565964
episode_reward_min: -31.67115871160356
episodes_this_iter: 5
episodes_total: 29
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.911046177148819
      entropy_coeff: 0.005
      kl: 0.010884105053264648
      policy_loss: -0.05637454998213798
      total_loss: 0.22225599107332528
      vf_explained_var: 0.9295306205749512
      vf_loss: 0.289920536801219
  num_steps_sampled: 16000
  num_steps_trained: 16000
iterations_since_restore: 16
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.1952380952380945
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.94499098212821

2020-09-21 15:17:24,997	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.20886821707244962, 'policy_loss': 0.0013832112308591604, 'vf_loss': 0.22189781535416842, 'vf_explained_var': 0.8110408, 'kl': 4.000471713161424e-05, 'entropy': 2.884961351752281, 'entropy_coeff': 0.005}
2020-09-21 15:17:25,454	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.15989261562936008, 'policy_loss': -0.003988805430708453, 'vf_loss': 0.1781316203996539, 'vf_explained_var': 0.83141494, 'kl': 0.00048607284497848013, 'entropy': 2.8792047947645187, 'entropy_coeff': 0.005}
2020-09-21 15:17:25,942	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.13161239679902792, 'policy_loss': -0.009759859123732895, 'vf_loss': 0.15541805885732174, 'vf_explained_var': 0.8639629, 'kl': 0.0011207708557776641, 'entropy': 2.876406103372574, 'entropy_coeff': 0.005}
2020-09-

2020-09-21 15:17:36,895	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04059600946493447, 'policy_loss': -0.05749639088753611, 'vf_loss': 0.10814036661759019, 'vf_explained_var': 0.9068387, 'kl': 0.014081571018323302, 'entropy': 2.8544869422912598, 'entropy_coeff': 0.005}
2020-09-21 15:17:37,379	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04521478805691004, 'policy_loss': -0.05275637574959546, 'vf_loss': 0.10799566470086575, 'vf_explained_var': 0.8974581, 'kl': 0.014180248079355806, 'entropy': 2.85571551322937, 'entropy_coeff': 0.005}
2020-09-21 15:17:37,874	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.03846186399459839, 'policy_loss': -0.059751356253400445, 'vf_loss': 0.1081601595506072, 'vf_explained_var': 0.8972528, 'kl': 0.01442277105525136, 'entropy': 2.8547563701868057, 'entropy_coeff': 0.005}
2020-09-21 15

custom_metrics: {}
date: 2020-09-21_15-17-38
done: false
episode_len_mean: 537.0344827586207
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.759948488565968
episode_reward_min: -31.67115871160356
episodes_this_iter: 0
episodes_total: 29
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.8511060178279877
      entropy_coeff: 0.005
      kl: 0.015102658013347536
      policy_loss: -0.061704514315351844
      total_loss: 0.03626649081707001
      vf_explained_var: 0.9059103727340698
      vf_loss: 0.1076957331970334
  num_steps_sampled: 17000
  num_steps_trained: 17000
iterations_since_restore: 17
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.076190476190476
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.944990982128

2020-09-21 15:17:40,899	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.35115732718259096, 'policy_loss': 0.0021597101585939527, 'vf_loss': 0.36312213353812695, 'vf_explained_var': 0.9416829, 'kl': 6.328513221402765e-05, 'entropy': 2.828702673316002, 'entropy_coeff': 0.005}
2020-09-21 15:17:41,351	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2140703045297414, 'policy_loss': -0.0021486892364919186, 'vf_loss': 0.2302615214139223, 'vf_explained_var': 0.96139014, 'kl': 0.00036302412081568036, 'entropy': 2.8302888572216034, 'entropy_coeff': 0.005}
2020-09-21 15:17:41,839	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.18927590269595385, 'policy_loss': -0.0007111073937267065, 'vf_loss': 0.20390469208359718, 'vf_explained_var': 0.96516263, 'kl': 0.0008182179190043826, 'entropy': 2.832630142569542, 'entropy_coeff': 0.005}
2020-0

2020-09-21 15:17:52,783	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.10536849149502814, 'policy_loss': -0.04556945781223476, 'vf_loss': 0.16208334360271692, 'vf_explained_var': 0.97228605, 'kl': 0.010420348844490945, 'entropy': 2.854299321770668, 'entropy_coeff': 0.005}
2020-09-21 15:17:53,272	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.10181795270182192, 'policy_loss': -0.04600783973000944, 'vf_loss': 0.15890379715710878, 'vf_explained_var': 0.9734769, 'kl': 0.01067689957562834, 'entropy': 2.856215000152588, 'entropy_coeff': 0.005}
2020-09-21 15:17:53,755	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.10135115694720298, 'policy_loss': -0.04699856764636934, 'vf_loss': 0.15938098169863224, 'vf_explained_var': 0.9733823, 'kl': 0.010812673368491232, 'entropy': 2.855012387037277, 'entropy_coeff': 0.005}
2020-09-21 15

custom_metrics: {}
date: 2020-09-21_15-17-54
done: false
episode_len_mean: 537.0344827586207
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.759948488565968
episode_reward_min: -31.67115871160356
episodes_this_iter: 0
episodes_total: 29
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.8542517721652985
      entropy_coeff: 0.005
      kl: 0.011102633725386113
      policy_loss: -0.060136734071420506
      total_loss: 0.08622304850723594
      vf_explained_var: 0.973534107208252
      vf_loss: 0.1573002515360713
  num_steps_sampled: 18000
  num_steps_trained: 18000
iterations_since_restore: 18
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.133333333333333
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.9449909821282

2020-09-21 15:17:56,745	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.4104809891432524, 'policy_loss': -0.0013580831582657993, 'vf_loss': 0.42629784159362316, 'vf_explained_var': 0.81194055, 'kl': 0.00019124767752316807, 'entropy': 2.9032285809516907, 'entropy_coeff': 0.005}
2020-09-21 15:17:57,234	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3371410146355629, 'policy_loss': -0.012052800506353378, 'vf_loss': 0.3630116479471326, 'vf_explained_var': 0.8301348, 'kl': 0.002282967423525406, 'entropy': 2.9005447924137115, 'entropy_coeff': 0.005}
2020-09-21 15:17:57,719	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3165516280569136, 'policy_loss': -0.007250335649587214, 'vf_loss': 0.3365812646225095, 'vf_explained_var': 0.8581325, 'kl': 0.00571243476588279, 'entropy': 2.8986063599586487, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:18:08,634	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.23393664555624127, 'policy_loss': -0.06269540812354535, 'vf_loss': 0.3083754749968648, 'vf_explained_var': 0.8685876, 'kl': 0.009131170460022986, 'entropy': 2.896553799510002, 'entropy_coeff': 0.005}
2020-09-21 15:18:09,120	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2156748860143125, 'policy_loss': -0.059348530136048794, 'vf_loss': 0.28669720608741045, 'vf_explained_var': 0.87665546, 'kl': 0.00934055814286694, 'entropy': 2.8951930552721024, 'entropy_coeff': 0.005}
2020-09-21 15:18:09,573	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.22258672770112753, 'policy_loss': -0.06137798703275621, 'vf_loss': 0.29559521842747927, 'vf_explained_var': 0.87624526, 'kl': 0.009489513759035617, 'entropy': 2.8954718112945557, 'entropy_coeff': 0.005}
2020-09-21 

custom_metrics: {}
date: 2020-09-21_15-18-10
done: false
episode_len_mean: 539.8387096774194
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.67024630478681
episode_reward_min: -31.67115871160356
episodes_this_iter: 2
episodes_total: 31
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.8947127163410187
      entropy_coeff: 0.005
      kl: 0.009942579083144665
      policy_loss: -0.05657577095553279
      total_loss: 0.22056887298822403
      vf_explained_var: 0.8716644048690796
      vf_loss: 0.2886354420334101
  num_steps_sampled: 19000
  num_steps_trained: 19000
iterations_since_restore: 19
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.109523809523809
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.94499098212821

[2m[36m(pid=24699)[0m 2020-09-21 15:18:11,077	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 9328.67600485205,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 74},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-7.879, max=6.878, mean=0.293),
[2m[36m(pid=24699)[0m                                   'prev_action': 1,
[2m[36m(pid=24699)[0m 

2020-09-21 15:18:12,523	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.3,
                                         'cur_lr': 1e-05,
                                         'entropy': 2.873291015625,
                                         'entropy_coeff': 0.005,
                                         'kl': 5.698308935819796e-09,
                                         'policy_loss': -0.00280037522315979,
                                         'total_loss': 0.4813106656074524,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.887, max=0.887, mean=0.887),
                                         'vf_loss': 0.4984774887561798}}}

2020-09-21 15:18:12,527	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs':

2020-09-21 15:18:19,616	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1263241830165498, 'policy_loss': -0.03805428108898923, 'vf_loss': 0.1764482562430203, 'vf_explained_var': 0.9544264, 'kl': 0.008075985591858625, 'entropy': 2.8985174894332886, 'entropy_coeff': 0.005}
2020-09-21 15:18:20,066	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.12801050225971267, 'policy_loss': -0.03701067576184869, 'vf_loss': 0.17672733217477798, 'vf_explained_var': 0.9545158, 'kl': 0.009306523308623582, 'entropy': 2.8996215760707855, 'entropy_coeff': 0.005}
2020-09-21 15:18:20,557	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.12422524252906442, 'policy_loss': -0.03718846675474197, 'vf_loss': 0.17307682754471898, 'vf_explained_var': 0.9550868, 'kl': 0.009438117151148617, 'entropy': 2.8989087492227554, 'entropy_coeff': 0.005}
2020-09-21 1

custom_metrics: {}
date: 2020-09-21_15-18-26
done: false
episode_len_mean: 553.6969696969697
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.735328368900465
episode_reward_min: -31.822523597721474
episodes_this_iter: 2
episodes_total: 33
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.8981622457504272
      entropy_coeff: 0.005
      kl: 0.014183294144459069
      policy_loss: -0.057906622416339815
      total_loss: 0.08116210740990937
      vf_explained_var: 0.9601295590400696
      vf_loss: 0.14930455153807998
  num_steps_sampled: 20000
  num_steps_trained: 20000
iterations_since_restore: 20
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.1571428571428575
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.944990982

[2m[36m(pid=24699)[0m 2020-09-21 15:18:26,958	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:18:28,705	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.6152955740690231, 'policy_loss': -0.004883595393039286, 'vf_loss': 0.6346139591187239, 'vf_explained_var': 0.8283468, 'kl': 3.2552628560722496e-05, 'entropy': 2.888912796974182, 'entropy_coeff': 0.005}
2020-09-21 15:18:29,190	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.4687990192323923, 'policy_loss': -0.0023761453448969405, 'vf_loss': 0.4854348208755255, 'vf_explained_var': 0.86345035, 'kl': 0.000607627095632779, 'entropy': 2.8883882761001587, 'entropy_coeff': 0.005}
2020-09-21 15:18:29,676	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3936514165252447, 'policy_loss': -0.0029471145244315267, 'vf_loss': 0.4106583967804909, 'vf_explain

2020-09-21 15:18:40,608	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.05260226014070213, 'policy_loss': -0.06396955600939691, 'vf_loss': 0.1280081132426858, 'vf_explained_var': 0.9668462, 'kl': 0.010074741556309164, 'entropy': 2.8917427510023117, 'entropy_coeff': 0.005}
2020-09-21 15:18:41,095	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.05587872734759003, 'policy_loss': -0.05038703163154423, 'vf_loss': 0.11767484876327217, 'vf_explained_var': 0.96788776, 'kl': 0.010158758144825697, 'entropy': 2.8913430720567703, 'entropy_coeff': 0.005}
2020-09-21 15:18:41,553	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04452603618847206, 'policy_loss': -0.05734473996562883, 'vf_loss': 0.1131625936832279, 'vf_explained_var': 0.9694517, 'kl': 0.010538716684095562, 'entropy': 2.890686422586441, 'entropy_coeff': 0.005}
2020-09-21 1

custom_metrics: {}
date: 2020-09-21_15-18-42
done: false
episode_len_mean: 560.5555555555555
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.686112629191452
episode_reward_min: -31.822523597721474
episodes_this_iter: 3
episodes_total: 36
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.8904492408037186
      entropy_coeff: 0.005
      kl: 0.011228814895730466
      policy_loss: -0.07100006088148803
      total_loss: 0.028423912590369582
      vf_explained_var: 0.9692778587341309
      vf_loss: 0.1105075681116432
  num_steps_sampled: 21000
  num_steps_trained: 21000
iterations_since_restore: 21
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.109523809523809
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.94499098212

2020-09-21 15:18:44,543	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2689084990415722, 'policy_loss': 0.003462375025264919, 'vf_loss': 0.2797649288550019, 'vf_explained_var': 0.9603201, 'kl': 0.00021421353229666984, 'entropy': 2.87661275267601, 'entropy_coeff': 0.005}
2020-09-21 15:18:45,032	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.22552771819755435, 'policy_loss': -0.006135109870228916, 'vf_loss': 0.24565162230283022, 'vf_explained_var': 0.9645962, 'kl': 0.001324744964222191, 'entropy': 2.877242788672447, 'entropy_coeff': 0.005}
2020-09-21 15:18:45,488	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.20989915472455323, 'policy_loss': -0.011423966985603329, 'vf_loss': 0.23486968036741018, 'vf_explained_var': 0.9649416, 'kl': 0.0028141972579760477, 'entropy': 2.8781638145446777, 'entropy_coeff': 0.005}
2020-09-21 15

2020-09-21 15:18:56,442	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.129524155985564, 'policy_loss': -0.05373114487156272, 'vf_loss': 0.19556242879480124, 'vf_explained_var': 0.9715163, 'kl': 0.0071045100630726665, 'entropy': 2.8876964449882507, 'entropy_coeff': 0.005}
2020-09-21 15:18:56,899	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.12112171202898026, 'policy_loss': -0.059600202017463744, 'vf_loss': 0.19297641701996326, 'vf_explained_var': 0.97118807, 'kl': 0.0072786105156410486, 'entropy': 2.887617126107216, 'entropy_coeff': 0.005}
2020-09-21 15:18:57,390	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.12690923106856644, 'policy_loss': -0.058811443857848644, 'vf_loss': 0.19791483785957098, 'vf_explained_var': 0.9712926, 'kl': 0.0074764373421203345, 'entropy': 2.8874187916517258, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-18-58
done: false
episode_len_mean: 560.5555555555555
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.686112629191452
episode_reward_min: -31.822523597721474
episodes_this_iter: 0
episodes_total: 36
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.885180115699768
      entropy_coeff: 0.005
      kl: 0.008013190177734941
      policy_loss: -0.06309933529701084
      total_loss: 0.12087937816977501
      vf_explained_var: 0.971560001373291
      vf_loss: 0.19600065518170595
  num_steps_sampled: 22000
  num_steps_trained: 22000
iterations_since_restore: 22
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.14
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.9449909821282179
  vram_ut

2020-09-21 15:19:00,750	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.46394587599206716, 'policy_loss': 0.003222741768695414, 'vf_loss': 0.47510738484561443, 'vf_explained_var': 0.9197501, 'kl': 1.1999995837785882e-05, 'entropy': 2.8775710314512253, 'entropy_coeff': 0.005}
2020-09-21 15:19:01,239	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3834650609642267, 'policy_loss': -0.0015331466565839946, 'vf_loss': 0.39933852292597294, 'vf_explained_var': 0.93306065, 'kl': 0.00011178220790952764, 'entropy': 2.874767929315567, 'entropy_coeff': 0.005}
2020-09-21 15:19:01,697	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3311209920793772, 'policy_loss': -0.004297401639632881, 'vf_loss': 0.34966955333948135, 'vf_explained_var': 0.9437082, 'kl': 0.00036645560885517625, 'entropy': 2.8722179383039474, 'entropy_coeff': 0.005}
2020-0

2020-09-21 15:19:12,554	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-1.787, max=1.382, mean=-0.035),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-4.473, max=-2.019, mean=-2.934),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.011, max=0.133, mean=0.061),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=10.828),
                                                    'advantages': np.ndarray((64,), dtype=float32, min=-2.402, max=5.101, mean=-0.146),
                                                    'agent_index': np.ndarray((64,), dtype=int64, min=0.0, max=0.0, mean=0.0),
                                                    'dones': np.ndarray((64,), 

custom_metrics: {}
date: 2020-09-21_15-19-14
done: false
episode_len_mean: 560.5135135135135
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.738450182920328
episode_reward_min: -31.822523597721474
episodes_this_iter: 1
episodes_total: 37
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.837899297475815
      entropy_coeff: 0.005
      kl: 0.013586772722192109
      policy_loss: -0.0683347872691229
      total_loss: 0.12135217059403658
      vf_explained_var: 0.9674100875854492
      vf_loss: 0.1998004149645567
  num_steps_sampled: 23000
  num_steps_trained: 23000
iterations_since_restore: 23
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.080952380952381
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.94499098212821

2020-09-21 15:19:16,658	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.4177766451612115, 'policy_loss': 0.00011616144911386073, 'vf_loss': 0.43196520674973726, 'vf_explained_var': 0.8965278, 'kl': 0.00010426215707659559, 'entropy': 2.867200806736946, 'entropy_coeff': 0.005}
2020-09-21 15:19:17,154	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3090588788036257, 'policy_loss': -0.005518054240383208, 'vf_loss': 0.3288043439388275, 'vf_explained_var': 0.91905284, 'kl': 0.00033246678049181355, 'entropy': 2.8654306828975677, 'entropy_coeff': 0.005}
2020-09-21 15:19:17,606	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.25842117657884955, 'policy_loss': -0.004880745778791606, 'vf_loss': 0.27737763337790966, 'vf_explained_var': 0.93252254, 'kl': 0.0008232683558162535, 'entropy': 2.864538997411728, 'entropy_coeff': 0.005}
2020-09

2020-09-21 15:19:28,505	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.11611455399543047, 'policy_loss': -0.058651946019381285, 'vf_loss': 0.18593146745115519, 'vf_explained_var': 0.9559367, 'kl': 0.010138569225091487, 'entropy': 2.8413073122501373, 'entropy_coeff': 0.005}
2020-09-21 15:19:28,957	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1159962781239301, 'policy_loss': -0.055514984705951065, 'vf_loss': 0.18257490452378988, 'vf_explained_var': 0.95423716, 'kl': 0.01045966474339366, 'entropy': 2.8403080999851227, 'entropy_coeff': 0.005}
2020-09-21 15:19:29,446	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.11283401260152459, 'policy_loss': -0.07036360574420542, 'vf_loss': 0.19420786714181304, 'vf_explained_var': 0.9520645, 'kl': 0.010643894667737186, 'entropy': 2.840684026479721, 'entropy_coeff': 0.005}
2020-09-21

custom_metrics: {}
date: 2020-09-21_15-19-30
done: false
episode_len_mean: 564.7179487179487
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.83659997147612
episode_reward_min: -31.822523597721474
episodes_this_iter: 2
episodes_total: 39
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.8393107652664185
      entropy_coeff: 0.005
      kl: 0.01111882971599698
      policy_loss: -0.06697900721337646
      total_loss: 0.10230841487646103
      vf_explained_var: 0.9554589986801147
      vf_loss: 0.18014832655899227
  num_steps_sampled: 24000
  num_steps_trained: 24000
iterations_since_restore: 24
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 4.98095238095238
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.94499098212821

[2m[36m(pid=24699)[0m 2020-09-21 15:19:30,623	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 7902.195873132546,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 161},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.419, max=1.396, mean=0.165),
[2m[36m(pid=24699)[0m                                   'prev_action': 17,
[2m[36m(pid=24699)[

2020-09-21 15:19:32,507	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.5122964419424534, 'policy_loss': 0.0015683420933783054, 'vf_loss': 0.5248916391283274, 'vf_explained_var': 0.8473593, 'kl': 5.990451209181291e-05, 'entropy': 2.836306467652321, 'entropy_coeff': 0.005}
2020-09-21 15:19:32,967	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.41003337036818266, 'policy_loss': -0.0012347089941613376, 'vf_loss': 0.42524594999849796, 'vf_explained_var': 0.87627935, 'kl': 0.0007451293986378005, 'entropy': 2.84028522670269, 'entropy_coeff': 0.005}
2020-09-21 15:19:33,452	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3438715282827616, 'policy_loss': -0.013082379358820617, 'vf_loss': 0.3704784903675318, 'vf_explained_var': 0.89014417, 'kl': 0.0022894138455740176, 'entropy': 2.842281088232994, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:19:44,365	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.12167388224042952, 'policy_loss': -0.05985458637587726, 'vf_loss': 0.19208098156377673, 'vf_explained_var': 0.94382274, 'kl': 0.012074838625267148, 'entropy': 2.83499214053154, 'entropy_coeff': 0.005}
2020-09-21 15:19:44,847	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.12464861723128706, 'policy_loss': -0.05228828446706757, 'vf_loss': 0.18745954800397158, 'vf_explained_var': 0.9447675, 'kl': 0.01219083636533469, 'entropy': 2.8359785825014114, 'entropy_coeff': 0.005}
2020-09-21 15:19:45,338	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.10745478072203696, 'policy_loss': -0.0674421148141846, 'vf_loss': 0.18543374631553888, 'vf_explained_var': 0.94473875, 'kl': 0.012111890595406294, 'entropy': 2.8340854197740555, 'entropy_coeff': 0.005}
2020-09-21 1

custom_metrics: {}
date: 2020-09-21_15-19-46
done: false
episode_len_mean: 569.8048780487804
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.893685725929316
episode_reward_min: -31.889964824799378
episodes_this_iter: 2
episodes_total: 41
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.8367919623851776
      entropy_coeff: 0.005
      kl: 0.012967365037184209
      policy_loss: -0.06325102131813765
      total_loss: 0.11206540721468627
      vf_explained_var: 0.945068895816803
      vf_loss: 0.1856101807206869
  num_steps_sampled: 25000
  num_steps_trained: 25000
iterations_since_restore: 25
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.214285714285714
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.9449909821282

[2m[36m(pid=24699)[0m 2020-09-21 15:19:46,514	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:19:48,424	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.42989803198724985, 'policy_loss': 3.727592411451042e-05, 'vf_loss': 0.4441015589982271, 'vf_explained_var': 0.91965055, 'kl': 3.3062503837277823e-05, 'entropy': 2.8501443564891815, 'entropy_coeff': 0.005}
2020-09-21 15:19:48,878	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.27729873720090836, 'policy_loss': -0.011009478475898504, 'vf_loss': 0.30238847248256207, 'vf_explained_var': 0.946225, 'kl': 0.0005491015863299253, 'entropy': 2.848996937274933, 'entropy_coeff': 0.005}
2020-09-21 15:19:49,363	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.22592319501563907, 'policy_loss': -0.009419077425263822, 'vf_loss': 0.24921151995658875, 'vf_expl

2020-09-21 15:20:00,218	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.026834469754248857, 'policy_loss': -0.06497813388705254, 'vf_loss': 0.10250678798183799, 'vf_explained_var': 0.9800165, 'kl': 0.011748689110390842, 'entropy': 2.8437581658363342, 'entropy_coeff': 0.005}
2020-09-21 15:20:00,701	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.021852759760804474, 'policy_loss': -0.06911996408598498, 'vf_loss': 0.10151893924921751, 'vf_explained_var': 0.98012257, 'kl': 0.0122314392356202, 'entropy': 2.8431281000375748, 'entropy_coeff': 0.005}
2020-09-21 15:20:01,146	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.017199371475726366, 'policy_loss': -0.07303719589253888, 'vf_loss': 0.10070010228082538, 'vf_explained_var': 0.981199, 'kl': 0.012497279734816402, 'entropy': 2.8425441533327103, 'entropy_coeff': 0.005}
2020-09-2

custom_metrics: {}
date: 2020-09-21_15-20-02
done: false
episode_len_mean: 580.8333333333334
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.900857996058363
episode_reward_min: -31.889964824799378
episodes_this_iter: 1
episodes_total: 42
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.841536059975624
      entropy_coeff: 0.005
      kl: 0.01269983482779935
      policy_loss: -0.06981073075439781
      total_loss: 0.019903131411410868
      vf_explained_var: 0.9813753366470337
      vf_loss: 0.10011159395799041
  num_steps_sampled: 26000
  num_steps_trained: 26000
iterations_since_restore: 26
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.114285714285715
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.944990982128

2020-09-21 15:20:04,645	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.39512951485812664, 'policy_loss': -0.0005632609827443957, 'vf_loss': 0.40981557965278625, 'vf_explained_var': 0.90391827, 'kl': 1.6478925583207626e-05, 'entropy': 2.825547531247139, 'entropy_coeff': 0.005}
2020-09-21 15:20:05,125	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.30303645320236683, 'policy_loss': -0.0053256520768627524, 'vf_loss': 0.32245468348264694, 'vf_explained_var': 0.9232551, 'kl': 9.643157113714551e-05, 'entropy': 2.824301779270172, 'entropy_coeff': 0.005}
2020-09-21 15:20:05,626	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2598427487537265, 'policy_loss': -0.009141662390902638, 'vf_loss': 0.28303732816129923, 'vf_explained_var': 0.93338895, 'kl': 0.0002372428061789833, 'entropy': 2.8248181343078613, 'entropy_coeff': 0.005}
2020-

2020-09-21 15:20:12,634	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.3,
                                         'cur_lr': 1e-05,
                                         'entropy': 2.826984167098999,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.00923076644539833,
                                         'policy_loss': 0.0950387567281723,
                                         'total_loss': 0.2818982005119324,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.956, max=0.956, mean=0.956),
                                         'vf_loss': 0.19822508096694946}}}

2020-09-21 15:20:12,747	DEBUG sgd.py:120 -- 17 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1503250626847148, 'policy_loss': -0.04013381525874138, '

custom_metrics: {}
date: 2020-09-21_15-20-18
done: false
episode_len_mean: 583.3720930232558
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.918226570578206
episode_reward_min: -31.889964824799378
episodes_this_iter: 1
episodes_total: 43
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.8248845636844635
      entropy_coeff: 0.005
      kl: 0.012905180628877133
      policy_loss: -0.055210488266311586
      total_loss: 0.12217164668254554
      vf_explained_var: 0.9548047780990601
      vf_loss: 0.18763501103967428
  num_steps_sampled: 27000
  num_steps_trained: 27000
iterations_since_restore: 27
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.104761904761905
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.9449909821

2020-09-21 15:20:20,474	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.5358264669775963, 'policy_loss': 0.0016124791000038385, 'vf_loss': 0.5483313649892807, 'vf_explained_var': 0.8225628, 'kl': 4.542488029818381e-05, 'entropy': 2.8262027353048325, 'entropy_coeff': 0.005}
2020-09-21 15:20:20,988	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3196215508505702, 'policy_loss': -0.002392803959082812, 'vf_loss': 0.33603333216160536, 'vf_explained_var': 0.89063275, 'kl': 0.00036918850219080923, 'entropy': 2.825946882367134, 'entropy_coeff': 0.005}
2020-09-21 15:20:21,481	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2170701385475695, 'policy_loss': -0.005349931074306369, 'vf_loss': 0.23625950422137976, 'vf_explained_var': 0.9239885, 'kl': 0.0009574495525157545, 'entropy': 2.825333848595619, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:20:32,415	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.006072276271879673, 'policy_loss': -0.05949248874094337, 'vf_loss': 0.07609754952136427, 'vf_explained_var': 0.9750113, 'kl': 0.01170315989293158, 'entropy': 2.8087457418441772, 'entropy_coeff': 0.005}
2020-09-21 15:20:32,908	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.003650918835774064, 'policy_loss': -0.05985048320144415, 'vf_loss': 0.07392249628901482, 'vf_explained_var': 0.97609735, 'kl': 0.012076196377165616, 'entropy': 2.8087894171476364, 'entropy_coeff': 0.005}
2020-09-21 15:20:33,363	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.00551509321667254, 'policy_loss': -0.057947377441450953, 'vf_loss': 0.07375935511663556, 'vf_explained_var': 0.97619885, 'kl': 0.012456650438252836, 'entropy': 2.8067757338285446, 'entropy_coeff': 0.005}
2020-0

custom_metrics: {}
date: 2020-09-21_15-20-34
done: false
episode_len_mean: 591.4222222222222
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.89521507154153
episode_reward_min: -31.889964824799378
episodes_this_iter: 2
episodes_total: 45
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.806925356388092
      entropy_coeff: 0.005
      kl: 0.013184992712922394
      policy_loss: -0.07143951766192913
      total_loss: -0.009361999109387398
      vf_explained_var: 0.9769281148910522
      vf_loss: 0.07215664198156446
  num_steps_sampled: 28000
  num_steps_trained: 28000
iterations_since_restore: 28
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.166666666666667
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.94499098212

2020-09-21 15:20:36,331	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3593991529196501, 'policy_loss': -0.0030909961787983775, 'vf_loss': 0.37656097300350666, 'vf_explained_var': 0.888451, 'kl': 4.37004985228695e-05, 'entropy': 2.8167876601219177, 'entropy_coeff': 0.005}
2020-09-21 15:20:36,818	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2897352164145559, 'policy_loss': -0.007928335631731898, 'vf_loss': 0.31169932521879673, 'vf_explained_var': 0.91087353, 'kl': 0.00018696347615332343, 'entropy': 2.818371891975403, 'entropy_coeff': 0.005}
2020-09-21 15:20:37,270	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.23133674880955368, 'policy_loss': -0.012727000401355326, 'vf_loss': 0.257996360771358, 'vf_explained_var': 0.9241276, 'kl': 0.0005158929816388991, 'entropy': 2.8174756914377213, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:20:48,205	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.003787595545873046, 'policy_loss': -0.059216888854280114, 'vf_loss': 0.07401831611059606, 'vf_explained_var': 0.9786, 'kl': 0.00991041143424809, 'entropy': 2.797390565276146, 'entropy_coeff': 0.005}
2020-09-21 15:20:48,660	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.0014633202808909118, 'policy_loss': -0.05874522787053138, 'vf_loss': 0.07115453062579036, 'vf_explained_var': 0.9786791, 'kl': 0.010103787644766271, 'entropy': 2.795423924922943, 'entropy_coeff': 0.005}
2020-09-21 15:20:49,138	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.002634574717376381, 'policy_loss': -0.06052103464026004, 'vf_loss': 0.06875154562294483, 'vf_explained_var': 0.9795707, 'kl': 0.010364942194428295, 'entropy': 2.794914096593857, 'entropy_coeff': 0.005}
2020-09-21 

custom_metrics: {}
date: 2020-09-21_15-20-50
done: false
episode_len_mean: 593.468085106383
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.969803813624985
episode_reward_min: -31.982254290750102
episodes_this_iter: 2
episodes_total: 47
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.7937904745340347
      entropy_coeff: 0.005
      kl: 0.011244159133639187
      policy_loss: -0.06266364059410989
      total_loss: -0.005946871009655297
      vf_explained_var: 0.9801150560379028
      vf_loss: 0.06731248367577791
  num_steps_sampled: 29000
  num_steps_trained: 29000
iterations_since_restore: 29
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.1
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.430000000000001
  vram_util_percent0: 0.9449909821282179
  vram_

[2m[36m(pid=24699)[0m 2020-09-21 15:20:50,334	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 6094.121807145901,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 471},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-6.73, max=6.237, mean=0.364),
[2m[36m(pid=24699)[0m                                   'prev_action': 0,
[2m[36m(pid=24699)[0m

2020-09-21 15:20:52,200	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.31999989261385053, 'policy_loss': -0.01140159327769652, 'vf_loss': 0.34521326422691345, 'vf_explained_var': 0.9109403, 'kl': 8.972991083644466e-05, 'entropy': 2.7677365839481354, 'entropy_coeff': 0.005}
2020-09-21 15:20:52,684	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.25112680747406557, 'policy_loss': -0.008056979451794177, 'vf_loss': 0.272860218770802, 'vf_explained_var': 0.9302504, 'kl': 0.0004734343929158058, 'entropy': 2.7636941969394684, 'entropy_coeff': 0.005}
2020-09-21 15:20:53,141	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2062826391775161, 'policy_loss': -0.006466354709118605, 'vf_loss': 0.22623053938150406, 'vf_explained_var': 0.941035, 'kl': 0.0010610419703880325, 'entropy': 2.759968861937523, 'entropy_coeff': 0.005}
2020-09-21 15

2020-09-21 15:21:04,061	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.023035018472000957, 'policy_loss': -0.06260073150042444, 'vf_loss': 0.09529257472604513, 'vf_explained_var': 0.97414804, 'kl': 0.013497309118974954, 'entropy': 2.7412028163671494, 'entropy_coeff': 0.005}
2020-09-21 15:21:04,517	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.027827587444335222, 'policy_loss': -0.05792601592838764, 'vf_loss': 0.09550044126808643, 'vf_explained_var': 0.9743317, 'kl': 0.013200918037910014, 'entropy': 2.741422399878502, 'entropy_coeff': 0.005}
2020-09-21 15:21:05,011	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.01380164991132915, 'policy_loss': -0.07173978176433593, 'vf_loss': 0.09514490980654955, 'vf_explained_var': 0.9745745, 'kl': 0.013678152114152908, 'entropy': 2.7413839399814606, 'entropy_coeff': 0.005}
2020-09-

custom_metrics: {}
date: 2020-09-21_15-21-05
done: false
episode_len_mean: 593.468085106383
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.969803813624985
episode_reward_min: -31.982254290750102
episodes_this_iter: 0
episodes_total: 47
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.7416616827249527
      entropy_coeff: 0.005
      kl: 0.013783353031612933
      policy_loss: -0.060604695696383715
      total_loss: 0.02402240759693086
      vf_explained_var: 0.9744489789009094
      vf_loss: 0.09420040529221296
  num_steps_sampled: 30000
  num_steps_trained: 30000
iterations_since_restore: 30
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.152380952380953
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.49047619047619
  vram_util_percent0: 0.944990982128

[2m[36m(pid=24699)[0m 2020-09-21 15:21:06,527	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:21:08,473	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.46081474982202053, 'policy_loss': -0.0026946121361106634, 'vf_loss': 0.4772922210395336, 'vf_explained_var': 0.78114796, 'kl': 1.6141297730087878e-05, 'entropy': 2.757536470890045, 'entropy_coeff': 0.005}
2020-09-21 15:21:08,960	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3191688866354525, 'policy_loss': -0.0008988508488982916, 'vf_loss': 0.333819180727005, 'vf_explained_var': 0.84567887, 'kl': 0.00011788058873207774, 'entropy': 2.757363259792328, 'entropy_coeff': 0.005}
2020-09-21 15:21:09,413	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.22328449808992445, 'policy_loss': -0.007773624267429113, 'vf_loss': 0.2447229428216815, 'vf_expl

2020-09-21 15:21:15,111	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.08107444015331566, 'policy_loss': -0.03865978785324842, 'vf_loss': 0.13128900714218616, 'vf_explained_var': 0.9425715, 'kl': 0.007198545848950744, 'entropy': 2.742868348956108, 'entropy_coeff': 0.005}
2020-09-21 15:21:15,592	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.07920824864413589, 'policy_loss': -0.045238654827699065, 'vf_loss': 0.13579455576837063, 'vf_explained_var': 0.9395356, 'kl': 0.00786144650192, 'entropy': 2.74121530354023, 'entropy_coeff': 0.005}
2020-09-21 15:21:16,054	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.07499192317482084, 'policy_loss': -0.042467306135222316, 'vf_loss': 0.12862531933933496, 'vf_explained_var': 0.94092906, 'kl': 0.008464444166747853, 'entropy': 2.741084948182106, 'entropy_coeff': 0.005}
2020-09-21 15:2

custom_metrics: {}
date: 2020-09-21_15-21-22
done: false
episode_len_mean: 593.468085106383
episode_reward_max: -26.684516892237284
episode_reward_mean: -29.969803813624985
episode_reward_min: -31.982254290750102
episodes_this_iter: 0
episodes_total: 47
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.738005369901657
      entropy_coeff: 0.005
      kl: 0.012348294898401946
      policy_loss: -0.054979508931864984
      total_loss: 0.050381263019517064
      vf_explained_var: 0.9463908076286316
      vf_loss: 0.11534631159156561
  num_steps_sampled: 31000
  num_steps_trained: 31000
iterations_since_restore: 31
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.066666666666667
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_u

2020-09-21 15:21:24,416	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.29577502189204097, 'policy_loss': 0.0034938338212668896, 'vf_loss': 0.30609031906351447, 'vf_explained_var': 0.8951484, 'kl': 0.00018827162196405345, 'entropy': 2.7731234431266785, 'entropy_coeff': 0.005}
2020-09-21 15:21:24,898	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2065787659958005, 'policy_loss': -0.003382778144441545, 'vf_loss': 0.22362927068024874, 'vf_explained_var': 0.9235257, 'kl': 0.0006577238455065526, 'entropy': 2.7730098366737366, 'entropy_coeff': 0.005}
2020-09-21 15:21:25,355	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.16635412955656648, 'policy_loss': -0.01030145218828693, 'vf_loss': 0.18965591862797737, 'vf_explained_var': 0.9364874, 'kl': 0.0028951315180165693, 'entropy': 2.7737747132778168, 'entropy_coeff': 0.005}
2020-09-

2020-09-21 15:21:36,306	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.02260498481336981, 'policy_loss': -0.06667319289408624, 'vf_loss': 0.1001649925019592, 'vf_explained_var': 0.96519125, 'kl': 0.009922175551764667, 'entropy': 2.7726918309926987, 'entropy_coeff': 0.005}
2020-09-21 15:21:36,755	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.013123390614055097, 'policy_loss': -0.07413520058616996, 'vf_loss': 0.0979607873596251, 'vf_explained_var': 0.96601176, 'kl': 0.01053856877842918, 'entropy': 2.7727518528699875, 'entropy_coeff': 0.005}
2020-09-21 15:21:37,245	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.016079071327112615, 'policy_loss': -0.07004799693822861, 'vf_loss': 0.09677294339053333, 'vf_explained_var': 0.966607, 'kl': 0.010686422290746123, 'entropy': 2.770359694957733, 'entropy_coeff': 0.005}
2020-09-21 

custom_metrics: {}
date: 2020-09-21_15-21-38
done: false
episode_len_mean: 606.7647058823529
episode_reward_max: -26.684516892237284
episode_reward_mean: -30.026398828770304
episode_reward_min: -31.982254290750102
episodes_this_iter: 4
episodes_total: 51
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.770192265510559
      entropy_coeff: 0.005
      kl: 0.011220381944440305
      policy_loss: -0.07018571021035314
      total_loss: 0.014590646605938673
      vf_explained_var: 0.9674606323242188
      vf_loss: 0.09526120102964342
  num_steps_sampled: 32000
  num_steps_trained: 32000
iterations_since_restore: 32
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.128571428571429
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_u

2020-09-21 15:21:40,538	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.29579866176936775, 'policy_loss': 0.0018965990748256445, 'vf_loss': 0.3075853744521737, 'vf_explained_var': 0.8380606, 'kl': 3.417644489289717e-05, 'entropy': 2.738712355494499, 'entropy_coeff': 0.005}
2020-09-21 15:21:41,021	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.21678234403952956, 'policy_loss': 0.003282234654761851, 'vf_loss': 0.2271528523415327, 'vf_explained_var': 0.8811021, 'kl': 9.734214415857423e-05, 'entropy': 2.7363895028829575, 'entropy_coeff': 0.005}
2020-09-21 15:21:41,478	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.16492206789553165, 'policy_loss': -0.009066195460036397, 'vf_loss': 0.1875520572066307, 'vf_explained_var': 0.89247364, 'kl': 0.00032076203842734685, 'entropy': 2.7320044338703156, 'entropy_coeff': 0.005}
2020-09-21

2020-09-21 15:21:52,417	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.02851425192784518, 'policy_loss': -0.048536845250055194, 'vf_loss': 0.08740716939792037, 'vf_explained_var': 0.9510911, 'kl': 0.010552206600550562, 'entropy': 2.704347103834152, 'entropy_coeff': 0.005}
2020-09-21 15:21:52,881	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.021893554250709713, 'policy_loss': -0.054235022980719805, 'vf_loss': 0.08642496401444077, 'vf_explained_var': 0.95104235, 'kl': 0.010739132296293974, 'entropy': 2.7036257684230804, 'entropy_coeff': 0.005}
2020-09-21 15:21:53,376	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.027550590020837262, 'policy_loss': -0.0477691397536546, 'vf_loss': 0.08540701400488615, 'vf_explained_var': 0.95376515, 'kl': 0.01140300981933251, 'entropy': 2.7016370743513107, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-21-54
done: false
episode_len_mean: 606.7647058823529
episode_reward_max: -26.684516892237284
episode_reward_mean: -30.026398828770304
episode_reward_min: -31.982254290750102
episodes_this_iter: 0
episodes_total: 51
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.7042538821697235
      entropy_coeff: 0.005
      kl: 0.011477071966510266
      policy_loss: -0.05203953059390187
      total_loss: 0.02114058705046773
      vf_explained_var: 0.9532491564750671
      vf_loss: 0.08325826888903975
  num_steps_sampled: 33000
  num_steps_trained: 33000
iterations_since_restore: 33
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 6.1380952380952385
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_

2020-09-21 15:21:56,721	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.4967910684645176, 'policy_loss': 0.005097105051390827, 'vf_loss': 0.5050287712365389, 'vf_explained_var': 0.8918667, 'kl': 0.00016732792294682497, 'entropy': 2.677002191543579, 'entropy_coeff': 0.005}
2020-09-21 15:21:57,213	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2759060962125659, 'policy_loss': -0.005019391304813325, 'vf_loss': 0.2941256919875741, 'vf_explained_var': 0.93117326, 'kl': 0.0006038906431058422, 'entropy': 2.6762747168540955, 'entropy_coeff': 0.005}
2020-09-21 15:21:57,667	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.22266787278931588, 'policy_loss': 0.0016570796724408865, 'vf_loss': 0.23398648295551538, 'vf_explained_var': 0.945333, 'kl': 0.0013456762753776275, 'entropy': 2.675877556204796, 'entropy_coeff': 0.005}
2020-09-21 15

2020-09-21 15:22:08,635	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.11445580783765763, 'policy_loss': -0.05682592559605837, 'vf_loss': 0.18055472616106272, 'vf_explained_var': 0.95688987, 'kl': 0.013291786541230977, 'entropy': 2.6521061956882477, 'entropy_coeff': 0.005}
2020-09-21 15:22:09,092	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.11210809496697038, 'policy_loss': -0.05992165708448738, 'vf_loss': 0.18111049011349678, 'vf_explained_var': 0.956849, 'kl': 0.013924079423304647, 'entropy': 2.651593118906021, 'entropy_coeff': 0.005}
2020-09-21 15:22:09,580	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.10876300290692598, 'policy_loss': -0.0611466804984957, 'vf_loss': 0.17895942367613316, 'vf_explained_var': 0.9576324, 'kl': 0.014046477095689625, 'entropy': 2.6527355760335922, 'entropy_coeff': 0.005}
2020-09-21 1

custom_metrics: {}
date: 2020-09-21_15-22-10
done: false
episode_len_mean: 613.0769230769231
episode_reward_max: -26.684516892237284
episode_reward_mean: -30.026766218132217
episode_reward_min: -31.982254290750102
episodes_this_iter: 1
episodes_total: 52
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.6513628512620926
      entropy_coeff: 0.005
      kl: 0.014417399826925248
      policy_loss: -0.060184311820194125
      total_loss: 0.10841073421761394
      vf_explained_var: 0.957861602306366
      vf_loss: 0.17752663977444172
  num_steps_sampled: 34000
  num_steps_trained: 34000
iterations_since_restore: 34
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.190476190476191
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.495238095238095
  vram_util_percent0: 0.94499098212

[2m[36m(pid=24699)[0m 2020-09-21 15:22:10,755	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 6595.764061703615,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 543},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.751, max=1.396, mean=0.264),
[2m[36m(pid=24699)[0m                                   'prev_action': 15,
[2m[36m(pid=24699)[

2020-09-21 15:22:12,593	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.4227429013699293, 'policy_loss': 0.00562998210079968, 'vf_loss': 0.4305200520902872, 'vf_explained_var': 0.882866, 'kl': 2.5725273106791402e-05, 'entropy': 2.6829690635204315, 'entropy_coeff': 0.005}
2020-09-21 15:22:12,695	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-2.422, max=2.436, mean=-0.043),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-4.384, max=-1.285, mean=-2.647),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.012, max=0.277, mean=0.089),
                                                    'actions': np.ndarray((64,), dtype=int64, min=1.0, max=20.0, mean=9.312),
                

2020-09-21 15:22:19,199	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1494079161202535, 'policy_loss': -0.037057326175272465, 'vf_loss': 0.19806380476802588, 'vf_explained_var': 0.945032, 'kl': 0.0061124849307816476, 'entropy': 2.686462253332138, 'entropy_coeff': 0.005}
2020-09-21 15:22:19,676	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.15639029210433364, 'policy_loss': -0.030149719852488488, 'vf_loss': 0.19808712927624583, 'vf_explained_var': 0.94684404, 'kl': 0.006283606606302783, 'entropy': 2.686437875032425, 'entropy_coeff': 0.005}
2020-09-21 15:22:20,157	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.14466304727829993, 'policy_loss': -0.04189810121897608, 'vf_loss': 0.19797665951773524, 'vf_explained_var': 0.94689405, 'kl': 0.006702796614263207, 'entropy': 2.685270741581917, 'entropy_coeff': 0.005}
2020-09-21

custom_metrics: {}
date: 2020-09-21_15-22-26
done: false
episode_len_mean: 614.1481481481482
episode_reward_max: -26.684516892237284
episode_reward_mean: -30.051454463393473
episode_reward_min: -31.982254290750102
episodes_this_iter: 2
episodes_total: 54
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.6818548291921616
      entropy_coeff: 0.005
      kl: 0.010326453892048448
      policy_loss: -0.06595230952370912
      total_loss: 0.112464499194175
      vf_explained_var: 0.9501712322235107
      vf_loss: 0.188728136010468
  num_steps_sampled: 35000
  num_steps_trained: 35000
iterations_since_restore: 35
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.247619047619047
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.461904761904762
  vram_util_percent0: 0.944990982128217

[2m[36m(pid=24699)[0m 2020-09-21 15:22:26,545	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:22:28,437	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.4410327896475792, 'policy_loss': 0.0012072266545146704, 'vf_loss': 0.45336921885609627, 'vf_explained_var': 0.8557726, 'kl': 0.00010312947437762787, 'entropy': 2.7149212062358856, 'entropy_coeff': 0.005}
2020-09-21 15:22:28,899	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.30205438658595085, 'policy_loss': -0.002987199928611517, 'vf_loss': 0.3177118478342891, 'vf_explained_var': 0.89673424, 'kl': 0.0029321337133296765, 'entropy': 2.7099829614162445, 'entropy_coeff': 0.005}
2020-09-21 15:22:29,388	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.26196834980510175, 'policy_loss': -0.011721090762875974, 'vf_loss': 0.28595337737351656, 'vf_exp

2020-09-21 15:22:40,297	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.11452717357315123, 'policy_loss': -0.0635674437507987, 'vf_loss': 0.1879845792427659, 'vf_explained_var': 0.93562376, 'kl': 0.011968476290348917, 'entropy': 2.696101263165474, 'entropy_coeff': 0.005}
2020-09-21 15:22:40,784	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.11591225117444992, 'policy_loss': -0.06126551446504891, 'vf_loss': 0.18695081630721688, 'vf_explained_var': 0.93560266, 'kl': 0.012375944119412452, 'entropy': 2.6971675604581833, 'entropy_coeff': 0.005}
2020-09-21 15:22:41,239	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.10812916827853769, 'policy_loss': -0.06746241846121848, 'vf_loss': 0.18541894434019923, 'vf_explained_var': 0.9347653, 'kl': 0.012197888223454356, 'entropy': 2.6973439753055573, 'entropy_coeff': 0.005}
2020-09-21 

custom_metrics: {}
date: 2020-09-21_15-22-42
done: false
episode_len_mean: 612.2142857142857
episode_reward_max: -25.338032876205922
episode_reward_mean: -29.980535500030133
episode_reward_min: -31.982254290750102
episodes_this_iter: 2
episodes_total: 56
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.6965452283620834
      entropy_coeff: 0.005
      kl: 0.012801905104424804
      policy_loss: -0.06260002957424149
      total_loss: 0.11181116267107427
      vf_explained_var: 0.9365924596786499
      vf_loss: 0.18405334698036313
  num_steps_sampled: 36000
  num_steps_trained: 36000
iterations_since_restore: 36
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 4.9904761904761905
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.400000000000002
  vram_util_percent0: 0.9449909821

2020-09-21 15:22:44,767	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.427063275128603, 'policy_loss': -0.00748912594281137, 'vf_loss': 0.4476806912571192, 'vf_explained_var': 0.8826668, 'kl': 4.0038942231479524e-05, 'entropy': 2.6280598640441895, 'entropy_coeff': 0.005}
2020-09-21 15:22:45,260	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3489928347989917, 'policy_loss': 0.0025028397794812918, 'vf_loss': 0.359457990154624, 'vf_explained_var': 0.9036239, 'kl': 0.0006037569701220491, 'entropy': 2.629825621843338, 'entropy_coeff': 0.005}
2020-09-21 15:22:45,709	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.30039958842098713, 'policy_loss': -0.011633507790975273, 'vf_loss': 0.32479935325682163, 'vf_explained_var': 0.9144246, 'kl': 0.0012849594932049513, 'entropy': 2.6303504556417465, 'entropy_coeff': 0.005}
2020-09-21 15:

2020-09-21 15:22:56,733	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.17871227231808007, 'policy_loss': -0.05991085831192322, 'vf_loss': 0.24829160887748003, 'vf_explained_var': 0.9334966, 'kl': 0.011380864074453712, 'entropy': 2.616546466946602, 'entropy_coeff': 0.005}
2020-09-21 15:22:57,192	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.16821169573813677, 'policy_loss': -0.06495948531664908, 'vf_loss': 0.24268940230831504, 'vf_explained_var': 0.9352775, 'kl': 0.011859358055517077, 'entropy': 2.61520753800869, 'entropy_coeff': 0.005}
2020-09-21 15:22:57,682	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.18362403637729585, 'policy_loss': -0.05867633584421128, 'vf_loss': 0.25177333131432533, 'vf_explained_var': 0.93304884, 'kl': 0.011997784546110779, 'entropy': 2.6144597083330154, 'entropy_coeff': 0.005}
2020-09-21 1

custom_metrics: {}
date: 2020-09-21_15-22-58
done: false
episode_len_mean: 612.2142857142857
episode_reward_max: -25.338032876205922
episode_reward_mean: -29.980535500030133
episode_reward_min: -31.982254290750102
episodes_this_iter: 0
episodes_total: 56
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.613553076982498
      entropy_coeff: 0.005
      kl: 0.012385270558297634
      policy_loss: -0.06441602564882487
      total_loss: 0.1700865513412282
      vf_explained_var: 0.9346309304237366
      vf_loss: 0.2438547690398991
  num_steps_sampled: 37000
  num_steps_trained: 37000
iterations_since_restore: 37
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.123809523809523
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.49047619047619
  vram_util_percent0: 0.944990982128217

2020-09-21 15:23:00,806	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3930947883054614, 'policy_loss': -0.00039796181954443455, 'vf_loss': 0.40674381889402866, 'vf_explained_var': 0.87505466, 'kl': 0.00010482522836208208, 'entropy': 2.6565064787864685, 'entropy_coeff': 0.005}
2020-09-21 15:23:01,294	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.25267949257977307, 'policy_loss': -0.006568109150975943, 'vf_loss': 0.2723040133714676, 'vf_explained_var': 0.9118562, 'kl': 0.0006823642488598125, 'entropy': 2.652222201228142, 'entropy_coeff': 0.005}
2020-09-21 15:23:01,754	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.19896022736793384, 'policy_loss': -0.013407130376435816, 'vf_loss': 0.22492924612015486, 'vf_explained_var': 0.92866147, 'kl': 0.0022596313065150753, 'entropy': 2.6479567885398865, 'entropy_coeff': 0.005}
2020-

2020-09-21 15:23:12,678	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.012957269209437072, 'policy_loss': -0.06207495485432446, 'vf_loss': 0.08439357858151197, 'vf_explained_var': 0.9732815, 'kl': 0.012815246591344476, 'entropy': 2.6411865204572678, 'entropy_coeff': 0.005}
2020-09-21 15:23:12,750	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-2.397, max=2.503, mean=-0.032),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-4.809, max=-1.322, mean=-2.857),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.008, max=0.267, mean=0.079),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=9.609),
            

custom_metrics: {}
date: 2020-09-21_15-23-14
done: false
episode_len_mean: 609.8771929824561
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.015872846904756
episode_reward_min: -31.994764271883824
episodes_this_iter: 1
episodes_total: 57
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.642096236348152
      entropy_coeff: 0.005
      kl: 0.013778912601992488
      policy_loss: -0.07473823777399957
      total_loss: -0.005476555146742612
      vf_explained_var: 0.9743996858596802
      vf_loss: 0.07833848800510168
  num_steps_sampled: 38000
  num_steps_trained: 38000
iterations_since_restore: 38
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.242857142857144
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_

2020-09-21 15:23:16,660	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.23823165777139366, 'policy_loss': 0.00043267612636554986, 'vf_loss': 0.25101296696811914, 'vf_explained_var': 0.90485656, 'kl': 0.0002196378095900564, 'entropy': 2.6559747457504272, 'entropy_coeff': 0.005}
2020-09-21 15:23:17,126	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.18094931449741125, 'policy_loss': -0.0011622542515397072, 'vf_loss': 0.19492362719029188, 'vf_explained_var': 0.9270602, 'kl': 0.0014312731309473747, 'entropy': 2.6482876539230347, 'entropy_coeff': 0.005}
2020-09-21 15:23:17,611	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.14323556248564273, 'policy_loss': -0.007880431367084384, 'vf_loss': 0.1635684221982956, 'vf_explained_var': 0.93775827, 'kl': 0.002535188767069485, 'entropy': 2.642597869038582, 'entropy_coeff': 0.005}
2020-0

2020-09-21 15:23:28,539	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.018195260548964143, 'policy_loss': -0.06325537455268204, 'vf_loss': 0.054499633610248566, 'vf_explained_var': 0.97913766, 'kl': 0.012253445922397077, 'entropy': 2.623110070824623, 'entropy_coeff': 0.005}
2020-09-21 15:23:29,027	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.01739071379415691, 'policy_loss': -0.05998191481921822, 'vf_loss': 0.05199145688675344, 'vf_explained_var': 0.9794793, 'kl': 0.012376931845210493, 'entropy': 2.622666507959366, 'entropy_coeff': 0.005}
2020-09-21 15:23:29,518	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.01992606185376644, 'policy_loss': -0.061767081497237086, 'vf_loss': 0.051041065249592066, 'vf_explained_var': 0.98006684, 'kl': 0.013010936789214611, 'entropy': 2.620666816830635, 'entropy_coeff': 0.005}
2020

custom_metrics: {}
date: 2020-09-21_15-23-30
done: false
episode_len_mean: 624.3728813559322
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.030333484912486
episode_reward_min: -31.994764271883824
episodes_this_iter: 2
episodes_total: 59
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.6184993535280228
      entropy_coeff: 0.005
      kl: 0.013222970126662403
      policy_loss: -0.07151792594231665
      total_loss: -0.029789146734401584
      vf_explained_var: 0.9802207946777344
      vf_loss: 0.050854382338002324
  num_steps_sampled: 39000
  num_steps_trained: 39000
iterations_since_restore: 39
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.119047619047619
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vra

[2m[36m(pid=24699)[0m 2020-09-21 15:23:30,708	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 6374.138888322279,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 351},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-2.578, max=1.381, mean=0.092),
[2m[36m(pid=24699)[0m                                   'prev_action': 6,
[2m[36m(pid=24699)[0

2020-09-21 15:23:33,003	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.21843854594044387, 'policy_loss': 0.0025448184460401535, 'vf_loss': 0.22895522229373455, 'vf_explained_var': 0.95069224, 'kl': 2.0180239466593974e-05, 'entropy': 2.6135102212429047, 'entropy_coeff': 0.005}
2020-09-21 15:23:33,492	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.14441950363107026, 'policy_loss': -0.008858218439854681, 'vf_loss': 0.1663049589842558, 'vf_explained_var': 0.9643383, 'kl': 0.00022826482654636493, 'entropy': 2.619143471121788, 'entropy_coeff': 0.005}
2020-09-21 15:23:33,946	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.10132260760292411, 'policy_loss': -0.013920982601121068, 'vf_loss': 0.12815257348120213, 'vf_explained_var': 0.9713512, 'kl': 0.0007401313123409636, 'entropy': 2.6262039095163345, 'entropy_coeff': 0.005}
2020-0

2020-09-21 15:23:44,874	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.043124046351294965, 'policy_loss': -0.06645115977153182, 'vf_loss': 0.03308524296153337, 'vf_explained_var': 0.9926138, 'kl': 0.01173362648114562, 'entropy': 2.655643880367279, 'entropy_coeff': 0.005}
2020-09-21 15:23:45,328	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.037621013121679425, 'policy_loss': -0.0605442663654685, 'vf_loss': 0.032502972055226564, 'vf_explained_var': 0.9928528, 'kl': 0.012328693817835301, 'entropy': 2.6556655317544937, 'entropy_coeff': 0.005}
2020-09-21 15:23:45,815	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.047152117593213916, 'policy_loss': -0.0693454600404948, 'vf_loss': 0.03168182773515582, 'vf_explained_var': 0.99317086, 'kl': 0.012607328360900283, 'entropy': 2.6541374772787094, 'entropy_coeff': 0.005}
2020-0

custom_metrics: {}
date: 2020-09-21_15-23-46
done: false
episode_len_mean: 624.3728813559322
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.030333484912486
episode_reward_min: -31.994764271883824
episodes_this_iter: 0
episodes_total: 59
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.6552228927612305
      entropy_coeff: 0.005
      kl: 0.013314067909959704
      policy_loss: -0.07043614762369543
      total_loss: -0.05020091129699722
      vf_explained_var: 0.9934712052345276
      vf_loss: 0.029517126851715147
  num_steps_sampled: 40000
  num_steps_trained: 40000
iterations_since_restore: 40
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.166666666666667
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram

[2m[36m(pid=24699)[0m 2020-09-21 15:23:47,045	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:23:48,996	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3066424196586013, 'policy_loss': 0.0008097321260720491, 'vf_loss': 0.3189406208693981, 'vf_explained_var': 0.902708, 'kl': 3.9901204655579114e-05, 'entropy': 2.6239792704582214, 'entropy_coeff': 0.005}
2020-09-21 15:23:49,484	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.20659220800735056, 'policy_loss': -0.01266166241839528, 'vf_loss': 0.23220419604331255, 'vf_explained_var': 0.92769134, 'kl': 0.0006012401390762534, 'entropy': 2.6261418014764786, 'entropy_coeff': 0.005}
2020-09-21 15:23:49,931	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1651883851736784, 'policy_loss': -0.00664022343698889, 'vf_loss': 0.18431130796670914, 'vf_explain

2020-09-21 15:24:00,877	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.04218423063866794, 'policy_loss': -0.06424422049894929, 'vf_loss': 0.03082361095584929, 'vf_explained_var': 0.9902996, 'kl': 0.014621396781876683, 'entropy': 2.630007430911064, 'entropy_coeff': 0.005}
2020-09-21 15:24:01,323	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.040332168515305966, 'policy_loss': -0.061406972527038306, 'vf_loss': 0.029853336047381163, 'vf_explained_var': 0.9904274, 'kl': 0.014565583609510213, 'entropy': 2.6296414732933044, 'entropy_coeff': 0.005}
2020-09-21 15:24:01,816	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.04881036886945367, 'policy_loss': -0.0688819419592619, 'vf_loss': 0.028765756636857986, 'vf_explained_var': 0.9907073, 'kl': 0.014829742664005607, 'entropy': 2.628621891140938, 'entropy_coeff': 0.005}
2020-0

custom_metrics: {}
date: 2020-09-21_15-24-02
done: false
episode_len_mean: 624.3728813559322
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.030333484912486
episode_reward_min: -31.994764271883824
episodes_this_iter: 0
episodes_total: 59
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.627798706293106
      entropy_coeff: 0.005
      kl: 0.015311507042497396
      policy_loss: -0.06691304221749306
      total_loss: -0.04876326001249254
      vf_explained_var: 0.9911033511161804
      vf_loss: 0.026695323758758605
  num_steps_sampled: 41000
  num_steps_trained: 41000
iterations_since_restore: 41
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.199999999999999
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_

2020-09-21 15:24:05,365	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.29536355240270495, 'policy_loss': 0.002334974880795926, 'vf_loss': 0.3064931631088257, 'vf_explained_var': 0.8960307, 'kl': 7.67633387199762e-05, 'entropy': 2.6975237131118774, 'entropy_coeff': 0.005}
2020-09-21 15:24:05,859	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.18849760945886374, 'policy_loss': -0.0012534594861790538, 'vf_loss': 0.20312684029340744, 'vf_explained_var': 0.92896664, 'kl': 0.00037746643829450477, 'entropy': 2.6978020817041397, 'entropy_coeff': 0.005}
2020-09-21 15:24:06,314	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1475801082851831, 'policy_loss': -0.013161652837879956, 'vf_loss': 0.17397494427859783, 'vf_explained_var': 0.93873906, 'kl': 0.0008988825538835954, 'entropy': 2.700569584965706, 'entropy_coeff': 0.005}
2020-09-

2020-09-21 15:24:12,985	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.01854277914389968, 'policy_loss': -0.057622259715572, 'vf_loss': 0.0868960281368345, 'vf_explained_var': 0.9681421, 'kl': 0.009200398228131235, 'entropy': 2.698222115635872, 'entropy_coeff': 0.005}
2020-09-21 15:24:13,461	DEBUG sgd.py:120 -- 17 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.01693681225879118, 'policy_loss': -0.05661153781693429, 'vf_loss': 0.08429220237303525, 'vf_explained_var': 0.969146, 'kl': 0.009192315628752112, 'entropy': 2.700308457016945, 'entropy_coeff': 0.005}
2020-09-21 15:24:13,920	DEBUG sgd.py:120 -- 18 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.009477130603045225, 'policy_loss': -0.06552472978364676, 'vf_loss': 0.08563552296254784, 'vf_explained_var': 0.96854985, 'kl': 0.009508902032393962, 'entropy': 2.697267323732376, 'entropy_coeff': 0.005}
2020-09-21 15:2

custom_metrics: {}
date: 2020-09-21_15-24-19
done: false
episode_len_mean: 648.8064516129032
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.06583797481223
episode_reward_min: -31.994764271883824
episodes_this_iter: 3
episodes_total: 62
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.6905152797698975
      entropy_coeff: 0.005
      kl: 0.012719553837087005
      policy_loss: -0.07563259929884225
      total_loss: -0.010391613002866507
      vf_explained_var: 0.9725344777107239
      vf_loss: 0.0748776940163225
  num_steps_sampled: 42000
  num_steps_trained: 42000
iterations_since_restore: 42
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.223809523809524
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_u

2020-09-21 15:24:21,110	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.17344051878899336, 'policy_loss': -0.003381719347089529, 'vf_loss': 0.19001293554902077, 'vf_explained_var': 0.9578717, 'kl': 6.75539429916272e-05, 'entropy': 2.6421931236982346, 'entropy_coeff': 0.005}
2020-09-21 15:24:21,562	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.12217564135789871, 'policy_loss': -0.004263798065949231, 'vf_loss': 0.13960855174809694, 'vf_explained_var': 0.96904, 'kl': 0.00014130304498394253, 'entropy': 2.6423002630472183, 'entropy_coeff': 0.005}
2020-09-21 15:24:22,052	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.09065265659592114, 'policy_loss': -0.011819756124168634, 'vf_loss': 0.11558484239503741, 'vf_explained_var': 0.9745773, 'kl': 0.0003505808526824694, 'entropy': 2.6435208171606064, 'entropy_coeff': 0.005}
2020-09-2

2020-09-21 15:24:32,967	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.05051107471808791, 'policy_loss': -0.07709218235686421, 'vf_loss': 0.03543376759625971, 'vf_explained_var': 0.99192476, 'kl': 0.014360918197780848, 'entropy': 2.6321872919797897, 'entropy_coeff': 0.005}
2020-09-21 15:24:33,450	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.051300616934895515, 'policy_loss': -0.07705774961505085, 'vf_loss': 0.03449057543184608, 'vf_explained_var': 0.99197245, 'kl': 0.01474611385492608, 'entropy': 2.631454735994339, 'entropy_coeff': 0.005}
2020-09-21 15:24:33,935	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.0535196095588617, 'policy_loss': -0.08001085097203031, 'vf_loss': 0.0350722128059715, 'vf_explained_var': 0.9921813, 'kl': 0.015230765333399177, 'entropy': 2.630038842558861, 'entropy_coeff': 0.005}
2020-09-2

custom_metrics: {}
date: 2020-09-21_15-24-34
done: false
episode_len_mean: 657.328125
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.054132729035366
episode_reward_min: -31.994764271883824
episodes_this_iter: 2
episodes_total: 64
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.629040911793709
      entropy_coeff: 0.005
      kl: 0.015711402404122055
      policy_loss: -0.08832519000861794
      total_loss: -0.06347212777473032
      vf_explained_var: 0.9923006296157837
      vf_loss: 0.03328484285157174
  num_steps_sampled: 43000
  num_steps_trained: 43000
iterations_since_restore: 43
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.1952380952380945
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_util_pe

2020-09-21 15:24:36,865	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.25220071664080024, 'policy_loss': -0.002520340960472822, 'vf_loss': 0.267449920065701, 'vf_explained_var': 0.94535697, 'kl': 5.0461126331335926e-05, 'entropy': 2.548801213502884, 'entropy_coeff': 0.005}
2020-09-21 15:24:37,347	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.19244373321998864, 'policy_loss': -0.0017098597018048167, 'vf_loss': 0.20669467095285654, 'vf_explained_var': 0.956951, 'kl': 0.0007844457340979716, 'entropy': 2.5552830547094345, 'entropy_coeff': 0.005}
2020-09-21 15:24:37,832	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1775050297146663, 'policy_loss': -0.01314867241308093, 'vf_loss': 0.20273901522159576, 'vf_explained_var': 0.96127176, 'kl': 0.002428748797683511, 'entropy': 2.5627874583005905, 'entropy_coeff': 0.005}
2020-09-21

2020-09-21 15:24:48,705	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.07205415901262313, 'policy_loss': -0.06529337220126763, 'vf_loss': 0.1465040105395019, 'vf_explained_var': 0.969137, 'kl': 0.012237782240845263, 'entropy': 2.5655629336833954, 'entropy_coeff': 0.005}
2020-09-21 15:24:49,193	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.07205463800346479, 'policy_loss': -0.07083882193546742, 'vf_loss': 0.1520213051699102, 'vf_explained_var': 0.9685145, 'kl': 0.012337030260823667, 'entropy': 2.5657903254032135, 'entropy_coeff': 0.005}
2020-09-21 15:24:49,644	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06403032911475748, 'policy_loss': -0.07497751002665609, 'vf_loss': 0.14793898910284042, 'vf_explained_var': 0.96844804, 'kl': 0.012993817799724638, 'entropy': 2.5658596605062485, 'entropy_coeff': 0.005}
2020-09-21 1

custom_metrics: {}
date: 2020-09-21_15-24-50
done: false
episode_len_mean: 657.328125
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.054132729035366
episode_reward_min: -31.994764271883824
episodes_this_iter: 0
episodes_total: 64
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.561094745993614
      entropy_coeff: 0.005
      kl: 0.01310330705018714
      policy_loss: -0.07611612952314317
      total_loss: 0.06133656855672598
      vf_explained_var: 0.9691827297210693
      vf_loss: 0.14632718870416284
  num_steps_sampled: 44000
  num_steps_trained: 44000
iterations_since_restore: 44
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.066666666666667
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_util_perce

[2m[36m(pid=24699)[0m 2020-09-21 15:24:50,849	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 7746.91,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 217},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.315, max=1.189, mean=0.22),
[2m[36m(pid=24699)[0m                                   'prev_action': 20,
[2m[36m(pid=24699)[0m         

2020-09-21 15:24:52,686	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3037648107856512, 'policy_loss': 0.004910349729470909, 'vf_loss': 0.3118165014311671, 'vf_explained_var': 0.8940467, 'kl': 6.099707344459082e-05, 'entropy': 2.596065863966942, 'entropy_coeff': 0.005}
2020-09-21 15:24:53,175	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.212908765533939, 'policy_loss': -0.006532411964144558, 'vf_loss': 0.23226665519177914, 'vf_explained_var': 0.9205999, 'kl': 0.0004917780270261574, 'entropy': 2.5945991426706314, 'entropy_coeff': 0.005}
2020-09-21 15:24:53,629	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.172431718907319, 'policy_loss': -0.013663089368492365, 'vf_loss': 0.19868685211986303, 'vf_explained_var': 0.93335015, 'kl': 0.0012773319103871472, 'entropy': 2.5950490683317184, 'entropy_coeff': 0.005}
2020-09-21 15:

2020-09-21 15:25:04,581	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.021076373755931854, 'policy_loss': -0.06397798331454396, 'vf_loss': 0.0945572517812252, 'vf_explained_var': 0.9674481, 'kl': 0.011793459067121148, 'entropy': 2.6081855446100235, 'entropy_coeff': 0.005}
2020-09-21 15:25:05,039	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.013345036830287427, 'policy_loss': -0.0750828767195344, 'vf_loss': 0.0977373025380075, 'vf_explained_var': 0.96632683, 'kl': 0.01238954224390909, 'entropy': 2.6052521467208862, 'entropy_coeff': 0.005}
2020-09-21 15:25:05,529	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.012611098936758935, 'policy_loss': -0.07123992091510445, 'vf_loss': 0.09311897167935967, 'vf_explained_var': 0.96710455, 'kl': 0.01254418824100867, 'entropy': 2.6062408834695816, 'entropy_coeff': 0.005}
2020-09-21

custom_metrics: {}
date: 2020-09-21_15-25-06
done: false
episode_len_mean: 658.6153846153846
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.087561169987104
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 65
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.6059014946222305
      entropy_coeff: 0.005
      kl: 0.01309020962798968
      policy_loss: -0.07318214874248952
      total_loss: 0.012878502486273646
      vf_explained_var: 0.9675379991531372
      vf_loss: 0.0951630980707705
  num_steps_sampled: 45000
  num_steps_trained: 45000
iterations_since_restore: 45
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.285
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_util_percent1: 

[2m[36m(pid=24699)[0m 2020-09-21 15:25:06,746	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:25:08,957	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.32342302426695824, 'policy_loss': -0.0003350719343870878, 'vf_loss': 0.3371625728905201, 'vf_explained_var': 0.8599357, 'kl': 0.0003977188350172689, 'entropy': 2.7047573626041412, 'entropy_coeff': 0.005}
2020-09-21 15:25:09,450	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.17687235260382295, 'policy_loss': -0.0078053860925138, 'vf_loss': 0.19777467101812363, 'vf_explained_var': 0.9179256, 'kl': 0.0014411088486667722, 'entropy': 2.705853447318077, 'entropy_coeff': 0.005}
2020-09-21 15:25:09,901	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.12628024467267096, 'policy_loss': -0.010345644084736705, 'vf_loss': 0.14898074604570866, 'vf_explai

2020-09-21 15:25:15,620	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.023790150065906346, 'policy_loss': -0.05607549217529595, 'vf_loss': 0.09028360759839416, 'vf_explained_var': 0.9614763, 'kl': 0.010490481625311077, 'entropy': 2.7130231112241745, 'entropy_coeff': 0.005}
2020-09-21 15:25:16,107	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.020628908067010343, 'policy_loss': -0.058799128979444504, 'vf_loss': 0.08971087518148124, 'vf_explained_var': 0.9612603, 'kl': 0.010978699079714715, 'entropy': 2.715289816260338, 'entropy_coeff': 0.005}
2020-09-21 15:25:16,558	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.018921908864285797, 'policy_loss': -0.060228760936297476, 'vf_loss': 0.08932303637266159, 'vf_explained_var': 0.9620707, 'kl': 0.011347808758728206, 'entropy': 2.715340778231621, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-25-22
done: false
episode_len_mean: 659.8636363636364
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.10290990426854
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 66
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.716336116194725
      entropy_coeff: 0.005
      kl: 0.014330660691484809
      policy_loss: -0.08019634475931525
      total_loss: -0.004486348130740225
      vf_explained_var: 0.9632386565208435
      vf_loss: 0.08499247930012643
  num_steps_sampled: 46000
  num_steps_trained: 46000
iterations_since_restore: 46
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.033333333333334
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_ut

2020-09-21 15:25:24,778	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.26064909622073174, 'policy_loss': 0.00010974262841045856, 'vf_loss': 0.2739097950980067, 'vf_explained_var': 0.9173275, 'kl': 0.00021616212355146658, 'entropy': 2.6870585829019547, 'entropy_coeff': 0.005}
2020-09-21 15:25:25,224	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.16321017523296177, 'policy_loss': -0.004199784132651985, 'vf_loss': 0.18073367839679122, 'vf_explained_var': 0.9413127, 'kl': 0.0002939094929388375, 'entropy': 2.6823782473802567, 'entropy_coeff': 0.005}
2020-09-21 15:25:25,713	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.12468659086152911, 'policy_loss': -0.009507488575764, 'vf_loss': 0.14737482881173491, 'vf_explained_var': 0.9513258, 'kl': 0.0007611145556438714, 'entropy': 2.68181711435318, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:25:36,624	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.012596760352607816, 'policy_loss': -0.07177165699977195, 'vf_loss': 0.0687992216553539, 'vf_explained_var': 0.9771972, 'kl': 0.011877535143867135, 'entropy': 2.6375177800655365, 'entropy_coeff': 0.005}
2020-09-21 15:25:37,114	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.017429219791665673, 'policy_loss': -0.07443091610912234, 'vf_loss': 0.0666634738445282, 'vf_explained_var': 0.97791576, 'kl': 0.011753587168641388, 'entropy': 2.637570336461067, 'entropy_coeff': 0.005}
2020-09-21 15:25:37,605	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.016820995253510773, 'policy_loss': -0.07454823688021861, 'vf_loss': 0.06730282143689692, 'vf_explained_var': 0.9776125, 'kl': 0.01202366454526782, 'entropy': 2.6365355849266052, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-25-38
done: false
episode_len_mean: 667.3768115942029
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.131556011534368
episode_reward_min: -32.22698139089825
episodes_this_iter: 3
episodes_total: 69
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.6375990957021713
      entropy_coeff: 0.005
      kl: 0.012447283254005015
      policy_loss: -0.07815699063939974
      total_loss: -0.02148324646987021
      vf_explained_var: 0.9777644276618958
      vf_loss: 0.06612755334936082
  num_steps_sampled: 47000
  num_steps_trained: 47000
iterations_since_restore: 47
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.190476190476191
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_u

2020-09-21 15:25:40,580	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.35548554081469774, 'policy_loss': -0.0014835833862889558, 'vf_loss': 0.36985865607857704, 'vf_explained_var': 0.83503133, 'kl': 8.808686288552803e-05, 'entropy': 2.583190992474556, 'entropy_coeff': 0.005}
2020-09-21 15:25:41,030	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.28386995662003756, 'policy_loss': -0.0019933582516387105, 'vf_loss': 0.29838387854397297, 'vf_explained_var': 0.86561114, 'kl': 0.0013371062450460158, 'entropy': 2.5843396484851837, 'entropy_coeff': 0.005}
2020-09-21 15:25:41,516	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.24052234180271626, 'policy_loss': -0.013803460751660168, 'vf_loss': 0.2659115083515644, 'vf_explained_var': 0.8673095, 'kl': 0.004420275377924554, 'entropy': 2.5823576599359512, 'entropy_coeff': 0.005}
2020-0

2020-09-21 15:25:52,442	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1482490913476795, 'policy_loss': -0.06969819508958608, 'vf_loss': 0.22668261174112558, 'vf_explained_var': 0.89525867, 'kl': 0.01377704372862354, 'entropy': 2.5736896693706512, 'entropy_coeff': 0.005}
2020-09-21 15:25:52,933	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1476485297898762, 'policy_loss': -0.07105841976590455, 'vf_loss': 0.2273063249886036, 'vf_explained_var': 0.9010355, 'kl': 0.014240936608985066, 'entropy': 2.574331223964691, 'entropy_coeff': 0.005}
2020-09-21 15:25:53,388	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.13655814458616078, 'policy_loss': -0.07989337248727679, 'vf_loss': 0.22491686418652534, 'vf_explained_var': 0.8861259, 'kl': 0.014698434970341623, 'entropy': 2.5749752819538116, 'entropy_coeff': 0.005}
2020-09-21 15:

custom_metrics: {}
date: 2020-09-21_15-25-54
done: false
episode_len_mean: 667.3768115942029
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.131556011534368
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 69
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.5708609223365784
      entropy_coeff: 0.005
      kl: 0.015001307590864599
      policy_loss: -0.0773612423799932
      total_loss: 0.1383665872272104
      vf_explained_var: 0.8970406651496887
      vf_loss: 0.22408173326402903
  num_steps_sampled: 48000
  num_steps_trained: 48000
iterations_since_restore: 48
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.1571428571428575
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_uti

2020-09-21 15:25:56,404	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.23835470457561314, 'policy_loss': 0.00036973407259210944, 'vf_loss': 0.2509110448881984, 'vf_explained_var': 0.89008564, 'kl': 3.372839401280525e-05, 'entropy': 2.587238594889641, 'entropy_coeff': 0.005}
2020-09-21 15:25:56,897	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.17571535223396495, 'policy_loss': -0.004388336557894945, 'vf_loss': 0.19297841656953096, 'vf_explained_var': 0.91503406, 'kl': 0.00028402765246937633, 'entropy': 2.591986835002899, 'entropy_coeff': 0.005}
2020-09-21 15:25:57,384	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1537458406528458, 'policy_loss': -0.00264275714289397, 'vf_loss': 0.1689707413315773, 'vf_explained_var': 0.92449296, 'kl': 0.001366289667203091, 'entropy': 2.59840589761734, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:26:08,343	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.043350177584216, 'policy_loss': -0.07252146885730326, 'vf_loss': 0.12553557846695185, 'vf_explained_var': 0.94427013, 'kl': 0.0110268538701348, 'entropy': 2.5943959802389145, 'entropy_coeff': 0.005}
2020-09-21 15:26:08,833	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.043974439380690455, 'policy_loss': -0.06902440160047263, 'vf_loss': 0.12278232211247087, 'vf_explained_var': 0.9433546, 'kl': 0.010625306284055114, 'entropy': 2.594215542078018, 'entropy_coeff': 0.005}
2020-09-21 15:26:09,288	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04713183781132102, 'policy_loss': -0.0680878956336528, 'vf_loss': 0.12462140480056405, 'vf_explained_var': 0.94383883, 'kl': 0.011923832702450454, 'entropy': 2.5957655012607574, 'entropy_coeff': 0.005}
2020-09-21 15

custom_metrics: {}
date: 2020-09-21_15-26-10
done: false
episode_len_mean: 667.3768115942029
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.131556011534368
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 69
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.593716472387314
      entropy_coeff: 0.005
      kl: 0.012252123677171767
      policy_loss: -0.0707887700991705
      total_loss: 0.043337267416063696
      vf_explained_var: 0.9431397914886475
      vf_loss: 0.12341898400336504
  num_steps_sampled: 49000
  num_steps_trained: 49000
iterations_since_restore: 49
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.138095238095238
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_uti

[2m[36m(pid=24699)[0m 2020-09-21 15:26:10,811	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 6565.112519610518,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 447},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-2.476, max=1.964, mean=0.336),
[2m[36m(pid=24699)[0m                                   'prev_action': 6,
[2m[36m(pid=24699)[0

2020-09-21 15:26:12,850	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.20548967644572258, 'policy_loss': -0.0015846604947000742, 'vf_loss': 0.2203658544458449, 'vf_explained_var': 0.8456998, 'kl': 0.0003326578265485658, 'entropy': 2.6782627552747726, 'entropy_coeff': 0.005}
2020-09-21 15:26:12,890	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.3,
                                         'cur_lr': 1e-05,
                                         'entropy': 2.6761960983276367,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.0009384004515595734,
                                         'policy_loss': -0.08454734086990356,
                                         'total_loss': 0.08389122039079666,
                                         'vf_explai

2020-09-21 15:26:19,490	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04410392907448113, 'policy_loss': -0.051145796198397875, 'vf_loss': 0.10606413567438722, 'vf_explained_var': 0.92341983, 'kl': 0.00887422199593857, 'entropy': 2.6953370571136475, 'entropy_coeff': 0.005}
2020-09-21 15:26:19,944	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.038398974866140634, 'policy_loss': -0.057802612194791436, 'vf_loss': 0.10690603591501713, 'vf_explained_var': 0.92356277, 'kl': 0.009247577632777393, 'entropy': 2.695745751261711, 'entropy_coeff': 0.005}
2020-09-21 15:26:20,432	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.041113287676125765, 'policy_loss': -0.052594167878851295, 'vf_loss': 0.10429685097187757, 'vf_explained_var': 0.9254104, 'kl': 0.00962536729639396, 'entropy': 2.695401608943939, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-26-26
done: false
episode_len_mean: 673.8732394366198
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.171399927258317
episode_reward_min: -32.22698139089825
episodes_this_iter: 2
episodes_total: 71
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.6958591043949127
      entropy_coeff: 0.005
      kl: 0.013735119020566344
      policy_loss: -0.08354656957089901
      total_loss: 0.001405874383635819
      vf_explained_var: 0.9325600862503052
      vf_loss: 0.0943112033419311
  num_steps_sampled: 50000
  num_steps_trained: 50000
iterations_since_restore: 50
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.276190476190476
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_ut

[2m[36m(pid=24699)[0m 2020-09-21 15:26:26,812	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:26:29,050	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2588780701626092, 'policy_loss': 0.006468575913459063, 'vf_loss': 0.26583403442054987, 'vf_explained_var': 0.8570415, 'kl': 0.00030508662562261923, 'entropy': 2.7032132148742676, 'entropy_coeff': 0.005}
2020-09-21 15:26:29,543	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1910046790726483, 'policy_loss': -0.0030660805059596896, 'vf_loss': 0.2073303135111928, 'vf_explained_var': 0.8948704, 'kl': 0.0007970620135893114, 'entropy': 2.6997349560260773, 'entropy_coeff': 0.005}
2020-09-21 15:26:29,988	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.16504004679154605, 'policy_loss': -0.004072050971444696, 'vf_loss': 0.18198966467753053, 'vf_expla

2020-09-21 15:26:40,878	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04412986640818417, 'policy_loss': -0.059628399554640055, 'vf_loss': 0.11434488277882338, 'vf_explained_var': 0.9354055, 'kl': 0.009713585546705872, 'entropy': 2.7001370042562485, 'entropy_coeff': 0.005}
2020-09-21 15:26:41,332	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04403640516102314, 'policy_loss': -0.0616798372939229, 'vf_loss': 0.11617370415478945, 'vf_explained_var': 0.93515354, 'kl': 0.010097387770656496, 'entropy': 2.6973367035388947, 'entropy_coeff': 0.005}
2020-09-21 15:26:41,818	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.040214707783889025, 'policy_loss': -0.07171257713343948, 'vf_loss': 0.12231956794857979, 'vf_explained_var': 0.93578935, 'kl': 0.010286516742780805, 'entropy': 2.6956487745046616, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-26-42
done: false
episode_len_mean: 673.8732394366198
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.17139992725832
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 71
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.697084739804268
      entropy_coeff: 0.005
      kl: 0.010788114042952657
      policy_loss: -0.06936458440031856
      total_loss: 0.035733621451072395
      vf_explained_var: 0.9366102814674377
      vf_loss: 0.11534719681367278
  num_steps_sampled: 51000
  num_steps_trained: 51000
iterations_since_restore: 51
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.1380952380952385
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_ut

2020-09-21 15:26:45,358	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3397952844388783, 'policy_loss': 0.002994670532643795, 'vf_loss': 0.3501085601747036, 'vf_explained_var': 0.7784642, 'kl': 8.60129255406683e-05, 'entropy': 2.6667504608631134, 'entropy_coeff': 0.005}
2020-09-21 15:26:45,845	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.176170046441257, 'policy_loss': -0.0030791128519922495, 'vf_loss': 0.19225094839930534, 'vf_explained_var': 0.88066345, 'kl': 0.001071089002834924, 'entropy': 2.66462279856205, 'entropy_coeff': 0.005}
2020-09-21 15:26:46,288	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.11385749815963209, 'policy_loss': -0.010664664208889008, 'vf_loss': 0.13649033708497882, 'vf_explained_var': 0.91439474, 'kl': 0.0045083435252308846, 'entropy': 2.664136126637459, 'entropy_coeff': 0.005}
2020-09-21 15:

2020-09-21 15:26:57,270	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.0003162022912874818, 'policy_loss': -0.06555729592218995, 'vf_loss': 0.0750081823207438, 'vf_explained_var': 0.95073974, 'kl': 0.013478273758664727, 'entropy': 2.6356345862150192, 'entropy_coeff': 0.005}
2020-09-21 15:26:57,724	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.008094975142739713, 'policy_loss': -0.060317251831293106, 'vf_loss': 0.07713978900574148, 'vf_explained_var': 0.9504037, 'kl': 0.01481594069628045, 'entropy': 2.634468525648117, 'entropy_coeff': 0.005}
2020-09-21 15:26:58,211	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.0039701329078525305, 'policy_loss': -0.06315455469302833, 'vf_loss': 0.07630414608865976, 'vf_explained_var': 0.9498936, 'kl': 0.013306403532624245, 'entropy': 2.6342750787734985, 'entropy_coeff': 0.005}
2020-0

custom_metrics: {}
date: 2020-09-21_15-26-59
done: false
episode_len_mean: 673.8732394366198
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.17139992725832
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 71
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.6331092715263367
      entropy_coeff: 0.005
      kl: 0.015114163339603692
      policy_loss: -0.07660832861438394
      total_loss: -0.008692546747624874
      vf_explained_var: 0.9508745074272156
      vf_loss: 0.07654708065092564
  num_steps_sampled: 52000
  num_steps_trained: 52000
iterations_since_restore: 52
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 6.095238095238095
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_u

2020-09-21 15:27:01,360	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.18409641296602786, 'policy_loss': 0.00018743646796792746, 'vf_loss': 0.19723736681044102, 'vf_explained_var': 0.83845997, 'kl': 7.592070147327545e-05, 'entropy': 2.6702330857515335, 'entropy_coeff': 0.005}
2020-09-21 15:27:01,857	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.12123640684876591, 'policy_loss': -0.010609470889903605, 'vf_loss': 0.1446464559994638, 'vf_explained_var': 0.8792444, 'kl': 0.001752872318320442, 'entropy': 2.6652870029211044, 'entropy_coeff': 0.005}
2020-09-21 15:27:02,306	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.09367346111685038, 'policy_loss': -0.01830620545661077, 'vf_loss': 0.12368207797408104, 'vf_explained_var': 0.8949027, 'kl': 0.0053100987279322, 'entropy': 2.6590882539749146, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:27:12,899	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-3.141, max=2.996, mean=-0.021),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-3.981, max=-0.838, mean=-2.48),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.019, max=0.433, mean=0.105),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=11.094),
                                                    'advantages': np.ndarray((64,), dtype=float32, min=-1.875, max=2.456, mean=-0.123),
                                                    'agent_index': np.ndarray((64,), dtype=int64, min=0.0, max=0.0, mean=0.0),
                                                    'dones': np.ndarray((64,), d

custom_metrics: {}
date: 2020-09-21_15-27-15
done: false
episode_len_mean: 695.3378378378378
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.168973580798973
episode_reward_min: -32.22698139089825
episodes_this_iter: 3
episodes_total: 74
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.6578845977783203
      entropy_coeff: 0.005
      kl: 0.013790217984933406
      policy_loss: -0.08825954888015985
      total_loss: -0.01774548226967454
      vf_explained_var: 0.931806206703186
      vf_loss: 0.07966641988605261
  num_steps_sampled: 53000
  num_steps_trained: 53000
iterations_since_restore: 53
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.2
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_util_percent1: 0

2020-09-21 15:27:17,197	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.20851262798532844, 'policy_loss': 0.00267969723790884, 'vf_loss': 0.21889451052993536, 'vf_explained_var': 0.764309, 'kl': 5.107637992585179e-05, 'entropy': 2.615380823612213, 'entropy_coeff': 0.005}
2020-09-21 15:27:17,648	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.15687980549409986, 'policy_loss': -0.001384819857776165, 'vf_loss': 0.17110417736694217, 'vf_explained_var': 0.8033722, 'kl': 0.0007690632610319881, 'entropy': 2.6140530109405518, 'entropy_coeff': 0.005}
2020-09-21 15:27:18,136	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.12756843538954854, 'policy_loss': -0.015198333421722054, 'vf_loss': 0.15531002497300506, 'vf_explained_var': 0.819714, 'kl': 0.0018334252599743195, 'entropy': 2.61865770816803, 'entropy_coeff': 0.005}
2020-09-21 15:

2020-09-21 15:27:29,039	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04100870853289962, 'policy_loss': -0.06564386654645205, 'vf_loss': 0.11673897132277489, 'vf_explained_var': 0.8602648, 'kl': 0.01013930351473391, 'entropy': 2.625638708472252, 'entropy_coeff': 0.005}
2020-09-21 15:27:29,527	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04025422141421586, 'policy_loss': -0.06484323530457914, 'vf_loss': 0.11506501771509647, 'vf_explained_var': 0.8666823, 'kl': 0.010412020201329142, 'entropy': 2.6182336807250977, 'entropy_coeff': 0.005}
2020-09-21 15:27:30,011	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.03437516710255295, 'policy_loss': -0.07172460964648053, 'vf_loss': 0.11595985013991594, 'vf_explained_var': 0.8610985, 'kl': 0.010865379241295159, 'entropy': 2.623938336968422, 'entropy_coeff': 0.005}
2020-09-21 15

custom_metrics: {}
date: 2020-09-21_15-27-30
done: false
episode_len_mean: 697.8421052631579
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.17840034991742
episode_reward_min: -32.22698139089825
episodes_this_iter: 2
episodes_total: 76
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.625223085284233
      entropy_coeff: 0.005
      kl: 0.01164317416260019
      policy_loss: -0.0715790253598243
      total_loss: 0.03257748472969979
      vf_explained_var: 0.8627699613571167
      vf_loss: 0.11378967203199863
  num_steps_sampled: 54000
  num_steps_trained: 54000
iterations_since_restore: 54
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 4.942857142857143
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_util_p

[2m[36m(pid=24699)[0m 2020-09-21 15:27:31,189	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 7222.15,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 254},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.846, max=2.159, mean=0.14),
[2m[36m(pid=24699)[0m                                   'prev_action': 10,
[2m[36m(pid=24699)[0m         

2020-09-21 15:27:33,062	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.24226270243525505, 'policy_loss': 0.005514958407729864, 'vf_loss': 0.2492964044213295, 'vf_explained_var': 0.89863557, 'kl': 0.0004488880792280492, 'entropy': 2.536666437983513, 'entropy_coeff': 0.005}
2020-09-21 15:27:33,517	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.16981819574721158, 'policy_loss': -0.007533953641541302, 'vf_loss': 0.18927068635821342, 'vf_explained_var': 0.9207453, 'kl': 0.0023928007431095466, 'entropy': 2.527274563908577, 'entropy_coeff': 0.005}
2020-09-21 15:27:34,007	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.12736874970141798, 'policy_loss': -0.018553130445070565, 'vf_loss': 0.15686154458671808, 'vf_explained_var': 0.9318167, 'kl': 0.00555122023797594, 'entropy': 2.521006792783737, 'entropy_coeff': 0.005}
2020-09-21 15

2020-09-21 15:27:44,852	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.03018418571446091, 'policy_loss': -0.07084308937191963, 'vf_loss': 0.10950771626085043, 'vf_explained_var': 0.9526424, 'kl': 0.013749186415225267, 'entropy': 2.5210400372743607, 'entropy_coeff': 0.005}
2020-09-21 15:27:45,329	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.021489724749699235, 'policy_loss': -0.07848098105750978, 'vf_loss': 0.10843303985893726, 'vf_explained_var': 0.9543069, 'kl': 0.013825854053720832, 'entropy': 2.522017329931259, 'entropy_coeff': 0.005}
2020-09-21 15:27:45,776	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.03189000638667494, 'policy_loss': -0.07052409858442843, 'vf_loss': 0.11078801564872265, 'vf_explained_var': 0.95425034, 'kl': 0.014188034110702574, 'entropy': 2.5260638296604156, 'entropy_coeff': 0.005}
2020-09-2

custom_metrics: {}
date: 2020-09-21_15-27-46
done: false
episode_len_mean: 697.8421052631579
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.17840034991742
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 76
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.5238128155469894
      entropy_coeff: 0.005
      kl: 0.014457647339440882
      policy_loss: -0.08181906264508143
      total_loss: 0.01773149543441832
      vf_explained_var: 0.9543105363845825
      vf_loss: 0.10783232655376196
  num_steps_sampled: 55000
  num_steps_trained: 55000
iterations_since_restore: 55
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.10952380952381
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_util

[2m[36m(pid=24699)[0m 2020-09-21 15:27:47,003	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:27:49,196	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.48188421316444874, 'policy_loss': -0.004096517339348793, 'vf_loss': 0.49889612570405006, 'vf_explained_var': 0.7804738, 'kl': 6.76836836687178e-05, 'entropy': 2.58713898062706, 'entropy_coeff': 0.005}
2020-09-21 15:27:49,642	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.29453830420970917, 'policy_loss': -0.009228430106304586, 'vf_loss': 0.3161999359726906, 'vf_explained_var': 0.8727398, 'kl': 0.0015974167590684374, 'entropy': 2.5824866741895676, 'entropy_coeff': 0.005}
2020-09-21 15:27:50,128	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.20269922679290175, 'policy_loss': -0.01598524220753461, 'vf_loss': 0.23043206986039877, 'vf_explaine

2020-09-21 15:28:00,988	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04831614694558084, 'policy_loss': -0.07663981057703495, 'vf_loss': 0.13403863459825516, 'vf_explained_var': 0.94023585, 'kl': 0.012351497367490083, 'entropy': 2.5576260685920715, 'entropy_coeff': 0.005}
2020-09-21 15:28:01,474	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04128947731805965, 'policy_loss': -0.08209998323582113, 'vf_loss': 0.13224205514416099, 'vf_explained_var': 0.94077885, 'kl': 0.013002829218748957, 'entropy': 2.550688847899437, 'entropy_coeff': 0.005}
2020-09-21 15:28:01,971	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04016249242704362, 'policy_loss': -0.08106167140067555, 'vf_loss': 0.13007286470383406, 'vf_explained_var': 0.94062895, 'kl': 0.013070064596831799, 'entropy': 2.553943946957588, 'entropy_coeff': 0.005}
2020-09-2

custom_metrics: {}
date: 2020-09-21_15-28-02
done: false
episode_len_mean: 697.8421052631579
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.17840034991742
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 76
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.5533196330070496
      entropy_coeff: 0.005
      kl: 0.013875161530449986
      policy_loss: -0.08877413556911051
      total_loss: 0.03169356216676533
      vf_explained_var: 0.9442026615142822
      vf_loss: 0.12907174974679947
  num_steps_sampled: 56000
  num_steps_trained: 56000
iterations_since_restore: 56
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.133333333333333
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_uti

2020-09-21 15:28:05,112	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.33072298765182495, 'policy_loss': 0.0018822322017513216, 'vf_loss': 0.342456366866827, 'vf_explained_var': 0.57758594, 'kl': 5.507293361217691e-05, 'entropy': 2.726427435874939, 'entropy_coeff': 0.005}
2020-09-21 15:28:05,562	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.25612670346163213, 'policy_loss': -0.002834088445524685, 'vf_loss': 0.27243933267891407, 'vf_explained_var': 0.6686617, 'kl': 0.0005712432403015555, 'entropy': 2.729983165860176, 'entropy_coeff': 0.005}
2020-09-21 15:28:06,052	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.20552927441895008, 'policy_loss': -0.008849616278894246, 'vf_loss': 0.22769913356751204, 'vf_explained_var': 0.7299769, 'kl': 0.0010959131286654156, 'entropy': 2.729803502559662, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:28:12,982	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.3,
                                         'cur_lr': 1e-05,
                                         'entropy': 2.690499782562256,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.010651422664523125,
                                         'policy_loss': -0.05518992990255356,
                                         'total_loss': 0.04559662193059921,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.899, max=0.899, mean=0.899),
                                         'vf_loss': 0.11104362457990646}}}

2020-09-21 15:28:13,152	DEBUG sgd.py:120 -- 17 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.015976279275491834, 'policy_loss': -0.06207684287801

custom_metrics: {}
date: 2020-09-21_15-28-18
done: false
episode_len_mean: 700.9090909090909
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.197200867863156
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 77
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.710395887494087
      entropy_coeff: 0.005
      kl: 0.014838057162705809
      policy_loss: -0.0823249940876849
      total_loss: -0.01708178815897554
      vf_explained_var: 0.9102519750595093
      vf_loss: 0.07434376562014222
  num_steps_sampled: 57000
  num_steps_trained: 57000
iterations_since_restore: 57
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.238095238095238
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_uti

2020-09-21 15:28:20,955	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.42476910073310137, 'policy_loss': 0.007423706701956689, 'vf_loss': 0.4306953102350235, 'vf_explained_var': 0.76087797, 'kl': 9.75348019675859e-05, 'entropy': 2.675832986831665, 'entropy_coeff': 0.005}
2020-09-21 15:28:21,440	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3030738802626729, 'policy_loss': -0.001697579282335937, 'vf_loss': 0.3178206030279398, 'vf_explained_var': 0.8266587, 'kl': 0.001052704370522406, 'entropy': 2.672990396618843, 'entropy_coeff': 0.005}
2020-09-21 15:28:21,926	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.21038903202861547, 'policy_loss': -0.012160304235294461, 'vf_loss': 0.23483030498027802, 'vf_explained_var': 0.87204576, 'kl': 0.0035132807679474354, 'entropy': 2.666991427540779, 'entropy_coeff': 0.005}
2020-09-21 15:

2020-09-21 15:28:32,823	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.007032240508124232, 'policy_loss': -0.073265585815534, 'vf_loss': 0.08966359961777925, 'vf_explained_var': 0.9490483, 'kl': 0.013353230024222285, 'entropy': 2.674348369240761, 'entropy_coeff': 0.005}
2020-09-21 15:28:33,310	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.003718988737091422, 'policy_loss': -0.07674356503412127, 'vf_loss': 0.08970168395899236, 'vf_explained_var': 0.950776, 'kl': 0.013759076246060431, 'entropy': 2.673370063304901, 'entropy_coeff': 0.005}
2020-09-21 15:28:33,769	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.003632787731476128, 'policy_loss': -0.07641333056380972, 'vf_loss': 0.08924931101500988, 'vf_explained_var': 0.9506309, 'kl': 0.013863795960787684, 'entropy': 2.672464594244957, 'entropy_coeff': 0.005}
2020-09-21 15

custom_metrics: {}
date: 2020-09-21_15-28-34
done: false
episode_len_mean: 706.2025316455696
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.198265712168183
episode_reward_min: -32.22698139089825
episodes_this_iter: 2
episodes_total: 79
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.6726702451705933
      entropy_coeff: 0.005
      kl: 0.014354220649693161
      policy_loss: -0.07219753391109407
      total_loss: 0.0069290329702198505
      vf_explained_var: 0.9486596584320068
      vf_loss: 0.08818364865146577
  num_steps_sampled: 58000
  num_steps_trained: 58000
iterations_since_restore: 58
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.204761904761905
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_

2020-09-21 15:28:37,156	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2534592431038618, 'policy_loss': 0.0014507030136883259, 'vf_loss': 0.26482930686324835, 'vf_explained_var': 0.8693401, 'kl': 0.0003267119286504139, 'entropy': 2.58375446498394, 'entropy_coeff': 0.005}
2020-09-21 15:28:37,611	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.13132653723005205, 'policy_loss': -0.008259173831902444, 'vf_loss': 0.1522002788260579, 'vf_explained_var': 0.92117417, 'kl': 0.0010158621444134042, 'entropy': 2.5838648974895477, 'entropy_coeff': 0.005}
2020-09-21 15:28:38,096	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.09258881909772754, 'policy_loss': -0.009659084957093, 'vf_loss': 0.11444765003398061, 'vf_explained_var': 0.93833816, 'kl': 0.0024180488908314146, 'entropy': 2.5850326865911484, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:28:48,973	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.05380601168144494, 'policy_loss': -0.0820055773947388, 'vf_loss': 0.037052905187010765, 'vf_explained_var': 0.9768929, 'kl': 0.012841881369240582, 'entropy': 2.541179448366165, 'entropy_coeff': 0.005}
2020-09-21 15:28:49,459	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.044655107136350125, 'policy_loss': -0.0714929411187768, 'vf_loss': 0.035555966082029045, 'vf_explained_var': 0.9808922, 'kl': 0.01323673955630511, 'entropy': 2.537832409143448, 'entropy_coeff': 0.005}
2020-09-21 15:28:49,948	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.047050048829987645, 'policy_loss': -0.07331205846276134, 'vf_loss': 0.03489348595030606, 'vf_explained_var': 0.98080856, 'kl': 0.013591655006166548, 'entropy': 2.541794702410698, 'entropy_coeff': 0.005}
2020-09-

custom_metrics: {}
date: 2020-09-21_15-28-50
done: false
episode_len_mean: 706.2025316455696
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.198265712168183
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 79
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.5408667623996735
      entropy_coeff: 0.005
      kl: 0.014148784626740962
      policy_loss: -0.08123419794719666
      total_loss: -0.055901274434290826
      vf_explained_var: 0.9815233945846558
      vf_loss: 0.033792625879868865
  num_steps_sampled: 59000
  num_steps_trained: 59000
iterations_since_restore: 59
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.080952380952381
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram

[2m[36m(pid=24699)[0m 2020-09-21 15:28:51,125	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 8478.31,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 221},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.846, max=1.425, mean=0.205),
[2m[36m(pid=24699)[0m                                   'prev_action': 16,
[2m[36m(pid=24699)[0m        

2020-09-21 15:28:52,909	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.30191249772906303, 'policy_loss': 0.004558983142487705, 'vf_loss': 0.3099975623190403, 'vf_explained_var': 0.9009341, 'kl': 0.0002727647199175509, 'entropy': 2.545176178216934, 'entropy_coeff': 0.005}
2020-09-21 15:28:53,392	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2301777314278297, 'policy_loss': -0.0013801876339130104, 'vf_loss': 0.24396926909685135, 'vf_explained_var': 0.92862904, 'kl': 0.0009881263904389925, 'entropy': 2.541557013988495, 'entropy_coeff': 0.005}
2020-09-21 15:28:53,871	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.17477732268162072, 'policy_loss': -0.020873487781500444, 'vf_loss': 0.20770975854247808, 'vf_explained_var': 0.93278813, 'kl': 0.0021809078025398776, 'entropy': 2.54264497756958, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:29:04,802	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.004282267531380057, 'policy_loss': -0.08934305526781827, 'vf_loss': 0.10200935509055853, 'vf_explained_var': 0.9667702, 'kl': 0.013920437428168952, 'entropy': 2.5120341032743454, 'entropy_coeff': 0.005}
2020-09-21 15:29:05,283	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.006533732637763023, 'policy_loss': -0.08720487385289744, 'vf_loss': 0.10207737050950527, 'vf_explained_var': 0.965821, 'kl': 0.014100017375312746, 'entropy': 2.513755351305008, 'entropy_coeff': 0.005}
2020-09-21 15:29:05,737	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.004802941053640097, 'policy_loss': -0.08717382967006415, 'vf_loss': 0.10027294349856675, 'vf_explained_var': 0.9672394, 'kl': 0.01424417522503063, 'entropy': 2.5138851702213287, 'entropy_coeff': 0.005}
2020-09-21

custom_metrics: {}
date: 2020-09-21_15-29-06
done: false
episode_len_mean: 719.0123456790124
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.205287775376977
episode_reward_min: -32.22698139089825
episodes_this_iter: 2
episodes_total: 81
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.515505149960518
      entropy_coeff: 0.005
      kl: 0.014833063061814755
      policy_loss: -0.08864594693295658
      total_loss: 0.0030220093176467344
      vf_explained_var: 0.9684175252914429
      vf_loss: 0.09979556384496391
  num_steps_sampled: 60000
  num_steps_trained: 60000
iterations_since_restore: 60
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.199999999999999
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_u

[2m[36m(pid=24699)[0m 2020-09-21 15:29:06,949	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:29:08,789	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.20297690291772597, 'policy_loss': -0.002392739523202181, 'vf_loss': 0.21797320898622274, 'vf_explained_var': 0.87062514, 'kl': 5.7176735318298366e-05, 'entropy': 2.524142473936081, 'entropy_coeff': 0.005}
2020-09-21 15:29:09,277	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.14049148434423842, 'policy_loss': -0.0016681383713148534, 'vf_loss': 0.15471644653007388, 'vf_explained_var': 0.90689415, 'kl': 0.00030313032129924977, 'entropy': 2.529553174972534, 'entropy_coeff': 0.005}
2020-09-21 15:29:09,757	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.0924988966435194, 'policy_loss': -0.006413322174921632, 'vf_loss': 0.11112122936174273, 'vf_e

2020-09-21 15:29:15,406	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.028132586390711367, 'policy_loss': -0.058774295379407704, 'vf_loss': 0.04123058612458408, 'vf_explained_var': 0.9743799, 'kl': 0.007119744434021413, 'entropy': 2.5449602901935577, 'entropy_coeff': 0.005}
2020-09-21 15:29:15,896	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.030485641211271286, 'policy_loss': -0.05989916971884668, 'vf_loss': 0.0398468627827242, 'vf_explained_var': 0.9756225, 'kl': 0.007640045980224386, 'entropy': 2.545070692896843, 'entropy_coeff': 0.005}
2020-09-21 15:29:16,350	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.03162690321914852, 'policy_loss': -0.06122118839994073, 'vf_loss': 0.03993984090629965, 'vf_explained_var': 0.9755379, 'kl': 0.007960652292240411, 'entropy': 2.546751245856285, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-29-22
done: false
episode_len_mean: 719.0123456790124
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.20528777537698
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 81
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.5399121791124344
      entropy_coeff: 0.005
      kl: 0.012176639342214912
      policy_loss: -0.08091519284062088
      total_loss: -0.057815139065496624
      vf_explained_var: 0.9799107313156128
      vf_loss: 0.0321466235909611
  num_steps_sampled: 61000
  num_steps_trained: 61000
iterations_since_restore: 61
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.085000000000001
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_ut

2020-09-21 15:29:25,310	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.20771085901651531, 'policy_loss': 0.005118362023495138, 'vf_loss': 0.21563498582690954, 'vf_explained_var': 0.8670088, 'kl': 0.00013204963893642674, 'entropy': 2.6164213120937347, 'entropy_coeff': 0.005}
2020-09-21 15:29:25,794	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.11735144618432969, 'policy_loss': -0.004242343478836119, 'vf_loss': 0.1344442367553711, 'vf_explained_var': 0.91317606, 'kl': 0.0007509804472647374, 'entropy': 2.6151473075151443, 'entropy_coeff': 0.005}
2020-09-21 15:29:26,251	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.07642422989010811, 'policy_loss': -0.008939927211031318, 'vf_loss': 0.09749841247685254, 'vf_explained_var': 0.9359336, 'kl': 0.00307920943305362, 'entropy': 2.611604079604149, 'entropy_coeff': 0.005}
2020-09-21

2020-09-21 15:29:37,126	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.05784159933682531, 'policy_loss': -0.07674788928125054, 'vf_loss': 0.02770716603845358, 'vf_explained_var': 0.9823958, 'kl': 0.013833950331900269, 'entropy': 2.590211734175682, 'entropy_coeff': 0.005}
2020-09-21 15:29:37,612	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06144902075175196, 'policy_loss': -0.08035911648767069, 'vf_loss': 0.02780479087959975, 'vf_explained_var': 0.9824414, 'kl': 0.01349636574741453, 'entropy': 2.5887209475040436, 'entropy_coeff': 0.005}
2020-09-21 15:29:38,103	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06311534630367532, 'policy_loss': -0.08259440399706364, 'vf_loss': 0.028301681741140783, 'vf_explained_var': 0.98306525, 'kl': 0.013729837955906987, 'entropy': 2.588314637541771, 'entropy_coeff': 0.005}
2020-09-

custom_metrics: {}
date: 2020-09-21_15-29-39
done: false
episode_len_mean: 719.0123456790124
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.20528777537698
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 81
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.590858444571495
      entropy_coeff: 0.005
      kl: 0.01415723521495238
      policy_loss: -0.07631635526195168
      total_loss: -0.05792497843503952
      vf_explained_var: 0.9836553335189819
      vf_loss: 0.027098492602817714
  num_steps_sampled: 62000
  num_steps_trained: 62000
iterations_since_restore: 62
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.0681818181818175
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282177
  vram_ut

2020-09-21 15:29:41,327	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.21700819116085768, 'policy_loss': -0.004723483580164611, 'vf_loss': 0.23477613180875778, 'vf_explained_var': 0.80175567, 'kl': 0.00015220329549114897, 'entropy': 2.6180229038000107, 'entropy_coeff': 0.005}
2020-09-21 15:29:41,807	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1464927000924945, 'policy_loss': -0.006005763483699411, 'vf_loss': 0.16521951742470264, 'vf_explained_var': 0.8535829, 'kl': 0.0012396045040077297, 'entropy': 2.6185864955186844, 'entropy_coeff': 0.005}
2020-09-21 15:29:42,263	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.10650263342540711, 'policy_loss': -0.01632673409767449, 'vf_loss': 0.13506614230573177, 'vf_explained_var': 0.8802471, 'kl': 0.00288445787737146, 'entropy': 2.620423272252083, 'entropy_coeff': 0.005}
2020-09-21

2020-09-21 15:29:53,134	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.00749102549161762, 'policy_loss': -0.07182837999425828, 'vf_loss': 0.07391057233326137, 'vf_explained_var': 0.9337642, 'kl': 0.011854154872708023, 'entropy': 2.6258929520845413, 'entropy_coeff': 0.005}
2020-09-21 15:29:53,626	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.010864947689697146, 'policy_loss': -0.07530507910996675, 'vf_loss': 0.07400767807848752, 'vf_explained_var': 0.93336797, 'kl': 0.011875017080456018, 'entropy': 2.6260107904672623, 'entropy_coeff': 0.005}
2020-09-21 15:29:54,109	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.0010140290251001716, 'policy_loss': -0.06959432346047834, 'vf_loss': 0.07799595361575484, 'vf_explained_var': 0.93435776, 'kl': 0.012326404103077948, 'entropy': 2.622717037796974, 'entropy_coeff': 0.005}
202

custom_metrics: {}
date: 2020-09-21_15-29-55
done: false
episode_len_mean: 724.0853658536586
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.210075188123856
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 82
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.622076988220215
      entropy_coeff: 0.005
      kl: 0.012413561285939068
      policy_loss: -0.08147307054605335
      total_loss: -0.017649207264184952
      vf_explained_var: 0.9345916509628296
      vf_loss: 0.07321017677895725
  num_steps_sampled: 63000
  num_steps_trained: 63000
iterations_since_restore: 63
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.204761904761905
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_u

2020-09-21 15:29:57,215	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.39685969054698944, 'policy_loss': 0.0011723439674824476, 'vf_loss': 0.408591540530324, 'vf_explained_var': 0.64041924, 'kl': 0.00016861158285919053, 'entropy': 2.5909589380025864, 'entropy_coeff': 0.005}
2020-09-21 15:29:57,677	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3034867998212576, 'policy_loss': -0.00954850047128275, 'vf_loss': 0.325622689910233, 'vf_explained_var': 0.7081076, 'kl': 0.0013386644568527117, 'entropy': 2.59780091047287, 'entropy_coeff': 0.005}
2020-09-21 15:29:58,480	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2456447253935039, 'policy_loss': -0.01358686073217541, 'vf_loss': 0.2714871000498533, 'vf_explained_var': 0.76405203, 'kl': 0.0025020659086294472, 'entropy': 2.601227179169655, 'entropy_coeff': 0.005}
2020-09-21 15:29

2020-09-21 15:30:09,462	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.004307506955228746, 'policy_loss': -0.0719720798661001, 'vf_loss': 0.07735747005790472, 'vf_explained_var': 0.92862755, 'kl': 0.010983943939208984, 'entropy': 2.597616955637932, 'entropy_coeff': 0.005}
2020-09-21 15:30:09,915	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.008878226391971111, 'policy_loss': -0.07612602377776057, 'vf_loss': 0.07699964917264879, 'vf_explained_var': 0.9322669, 'kl': 0.01067940064240247, 'entropy': 2.591135188937187, 'entropy_coeff': 0.005}
2020-09-21 15:30:10,408	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.006943456130102277, 'policy_loss': -0.06920542730949819, 'vf_loss': 0.0718350326642394, 'vf_explained_var': 0.93293697, 'kl': 0.01134377991547808, 'entropy': 2.5952391773462296, 'entropy_coeff': 0.005}
2020-09-

custom_metrics: {}
date: 2020-09-21_15-30-11
done: false
episode_len_mean: 735.6904761904761
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.233515188103286
episode_reward_min: -32.22698139089825
episodes_this_iter: 2
episodes_total: 84
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.593558445572853
      entropy_coeff: 0.005
      kl: 0.011403738404624164
      policy_loss: -0.0780659873271361
      total_loss: -0.01859391783364117
      vf_explained_var: 0.9389572143554688
      vf_loss: 0.06901874486356974
  num_steps_sampled: 64000
  num_steps_trained: 64000
iterations_since_restore: 64
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.161904761904762
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_uti

[2m[36m(pid=24699)[0m 2020-09-21 15:30:11,626	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 8836.196702590396,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 126},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.141, max=1.396, mean=0.212),
[2m[36m(pid=24699)[0m                                   'prev_action': 5,
[2m[36m(pid=24699)[0

2020-09-21 15:30:13,064	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-3.59, max=3.651, mean=-0.035),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-4.903, max=-0.552, mean=-2.396),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.007, max=0.576, mean=0.136),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=9.219),
                                                    'advantages': np.ndarray((64,), dtype=float32, min=-3.271, max=1.45, mean=-0.007),
                                                    'agent_index': np.ndarray((64,), dtype=int64, min=0.0, max=0.0, mean=0.0),
                                                    'dones': np.ndarray((64,), dty

2020-09-21 15:30:21,577	DEBUG sgd.py:120 -- 17 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.04805945826228708, 'policy_loss': -0.06495867628837004, 'vf_loss': 0.027321149478666484, 'vf_explained_var': 0.98792315, 'kl': 0.006880584929604083, 'entropy': 2.4972202479839325, 'entropy_coeff': 0.005}
2020-09-21 15:30:22,028	DEBUG sgd.py:120 -- 18 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.04243233217857778, 'policy_loss': -0.058161931636277586, 'vf_loss': 0.026118297246284783, 'vf_explained_var': 0.98790073, 'kl': 0.006935918965609744, 'entropy': 2.4938962906599045, 'entropy_coeff': 0.005}
2020-09-21 15:30:22,516	DEBUG sgd.py:120 -- 19 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.050056370615493506, 'policy_loss': -0.06494092254433781, 'vf_loss': 0.02528856211574748, 'vf_explained_var': 0.98973143, 'kl': 0.006904670823132619, 'entropy': 2.495083600282669, 'entropy_coeff': 0.005}
2

custom_metrics: {}
date: 2020-09-21_15-30-27
done: false
episode_len_mean: 735.6904761904761
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.233515188103286
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 84
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.488356962800026
      entropy_coeff: 0.005
      kl: 0.008925696951337159
      policy_loss: -0.07258578616892919
      total_loss: -0.061670385766774416
      vf_explained_var: 0.9910361766815186
      vf_loss: 0.020679475273936987
  num_steps_sampled: 65000
  num_steps_trained: 65000
iterations_since_restore: 65
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.200000000000001
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_

[2m[36m(pid=24699)[0m 2020-09-21 15:30:27,494	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:30:29,367	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.25438491138629615, 'policy_loss': -0.0008074550423771143, 'vf_loss': 0.2672651167958975, 'vf_explained_var': 0.9174407, 'kl': 0.0006983489615966509, 'entropy': 2.456451892852783, 'entropy_coeff': 0.005}
2020-09-21 15:30:29,823	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1865026312880218, 'policy_loss': -0.0014609212521463633, 'vf_loss': 0.2000149809755385, 'vf_explained_var': 0.9370146, 'kl': 0.000873362831043778, 'entropy': 2.462689056992531, 'entropy_coeff': 0.005}
2020-09-21 15:30:30,310	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.14147944864816964, 'policy_loss': -0.008958245103713125, 'vf_loss': 0.16211684048175812, 'vf_explain

2020-09-21 15:30:41,229	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.001681690919212997, 'policy_loss': -0.07773478802118916, 'vf_loss': 0.08801589021459222, 'vf_explained_var': 0.97094035, 'kl': 0.012987212976440787, 'entropy': 2.4991146475076675, 'entropy_coeff': 0.005}
2020-09-21 15:30:41,719	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.0019185617566108704, 'policy_loss': -0.07913931168150157, 'vf_loss': 0.08968618419021368, 'vf_explained_var': 0.9706168, 'kl': 0.012877218367066234, 'entropy': 2.498295783996582, 'entropy_coeff': 0.005}
2020-09-21 15:30:42,213	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.0028167625423520803, 'policy_loss': -0.08297743485309184, 'vf_loss': 0.08875692845322192, 'vf_explained_var': 0.97061175, 'kl': 0.012978055339772254, 'entropy': 2.4979336261749268, 'entropy_coeff': 0.005}
202

custom_metrics: {}
date: 2020-09-21_15-30-43
done: false
episode_len_mean: 741.3764705882353
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.246218467583063
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 85
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.4976855516433716
      entropy_coeff: 0.005
      kl: 0.01366138772573322
      policy_loss: -0.09409091190900654
      total_loss: -0.017163496115244925
      vf_explained_var: 0.9725820422172546
      vf_loss: 0.08531742729246616
  num_steps_sampled: 66000
  num_steps_trained: 66000
iterations_since_restore: 66
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.10952380952381
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_ut

2020-09-21 15:30:45,525	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.27618624549359083, 'policy_loss': -0.0004967497661709785, 'vf_loss': 0.28938629664480686, 'vf_explained_var': 0.80017525, 'kl': 0.0002641117206036714, 'entropy': 2.556507483124733, 'entropy_coeff': 0.005}
2020-09-21 15:30:46,013	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.18328265915624797, 'policy_loss': -0.003921755647752434, 'vf_loss': 0.19933680910617113, 'vf_explained_var': 0.85849404, 'kl': 0.0022558467244380154, 'entropy': 2.561830535531044, 'entropy_coeff': 0.005}
2020-09-21 15:30:46,460	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.13023958913981915, 'policy_loss': -0.015888548456132412, 'vf_loss': 0.15739474771544337, 'vf_explained_var': 0.893505, 'kl': 0.005192686730879359, 'entropy': 2.564882919192314, 'entropy_coeff': 0.005}
2020-09-2

2020-09-21 15:30:57,390	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.05442017433233559, 'policy_loss': -0.07911586854606867, 'vf_loss': 0.0339415849884972, 'vf_explained_var': 0.9759017, 'kl': 0.011741995171178132, 'entropy': 2.5536969006061554, 'entropy_coeff': 0.005}
2020-09-21 15:30:57,835	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.05592227284796536, 'policy_loss': -0.08063396986108273, 'vf_loss': 0.033888074569404125, 'vf_explained_var': 0.97617227, 'kl': 0.011948046972975135, 'entropy': 2.552158311009407, 'entropy_coeff': 0.005}
2020-09-21 15:30:58,319	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.05500518099870533, 'policy_loss': -0.07883691508322954, 'vf_loss': 0.0328388754860498, 'vf_explained_var': 0.97493327, 'kl': 0.012506508152000606, 'entropy': 2.551817461848259, 'entropy_coeff': 0.005}
2020-09-

custom_metrics: {}
date: 2020-09-21_15-30-59
done: false
episode_len_mean: 746.9883720930233
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.23791204946725
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 86
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.5538234412670135
      entropy_coeff: 0.005
      kl: 0.012913666607346386
      policy_loss: -0.08507096138782799
      total_loss: -0.06246407702565193
      vf_explained_var: 0.9766196608543396
      vf_loss: 0.031501899706199765
  num_steps_sampled: 67000
  num_steps_trained: 67000
iterations_since_restore: 67
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.133333333333335
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_u

2020-09-21 15:31:01,378	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.23164752079173923, 'policy_loss': 0.002139504882507026, 'vf_loss': 0.24230851279571652, 'vf_explained_var': 0.82076126, 'kl': 0.00036207788626363424, 'entropy': 2.581824228167534, 'entropy_coeff': 0.005}
2020-09-21 15:31:01,831	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.16348137171007693, 'policy_loss': -0.0099007798708044, 'vf_loss': 0.18520517647266388, 'vf_explained_var': 0.87053114, 'kl': 0.003543099548551254, 'entropy': 2.577190935611725, 'entropy_coeff': 0.005}
2020-09-21 15:31:02,314	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.12683251884300262, 'policy_loss': -0.01768666412681341, 'vf_loss': 0.15502692153677344, 'vf_explained_var': 0.89677036, 'kl': 0.007874165487010032, 'entropy': 2.573998212814331, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:31:13,076	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-4.269, max=3.867, mean=-0.015),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-5.226, max=-0.715, mean=-2.426),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.005, max=0.489, mean=0.132),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=8.594),
                                                    'advantages': np.ndarray((64,), dtype=float32, min=-1.743, max=6.908, mean=0.241),
                                                    'agent_index': np.ndarray((64,), dtype=int64, min=0.0, max=0.0, mean=0.0),
                                                    'dones': np.ndarray((64,), dt

custom_metrics: {}
date: 2020-09-21_15-31-15
done: false
episode_len_mean: 748.919540229885
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.252823246764912
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 87
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.5613978505134583
      entropy_coeff: 0.005
      kl: 0.012833625019993633
      policy_loss: -0.07506344909779727
      total_loss: 0.004519369336776435
      vf_explained_var: 0.9426018595695496
      vf_loss: 0.08853972586803138
  num_steps_sampled: 68000
  num_steps_trained: 68000
iterations_since_restore: 68
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.190476190476191
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_ut

2020-09-21 15:31:17,379	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.24111378006637096, 'policy_loss': 0.0024922535521909595, 'vf_loss': 0.2513883523643017, 'vf_explained_var': 0.84986424, 'kl': 0.0006247634827960535, 'entropy': 2.5908512324094772, 'entropy_coeff': 0.005}
2020-09-21 15:31:17,831	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.10932136059273034, 'policy_loss': -0.007591476198285818, 'vf_loss': 0.12849220912903547, 'vf_explained_var': 0.91923755, 'kl': 0.004557607666356489, 'entropy': 2.5893316864967346, 'entropy_coeff': 0.005}
2020-09-21 15:31:18,320	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.05979147390462458, 'policy_loss': -0.015406834718305618, 'vf_loss': 0.08573259296827018, 'vf_explained_var': 0.9505335, 'kl': 0.008014842780539766, 'entropy': 2.5877464711666107, 'entropy_coeff': 0.005}
2020-09-

2020-09-21 15:31:29,170	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08512433082796633, 'policy_loss': -0.08694894087966532, 'vf_loss': 0.010823789838468656, 'vf_explained_var': 0.99376833, 'kl': 0.012879437825176865, 'entropy': 2.5726025104522705, 'entropy_coeff': 0.005}
2020-09-21 15:31:29,656	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08417728811036795, 'policy_loss': -0.08596920943818986, 'vf_loss': 0.010695370641769841, 'vf_explained_var': 0.9934324, 'kl': 0.013151872204616666, 'entropy': 2.5698021054267883, 'entropy_coeff': 0.005}
2020-09-21 15:31:30,140	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08510381786618382, 'policy_loss': -0.08573862537741661, 'vf_loss': 0.009498579980572686, 'vf_explained_var': 0.9945394, 'kl': 0.013271653500851244, 'entropy': 2.5690539181232452, 'entropy_coeff': 0.005}
202

custom_metrics: {}
date: 2020-09-21_15-31-31
done: false
episode_len_mean: 748.919540229885
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.252823246764912
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 87
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.571217492222786
      entropy_coeff: 0.005
      kl: 0.012517829425632954
      policy_loss: -0.08714744870667346
      total_loss: -0.08745103428373113
      vf_explained_var: 0.9947432279586792
      vf_loss: 0.008797157148364931
  num_steps_sampled: 69000
  num_steps_trained: 69000
iterations_since_restore: 69
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.042857142857143
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_ut

[2m[36m(pid=24699)[0m 2020-09-21 15:31:31,324	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 9375.09564794445,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 211},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.315, max=1.189, mean=0.224),
[2m[36m(pid=24699)[0m                                   'prev_action': 11,
[2m[36m(pid=24699)[0

2020-09-21 15:31:33,719	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.36846165638417006, 'policy_loss': 0.0012307791039347649, 'vf_loss': 0.37952468264847994, 'vf_explained_var': 0.8122063, 'kl': 0.0009173152851406352, 'entropy': 2.5137995779514313, 'entropy_coeff': 0.005}
2020-09-21 15:31:34,203	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2634845580905676, 'policy_loss': -0.012534188106656075, 'vf_loss': 0.2855035187676549, 'vf_explained_var': 0.87036717, 'kl': 0.010238767717964947, 'entropy': 2.5112801492214203, 'entropy_coeff': 0.005}
2020-09-21 15:31:34,660	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.18507337954360992, 'policy_loss': -0.02812466793693602, 'vf_loss': 0.22276602173224092, 'vf_explained_var': 0.89376295, 'kl': 0.010195079259574413, 'entropy': 2.525301471352577, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:31:45,575	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07287788670510054, 'policy_loss': -0.08868701205938123, 'vf_loss': 0.02379360911436379, 'vf_explained_var': 0.9879624, 'kl': 0.015280992432963103, 'entropy': 2.5137570649385452, 'entropy_coeff': 0.005}
2020-09-21 15:31:46,027	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07629971404094249, 'policy_loss': -0.09145118133164942, 'vf_loss': 0.02365012455265969, 'vf_explained_var': 0.9892374, 'kl': 0.01355039986083284, 'entropy': 2.512754887342453, 'entropy_coeff': 0.005}
2020-09-21 15:31:46,516	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07758735551033169, 'policy_loss': -0.0906675336882472, 'vf_loss': 0.021467810438480228, 'vf_explained_var': 0.98921776, 'kl': 0.013938374409917742, 'entropy': 2.5138273388147354, 'entropy_coeff': 0.005}
2020-09-

custom_metrics: {}
date: 2020-09-21_15-31-47
done: false
episode_len_mean: 759.4719101123595
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.2765255133972
episode_reward_min: -32.22698139089825
episodes_this_iter: 2
episodes_total: 89
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.5127388685941696
      entropy_coeff: 0.005
      kl: 0.014019212976563722
      policy_loss: -0.09765072062145919
      total_loss: -0.0860857742081862
      vf_explained_var: 0.9904506206512451
      vf_loss: 0.019922880514059216
  num_steps_sampled: 70000
  num_steps_trained: 70000
iterations_since_restore: 70
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.285714285714286
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_uti

[2m[36m(pid=24699)[0m 2020-09-21 15:31:47,745	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:31:49,648	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.24987246072851121, 'policy_loss': 0.0034686821745708585, 'vf_loss': 0.25881070271134377, 'vf_explained_var': 0.8926966, 'kl': 0.00039919639564678366, 'entropy': 2.5053359866142273, 'entropy_coeff': 0.005}
2020-09-21 15:31:50,144	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.15147254162002355, 'policy_loss': -0.0018779808888211846, 'vf_loss': 0.16542595578357577, 'vf_explained_var': 0.93123686, 'kl': 0.001513266448455397, 'entropy': 2.5058818012475967, 'entropy_coeff': 0.005}
2020-09-21 15:31:50,600	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.08630098740104586, 'policy_loss': -0.01674757804721594, 'vf_loss': 0.11464593466371298, 'vf_ex

2020-09-21 15:32:01,576	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06153458124026656, 'policy_loss': -0.07588258001487702, 'vf_loss': 0.02310933219268918, 'vf_explained_var': 0.9898136, 'kl': 0.01247202546801418, 'entropy': 2.5005875676870346, 'entropy_coeff': 0.005}
2020-09-21 15:32:02,046	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06807277863845229, 'policy_loss': -0.08145988755859435, 'vf_loss': 0.022100253379903734, 'vf_explained_var': 0.9904763, 'kl': 0.01259854412637651, 'entropy': 2.4985414147377014, 'entropy_coeff': 0.005}
2020-09-21 15:32:02,527	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06523895636200905, 'policy_loss': -0.07854748901445419, 'vf_loss': 0.0219519684324041, 'vf_explained_var': 0.99004763, 'kl': 0.012815909634809941, 'entropy': 2.497641697525978, 'entropy_coeff': 0.005}
2020-09-2

custom_metrics: {}
date: 2020-09-21_15-32-03
done: false
episode_len_mean: 759.4719101123595
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.276525513397203
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 89
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.494216725230217
      entropy_coeff: 0.005
      kl: 0.013193625549320132
      policy_loss: -0.08037340152077377
      total_loss: -0.06770534720271826
      vf_explained_var: 0.9907053709030151
      vf_loss: 0.02118105028057471
  num_steps_sampled: 71000
  num_steps_trained: 71000
iterations_since_restore: 71
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 6.042857142857143
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_ut

2020-09-21 15:32:05,834	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3021042300388217, 'policy_loss': 0.0034207982243970037, 'vf_loss': 0.31132675521075726, 'vf_explained_var': 0.9023111, 'kl': 5.956826979147678e-05, 'entropy': 2.5322407335042953, 'entropy_coeff': 0.005}
2020-09-21 15:32:06,321	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.14550827629864216, 'policy_loss': -0.013794469414278865, 'vf_loss': 0.17186677595600486, 'vf_explained_var': 0.94766927, 'kl': 0.0003807585321737861, 'entropy': 2.5356505215168, 'entropy_coeff': 0.005}
2020-09-21 15:32:06,774	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.08842191356234252, 'policy_loss': -0.01491667702794075, 'vf_loss': 0.11565385526046157, 'vf_explained_var': 0.964205, 'kl': 0.0013952413719380274, 'entropy': 2.5467695593833923, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:32:13,281	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.04474893130827695, 'policy_loss': -0.0650005656061694, 'vf_loss': 0.030676880502142012, 'vf_explained_var': 0.99055237, 'kl': 0.00788898509927094, 'entropy': 2.558388128876686, 'entropy_coeff': 0.005}
2020-09-21 15:32:13,734	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.04087798192631453, 'policy_loss': -0.06058416701853275, 'vf_loss': 0.029866144992411137, 'vf_explained_var': 0.99066204, 'kl': 0.008811874111415818, 'entropy': 2.560703232884407, 'entropy_coeff': 0.005}
2020-09-21 15:32:14,218	DEBUG sgd.py:120 -- 17 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.048719545477069914, 'policy_loss': -0.06670786021277308, 'vf_loss': 0.028049430227838457, 'vf_explained_var': 0.9910573, 'kl': 0.009103319491259754, 'entropy': 2.5584223568439484, 'entropy_coeff': 0.005}
2020-

custom_metrics: {}
date: 2020-09-21_15-32-19
done: false
episode_len_mean: 759.4719101123595
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.276525513397203
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 89
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.554138019680977
      entropy_coeff: 0.005
      kl: 0.012239702278748155
      policy_loss: -0.08953673928044736
      total_loss: -0.07707046205177903
      vf_explained_var: 0.9933456778526306
      vf_loss: 0.021565051283687353
  num_steps_sampled: 72000
  num_steps_trained: 72000
iterations_since_restore: 72
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.299999999999999
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282177
  vram_u

2020-09-21 15:32:22,139	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3422572407871485, 'policy_loss': -0.0017889677546918392, 'vf_loss': 0.3567125294357538, 'vf_explained_var': 0.81057084, 'kl': 0.0006570025785595512, 'entropy': 2.5726860761642456, 'entropy_coeff': 0.005}
2020-09-21 15:32:22,629	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.26596858259290457, 'policy_loss': -0.005426699295639992, 'vf_loss': 0.2833833498880267, 'vf_explained_var': 0.8499481, 'kl': 0.002921645253081806, 'entropy': 2.5729118287563324, 'entropy_coeff': 0.005}
2020-09-21 15:32:23,118	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.20028673348133452, 'policy_loss': -0.010898181237280369, 'vf_loss': 0.22274870984256268, 'vf_explained_var': 0.87773037, 'kl': 0.004336596728535369, 'entropy': 2.572956219315529, 'entropy_coeff': 0.005}
2020-09-21

2020-09-21 15:32:34,001	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.045586780179291964, 'policy_loss': -0.08369453379418701, 'vf_loss': 0.04713228775653988, 'vf_explained_var': 0.97380435, 'kl': 0.013216484454460442, 'entropy': 2.597895473241806, 'entropy_coeff': 0.005}
2020-09-21 15:32:34,484	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.0514136798446998, 'policy_loss': -0.08851390919880942, 'vf_loss': 0.04589252290315926, 'vf_explained_var': 0.9751501, 'kl': 0.0139564992277883, 'entropy': 2.595848113298416, 'entropy_coeff': 0.005}
2020-09-21 15:32:34,935	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.05625890789087862, 'policy_loss': -0.09172318875789642, 'vf_loss': 0.04432861087843776, 'vf_explained_var': 0.9756005, 'kl': 0.013723871030379087, 'entropy': 2.5962979942560196, 'entropy_coeff': 0.005}
2020-09-21

custom_metrics: {}
date: 2020-09-21_15-32-35
done: false
episode_len_mean: 765.2333333333333
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.29179662152105
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 90
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.596537262201309
      entropy_coeff: 0.005
      kl: 0.013703214179258794
      policy_loss: -0.09548290446400642
      total_loss: -0.06411360885249451
      vf_explained_var: 0.9777019023895264
      vf_loss: 0.04024102131370455
  num_steps_sampled: 73000
  num_steps_trained: 73000
iterations_since_restore: 73
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.276190476190477
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_uti

2020-09-21 15:32:37,980	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3226809985935688, 'policy_loss': 0.004222315503284335, 'vf_loss': 0.3312689224258065, 'vf_explained_var': 0.78845704, 'kl': 0.0005404265636453076, 'entropy': 2.5944737941026688, 'entropy_coeff': 0.005}
2020-09-21 15:32:38,469	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.23054160038009286, 'policy_loss': -0.0026083210250362754, 'vf_loss': 0.24464302696287632, 'vf_explained_var': 0.8485434, 'kl': 0.004766224665218033, 'entropy': 2.584593176841736, 'entropy_coeff': 0.005}
2020-09-21 15:32:38,924	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.15694410121068358, 'policy_loss': -0.01839415600989014, 'vf_loss': 0.18645158782601357, 'vf_explained_var': 0.8786716, 'kl': 0.005955776505288668, 'entropy': 2.5800115168094635, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:32:49,865	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.00974707247223705, 'policy_loss': -0.08114269934594631, 'vf_loss': 0.08046730794012547, 'vf_explained_var': 0.94758904, 'kl': 0.01277597964508459, 'entropy': 2.580895632505417, 'entropy_coeff': 0.005}
2020-09-21 15:32:50,316	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.009634656133130193, 'policy_loss': -0.0818909794325009, 'vf_loss': 0.0811231741681695, 'vf_explained_var': 0.94858277, 'kl': 0.013427471392787993, 'entropy': 2.5790188163518906, 'entropy_coeff': 0.005}
2020-09-21 15:32:50,800	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.012731026858091354, 'policy_loss': -0.08404597546905279, 'vf_loss': 0.08024056977592409, 'vf_explained_var': 0.9485673, 'kl': 0.013218508975114673, 'entropy': 2.5782340615987778, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-32-51
done: false
episode_len_mean: 774.1978021978022
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.29280270521436
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 91
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.5809196382761
      entropy_coeff: 0.005
      kl: 0.0137347899726592
      policy_loss: -0.0862994781928137
      total_loss: -0.017118811141699553
      vf_explained_var: 0.9497854113578796
      vf_loss: 0.07796482718549669
  num_steps_sampled: 74000
  num_steps_trained: 74000
iterations_since_restore: 74
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.15
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_util_percent1: 0.030

[2m[36m(pid=24699)[0m 2020-09-21 15:32:52,031	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 6938.618967871265,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 1211},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.253, max=1.989, mean=0.25),
[2m[36m(pid=24699)[0m                                   'prev_action': 7,
[2m[36m(pid=24699)[0

2020-09-21 15:32:54,368	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.20994167181197554, 'policy_loss': 0.003095119260251522, 'vf_loss': 0.21932625398039818, 'vf_explained_var': 0.9021919, 'kl': 7.35628441139724e-05, 'entropy': 2.500352218747139, 'entropy_coeff': 0.005}
2020-09-21 15:32:54,847	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.11184080294333398, 'policy_loss': -0.004748619277961552, 'vf_loss': 0.12890891963616014, 'vf_explained_var': 0.9507604, 'kl': 0.0005909135434194468, 'entropy': 2.499354302883148, 'entropy_coeff': 0.005}
2020-09-21 15:32:55,297	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06385436048731208, 'policy_loss': -0.008536909590475261, 'vf_loss': 0.08411618624813855, 'vf_explained_var': 0.9675333, 'kl': 0.002589987372630276, 'entropy': 2.5003832578659058, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:33:06,175	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07861166982911527, 'policy_loss': -0.07933558919467032, 'vf_loss': 0.010712827352108434, 'vf_explained_var': 0.9957176, 'kl': 0.008427928696619347, 'entropy': 2.5034563839435577, 'entropy_coeff': 0.005}
2020-09-21 15:33:06,628	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07305001732311212, 'policy_loss': -0.07379802613286301, 'vf_loss': 0.010531012580031529, 'vf_explained_var': 0.9957534, 'kl': 0.009075975365703925, 'entropy': 2.5011595487594604, 'entropy_coeff': 0.005}
2020-09-21 15:33:07,109	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07823954813648015, 'policy_loss': -0.07915380864869803, 'vf_loss': 0.010767927160486579, 'vf_explained_var': 0.99582267, 'kl': 0.00881199078867212, 'entropy': 2.4994528740644455, 'entropy_coeff': 0.005}
2020

custom_metrics: {}
date: 2020-09-21_15-33-08
done: false
episode_len_mean: 774.1978021978022
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.292802705214363
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 91
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.5002470910549164
      entropy_coeff: 0.005
      kl: 0.009103582851821557
      policy_loss: -0.07372606650460511
      total_loss: -0.07353335514198989
      vf_explained_var: 0.9960519075393677
      vf_loss: 0.00996287289308384
  num_steps_sampled: 75000
  num_steps_trained: 75000
iterations_since_restore: 75
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.142857142857143
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_u

[2m[36m(pid=24699)[0m 2020-09-21 15:33:08,291	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:33:10,355	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.18183113215491176, 'policy_loss': 0.007094806991517544, 'vf_loss': 0.18771682074293494, 'vf_explained_var': 0.8950558, 'kl': 9.425986578161238e-05, 'entropy': 2.6017543226480484, 'entropy_coeff': 0.005}
2020-09-21 15:33:10,843	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.08107836963608861, 'policy_loss': -0.004877852683421224, 'vf_loss': 0.09851832501590252, 'vf_explained_var': 0.9451932, 'kl': 0.0015083594335010275, 'entropy': 2.602922484278679, 'entropy_coeff': 0.005}
2020-09-21 15:33:11,297	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.042710110545158386, 'policy_loss': -0.01286529831122607, 'vf_loss': 0.0670664079952985, 'vf_explai

2020-09-21 15:33:17,045	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.045183887938037515, 'policy_loss': -0.062182787514757365, 'vf_loss': 0.02651252632495016, 'vf_explained_var': 0.98485315, 'kl': 0.011577200901228935, 'entropy': 2.5973578840494156, 'entropy_coeff': 0.005}
2020-09-21 15:33:17,540	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.048335205065086484, 'policy_loss': -0.06601085839793086, 'vf_loss': 0.027017169748432934, 'vf_explained_var': 0.9863517, 'kl': 0.01215403841342777, 'entropy': 2.5975458920001984, 'entropy_coeff': 0.005}
2020-09-21 15:33:17,989	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.05418466089759022, 'policy_loss': -0.07005185773596168, 'vf_loss': 0.02504749468062073, 'vf_explained_var': 0.9860105, 'kl': 0.01271054259268567, 'entropy': 2.59869222342968, 'entropy_coeff': 0.005}
2020-0

custom_metrics: {}
date: 2020-09-21_15-33-24
done: false
episode_len_mean: 774.1978021978022
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.292802705214363
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 91
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.5943607836961746
      entropy_coeff: 0.005
      kl: 0.016659857472404838
      policy_loss: -0.10013979277573526
      total_loss: -0.08699423831421882
      vf_explained_var: 0.9889909625053406
      vf_loss: 0.021119399461895227
  num_steps_sampled: 76000
  num_steps_trained: 76000
iterations_since_restore: 76
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.214285714285715
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_

2020-09-21 15:33:26,829	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3462482615141198, 'policy_loss': -0.00019389728549867868, 'vf_loss': 0.3594218287616968, 'vf_explained_var': 0.6289357, 'kl': 3.187663187453893e-05, 'entropy': 2.5978507697582245, 'entropy_coeff': 0.005}
2020-09-21 15:33:27,313	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.20954586821608245, 'policy_loss': -0.011209827964194119, 'vf_loss': 0.23290714900940657, 'vf_explained_var': 0.7523544, 'kl': 0.0029408048703771783, 'entropy': 2.60673925280571, 'entropy_coeff': 0.005}
2020-09-21 15:33:27,769	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.13803362648468465, 'policy_loss': -0.014982722117565572, 'vf_loss': 0.16476637357845902, 'vf_explained_var': 0.8253782, 'kl': 0.004338994127465412, 'entropy': 2.610343560576439, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:33:38,735	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.048170798807404935, 'policy_loss': -0.07747604954056442, 'vf_loss': 0.03878060355782509, 'vf_explained_var': 0.9582643, 'kl': 0.011868764355313033, 'entropy': 2.607197254896164, 'entropy_coeff': 0.005}
2020-09-21 15:33:39,188	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.054212083108723164, 'policy_loss': -0.0828080092323944, 'vf_loss': 0.03790846827905625, 'vf_explained_var': 0.9606568, 'kl': 0.012425040185917169, 'entropy': 2.608009561896324, 'entropy_coeff': 0.005}
2020-09-21 15:33:39,681	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.05097862798720598, 'policy_loss': -0.07834835187532008, 'vf_loss': 0.036630036076530814, 'vf_explained_var': 0.9609525, 'kl': 0.012624477210920304, 'entropy': 2.609532594680786, 'entropy_coeff': 0.005}
2020-09-

custom_metrics: {}
date: 2020-09-21_15-33-40
done: false
episode_len_mean: 792.6236559139785
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.294260318940772
episode_reward_min: -32.22698139089825
episodes_this_iter: 2
episodes_total: 93
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.605051502585411
      entropy_coeff: 0.005
      kl: 0.012366291834041476
      policy_loss: -0.08501029794570059
      total_loss: -0.059407899738289416
      vf_explained_var: 0.9609096646308899
      vf_loss: 0.034917768789455295
  num_steps_sampled: 77000
  num_steps_trained: 77000
iterations_since_restore: 77
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.023809523809525
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_

2020-09-21 15:33:42,664	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.20845885970629752, 'policy_loss': -0.0035572658525779843, 'vf_loss': 0.22449988685548306, 'vf_explained_var': 0.79045963, 'kl': 0.000772091247931872, 'entropy': 2.543077453970909, 'entropy_coeff': 0.005}
2020-09-21 15:33:43,117	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.12863808718975633, 'policy_loss': -0.0009931412059813738, 'vf_loss': 0.14190861070528626, 'vf_explained_var': 0.869622, 'kl': 0.0015406925667775795, 'entropy': 2.54791796207428, 'entropy_coeff': 0.005}
2020-09-21 15:33:43,604	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.08759149583056569, 'policy_loss': -0.007331421889830381, 'vf_loss': 0.10643626376986504, 'vf_explained_var': 0.900376, 'kl': 0.0042261611961293966, 'entropy': 2.5562383979558945, 'entropy_coeff': 0.005}
2020-09-21

2020-09-21 15:33:54,565	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.060612653476709966, 'policy_loss': -0.07321750049050024, 'vf_loss': 0.02190259745111689, 'vf_explained_var': 0.978654, 'kl': 0.011296272918116301, 'entropy': 2.5373258143663406, 'entropy_coeff': 0.005}
2020-09-21 15:33:55,030	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06730361958034337, 'policy_loss': -0.07957761909347028, 'vf_loss': 0.02156159479636699, 'vf_explained_var': 0.97889364, 'kl': 0.011252097901888192, 'entropy': 2.5326445400714874, 'entropy_coeff': 0.005}
2020-09-21 15:33:55,519	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06373831414384767, 'policy_loss': -0.07626497256569564, 'vf_loss': 0.021531325357500464, 'vf_explained_var': 0.97976047, 'kl': 0.012246579979546368, 'entropy': 2.535728096961975, 'entropy_coeff': 0.005}
2020-

custom_metrics: {}
date: 2020-09-21_15-33-56
done: false
episode_len_mean: 800.5531914893617
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.285133511547578
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 94
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.5340275317430496
      entropy_coeff: 0.005
      kl: 0.012636190163902938
      policy_loss: -0.08140491251833737
      total_loss: -0.06870259426068515
      vf_explained_var: 0.9801385402679443
      vf_loss: 0.021581604960374534
  num_steps_sampled: 78000
  num_steps_trained: 78000
iterations_since_restore: 78
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.180952380952381
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_

2020-09-21 15:33:58,645	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2785929897800088, 'policy_loss': 0.002592087024822831, 'vf_loss': 0.2882948135957122, 'vf_explained_var': 0.92321646, 'kl': 0.0003589252423378819, 'entropy': 2.4803177565336227, 'entropy_coeff': 0.005}
2020-09-21 15:33:59,097	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.15816312958486378, 'policy_loss': -0.0076098639983683825, 'vf_loss': 0.1777437194250524, 'vf_explained_var': 0.9492743, 'kl': 0.0013399923373071942, 'entropy': 2.4745441526174545, 'entropy_coeff': 0.005}
2020-09-21 15:33:59,585	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.11356481222901493, 'policy_loss': -0.008951925206929445, 'vf_loss': 0.13408320024609566, 'vf_explained_var': 0.9626314, 'kl': 0.002627967951411847, 'entropy': 2.470970243215561, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:34:10,506	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.008093442767858505, 'policy_loss': -0.0785607680445537, 'vf_loss': 0.07817442994564772, 'vf_explained_var': 0.977759, 'kl': 0.01546021708054468, 'entropy': 2.4690348505973816, 'entropy_coeff': 0.005}
2020-09-21 15:34:10,994	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.017129811574704945, 'policy_loss': -0.08596088382182643, 'vf_loss': 0.07650326821021736, 'vf_explained_var': 0.9773264, 'kl': 0.01557094231247902, 'entropy': 2.468694806098938, 'entropy_coeff': 0.005}
2020-09-21 15:34:11,481	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.013265396002680063, 'policy_loss': -0.08285807736683637, 'vf_loss': 0.07707420852966607, 'vf_explained_var': 0.9781561, 'kl': 0.016227958840318024, 'entropy': 2.4699825793504715, 'entropy_coeff': 0.005}
2020-09-2

custom_metrics: {}
date: 2020-09-21_15-34-12
done: false
episode_len_mean: 800.5531914893617
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.285133511547585
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 94
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.4729457944631577
      entropy_coeff: 0.005
      kl: 0.0165714246686548
      policy_loss: -0.09470743173733354
      total_loss: -0.024961786810308695
      vf_explained_var: 0.9785515666007996
      vf_loss: 0.0771389426663518
  num_steps_sampled: 79000
  num_steps_trained: 79000
iterations_since_restore: 79
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.161904761904761
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_uti

[2m[36m(pid=24699)[0m 2020-09-21 15:34:12,668	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 9541.205181028843,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 512},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-6.73, max=6.237, mean=0.378),
[2m[36m(pid=24699)[0m                                   'prev_action': 0,
[2m[36m(pid=24699)[0m

2020-09-21 15:34:14,607	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.3,
                                         'cur_lr': 1e-05,
                                         'entropy': 2.5327866077423096,
                                         'entropy_coeff': 0.005,
                                         'kl': -1.729004672768042e-08,
                                         'policy_loss': -0.08966276794672012,
                                         'total_loss': 0.263762891292572,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.87, max=0.87, mean=0.87),
                                         'vf_loss': 0.3660895824432373}}}

2020-09-21 15:34:14,612	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs'

2020-09-21 15:34:21,702	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.037627337500452995, 'policy_loss': -0.059226869605481625, 'vf_loss': 0.030962759163230658, 'vf_explained_var': 0.98618096, 'kl': 0.01061479962663725, 'entropy': 2.509533241391182, 'entropy_coeff': 0.005}
2020-09-21 15:34:22,191	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.04284884687513113, 'policy_loss': -0.06391971273114905, 'vf_loss': 0.030392734450288117, 'vf_explained_var': 0.98622644, 'kl': 0.010735786287114024, 'entropy': 2.5085219740867615, 'entropy_coeff': 0.005}
2020-09-21 15:34:22,641	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.04157392680644989, 'policy_loss': -0.062268035660963506, 'vf_loss': 0.029909694218076766, 'vf_explained_var': 0.9847951, 'kl': 0.011104267032351345, 'entropy': 2.509372651576996, 'entropy_coeff': 0.005}
20

custom_metrics: {}
date: 2020-09-21_15-34-28
done: false
episode_len_mean: 800.5531914893617
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.285133511547585
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 94
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.5077639371156693
      entropy_coeff: 0.005
      kl: 0.014956506434828043
      policy_loss: -0.08564797067083418
      total_loss: -0.06591374363051727
      vf_explained_var: 0.9870879054069519
      vf_loss: 0.027786095160990953
  num_steps_sampled: 80000
  num_steps_trained: 80000
iterations_since_restore: 80
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.128571428571429
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_

[2m[36m(pid=24699)[0m 2020-09-21 15:34:29,096	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:34:31,028	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.0944162457017228, 'policy_loss': 0.002790214726701379, 'vf_loss': 0.10424790298566222, 'vf_explained_var': 0.8994887, 'kl': 0.0003791735399864826, 'entropy': 2.5471245497465134, 'entropy_coeff': 0.005}
2020-09-21 15:34:31,516	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06047721405047923, 'policy_loss': -0.003962576272897422, 'vf_loss': 0.0769682948011905, 'vf_explained_var': 0.9255502, 'kl': 0.0007498915565520292, 'entropy': 2.55069500207901, 'entropy_coeff': 0.005}
2020-09-21 15:34:31,984	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.040747718419879675, 'policy_loss': -0.012602990900631994, 'vf_loss': 0.06552964076399803, 'vf_explain

2020-09-21 15:34:42,937	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.04789686412550509, 'policy_loss': -0.07980656786821783, 'vf_loss': 0.04106686974409968, 'vf_explained_var': 0.9595437, 'kl': 0.012024702737107873, 'entropy': 2.55291448533535, 'entropy_coeff': 0.005}
2020-09-21 15:34:43,395	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.04673458298202604, 'policy_loss': -0.07947085960768163, 'vf_loss': 0.041757358820177615, 'vf_explained_var': 0.9589691, 'kl': 0.012465389911085367, 'entropy': 2.5521390587091446, 'entropy_coeff': 0.005}
2020-09-21 15:34:43,891	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.04702485227608122, 'policy_loss': -0.08098664256976917, 'vf_loss': 0.04304252297151834, 'vf_explained_var': 0.95850563, 'kl': 0.012249174469616264, 'entropy': 2.551096275448799, 'entropy_coeff': 0.005}
2020-09-

custom_metrics: {}
date: 2020-09-21_15-34-44
done: false
episode_len_mean: 816.6875
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.253403599971065
episode_reward_min: -32.22698139089825
episodes_this_iter: 2
episodes_total: 96
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.5515901297330856
      entropy_coeff: 0.005
      kl: 0.012587894685566425
      policy_loss: -0.08924060105346143
      total_loss: -0.056681540329009295
      vf_explained_var: 0.9597105979919434
      vf_loss: 0.041540650418028235
  num_steps_sampled: 81000
  num_steps_trained: 81000
iterations_since_restore: 81
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.166666666666667
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_util_per

2020-09-21 15:34:46,999	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.114755607501138, 'policy_loss': -0.003864563594106585, 'vf_loss': 0.1310997288674116, 'vf_explained_var': 0.90150195, 'kl': 0.00013060270242759575, 'entropy': 2.503745809197426, 'entropy_coeff': 0.005}
2020-09-21 15:34:47,484	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06744194426573813, 'policy_loss': -0.008751533809117973, 'vf_loss': 0.08843169501051307, 'vf_explained_var': 0.93320155, 'kl': 0.0007590526856802171, 'entropy': 2.493186369538307, 'entropy_coeff': 0.005}
2020-09-21 15:34:47,937	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04482830979395658, 'policy_loss': -0.007201542379334569, 'vf_loss': 0.06380069209262729, 'vf_explained_var': 0.95291865, 'kl': 0.00214105898339767, 'entropy': 2.48263256251812, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:34:58,903	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.056770210387185216, 'policy_loss': -0.07207707583438605, 'vf_loss': 0.023560186149552464, 'vf_explained_var': 0.9822778, 'kl': 0.013699353556148708, 'entropy': 2.472623825073242, 'entropy_coeff': 0.005}
2020-09-21 15:34:59,354	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.05933387955883518, 'policy_loss': -0.0754802385927178, 'vf_loss': 0.02428419457282871, 'vf_explained_var': 0.9822366, 'kl': 0.014086286129895598, 'entropy': 2.472743719816208, 'entropy_coeff': 0.005}
2020-09-21 15:34:59,845	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06646176613867283, 'policy_loss': -0.08172552310861647, 'vf_loss': 0.02319261519005522, 'vf_explained_var': 0.98227865, 'kl': 0.0147675676853396, 'entropy': 2.4718261063098907, 'entropy_coeff': 0.005}
2020-09-2

custom_metrics: {}
date: 2020-09-21_15-35-00
done: false
episode_len_mean: 816.6875
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.253403599971065
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 96
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.4724182784557343
      entropy_coeff: 0.005
      kl: 0.01492063730256632
      policy_loss: -0.08166948845610023
      total_loss: -0.06661844509653747
      vf_explained_var: 0.9824989438056946
      vf_loss: 0.022936939145438373
  num_steps_sampled: 82000
  num_steps_trained: 82000
iterations_since_restore: 82
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.009523809523809
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_util_perce

2020-09-21 15:35:03,394	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.36094337422400713, 'policy_loss': 0.0028858744772151113, 'vf_loss': 0.3699953071773052, 'vf_explained_var': 0.90307665, 'kl': 0.0006045574738509085, 'entropy': 2.4238344728946686, 'entropy_coeff': 0.005}
2020-09-21 15:35:03,882	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2652704515494406, 'policy_loss': -0.004535265266895294, 'vf_loss': 0.28160401340574026, 'vf_explained_var': 0.9245863, 'kl': 0.0011880270176334307, 'entropy': 2.430940702557564, 'entropy_coeff': 0.005}
2020-09-21 15:35:04,332	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.22296261723386124, 'policy_loss': -0.01861660450231284, 'vf_loss': 0.2525575291365385, 'vf_explained_var': 0.93443346, 'kl': 0.0039156326674856246, 'entropy': 2.430600792169571, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:35:14,668	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.3,
                                         'cur_lr': 1e-05,
                                         'entropy': 2.3913700580596924,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.013655422255396843,
                                         'policy_loss': 0.02702011540532112,
                                         'total_loss': 0.2330852448940277,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.935, max=0.935, mean=0.935),
                                         'vf_loss': 0.21392537653446198}}}

2020-09-21 15:35:14,777	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.09787377971224487, 'policy_loss': -0.0856442239601165

custom_metrics: {}
date: 2020-09-21_15-35-17
done: false
episode_len_mean: 820.2474226804123
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.24457708162477
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 97
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.42844295501709
      entropy_coeff: 0.005
      kl: 0.015772292099427432
      policy_loss: -0.08444526395760477
      total_loss: 0.09168612235225737
      vf_explained_var: 0.9507811665534973
      vf_loss: 0.1835419130511582
  num_steps_sampled: 83000
  num_steps_trained: 83000
iterations_since_restore: 83
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.10952380952381
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_util_pe

2020-09-21 15:35:19,250	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.266166337300092, 'policy_loss': -0.0006104283966124058, 'vf_loss': 0.27903496753424406, 'vf_explained_var': 0.8023998, 'kl': 0.0003283084891259058, 'entropy': 2.4713380187749863, 'entropy_coeff': 0.005}
2020-09-21 15:35:19,739	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.15556177496910095, 'policy_loss': -0.015146593912504613, 'vf_loss': 0.18213076144456863, 'vf_explained_var': 0.86404186, 'kl': 0.002928921669081319, 'entropy': 2.4602138996124268, 'entropy_coeff': 0.005}
2020-09-21 15:35:20,185	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.09476683067623526, 'policy_loss': -0.02217188267968595, 'vf_loss': 0.12680982099846005, 'vf_explained_var': 0.9096571, 'kl': 0.007839506433811039, 'entropy': 2.444591134786606, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:35:31,036	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08941426174715161, 'policy_loss': -0.0956703806295991, 'vf_loss': 0.014292927808128297, 'vf_explained_var': 0.9894532, 'kl': 0.013773399870842695, 'entropy': 2.4337659776210785, 'entropy_coeff': 0.005}
2020-09-21 15:35:31,524	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.0891873080690857, 'policy_loss': -0.09501438933511963, 'vf_loss': 0.01342812494840473, 'vf_explained_var': 0.98990226, 'kl': 0.015171695442404598, 'entropy': 2.430509567260742, 'entropy_coeff': 0.005}
2020-09-21 15:35:32,027	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08992966287769377, 'policy_loss': -0.09530648565851152, 'vf_loss': 0.012903704715427011, 'vf_explained_var': 0.9899671, 'kl': 0.01546502357814461, 'entropy': 2.4332782179117203, 'entropy_coeff': 0.005}
2020-09-

custom_metrics: {}
date: 2020-09-21_15-35-33
done: false
episode_len_mean: 831.1313131313132
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.26606612844858
episode_reward_min: -32.22698139089825
episodes_this_iter: 2
episodes_total: 99
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.430670440196991
      entropy_coeff: 0.005
      kl: 0.015658037911634892
      policy_loss: -0.09976030004327185
      total_loss: -0.09547398024005815
      vf_explained_var: 0.9913749098777771
      vf_loss: 0.011742254806449637
  num_steps_sampled: 84000
  num_steps_trained: 84000
iterations_since_restore: 84
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.147619047619047
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_ut

[2m[36m(pid=24699)[0m 2020-09-21 15:35:33,229	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 10055.523316275618,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 74},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-7.879, max=6.878, mean=0.293),
[2m[36m(pid=24699)[0m                                   'prev_action': 8,
[2m[36m(pid=24699)[0

2020-09-21 15:35:35,079	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.08643282565753907, 'policy_loss': -0.007872199872508645, 'vf_loss': 0.10630321898497641, 'vf_explained_var': 0.96766245, 'kl': 0.00013493473458225935, 'entropy': 2.4077323973178864, 'entropy_coeff': 0.005}
2020-09-21 15:35:35,536	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04890913749113679, 'policy_loss': -0.0021360845712479204, 'vf_loss': 0.06249092519283295, 'vf_explained_var': 0.98042697, 'kl': 0.001962536607607035, 'entropy': 2.4068921208381653, 'entropy_coeff': 0.005}
2020-09-21 15:35:36,025	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.016592932399362326, 'policy_loss': -0.016429075330961496, 'vf_loss': 0.043119684560224414, 'vf_explained_var': 0.98672944, 'kl': 0.006397719276719727, 'entropy': 2.403398022055626, 'entropy_coeff': 0.005}
202

2020-09-21 15:35:46,953	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06845148676075041, 'policy_loss': -0.07246651206514798, 'vf_loss': 0.011600268364418298, 'vf_explained_var': 0.9963206, 'kl': 0.014313282503280789, 'entropy': 2.375846341252327, 'entropy_coeff': 0.005}
2020-09-21 15:35:47,442	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07484025624580681, 'policy_loss': -0.07877190492581576, 'vf_loss': 0.011631813598796725, 'vf_explained_var': 0.9962803, 'kl': 0.013945858634542674, 'entropy': 2.376784384250641, 'entropy_coeff': 0.005}
2020-09-21 15:35:47,936	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07292557903565466, 'policy_loss': -0.07666261529084295, 'vf_loss': 0.011283548054052517, 'vf_explained_var': 0.99640965, 'kl': 0.014481702237389982, 'entropy': 2.378205344080925, 'entropy_coeff': 0.005}
2020-0

custom_metrics: {}
date: 2020-09-21_15-35-48
done: false
episode_len_mean: 831.1313131313132
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.266066128448585
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 99
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.3769525438547134
      entropy_coeff: 0.005
      kl: 0.01467832934577018
      policy_loss: -0.08515507844276726
      total_loss: -0.08172523882240057
      vf_explained_var: 0.9963690042495728
      vf_loss: 0.010911103221587837
  num_steps_sampled: 85000
  num_steps_trained: 85000
iterations_since_restore: 85
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.061904761904763
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_u

[2m[36m(pid=24699)[0m 2020-09-21 15:35:49,115	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:35:51,208	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2726674908772111, 'policy_loss': 0.00037724111462011933, 'vf_loss': 0.28379349038004875, 'vf_explained_var': 0.93641585, 'kl': 0.00033532441917116795, 'entropy': 2.320767968893051, 'entropy_coeff': 0.005}
2020-09-21 15:35:51,656	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1990436394698918, 'policy_loss': -0.003910379484295845, 'vf_loss': 0.2143190149217844, 'vf_explained_var': 0.9503645, 'kl': 0.0008309396125696367, 'entropy': 2.322855696082115, 'entropy_coeff': 0.005}
2020-09-21 15:35:52,144	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.16121769160963595, 'policy_loss': -0.010260555427521467, 'vf_loss': 0.18261588597670197, 'vf_expla

2020-09-21 15:36:03,082	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.049869328271597624, 'policy_loss': -0.07967171666678041, 'vf_loss': 0.13796418579295278, 'vf_explained_var': 0.96547663, 'kl': 0.010940755193587393, 'entropy': 2.3410739600658417, 'entropy_coeff': 0.005}
2020-09-21 15:36:03,574	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04876548459287733, 'policy_loss': -0.07922207424417138, 'vf_loss': 0.1363347675651312, 'vf_explained_var': 0.965423, 'kl': 0.011195711209438741, 'entropy': 2.341183766722679, 'entropy_coeff': 0.005}
2020-09-21 15:36:04,063	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.046496832743287086, 'policy_loss': -0.08261451357975602, 'vf_loss': 0.1374621642753482, 'vf_explained_var': 0.96588594, 'kl': 0.011218516854569316, 'entropy': 2.3432751446962357, 'entropy_coeff': 0.005}
2020-09-21

custom_metrics: {}
date: 2020-09-21_15-36-05
done: false
episode_len_mean: 831.1313131313132
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.266066128448585
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 99
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.3423280119895935
      entropy_coeff: 0.005
      kl: 0.011727634526323527
      policy_loss: -0.09071685490198433
      total_loss: 0.03907224233262241
      vf_explained_var: 0.9660089015960693
      vf_loss: 0.13798244949430227
  num_steps_sampled: 86000
  num_steps_trained: 86000
iterations_since_restore: 86
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.195238095238095
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.5
  vram_util_percent0: 0.9449909821282179
  vram_ut

2020-09-21 15:36:07,212	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.20890329778194427, 'policy_loss': 0.002181065734475851, 'vf_loss': 0.21882173232734203, 'vf_explained_var': 0.8512776, 'kl': 0.0006519220737346149, 'entropy': 2.4590159207582474, 'entropy_coeff': 0.005}
2020-09-21 15:36:07,661	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1079052418936044, 'policy_loss': -0.011377162300050259, 'vf_loss': 0.13100489554926753, 'vf_explained_var': 0.9092078, 'kl': 0.0018043472664430737, 'entropy': 2.4527579993009567, 'entropy_coeff': 0.005}
2020-09-21 15:36:08,144	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.0681629455066286, 'policy_loss': -0.03127273160498589, 'vf_loss': 0.10994432121515274, 'vf_explained_var': 0.9219532, 'kl': 0.005684536517946981, 'entropy': 2.4428012669086456, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:36:14,801	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.04718677012715489, 'policy_loss': -0.09362657397286966, 'vf_loss': 0.05399748496711254, 'vf_explained_var': 0.9605396, 'kl': 0.015243473346345127, 'entropy': 2.4261444360017776, 'entropy_coeff': 0.005}
2020-09-21 15:36:15,261	DEBUG sgd.py:120 -- 17 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.04991793748922646, 'policy_loss': -0.0962527533993125, 'vf_loss': 0.053583310917019844, 'vf_explained_var': 0.9597102, 'kl': 0.016209109337069094, 'entropy': 2.422244906425476, 'entropy_coeff': 0.005}
2020-09-21 15:36:15,739	DEBUG sgd.py:120 -- 18 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.03961346636060625, 'policy_loss': -0.08522496826481074, 'vf_loss': 0.0530287423171103, 'vf_explained_var': 0.9598445, 'kl': 0.01567778451135382, 'entropy': 2.424114853143692, 'entropy_coeff': 0.005}
2020-09-21 

custom_metrics: {}
date: 2020-09-21_15-36-20
done: false
episode_len_mean: 831.1313131313132
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.266066128448585
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 99
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.4142653793096542
      entropy_coeff: 0.005
      kl: 0.01934314868412912
      policy_loss: -0.1058391616679728
      total_loss: -0.06361942598596215
      vf_explained_var: 0.9647205471992493
      vf_loss: 0.048488116124644876
  num_steps_sampled: 87000
  num_steps_trained: 87000
iterations_since_restore: 87
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.128571428571429
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.557142857142857
  vram_util_percent0: 0.944990982128

2020-09-21 15:36:23,231	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.10958552465308458, 'policy_loss': -0.002517591987270862, 'vf_loss': 0.12374906847253442, 'vf_explained_var': 0.8699669, 'kl': 0.0016002119910055734, 'entropy': 2.4252024590969086, 'entropy_coeff': 0.005}
2020-09-21 15:36:23,687	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.05666193808428943, 'policy_loss': -0.023573010927066207, 'vf_loss': 0.08856518613174558, 'vf_explained_var': 0.9102134, 'kl': 0.012211584573378786, 'entropy': 2.398741692304611, 'entropy_coeff': 0.005}
2020-09-21 15:36:24,175	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.03294904972426593, 'policy_loss': -0.02869726240169257, 'vf_loss': 0.07025077380239964, 'vf_explained_var': 0.92599726, 'kl': 0.01123495667707175, 'entropy': 2.394990235567093, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:36:35,109	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.056226963235531, 'policy_loss': -0.07879964384483173, 'vf_loss': 0.030682808137498796, 'vf_explained_var': 0.9675661, 'kl': 0.012843386735767126, 'entropy': 2.3926278203725815, 'entropy_coeff': 0.005}
2020-09-21 15:36:35,602	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.058972242404706776, 'policy_loss': -0.07977545645553619, 'vf_loss': 0.028964909026399255, 'vf_explained_var': 0.9682771, 'kl': 0.012614992971066386, 'entropy': 2.3892389982938766, 'entropy_coeff': 0.005}
2020-09-21 15:36:36,091	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06423932546749711, 'policy_loss': -0.08550040691625327, 'vf_loss': 0.029234819463454187, 'vf_explained_var': 0.9678108, 'kl': 0.013253926765173674, 'entropy': 2.3899828791618347, 'entropy_coeff': 0.005}
2020-

custom_metrics: {}
date: 2020-09-21_15-36-37
done: false
episode_len_mean: 831.1313131313132
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.266066128448585
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 99
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.390056312084198
      entropy_coeff: 0.005
      kl: 0.014741428080014884
      policy_loss: -0.08261318400036544
      total_loss: -0.062013048212975264
      vf_explained_var: 0.968713641166687
      vf_loss: 0.028127992409281433
  num_steps_sampled: 88000
  num_steps_trained: 88000
iterations_since_restore: 88
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.1761904761904765
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.595238095238093
  vram_util_percent0: 0.9449909821

2020-09-21 15:36:39,503	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.18188581662252545, 'policy_loss': -0.0007229126058518887, 'vf_loss': 0.1942635765299201, 'vf_explained_var': 0.7791232, 'kl': 0.0005523356879904018, 'entropy': 2.3641098737716675, 'entropy_coeff': 0.005}
2020-09-21 15:36:39,987	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1066855767276138, 'policy_loss': -0.0067117956932634115, 'vf_loss': 0.12401771685108542, 'vf_explained_var': 0.8614429, 'kl': 0.004030313677503727, 'entropy': 2.365888550877571, 'entropy_coeff': 0.005}
2020-09-21 15:36:40,441	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06536682974547148, 'policy_loss': -0.0218379944562912, 'vf_loss': 0.09679529909044504, 'vf_explained_var': 0.8868079, 'kl': 0.007565696520032361, 'entropy': 2.372034505009651, 'entropy_coeff': 0.005}
2020-09-21 15

2020-09-21 15:36:51,380	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07394937821663916, 'policy_loss': -0.09130850061774254, 'vf_loss': 0.024270127760246396, 'vf_explained_var': 0.973083, 'kl': 0.016337831679265946, 'entropy': 2.3624712228775024, 'entropy_coeff': 0.005}
2020-09-21 15:36:51,835	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06869276243378408, 'policy_loss': -0.08541938685812056, 'vf_loss': 0.02347067801747471, 'vf_explained_var': 0.9730047, 'kl': 0.016930608602706343, 'entropy': 2.364648684859276, 'entropy_coeff': 0.005}
2020-09-21 15:36:52,324	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06910106248687953, 'policy_loss': -0.0840015267021954, 'vf_loss': 0.022034394554793835, 'vf_explained_var': 0.9742818, 'kl': 0.015608425659593195, 'entropy': 2.363290697336197, 'entropy_coeff': 0.005}
2020-09-2

custom_metrics: {}
date: 2020-09-21_15-36-53
done: false
episode_len_mean: 837.01
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.27323979948128
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 100
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.3616145104169846
      entropy_coeff: 0.005
      kl: 0.01672870689071715
      policy_loss: -0.10232190985698253
      total_loss: -0.08710463554598391
      vf_explained_var: 0.974052369594574
      vf_loss: 0.022006738057825714
  num_steps_sampled: 89000
  num_steps_trained: 89000
iterations_since_restore: 89
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.476190476190477
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.514285714285714
  vram_util_percent0: 0.9449909821282179
  vram

[2m[36m(pid=24699)[0m 2020-09-21 15:36:53,527	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 10676.956878079076,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 1074},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-2.159, max=2.057, mean=0.413),
[2m[36m(pid=24699)[0m                                   'prev_action': 12,
[2m[36m(pid=24699)

2020-09-21 15:36:55,452	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1559967678040266, 'policy_loss': -0.003378775669261813, 'vf_loss': 0.171106297057122, 'vf_explained_var': 0.91690546, 'kl': 0.0004534424691703953, 'entropy': 2.3733576238155365, 'entropy_coeff': 0.005}
2020-09-21 15:36:55,936	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06224801065400243, 'policy_loss': -0.013632459420477971, 'vf_loss': 0.08568508690223098, 'vf_explained_var': 0.9536232, 'kl': 0.006699956647935323, 'entropy': 2.362921327352524, 'entropy_coeff': 0.005}
2020-09-21 15:36:56,381	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.022307993727736175, 'policy_loss': -0.02324196172412485, 'vf_loss': 0.05469158128835261, 'vf_explained_var': 0.9711178, 'kl': 0.00889559104689397, 'entropy': 2.362060099840164, 'entropy_coeff': 0.005}
2020-09-21 15:

2020-09-21 15:37:07,258	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07218991057015955, 'policy_loss': -0.08575343585107476, 'vf_loss': 0.020460471569094807, 'vf_explained_var': 0.98918796, 'kl': 0.016530072782188654, 'entropy': 2.3711924850940704, 'entropy_coeff': 0.005}
2020-09-21 15:37:07,743	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08201236487366259, 'policy_loss': -0.09596679755486548, 'vf_loss': 0.020969903853256255, 'vf_explained_var': 0.98952687, 'kl': 0.016101238667033613, 'entropy': 2.3691683411598206, 'entropy_coeff': 0.005}
2020-09-21 15:37:08,223	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08712699427269399, 'policy_loss': -0.10123859450686723, 'vf_loss': 0.020753904595039785, 'vf_explained_var': 0.98958385, 'kl': 0.017365566454827785, 'entropy': 2.3703943639993668, 'entropy_coeff': 0.005}
2

custom_metrics: {}
date: 2020-09-21_15-37-09
done: false
episode_len_mean: 851.95
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.25318990156063
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 101
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.3687161952257156
      entropy_coeff: 0.005
      kl: 0.016841620672494173
      policy_loss: -0.10484302090480924
      total_loss: -0.09247221797704697
      vf_explained_var: 0.9890002608299255
      vf_loss: 0.019161900621838868
  num_steps_sampled: 90000
  num_steps_trained: 90000
iterations_since_restore: 90
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.747619047619048
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  vr

[2m[36m(pid=24699)[0m 2020-09-21 15:37:09,412	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:37:11,835	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.13734256546013057, 'policy_loss': -0.006047200062312186, 'vf_loss': 0.15515096485614777, 'vf_explained_var': 0.88111186, 'kl': 3.962272705726466e-05, 'entropy': 2.35461762547493, 'entropy_coeff': 0.005}
2020-09-21 15:37:12,324	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.07061097875703126, 'policy_loss': -0.003395679988898337, 'vf_loss': 0.08562663197517395, 'vf_explained_var': 0.9401408, 'kl': 0.0004398118417157093, 'entropy': 2.350382938981056, 'entropy_coeff': 0.005}
2020-09-21 15:37:12,779	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.03815877053420991, 'policy_loss': -0.009199003805406392, 'vf_loss': 0.05878717661835253, 'vf_expla

2020-09-21 15:37:18,499	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.05163898004684597, 'policy_loss': -0.06417625758331269, 'vf_loss': 0.021607323084026575, 'vf_explained_var': 0.98427534, 'kl': 0.008949409471824765, 'entropy': 2.350973352789879, 'entropy_coeff': 0.005}
2020-09-21 15:37:18,983	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.05600463575683534, 'policy_loss': -0.06792240706272423, 'vf_loss': 0.020920514012686908, 'vf_explained_var': 0.98479605, 'kl': 0.00920360058080405, 'entropy': 2.352763906121254, 'entropy_coeff': 0.005}
2020-09-21 15:37:19,440	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.05348693020641804, 'policy_loss': -0.06533999159000814, 'vf_loss': 0.02069964789552614, 'vf_explained_var': 0.98119605, 'kl': 0.00964354071766138, 'entropy': 2.347929745912552, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-37-25
done: false
episode_len_mean: 851.95
episode_reward_max: -25.338032876205922
episode_reward_mean: -30.253189901560628
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 101
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.3451689928770065
      entropy_coeff: 0.005
      kl: 0.012652084929868579
      policy_loss: -0.09461100620683283
      total_loss: -0.08508616156177595
      vf_explained_var: 0.987709105014801
      vf_loss: 0.017455057124607265
  num_steps_sampled: 91000
  num_steps_trained: 91000
iterations_since_restore: 91
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.2
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  vram_util_percen

2020-09-21 15:37:27,826	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1580920573323965, 'policy_loss': 7.251622446347028e-05, 'vf_loss': 0.1696327063255012, 'vf_explained_var': 0.9531288, 'kl': 0.0005070185210373501, 'entropy': 2.3530542254447937, 'entropy_coeff': 0.005}
2020-09-21 15:37:28,316	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.07847422151826322, 'policy_loss': -0.014856404101010412, 'vf_loss': 0.10393302096053958, 'vf_explained_var': 0.97068655, 'kl': 0.0036078173216083087, 'entropy': 2.3369484543800354, 'entropy_coeff': 0.005}
2020-09-21 15:37:28,768	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.05182863213121891, 'policy_loss': -0.018597992602735758, 'vf_loss': 0.07916331687010825, 'vf_explained_var': 0.9774822, 'kl': 0.00960933740134351, 'entropy': 2.3238987773656845, 'entropy_coeff': 0.005}
2020-09-21

2020-09-21 15:37:39,734	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06973602378275245, 'policy_loss': -0.08739757345756516, 'vf_loss': 0.025526513578370214, 'vf_explained_var': 0.9922328, 'kl': 0.012578029476571828, 'entropy': 2.3276747167110443, 'entropy_coeff': 0.005}
2020-09-21 15:37:40,190	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07200255594216287, 'policy_loss': -0.08986886579077691, 'vf_loss': 0.025584960822016, 'vf_explained_var': 0.9926002, 'kl': 0.013071199471596628, 'entropy': 2.3280012160539627, 'entropy_coeff': 0.005}
2020-09-21 15:37:40,678	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07191143999807537, 'policy_loss': -0.09015327971428633, 'vf_loss': 0.025611415854655206, 'vf_explained_var': 0.9925524, 'kl': 0.014274606539402157, 'entropy': 2.3303909301757812, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-37-41
done: false
episode_len_mean: 866.54
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.210791317753365
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 102
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.334437981247902
      entropy_coeff: 0.005
      kl: 0.01348037552088499
      policy_loss: -0.09930533927399665
      total_loss: -0.08229712047614157
      vf_explained_var: 0.9926716089248657
      vf_loss: 0.024636295973323286
  num_steps_sampled: 92000
  num_steps_trained: 92000
iterations_since_restore: 92
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.161904761904761
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  vram

2020-09-21 15:37:44,259	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.17488476010112208, 'policy_loss': -0.0028520069317892194, 'vf_loss': 0.18938567209988832, 'vf_explained_var': 0.9220811, 'kl': 0.000382445028074635, 'entropy': 2.3527285754680634, 'entropy_coeff': 0.005}
2020-09-21 15:37:44,737	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.0878084198338911, 'policy_loss': -0.014323120703920722, 'vf_loss': 0.1131521794013679, 'vf_explained_var': 0.9543601, 'kl': 0.002380780591920484, 'entropy': 2.346974268555641, 'entropy_coeff': 0.005}
2020-09-21 15:37:45,194	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.05279905814677477, 'policy_loss': -0.022214805823750794, 'vf_loss': 0.08500520046800375, 'vf_explained_var': 0.96415645, 'kl': 0.005690457561286166, 'entropy': 2.3396941274404526, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:37:56,181	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.04639534023590386, 'policy_loss': -0.0869117999682203, 'vf_loss': 0.04882762429770082, 'vf_explained_var': 0.9788221, 'kl': 0.011316077783703804, 'entropy': 2.3411969393491745, 'entropy_coeff': 0.005}
2020-09-21 15:37:56,633	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.04894344590138644, 'policy_loss': -0.09037248906679451, 'vf_loss': 0.04912119428627193, 'vf_explained_var': 0.9790731, 'kl': 0.013275422272272408, 'entropy': 2.3349556028842926, 'entropy_coeff': 0.005}
2020-09-21 15:37:57,114	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.04829416412394494, 'policy_loss': -0.08914917713264003, 'vf_loss': 0.04901041719131172, 'vf_explained_var': 0.97904325, 'kl': 0.011789677606429905, 'entropy': 2.3384611159563065, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-37-58
done: false
episode_len_mean: 879.82
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.210795350929146
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 103
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.339925602078438
      entropy_coeff: 0.005
      kl: 0.012076275190338492
      policy_loss: -0.09602058748714626
      total_loss: -0.055659665958955884
      vf_explained_var: 0.9791046380996704
      vf_loss: 0.04843766882549971
  num_steps_sampled: 93000
  num_steps_trained: 93000
iterations_since_restore: 93
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.023809523809523
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  vra

2020-09-21 15:38:00,229	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.19697805715259165, 'policy_loss': -0.0008206027323467424, 'vf_loss': 0.2091088593006134, 'vf_explained_var': 0.9438939, 'kl': 0.00027613811128879284, 'entropy': 2.278609201312065, 'entropy_coeff': 0.005}
2020-09-21 15:38:00,722	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.09383909031748772, 'policy_loss': -0.01334489620057866, 'vf_loss': 0.11777744232676923, 'vf_explained_var': 0.9662285, 'kl': 0.0026405542776046786, 'entropy': 2.277126729488373, 'entropy_coeff': 0.005}
2020-09-21 15:38:01,176	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04301842179847881, 'policy_loss': -0.024898458912502974, 'vf_loss': 0.0781895958352834, 'vf_explained_var': 0.9795616, 'kl': 0.003781835868721828, 'entropy': 2.2814539968967438, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:38:12,144	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09733162098564208, 'policy_loss': -0.09535308484919369, 'vf_loss': 0.0040903117042034864, 'vf_explained_var': 0.99884784, 'kl': 0.01791012450121343, 'entropy': 2.2883774787187576, 'entropy_coeff': 0.005}
2020-09-21 15:38:12,596	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.1088505998486653, 'policy_loss': -0.1067705488530919, 'vf_loss': 0.004045746987685561, 'vf_explained_var': 0.99890584, 'kl': 0.017760514630936086, 'entropy': 2.290789097547531, 'entropy_coeff': 0.005}
2020-09-21 15:38:13,082	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.10409050190355629, 'policy_loss': -0.10216224624309689, 'vf_loss': 0.004046898044180125, 'vf_explained_var': 0.9989784, 'kl': 0.01820210670121014, 'entropy': 2.287156254053116, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-38-14
done: false
episode_len_mean: 879.82
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.210795350929143
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 103
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.289584368467331
      entropy_coeff: 0.005
      kl: 0.019208510173484683
      policy_loss: -0.104935363488039
      total_loss: -0.10699666923028417
      vf_explained_var: 0.9990299940109253
      vf_loss: 0.003624063356255647
  num_steps_sampled: 94000
  num_steps_trained: 94000
iterations_since_restore: 94
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.238095238095239
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  vram_

[2m[36m(pid=24699)[0m 2020-09-21 15:38:14,307	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 5174.958064314263,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 2074},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.828, max=3.047, mean=0.305),
[2m[36m(pid=24699)[0m                                   'prev_action': 1,
[2m[36m(pid=24699)[

2020-09-21 15:38:15,835	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-4.139, max=4.327, mean=-0.046),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-5.724, max=-0.562, mean=-2.231),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.003, max=0.57, mean=0.164),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=8.422),
                                                    'advantages': np.ndarray((64,), dtype=float32, min=-1.899, max=2.567, mean=-0.089),
                                                    'agent_index': np.ndarray((64,), dtype=int64, min=0.0, max=0.0, mean=0.0),
                                                    'dones': np.ndarray((64,), dt

2020-09-21 15:38:24,330	DEBUG sgd.py:120 -- 17 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06205292162485421, 'policy_loss': -0.06472282577306032, 'vf_loss': 0.011275295750238001, 'vf_explained_var': 0.9966363, 'kl': 0.009337116323877126, 'entropy': 2.2813048511743546, 'entropy_coeff': 0.005}
2020-09-21 15:38:24,782	DEBUG sgd.py:120 -- 18 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06339734519133344, 'policy_loss': -0.06513452375656925, 'vf_loss': 0.010281228402163833, 'vf_explained_var': 0.996687, 'kl': 0.009449778066482395, 'entropy': 2.2757960855960846, 'entropy_coeff': 0.005}
2020-09-21 15:38:25,268	DEBUG sgd.py:120 -- 19 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07088510587345809, 'policy_loss': -0.07189415208995342, 'vf_loss': 0.009473092330154032, 'vf_explained_var': 0.99689066, 'kl': 0.009670118044596165, 'entropy': 2.273016557097435, 'entropy_coeff': 0.005}
2020-

custom_metrics: {}
date: 2020-09-21_15-38-30
done: false
episode_len_mean: 896.92
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.20626608947816
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 104
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.273353323340416
      entropy_coeff: 0.005
      kl: 0.011407410493120551
      policy_loss: -0.0876167094338598
      total_loss: -0.09075182862579823
      vf_explained_var: 0.9984906911849976
      vf_loss: 0.004809423538972624
  num_steps_sampled: 95000
  num_steps_trained: 95000
iterations_since_restore: 95
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.147619047619047
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  vram_

[2m[36m(pid=24699)[0m 2020-09-21 15:38:30,284	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:38:32,549	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.15140724426601082, 'policy_loss': -0.004962237668223679, 'vf_loss': 0.1680591250769794, 'vf_explained_var': 0.93800044, 'kl': 9.614183029232892e-05, 'entropy': 2.3436976224184036, 'entropy_coeff': 0.005}
2020-09-21 15:38:33,039	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06368991400813684, 'policy_loss': -0.0113496002741158, 'vf_loss': 0.0863056315574795, 'vf_explained_var': 0.9699512, 'kl': 0.0014141878054942936, 'entropy': 2.33807435631752, 'entropy_coeff': 0.005}
2020-09-21 15:38:33,495	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.021209572441875935, 'policy_loss': -0.021978195873089135, 'vf_loss': 0.05362261971458793, 'vf_explain

2020-09-21 15:38:44,465	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09640834666788578, 'policy_loss': -0.0925431945361197, 'vf_loss': 0.003170474352373276, 'vf_explained_var': 0.99882793, 'kl': 0.01568085909821093, 'entropy': 2.3479781597852707, 'entropy_coeff': 0.005}
2020-09-21 15:38:44,915	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.10586252249777317, 'policy_loss': -0.10189984692260623, 'vf_loss': 0.002913933072704822, 'vf_explained_var': 0.9988721, 'kl': 0.01617366954451427, 'entropy': 2.34574256837368, 'entropy_coeff': 0.005}
2020-09-21 15:38:45,401	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09867556020617485, 'policy_loss': -0.09458339505363256, 'vf_loss': 0.002761493800790049, 'vf_explained_var': 0.9988805, 'kl': 0.016332949744537473, 'entropy': 2.350708156824112, 'entropy_coeff': 0.005}
2020-09-2

custom_metrics: {}
date: 2020-09-21_15-38-46
done: false
episode_len_mean: 896.92
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.20626608947816
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 104
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.349792942404747
      entropy_coeff: 0.005
      kl: 0.016933053906541318
      policy_loss: -0.09507245244458318
      total_loss: -0.09913640224840492
      vf_explained_var: 0.9990159869194031
      vf_loss: 0.002605098139611073
  num_steps_sampled: 96000
  num_steps_trained: 96000
iterations_since_restore: 96
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.152380952380954
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  vram

2020-09-21 15:38:48,701	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.15088561340235174, 'policy_loss': -0.0011304659419693053, 'vf_loss': 0.1634872010909021, 'vf_explained_var': 0.95929325, 'kl': 0.0011547117217193537, 'entropy': 2.3635058999061584, 'entropy_coeff': 0.005}
2020-09-21 15:38:49,195	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.07696193782612681, 'policy_loss': -0.011156179127283394, 'vf_loss': 0.09783865185454488, 'vf_explained_var': 0.9764479, 'kl': 0.00719892731285654, 'entropy': 2.376043453812599, 'entropy_coeff': 0.005}
2020-09-21 15:38:49,649	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.038410439738072455, 'policy_loss': -0.02414924802724272, 'vf_loss': 0.07191196037456393, 'vf_explained_var': 0.9826132, 'kl': 0.00842405779985711, 'entropy': 2.375897765159607, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:39:00,652	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09565781138371676, 'policy_loss': -0.09300316986627877, 'vf_loss': 0.003809031695709564, 'vf_explained_var': 0.9990618, 'kl': 0.017510536417830735, 'entropy': 2.3433670103549957, 'entropy_coeff': 0.005}
2020-09-21 15:39:01,143	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09315290197264403, 'policy_loss': -0.09062644501682371, 'vf_loss': 0.0036277402978157625, 'vf_explained_var': 0.99911296, 'kl': 0.018542506324592978, 'entropy': 2.3433902114629745, 'entropy_coeff': 0.005}
2020-09-21 15:39:01,600	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.10591120040044188, 'policy_loss': -0.10286773310508579, 'vf_loss': 0.0033283151933574118, 'vf_explained_var': 0.9991838, 'kl': 0.017796034924685955, 'entropy': 2.3421201407909393, 'entropy_coeff': 0.005}
2

custom_metrics: {}
date: 2020-09-21_15-39-02
done: false
episode_len_mean: 896.92
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.20626608947816
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 104
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.338303506374359
      entropy_coeff: 0.005
      kl: 0.01824359712190926
      policy_loss: -0.10444377176463604
      total_loss: -0.10747516562696546
      vf_explained_var: 0.9992424249649048
      vf_loss: 0.003187039037584327
  num_steps_sampled: 97000
  num_steps_trained: 97000
iterations_since_restore: 97
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.185714285714285
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  vram_

2020-09-21 15:39:05,442	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.201585199451074, 'policy_loss': -0.005025435355491936, 'vf_loss': 0.218493377789855, 'vf_explained_var': 0.9022044, 'kl': 0.00016768960723578807, 'entropy': 2.3866106420755386, 'entropy_coeff': 0.005}
2020-09-21 15:39:05,929	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.0998763443203643, 'policy_loss': -0.009898305055685341, 'vf_loss': 0.12114106304943562, 'vf_explained_var': 0.9448025, 'kl': 0.0018462662937963614, 'entropy': 2.3840595185756683, 'entropy_coeff': 0.005}
2020-09-21 15:39:06,385	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.052413647645153105, 'policy_loss': -0.02569928497541696, 'vf_loss': 0.08882502443157136, 'vf_explained_var': 0.9582815, 'kl': 0.003990790530224331, 'entropy': 2.3818660378456116, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:39:15,861	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.3,
                                         'cur_lr': 1e-05,
                                         'entropy': 2.3158016204833984,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.013418632559478283,
                                         'policy_loss': -0.22269728779792786,
                                         'total_loss': -0.22225423157215118,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.997, max=0.997, mean=0.997),
                                         'vf_loss': 0.007996468804776669}}}

2020-09-21 15:39:15,862	DEBUG sgd.py:120 -- 22 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.10287206177599728, 'policy_loss': -0.10516758356

custom_metrics: {}
date: 2020-09-21_15-39-19
done: false
episode_len_mean: 896.92
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.20626608947816
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 104
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.3826841562986374
      entropy_coeff: 0.005
      kl: 0.014766756270546466
      policy_loss: -0.1130781036335975
      total_loss: -0.1150313108228147
      vf_explained_var: 0.9973515272140503
      vf_loss: 0.005530193127924576
  num_steps_sampled: 98000
  num_steps_trained: 98000
iterations_since_restore: 98
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.1818181818181825
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282177
  vram

2020-09-21 15:39:21,726	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1740756540093571, 'policy_loss': 0.0003305660793557763, 'vf_loss': 0.18536448432132602, 'vf_explained_var': 0.8965375, 'kl': 0.00021769484902056657, 'entropy': 2.336940810084343, 'entropy_coeff': 0.005}
2020-09-21 15:39:22,211	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.08204779378138483, 'policy_loss': 0.0006600210908800364, 'vf_loss': 0.0925722518004477, 'vf_explained_var': 0.9471385, 'kl': 0.0015872023977863137, 'entropy': 2.332127556204796, 'entropy_coeff': 0.005}
2020-09-21 15:39:22,666	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.03896807809360325, 'policy_loss': -0.013613451388664544, 'vf_loss': 0.06266471231356263, 'vf_explained_var': 0.9646692, 'kl': 0.005207067835726775, 'entropy': 2.3290610760450363, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:39:33,629	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09825978986918926, 'policy_loss': -0.09610067191533744, 'vf_loss': 0.005763593115261756, 'vf_explained_var': 0.9965174, 'kl': 0.012358321284409612, 'entropy': 2.3260428309440613, 'entropy_coeff': 0.005}
2020-09-21 15:39:34,080	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.10363373812288046, 'policy_loss': -0.10138584766536951, 'vf_loss': 0.005550950867473148, 'vf_explained_var': 0.99678224, 'kl': 0.01280057750409469, 'entropy': 2.3278026282787323, 'entropy_coeff': 0.005}
2020-09-21 15:39:34,571	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.10310762689914554, 'policy_loss': -0.10112535033840686, 'vf_loss': 0.005732138262828812, 'vf_explained_var': 0.9968643, 'kl': 0.013112406886648387, 'entropy': 2.329627364873886, 'entropy_coeff': 0.005}
2020-

custom_metrics: {}
date: 2020-09-21_15-39-35
done: false
episode_len_mean: 914.32
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.217168846984247
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 105
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.322965979576111
      entropy_coeff: 0.005
      kl: 0.013177622866351157
      policy_loss: -0.10871790384408087
      total_loss: -0.11162796895951033
      vf_explained_var: 0.9970407485961914
      vf_loss: 0.004751478481921367
  num_steps_sampled: 99000
  num_steps_trained: 99000
iterations_since_restore: 99
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.276190476190476
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  vra

[2m[36m(pid=24699)[0m 2020-09-21 15:39:35,795	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 9541.55975806919,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 926},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.18, max=2.277, mean=0.336),
[2m[36m(pid=24699)[0m                                   'prev_action': 2,
[2m[36m(pid=24699)[0m 

2020-09-21 15:39:38,369	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.11744550312869251, 'policy_loss': -0.0001039113849401474, 'vf_loss': 0.12914711190387607, 'vf_explained_var': 0.90573835, 'kl': 0.00011587888304442906, 'entropy': 2.326492354273796, 'entropy_coeff': 0.005}
2020-09-21 15:39:38,854	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.061938820872455835, 'policy_loss': -0.006023651920258999, 'vf_loss': 0.07926143729127944, 'vf_explained_var': 0.9418776, 'kl': 0.0010574068837740924, 'entropy': 2.3232368379831314, 'entropy_coeff': 0.005}
2020-09-21 15:39:39,306	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.03880664869211614, 'policy_loss': -0.010323633789084852, 'vf_loss': 0.059829285368323326, 'vf_explained_var': 0.9576684, 'kl': 0.00290040738764219, 'entropy': 2.3138254582881927, 'entropy_coeff': 0.005}
2020-

2020-09-21 15:39:50,263	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08567066746763885, 'policy_loss': -0.08204787882277742, 'vf_loss': 0.004011057244497351, 'vf_explained_var': 0.9967692, 'kl': 0.01311354007339105, 'entropy': 2.3135806173086166, 'entropy_coeff': 0.005}
2020-09-21 15:39:50,710	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09149377746507525, 'policy_loss': -0.08771593461278826, 'vf_loss': 0.003800000049523078, 'vf_explained_var': 0.99702406, 'kl': 0.013289285998325795, 'entropy': 2.312925085425377, 'entropy_coeff': 0.005}
2020-09-21 15:39:51,193	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09554628538899124, 'policy_loss': -0.0916199367493391, 'vf_loss': 0.0035074775078101084, 'vf_explained_var': 0.99735314, 'kl': 0.013832108350470662, 'entropy': 2.3166916221380234, 'entropy_coeff': 0.005}
2020

custom_metrics: {}
date: 2020-09-21_15-39-52
done: false
episode_len_mean: 914.32
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.21716884698425
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 105
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.309658572077751
      entropy_coeff: 0.005
      kl: 0.014488431566860527
      policy_loss: -0.09189861384220421
      total_loss: -0.0959259511437267
      vf_explained_var: 0.9974279999732971
      vf_loss: 0.0031744193693157285
  num_steps_sampled: 100000
  num_steps_trained: 100000
iterations_since_restore: 100
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.00909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282177
  vr

[2m[36m(pid=24699)[0m 2020-09-21 15:39:52,380	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:39:54,494	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.19756621681153774, 'policy_loss': 0.00682465301360935, 'vf_loss': 0.201978980563581, 'vf_explained_var': 0.9506377, 'kl': 0.000386527637541878, 'entropy': 2.270674392580986, 'entropy_coeff': 0.005}
2020-09-21 15:39:54,985	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.11091223452240229, 'policy_loss': -0.004205400124192238, 'vf_loss': 0.12585494061931968, 'vf_explained_var': 0.97063696, 'kl': 0.0021534668157983106, 'entropy': 2.276668965816498, 'entropy_coeff': 0.005}
2020-09-21 15:39:55,434	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06215067417360842, 'policy_loss': -0.02204501465894282, 'vf_loss': 0.09461295092478395, 'vf_explained_

2020-09-21 15:40:06,430	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09141838573850691, 'policy_loss': -0.09163735993206501, 'vf_loss': 0.007191151875304058, 'vf_explained_var': 0.9983494, 'kl': 0.014172075025271624, 'entropy': 2.2447582483291626, 'entropy_coeff': 0.005}
2020-09-21 15:40:06,884	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09059024869929999, 'policy_loss': -0.08991167217027396, 'vf_loss': 0.006287262280238792, 'vf_explained_var': 0.9985132, 'kl': 0.014268006256315857, 'entropy': 2.2492494732141495, 'entropy_coeff': 0.005}
2020-09-21 15:40:07,370	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09077007486484945, 'policy_loss': -0.09031930344644934, 'vf_loss': 0.006476722206571139, 'vf_explained_var': 0.99859995, 'kl': 0.014447586727328598, 'entropy': 2.2523546367883682, 'entropy_coeff': 0.005}
202

custom_metrics: {}
date: 2020-09-21_15-40-08
done: false
episode_len_mean: 933.31
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.25248319739413
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 106
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.2441834807395935
      entropy_coeff: 0.005
      kl: 0.014246527978684753
      policy_loss: -0.10309462388977408
      total_loss: -0.10448451095726341
      vf_explained_var: 0.9987092018127441
      vf_loss: 0.005557074895477854
  num_steps_sampled: 101000
  num_steps_trained: 101000
iterations_since_restore: 101
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.252380952380952
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  

2020-09-21 15:40:10,895	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.254353393567726, 'policy_loss': 0.0007488398841815069, 'vf_loss': 0.2649928005412221, 'vf_explained_var': 0.8259038, 'kl': 0.00010896239637692151, 'entropy': 2.2841901034116745, 'entropy_coeff': 0.005}
2020-09-21 15:40:11,388	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1088964082300663, 'policy_loss': -0.013146452023647726, 'vf_loss': 0.13281168974936008, 'vf_explained_var': 0.90926135, 'kl': 0.0021203243431955343, 'entropy': 2.280985251069069, 'entropy_coeff': 0.005}
2020-09-21 15:40:11,844	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.056748251896351576, 'policy_loss': -0.019384850282222033, 'vf_loss': 0.08541584759950638, 'vf_explained_var': 0.94153166, 'kl': 0.006947098299860954, 'entropy': 2.273374527692795, 'entropy_coeff': 0.005}
2020-09-21

2020-09-21 15:40:17,555	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07085002795793116, 'policy_loss': -0.07745440094731748, 'vf_loss': 0.014692287775687873, 'vf_explained_var': 0.9894415, 'kl': 0.01055990083841607, 'entropy': 2.2511786371469498, 'entropy_coeff': 0.005}
2020-09-21 15:40:18,038	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07092907838523388, 'policy_loss': -0.07643873291090131, 'vf_loss': 0.013554874458350241, 'vf_explained_var': 0.9901583, 'kl': 0.01076579320942983, 'entropy': 2.25499127805233, 'entropy_coeff': 0.005}
2020-09-21 15:40:18,488	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07996730762533844, 'policy_loss': -0.08498742943629622, 'vf_loss': 0.012945566239068285, 'vf_explained_var': 0.99075973, 'kl': 0.011103269702289253, 'entropy': 2.2512842267751694, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-40-24
done: false
episode_len_mean: 957.99
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.221784634503695
episode_reward_min: -32.22698139089825
episodes_this_iter: 2
episodes_total: 108
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.247720092535019
      entropy_coeff: 0.005
      kl: 0.013868606009054929
      policy_loss: -0.10671298019587994
      total_loss: -0.10852081212215126
      vf_explained_var: 0.9960938692092896
      vf_loss: 0.005270184570690617
  num_steps_sampled: 102000
  num_steps_trained: 102000
iterations_since_restore: 102
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.19047619047619
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  v

2020-09-21 15:40:26,698	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.18469159910455346, 'policy_loss': 0.0025854934938251972, 'vf_loss': 0.1930701294913888, 'vf_explained_var': 0.9622071, 'kl': 0.00011816500568020971, 'entropy': 2.1998949199914932, 'entropy_coeff': 0.005}
2020-09-21 15:40:27,158	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06094622827367857, 'policy_loss': -0.01038605032954365, 'vf_loss': 0.08210308849811554, 'vf_explained_var': 0.9838903, 'kl': 0.0006707166130581754, 'entropy': 2.1944063752889633, 'entropy_coeff': 0.005}
2020-09-21 15:40:27,644	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.0279681165702641, 'policy_loss': -0.014395801117643714, 'vf_loss': 0.05282433703541756, 'vf_explained_var': 0.98958266, 'kl': 0.0016495479867444374, 'entropy': 2.191056415438652, 'entropy_coeff': 0.005}
2020-09-2

2020-09-21 15:40:38,578	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09423558576963842, 'policy_loss': -0.09231989737600088, 'vf_loss': 0.00514653034042567, 'vf_explained_var': 0.9988799, 'kl': 0.013057688542176038, 'entropy': 2.195905178785324, 'entropy_coeff': 0.005}
2020-09-21 15:40:39,043	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09901592507958412, 'policy_loss': -0.09710501902736723, 'vf_loss': 0.005049366256571375, 'vf_explained_var': 0.99891734, 'kl': 0.013416127127129585, 'entropy': 2.1970234513282776, 'entropy_coeff': 0.005}
2020-09-21 15:40:39,524	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09717186691705137, 'policy_loss': -0.09561524342279881, 'vf_loss': 0.005282434765831567, 'vf_explained_var': 0.998905, 'kl': 0.013859076716471463, 'entropy': 2.1993555277585983, 'entropy_coeff': 0.005}
2020-0

custom_metrics: {}
date: 2020-09-21_15-40-40
done: false
episode_len_mean: 957.99
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.221784634503692
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 108
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.198386326432228
      entropy_coeff: 0.005
      kl: 0.013988543825689703
      policy_loss: -0.09938483708538115
      total_loss: -0.10137454862706363
      vf_explained_var: 0.9989950656890869
      vf_loss: 0.004805664168088697
  num_steps_sampled: 103000
  num_steps_trained: 103000
iterations_since_restore: 103
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.019047619047619
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  

2020-09-21 15:40:42,449	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1425379334250465, 'policy_loss': 0.0026804388035088778, 'vf_loss': 0.15039792330935597, 'vf_explained_var': 0.97101426, 'kl': 0.0003695486535397752, 'entropy': 2.130259558558464, 'entropy_coeff': 0.005}
2020-09-21 15:40:42,910	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.039982699032407254, 'policy_loss': -0.008918183273635805, 'vf_loss': 0.059117395197972655, 'vf_explained_var': 0.9883707, 'kl': 0.0017227970238309354, 'entropy': 2.146670773625374, 'entropy_coeff': 0.005}
2020-09-21 15:40:43,398	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.016264508332824335, 'policy_loss': -0.008860077941790223, 'vf_loss': 0.03529731056187302, 'vf_explained_var': 0.9928877, 'kl': 0.0018130087264580652, 'entropy': 2.1433240175247192, 'entropy_coeff': 0.005}
2020-0

2020-09-21 15:40:54,294	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09664617106318474, 'policy_loss': -0.0973833620082587, 'vf_loss': 0.00781533018744085, 'vf_explained_var': 0.9982717, 'kl': 0.012102656939532608, 'entropy': 2.1417871862649918, 'entropy_coeff': 0.005}
2020-09-21 15:40:54,783	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.0911909646820277, 'policy_loss': -0.09199741203337908, 'vf_loss': 0.007846183172659948, 'vf_explained_var': 0.9983039, 'kl': 0.012211885652504861, 'entropy': 2.1406593173742294, 'entropy_coeff': 0.005}
2020-09-21 15:40:55,236	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.0974725077394396, 'policy_loss': -0.09857140190433711, 'vf_loss': 0.008069128583883867, 'vf_explained_var': 0.99823916, 'kl': 0.012465372448787093, 'entropy': 2.1419688165187836, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-40-56
done: false
episode_len_mean: 969.19
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.197691438825746
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 109
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.1384999752044678
      entropy_coeff: 0.005
      kl: 0.01288402231875807
      policy_loss: -0.10250449716113508
      total_loss: -0.1014042419847101
      vf_explained_var: 0.9982588291168213
      vf_loss: 0.007927545899292454
  num_steps_sampled: 104000
  num_steps_trained: 104000
iterations_since_restore: 104
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.128571428571429
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  v

[2m[36m(pid=24699)[0m 2020-09-21 15:40:56,452	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 10199.608548481923,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 102},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.141, max=1.423, mean=0.252),
[2m[36m(pid=24699)[0m                                   'prev_action': 6,
[2m[36m(pid=24699)[

2020-09-21 15:40:58,234	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.10800341318827122, 'policy_loss': -0.0006743569392710924, 'vf_loss': 0.11979915201663971, 'vf_explained_var': 0.9571706, 'kl': 0.00027907810381561093, 'entropy': 2.2410217374563217, 'entropy_coeff': 0.005}
2020-09-21 15:40:58,725	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06181881343945861, 'policy_loss': -0.005949070677161217, 'vf_loss': 0.07766662747599185, 'vf_explained_var': 0.97258943, 'kl': 0.004271756966772955, 'entropy': 2.2360547184944153, 'entropy_coeff': 0.005}
2020-09-21 15:40:59,175	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.03263679472729564, 'policy_loss': -0.016334762098267674, 'vf_loss': 0.05909411143511534, 'vf_explained_var': 0.97977924, 'kl': 0.0035230148350819945, 'entropy': 2.2358935177326202, 'entropy_coeff': 0.005}
2020

2020-09-21 15:41:10,119	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06844155114958994, 'policy_loss': -0.08581146004144102, 'vf_loss': 0.024862711783498526, 'vf_explained_var': 0.99041504, 'kl': 0.012090597476344556, 'entropy': 2.2239968180656433, 'entropy_coeff': 0.005}
2020-09-21 15:41:10,571	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.0744212991266977, 'policy_loss': -0.09145660337526351, 'vf_loss': 0.02442539483308792, 'vf_explained_var': 0.9910558, 'kl': 0.012480617442633957, 'entropy': 2.2268564105033875, 'entropy_coeff': 0.005}
2020-09-21 15:41:11,054	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07535869072307833, 'policy_loss': -0.09209105017362162, 'vf_loss': 0.024221550847869366, 'vf_explained_var': 0.99039876, 'kl': 0.012145923625212163, 'entropy': 2.2265929579734802, 'entropy_coeff': 0.005}
2020

custom_metrics: {}
date: 2020-09-21_15-41-12
done: false
episode_len_mean: 969.19
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.197691438825746
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 109
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.225225046277046
      entropy_coeff: 0.005
      kl: 0.013061408477369696
      policy_loss: -0.09573323238873854
      total_loss: -0.07897180673899129
      vf_explained_var: 0.990984320640564
      vf_loss: 0.023969135421793908
  num_steps_sampled: 105000
  num_steps_trained: 105000
iterations_since_restore: 105
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.045
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  vram_util_per

[2m[36m(pid=24699)[0m 2020-09-21 15:41:12,570	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:41:14,466	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.10417910991236567, 'policy_loss': 0.007505749701522291, 'vf_loss': 0.1076516651082784, 'vf_explained_var': 0.9741199, 'kl': 0.0005689954771279648, 'entropy': 2.2298022359609604, 'entropy_coeff': 0.005}
2020-09-21 15:41:14,949	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.034345446329098195, 'policy_loss': -0.007338312920182943, 'vf_loss': 0.051862488966435194, 'vf_explained_var': 0.98595387, 'kl': 0.0032191026184591465, 'entropy': 2.2288922667503357, 'entropy_coeff': 0.005}
2020-09-21 15:41:15,397	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.0046468686778098345, 'policy_loss': -0.01655637100338936, 'vf_loss': 0.030672989669255912, 'vf_

2020-09-21 15:41:21,118	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06351388763869181, 'policy_loss': -0.06064235622761771, 'vf_loss': 0.0045252974377945065, 'vf_explained_var': 0.9987937, 'kl': 0.012181366502773017, 'entropy': 2.210248127579689, 'entropy_coeff': 0.005}
2020-09-21 15:41:21,574	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07485382445156574, 'policy_loss': -0.071480454178527, 'vf_loss': 0.0041555904754204676, 'vf_explained_var': 0.9987461, 'kl': 0.011595478921663016, 'entropy': 2.2015212178230286, 'entropy_coeff': 0.005}
2020-09-21 15:41:22,060	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06892481818795204, 'policy_loss': -0.06508332234807312, 'vf_loss': 0.0037065029027871788, 'vf_explained_var': 0.99892116, 'kl': 0.011679962452035397, 'entropy': 2.21039941906929, 'entropy_coeff': 0.005}
2020-

custom_metrics: {}
date: 2020-09-21_15-41-28
done: false
episode_len_mean: 969.19
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.197691438825746
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 109
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.202867090702057
      entropy_coeff: 0.005
      kl: 0.015242231253068894
      policy_loss: -0.0955345993861556
      total_loss: -0.09983164421282709
      vf_explained_var: 0.999407947063446
      vf_loss: 0.0021446262908284552
  num_steps_sampled: 106000
  num_steps_trained: 106000
iterations_since_restore: 106
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.1761904761904765
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  

2020-09-21 15:41:30,429	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.10158871999010444, 'policy_loss': -0.0015349439345300198, 'vf_loss': 0.11420909175649285, 'vf_explained_var': 0.97033083, 'kl': 0.0002846357187793469, 'entropy': 2.2341638058423996, 'entropy_coeff': 0.005}
2020-09-21 15:41:30,926	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.03473871876485646, 'policy_loss': -0.01055954396724701, 'vf_loss': 0.05530599155463278, 'vf_explained_var': 0.98551637, 'kl': 0.0038061379673308693, 'entropy': 2.229915142059326, 'entropy_coeff': 0.005}
2020-09-21 15:41:31,378	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.0018878817791119218, 'policy_loss': -0.02520238352008164, 'vf_loss': 0.0313484383514151, 'vf_explained_var': 0.9915862, 'kl': 0.010414038377348334, 'entropy': 2.2316305339336395, 'entropy_coeff': 0.005}
2020-0

2020-09-21 15:41:42,324	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.093001464381814, 'policy_loss': -0.09375966503284872, 'vf_loss': 0.007087688660249114, 'vf_explained_var': 0.99799615, 'kl': 0.016711828880943358, 'entropy': 2.2686075419187546, 'entropy_coeff': 0.005}
2020-09-21 15:41:42,772	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09590855112764984, 'policy_loss': -0.09730076242703944, 'vf_loss': 0.0077627473074244335, 'vf_explained_var': 0.9980476, 'kl': 0.016617405402939767, 'entropy': 2.2711528092622757, 'entropy_coeff': 0.005}
2020-09-21 15:41:43,250	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.10199929896043614, 'policy_loss': -0.1032008082838729, 'vf_loss': 0.0072679498116485775, 'vf_explained_var': 0.99793506, 'kl': 0.017584758112207055, 'entropy': 2.2683741748332977, 'entropy_coeff': 0.005}
202

custom_metrics: {}
date: 2020-09-21_15-41-44
done: false
episode_len_mean: 969.19
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.197691438825746
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 109
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.269805431365967
      entropy_coeff: 0.005
      kl: 0.017636726144701242
      policy_loss: -0.09806602692697197
      total_loss: -0.09710846014786512
      vf_explained_var: 0.9980522394180298
      vf_loss: 0.007015576004050672
  num_steps_sampled: 107000
  num_steps_trained: 107000
iterations_since_restore: 107
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.219047619047619
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  

2020-09-21 15:41:46,849	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2244672328233719, 'policy_loss': -0.002251932746730745, 'vf_loss': 0.23768798727542162, 'vf_explained_var': 0.9172846, 'kl': 0.0015835239233560061, 'entropy': 2.2887764871120453, 'entropy_coeff': 0.005}
2020-09-21 15:41:47,340	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.07535820524208248, 'policy_loss': -0.013152666622772813, 'vf_loss': 0.098293696064502, 'vf_explained_var': 0.96211696, 'kl': 0.00558631558669731, 'entropy': 2.2917439490556717, 'entropy_coeff': 0.005}
2020-09-21 15:41:47,793	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.035715549369342625, 'policy_loss': -0.02761007403023541, 'vf_loss': 0.07303828769363463, 'vf_explained_var': 0.97445166, 'kl': 0.005821492290124297, 'entropy': 2.291820913553238, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:41:58,753	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07605692863580771, 'policy_loss': -0.0943919369019568, 'vf_loss': 0.026182072586379945, 'vf_explained_var': 0.9895704, 'kl': 0.011974798457231373, 'entropy': 2.2879007309675217, 'entropy_coeff': 0.005}
2020-09-21 15:41:59,199	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.0703618994448334, 'policy_loss': -0.08925590896978974, 'vf_loss': 0.026461957837454975, 'vf_explained_var': 0.9893837, 'kl': 0.012859969341661781, 'entropy': 2.2851885557174683, 'entropy_coeff': 0.005}
2020-09-21 15:41:59,690	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08667114726267755, 'policy_loss': -0.10495538800023496, 'vf_loss': 0.025826834957115352, 'vf_explained_var': 0.9892976, 'kl': 0.012964038294740021, 'entropy': 2.286360129714012, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-42-00
done: false
episode_len_mean: 969.19
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.197691438825746
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 109
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.2863702476024628
      entropy_coeff: 0.005
      kl: 0.014031536120455712
      policy_loss: -0.09618729999056086
      total_loss: -0.0774431477766484
      vf_explained_var: 0.9896880984306335
      vf_loss: 0.025966543471440673
  num_steps_sampled: 108000
  num_steps_trained: 108000
iterations_since_restore: 108
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.995238095238095
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  

2020-09-21 15:42:03,168	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1327502483036369, 'policy_loss': -0.002503160387277603, 'vf_loss': 0.14649412455037236, 'vf_explained_var': 0.8981917, 'kl': 0.00037235757865441976, 'entropy': 2.2704854160547256, 'entropy_coeff': 0.005}
2020-09-21 15:42:03,660	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.07001256186049432, 'policy_loss': -0.004687563283368945, 'vf_loss': 0.0842570464592427, 'vf_explained_var': 0.9342593, 'kl': 0.005845524683536496, 'entropy': 2.262116402387619, 'entropy_coeff': 0.005}
2020-09-21 15:42:04,150	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.032356419367715716, 'policy_loss': -0.01586955296806991, 'vf_loss': 0.05814217496663332, 'vf_explained_var': 0.9570732, 'kl': 0.004718874653917737, 'entropy': 2.266373425722122, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:42:15,105	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09257568628527224, 'policy_loss': -0.09730721078813076, 'vf_loss': 0.01124276898917742, 'vf_explained_var': 0.9913517, 'kl': 0.01601550535997376, 'entropy': 2.2631795406341553, 'entropy_coeff': 0.005}
2020-09-21 15:42:15,587	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08380750217474997, 'policy_loss': -0.08821693924255669, 'vf_loss': 0.010910903511103243, 'vf_explained_var': 0.99150455, 'kl': 0.016006062040105462, 'entropy': 2.2606578320264816, 'entropy_coeff': 0.005}
2020-09-21 15:42:15,966	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-5.037, max=4.449, mean=-0.055),
                                                    'action_logp': np.ndarray((

custom_metrics: {}
date: 2020-09-21_15-42-17
done: false
episode_len_mean: 969.19
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.197691438825746
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 109
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.2625648081302643
      entropy_coeff: 0.005
      kl: 0.016698896419256926
      policy_loss: -0.10173146508168429
      total_loss: -0.09733038919512182
      vf_explained_var: 0.9912196397781372
      vf_loss: 0.010704224434448406
  num_steps_sampled: 109000
  num_steps_trained: 109000
iterations_since_restore: 109
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.404545454545455
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282177
 

[2m[36m(pid=24699)[0m 2020-09-21 15:42:17,600	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 9527.825394320083,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 1102},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-4.054, max=3.22, mean=0.341),
[2m[36m(pid=24699)[0m                                   'prev_action': 1,
[2m[36m(pid=24699)[0

2020-09-21 15:42:19,661	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.21652404556516558, 'policy_loss': 0.006721982499584556, 'vf_loss': 0.2208768194541335, 'vf_explained_var': 0.85075116, 'kl': 0.0002489146193778713, 'entropy': 2.229888439178467, 'entropy_coeff': 0.005}
2020-09-21 15:42:20,149	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.10408817615825683, 'policy_loss': -0.006666583241894841, 'vf_loss': 0.12116368254646659, 'vf_explained_var': 0.9194228, 'kl': 0.002392532656813273, 'entropy': 2.2253365963697433, 'entropy_coeff': 0.005}
2020-09-21 15:42:20,601	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04364307830110192, 'policy_loss': -0.01908808120060712, 'vf_loss': 0.07254169997759163, 'vf_explained_var': 0.94773805, 'kl': 0.004234152860590257, 'entropy': 2.2161565721035004, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:42:31,538	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08076978928875178, 'policy_loss': -0.09022230375558138, 'vf_loss': 0.016872989595867693, 'vf_explained_var': 0.9877393, 'kl': 0.01211888121906668, 'entropy': 2.211228609085083, 'entropy_coeff': 0.005}
2020-09-21 15:42:32,003	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09020017145667225, 'policy_loss': -0.09986448351992294, 'vf_loss': 0.016919294197577983, 'vf_explained_var': 0.98762566, 'kl': 0.012636521423701197, 'entropy': 2.2091869115829468, 'entropy_coeff': 0.005}
2020-09-21 15:42:32,489	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08607103326357901, 'policy_loss': -0.09496770531404763, 'vf_loss': 0.016117889084853232, 'vf_explained_var': 0.9881239, 'kl': 0.01274196314625442, 'entropy': 2.2087608575820923, 'entropy_coeff': 0.005}
2020-0

custom_metrics: {}
date: 2020-09-21_15-42-33
done: false
episode_len_mean: 996.55
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.18970144582663
episode_reward_min: -32.22698139089825
episodes_this_iter: 2
episodes_total: 111
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.2112937271595
      entropy_coeff: 0.005
      kl: 0.013244038040284067
      policy_loss: -0.09710159664973617
      total_loss: -0.087870002258569
      vf_explained_var: 0.9886808395385742
      vf_loss: 0.016314854787196964
  num_steps_sampled: 110000
  num_steps_trained: 110000
iterations_since_restore: 110
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.171428571428571
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  vram_

[2m[36m(pid=24699)[0m 2020-09-21 15:42:33,673	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:42:35,679	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.25707458332180977, 'policy_loss': -0.000492527149617672, 'vf_loss': 0.26802783366292715, 'vf_explained_var': 0.85831535, 'kl': 0.0013146145211891391, 'entropy': 2.1710197776556015, 'entropy_coeff': 0.005}
2020-09-21 15:42:36,172	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.11520875268615782, 'policy_loss': -0.018751402152702212, 'vf_loss': 0.14275869680568576, 'vf_explained_var': 0.92433846, 'kl': 0.006935540790436789, 'entropy': 2.1758420169353485, 'entropy_coeff': 0.005}
2020-09-21 15:42:36,628	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06590396084357053, 'policy_loss': -0.022656702203676105, 'vf_loss': 0.09753510169684887, 'vf_ex

2020-09-21 15:42:47,565	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06214203522540629, 'policy_loss': -0.09131744562182575, 'vf_loss': 0.03540860640350729, 'vf_explained_var': 0.9801645, 'kl': 0.015558481507468969, 'entropy': 2.18014857172966, 'entropy_coeff': 0.005}
2020-09-21 15:42:48,017	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06411684711929411, 'policy_loss': -0.09373691282235086, 'vf_loss': 0.0356575979385525, 'vf_explained_var': 0.98062575, 'kl': 0.016249151434749365, 'entropy': 2.1824555695056915, 'entropy_coeff': 0.005}
2020-09-21 15:42:48,504	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06380671996157616, 'policy_loss': -0.093704663682729, 'vf_loss': 0.035906987148337066, 'vf_explained_var': 0.9805645, 'kl': 0.016322346578817815, 'entropy': 2.1811501681804657, 'entropy_coeff': 0.005}
2020-09-21

custom_metrics: {}
date: 2020-09-21_15-42-49
done: false
episode_len_mean: 996.55
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.18970144582663
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 111
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.17641881108284
      entropy_coeff: 0.005
      kl: 0.01682490046368912
      policy_loss: -0.09961015312001109
      total_loss: -0.07022654381580651
      vf_explained_var: 0.9800944328308105
      vf_loss: 0.03521822753828019
  num_steps_sampled: 111000
  num_steps_trained: 111000
iterations_since_restore: 111
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.228571428571429
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  vram

2020-09-21 15:42:52,003	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.5700337290763855, 'policy_loss': 0.003997693303972483, 'vf_loss': 0.5765210650861263, 'vf_explained_var': 0.91777915, 'kl': 0.00017406159314914404, 'entropy': 2.1074488312005997, 'entropy_coeff': 0.005}
2020-09-21 15:42:52,492	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3185281325131655, 'policy_loss': -0.0016812391113489866, 'vf_loss': 0.3301142808049917, 'vf_explained_var': 0.9515925, 'kl': 0.0020712168043246493, 'entropy': 2.105253830552101, 'entropy_coeff': 0.005}
2020-09-21 15:42:52,944	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.21702688885852695, 'policy_loss': -0.02602568198926747, 'vf_loss': 0.25252096727490425, 'vf_explained_var': 0.96220446, 'kl': 0.0032800788758322597, 'entropy': 2.0904838740825653, 'entropy_coeff': 0.005}
2020-09-21

2020-09-21 15:43:03,877	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06592243688646704, 'policy_loss': -0.09918195311911404, 'vf_loss': 0.1705723525956273, 'vf_explained_var': 0.97386944, 'kl': 0.016477205033879727, 'entropy': 2.082225762307644, 'entropy_coeff': 0.005}
2020-09-21 15:43:04,329	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06740283803083003, 'policy_loss': -0.0999687280273065, 'vf_loss': 0.1729596429504454, 'vf_explained_var': 0.974439, 'kl': 0.016188468143809587, 'entropy': 2.088920973241329, 'entropy_coeff': 0.005}
2020-09-21 15:43:04,813	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.058904892997816205, 'policy_loss': -0.10352802369743586, 'vf_loss': 0.16791153186932206, 'vf_explained_var': 0.97371733, 'kl': 0.01653332426212728, 'entropy': 2.087722562253475, 'entropy_coeff': 0.005}
2020-09-21 15:4

custom_metrics: {}
date: 2020-09-21_15-43-05
done: false
episode_len_mean: 1008.98
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.189922438159506
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 112
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.0808070823550224
      entropy_coeff: 0.005
      kl: 0.01722212164895609
      policy_loss: -0.1149352085776627
      total_loss: 0.050640828092582524
      vf_explained_var: 0.9743596315383911
      vf_loss: 0.1708134301006794
  num_steps_sampled: 112000
  num_steps_trained: 112000
iterations_since_restore: 112
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.185714285714286
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  vr

2020-09-21 15:43:07,821	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2096583521924913, 'policy_loss': -0.0007319548167288303, 'vf_loss': 0.2211461579427123, 'vf_explained_var': 0.9370715, 'kl': 0.0001347014754966569, 'entropy': 2.1592520028352737, 'entropy_coeff': 0.005}
2020-09-21 15:43:08,278	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.11839002254419029, 'policy_loss': -0.0030405414290726185, 'vf_loss': 0.13167633349075913, 'vf_explained_var': 0.96168554, 'kl': 0.0018545884231571108, 'entropy': 2.1604298502206802, 'entropy_coeff': 0.005}
2020-09-21 15:43:08,764	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06998301373096183, 'policy_loss': -0.014231177978217602, 'vf_loss': 0.09426053380593657, 'vf_explained_var': 0.9730822, 'kl': 0.002539519511628896, 'entropy': 2.16163869202137, 'entropy_coeff': 0.005}
2020-09-2

2020-09-21 15:43:16,031	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.3,
                                         'cur_lr': 1e-05,
                                         'entropy': 2.159243583679199,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.012889710254967213,
                                         'policy_loss': -0.2017042487859726,
                                         'total_loss': -0.1964426189661026,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.996, max=0.996, mean=0.996),
                                         'vf_loss': 0.012190928682684898}}}

2020-09-21 15:43:16,329	DEBUG sgd.py:120 -- 18 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08658045576885343, 'policy_loss': -0.09199560503475

custom_metrics: {}
date: 2020-09-21_15-43-21
done: false
episode_len_mean: 1024.82
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.187363251416016
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 113
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.1566653847694397
      entropy_coeff: 0.005
      kl: 0.015130215208046138
      policy_loss: -0.11782313277944922
      total_loss: -0.11643365258350968
      vf_explained_var: 0.9977308511734009
      vf_loss: 0.007633749017259106
  num_steps_sampled: 113000
  num_steps_trained: 113000
iterations_since_restore: 113
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.104761904761905
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179


2020-09-21 15:43:23,848	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.18543396587483585, 'policy_loss': -0.007537049823440611, 'vf_loss': 0.20336051750928164, 'vf_explained_var': 0.95049894, 'kl': 0.0006538807061905461, 'entropy': 2.117133527994156, 'entropy_coeff': 0.005}
2020-09-21 15:43:24,300	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.09167809109203517, 'policy_loss': -0.014573585220205132, 'vf_loss': 0.11488675884902477, 'vf_explained_var': 0.97192633, 'kl': 0.006463583500590175, 'entropy': 2.1148309409618378, 'entropy_coeff': 0.005}
2020-09-21 15:43:24,792	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.048815128044225276, 'policy_loss': -0.02461027621757239, 'vf_loss': 0.081187944393605, 'vf_explained_var': 0.9796077, 'kl': 0.009377037931699306, 'entropy': 2.115130677819252, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:43:35,667	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08022427861578763, 'policy_loss': -0.09662424318958074, 'vf_loss': 0.022518137673614547, 'vf_explained_var': 0.9945472, 'kl': 0.01504493720130995, 'entropy': 2.1263314187526703, 'entropy_coeff': 0.005}
2020-09-21 15:43:36,153	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07988133188337088, 'policy_loss': -0.09489925147499889, 'vf_loss': 0.021153779758606106, 'vf_explained_var': 0.99445176, 'kl': 0.014989085670094937, 'entropy': 2.1265161633491516, 'entropy_coeff': 0.005}
2020-09-21 15:43:36,636	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08473550109192729, 'policy_loss': -0.09998038492631167, 'vf_loss': 0.02147446890012361, 'vf_explained_var': 0.99472606, 'kl': 0.014644024369772524, 'entropy': 2.124557450413704, 'entropy_coeff': 0.005}
2020-

custom_metrics: {}
date: 2020-09-21_15-43-37
done: false
episode_len_mean: 1024.82
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.187363251416013
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 113
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.131686806678772
      entropy_coeff: 0.005
      kl: 0.015114029054529965
      policy_loss: -0.09689237282145768
      total_loss: -0.07893976899794097
      vf_explained_var: 0.9947352409362793
      vf_loss: 0.02407683152705431
  num_steps_sampled: 114000
  num_steps_trained: 114000
iterations_since_restore: 114
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.123809523809524
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  

[2m[36m(pid=24699)[0m 2020-09-21 15:43:37,818	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 5472.339360768359,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 2102},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.209, max=3.047, mean=0.35),
[2m[36m(pid=24699)[0m                                   'prev_action': 16,
[2m[36m(pid=24699)[

2020-09-21 15:43:39,943	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.16661379113793373, 'policy_loss': 0.0018417291576042771, 'vf_loss': 0.17510053236037493, 'vf_explained_var': 0.9633876, 'kl': 0.0015927739006147412, 'entropy': 2.1612598299980164, 'entropy_coeff': 0.005}
2020-09-21 15:43:40,438	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.08673939388245344, 'policy_loss': -0.01690275315195322, 'vf_loss': 0.11272196937352419, 'vf_explained_var': 0.9776623, 'kl': 0.005620587704470381, 'entropy': 2.153201088309288, 'entropy_coeff': 0.005}
2020-09-21 15:43:40,890	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04971366538666189, 'policy_loss': -0.014675413025543094, 'vf_loss': 0.07401743321679533, 'vf_explained_var': 0.984538, 'kl': 0.00357454433105886, 'entropy': 2.140143319964409, 'entropy_coeff': 0.005}
2020-09-21 15:

2020-09-21 15:43:51,866	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08607006166130304, 'policy_loss': -0.09173572016879916, 'vf_loss': 0.011890216235769913, 'vf_explained_var': 0.9977771, 'kl': 0.014784922532271594, 'entropy': 2.1320074945688248, 'entropy_coeff': 0.005}
2020-09-21 15:43:52,316	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08844009670428932, 'policy_loss': -0.09281884657684714, 'vf_loss': 0.010578416055068374, 'vf_explained_var': 0.99763393, 'kl': 0.014861247211229056, 'entropy': 2.1316074430942535, 'entropy_coeff': 0.005}
2020-09-21 15:43:52,799	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09823369781952351, 'policy_loss': -0.10265016392804682, 'vf_loss': 0.010376039892435074, 'vf_explained_var': 0.99787956, 'kl': 0.015662436839193106, 'entropy': 2.1316607296466827, 'entropy_coeff': 0.005}
20

custom_metrics: {}
date: 2020-09-21_15-43-53
done: false
episode_len_mean: 1042.22
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.209143117401744
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 114
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.1242100596427917
      entropy_coeff: 0.005
      kl: 0.015736679662950337
      policy_loss: -0.0991020209621638
      total_loss: -0.09506760165095329
      vf_explained_var: 0.9979574680328369
      vf_loss: 0.009934463974786922
  num_steps_sampled: 115000
  num_steps_trained: 115000
iterations_since_restore: 115
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.104761904761905
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
 

[2m[36m(pid=24699)[0m 2020-09-21 15:43:54,001	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:43:55,803	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2058751219883561, 'policy_loss': 0.01164020854048431, 'vf_loss': 0.2050637104548514, 'vf_explained_var': 0.9373808, 'kl': 0.0002741178822294188, 'entropy': 2.1822054386138916, 'entropy_coeff': 0.005}
2020-09-21 15:43:56,260	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.07110807974822819, 'policy_loss': -0.010655667749233544, 'vf_loss': 0.09117120108567178, 'vf_explained_var': 0.9711123, 'kl': 0.005181119027838577, 'entropy': 2.1923571079969406, 'entropy_coeff': 0.005}
2020-09-21 15:43:56,748	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.0206843406194821, 'policy_loss': -0.023129503591917455, 'vf_loss': 0.05283946427516639, 'vf_explained

2020-09-21 15:44:07,628	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.11166151979705319, 'policy_loss': -0.10828730545472354, 'vf_loss': 0.0032398135954281315, 'vf_explained_var': 0.9988615, 'kl': 0.01425024849595502, 'entropy': 2.1778193414211273, 'entropy_coeff': 0.005}
2020-09-21 15:44:08,111	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.11152419878635556, 'policy_loss': -0.10804591001942754, 'vf_loss': 0.0031458110170206055, 'vf_explained_var': 0.9989154, 'kl': 0.014248190738726407, 'entropy': 2.179711177945137, 'entropy_coeff': 0.005}
2020-09-21 15:44:08,565	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.10856926091946661, 'policy_loss': -0.10533358226530254, 'vf_loss': 0.0030805354472249746, 'vf_explained_var': 0.9989285, 'kl': 0.015231061086524278, 'entropy': 2.1771060675382614, 'entropy_coeff': 0.005}
202

custom_metrics: {}
date: 2020-09-21_15-44-09
done: false
episode_len_mean: 1042.22
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.209143117401744
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 114
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.1759527921676636
      entropy_coeff: 0.005
      kl: 0.015535957703832537
      policy_loss: -0.11089352844282985
      total_loss: -0.11421409389004111
      vf_explained_var: 0.9989748597145081
      vf_loss: 0.002898415033996571
  num_steps_sampled: 116000
  num_steps_trained: 116000
iterations_since_restore: 116
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.2142857142857135
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179

2020-09-21 15:44:11,741	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.058390239952132106, 'policy_loss': -0.0008496665395796299, 'vf_loss': 0.07003022241406143, 'vf_explained_var': 0.98983425, 'kl': 0.00030564361391527406, 'entropy': 2.1764028817415237, 'entropy_coeff': 0.005}
2020-09-21 15:44:12,201	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.008186179678887129, 'policy_loss': -0.01164838601835072, 'vf_loss': 0.03047371603315696, 'vf_explained_var': 0.99525845, 'kl': 0.0006400219772331184, 'entropy': 2.1662319153547287, 'entropy_coeff': 0.005}
2020-09-21 15:44:12,690	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.004696424584835768, 'policy_loss': -0.012925099988933653, 'vf_loss': 0.01855304354103282, 'vf_explained_var': 0.9970745, 'kl': 0.001613527536392212, 'entropy': 2.1616854816675186, 'entropy_coeff': 0.005}
2

2020-09-21 15:44:18,375	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07311073911841959, 'policy_loss': -0.07056216150522232, 'vf_loss': 0.004520287184277549, 'vf_explained_var': 0.99929, 'kl': 0.012170594127383083, 'entropy': 2.1440077871084213, 'entropy_coeff': 0.005}
2020-09-21 15:44:18,854	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07297330943401903, 'policy_loss': -0.07015411136671901, 'vf_loss': 0.004235946296830662, 'vf_explained_var': 0.9993539, 'kl': 0.01218455919297412, 'entropy': 2.1421028226614, 'entropy_coeff': 0.005}
2020-09-21 15:44:19,300	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07695005438290536, 'policy_loss': -0.07440278702415526, 'vf_loss': 0.004339431339758448, 'vf_explained_var': 0.99935484, 'kl': 0.012703062384389341, 'entropy': 2.1395226418972015, 'entropy_coeff': 0.005}
2020-09-2

custom_metrics: {}
date: 2020-09-21_15-44-25
done: false
episode_len_mean: 1042.22
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.209143117401744
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 114
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.141002342104912
      entropy_coeff: 0.005
      kl: 0.017598163802176714
      policy_loss: -0.09102846845053136
      total_loss: -0.09347082220483571
      vf_explained_var: 0.9995589256286621
      vf_loss: 0.002983209677040577
  num_steps_sampled: 117000
  num_steps_trained: 117000
iterations_since_restore: 117
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.257142857142857
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
 

2020-09-21 15:44:28,082	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.13129675609525293, 'policy_loss': -0.00023069907911121845, 'vf_loss': 0.14159109396860003, 'vf_explained_var': 0.95188415, 'kl': 0.0024417637218188126, 'entropy': 2.1592330932617188, 'entropy_coeff': 0.005}
2020-09-21 15:44:28,562	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.05678654054645449, 'policy_loss': -0.018000051379203796, 'vf_loss': 0.08287706295959651, 'vf_explained_var': 0.97185105, 'kl': 0.008862179820425808, 'entropy': 2.149825319647789, 'entropy_coeff': 0.005}
2020-09-21 15:44:29,015	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.01970996509771794, 'policy_loss': -0.028036940726451576, 'vf_loss': 0.055973047157749534, 'vf_explained_var': 0.97960347, 'kl': 0.008428613044088706, 'entropy': 2.15094593167305, 'entropy_coeff': 0.005}
2020-0

2020-09-21 15:44:39,860	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09159039217047393, 'policy_loss': -0.10273344558663666, 'vf_loss': 0.017432991706300527, 'vf_explained_var': 0.9934594, 'kl': 0.015022261650301516, 'entropy': 2.1593237966299057, 'entropy_coeff': 0.005}
2020-09-21 15:44:40,344	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09813815134111792, 'policy_loss': -0.10893102805130184, 'vf_loss': 0.016784844046924263, 'vf_explained_var': 0.9936989, 'kl': 0.01610735768917948, 'entropy': 2.164833888411522, 'entropy_coeff': 0.005}
2020-09-21 15:44:40,830	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09745911206118762, 'policy_loss': -0.10782380762975663, 'vf_loss': 0.016406168055254966, 'vf_explained_var': 0.9937209, 'kl': 0.01584878464927897, 'entropy': 2.1592213064432144, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-44-41
done: false
episode_len_mean: 1042.22
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.209143117401744
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 114
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.15921214222908
      entropy_coeff: 0.005
      kl: 0.016517459240276366
      policy_loss: -0.10803992906585336
      total_loss: -0.09671723667997867
      vf_explained_var: 0.9938057661056519
      vf_loss: 0.017163513170089573
  num_steps_sampled: 118000
  num_steps_trained: 118000
iterations_since_restore: 118
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.114285714285715
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  

2020-09-21 15:44:44,164	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.13463370071258396, 'policy_loss': 0.0030855847289785743, 'vf_loss': 0.14209964033216238, 'vf_explained_var': 0.9681746, 'kl': 0.0003538093927798469, 'entropy': 2.13153412938118, 'entropy_coeff': 0.005}
2020-09-21 15:44:44,656	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06419845821801573, 'policy_loss': -0.00704803311964497, 'vf_loss': 0.08118702517822385, 'vf_explained_var': 0.98168874, 'kl': 0.0023741503682686016, 'entropy': 2.1305566132068634, 'entropy_coeff': 0.005}
2020-09-21 15:44:45,102	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.020450312178581953, 'policy_loss': -0.021665005013346672, 'vf_loss': 0.05168364918790758, 'vf_explained_var': 0.98744345, 'kl': 0.0036340357473818585, 'entropy': 2.131707951426506, 'entropy_coeff': 0.005}
2020-09-

2020-09-21 15:44:56,009	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.0959409661591053, 'policy_loss': -0.10022249654866755, 'vf_loss': 0.010213146830210462, 'vf_explained_var': 0.9975119, 'kl': 0.015495032304897904, 'entropy': 2.1160257309675217, 'entropy_coeff': 0.005}
2020-09-21 15:44:56,452	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09936011710669845, 'policy_loss': -0.10379678080789745, 'vf_loss': 0.0105018345057033, 'vf_explained_var': 0.9975853, 'kl': 0.01495077961590141, 'entropy': 2.110081121325493, 'entropy_coeff': 0.005}
2020-09-21 15:44:56,934	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09833475551567972, 'policy_loss': -0.10279509203974158, 'vf_loss': 0.010376595368143171, 'vf_explained_var': 0.9976498, 'kl': 0.015518885513301939, 'entropy': 2.114384800195694, 'entropy_coeff': 0.005}
2020-09-21

custom_metrics: {}
date: 2020-09-21_15-44-57
done: false
episode_len_mean: 1042.22
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.209143117401744
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 114
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.1051850393414497
      entropy_coeff: 0.005
      kl: 0.015485067153349519
      policy_loss: -0.11072136927396059
      total_loss: -0.10726251429878175
      vf_explained_var: 0.9974650740623474
      vf_loss: 0.009339265030575916
  num_steps_sampled: 119000
  num_steps_trained: 119000
iterations_since_restore: 119
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.276190476190476
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179


[2m[36m(pid=24699)[0m 2020-09-21 15:44:58,102	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 12927.140583095592,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 858},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.407, max=10.339, mean=0.417),
[2m[36m(pid=24699)[0m                                   'prev_action': 16,
[2m[36m(pid=24699)

2020-09-21 15:45:00,572	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.13125217030756176, 'policy_loss': -0.001171949552372098, 'vf_loss': 0.14253296563401818, 'vf_explained_var': 0.91713905, 'kl': 0.0012490248769870815, 'entropy': 2.096710577607155, 'entropy_coeff': 0.005}
2020-09-21 15:45:01,050	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.05606279580388218, 'policy_loss': -0.008422773098573089, 'vf_loss': 0.0738984839990735, 'vf_explained_var': 0.9595073, 'kl': 0.00378212051873561, 'entropy': 2.109509363770485, 'entropy_coeff': 0.005}
2020-09-21 15:45:01,503	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.015580155013594776, 'policy_loss': -0.023577651474624872, 'vf_loss': 0.04821651941165328, 'vf_explained_var': 0.97221255, 'kl': 0.00498456388595514, 'entropy': 2.1108168214559555, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:45:12,416	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09543474344536662, 'policy_loss': -0.10204403265379369, 'vf_loss': 0.011389613326173276, 'vf_explained_var': 0.99268836, 'kl': 0.0191167087177746, 'entropy': 2.103068135678768, 'entropy_coeff': 0.005}
2020-09-21 15:45:12,871	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09406820905860513, 'policy_loss': -0.10108774106993224, 'vf_loss': 0.012242171680554748, 'vf_explained_var': 0.9928833, 'kl': 0.01753354404354468, 'entropy': 2.096540540456772, 'entropy_coeff': 0.005}
2020-09-21 15:45:13,356	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09745889937039465, 'policy_loss': -0.10431053943466395, 'vf_loss': 0.011748555931262672, 'vf_explained_var': 0.9929682, 'kl': 0.018709924770519137, 'entropy': 2.1019790321588516, 'entropy_coeff': 0.005}
2020-09-

custom_metrics: {}
date: 2020-09-21_15-45-14
done: false
episode_len_mean: 1042.22
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.209143117401744
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 114
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.09285506606102
      entropy_coeff: 0.005
      kl: 0.019062950217630714
      policy_loss: -0.1115085759665817
      total_loss: -0.10426741791889071
      vf_explained_var: 0.9924595355987549
      vf_loss: 0.011986548372078687
  num_steps_sampled: 120000
  num_steps_trained: 120000
iterations_since_restore: 120
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.276190476190476
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  v

[2m[36m(pid=24699)[0m 2020-09-21 15:45:14,556	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:45:16,714	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.3,
                                         'cur_lr': 1e-05,
                                         'entropy': 2.025829792022705,
                                         'entropy_coeff': 0.005,
                                         'kl': -1.547417305403087e-09,
                                         'policy_loss': 0.010386772453784943,
                                         'total_loss': 0.1139737069606781,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.783, max=0.783, mean=0.783),
                                         'vf_loss': 0.11371608823537827}}}

2020-09-21 15:45:16,720	INFO rollout_worker.py:736 -- Training o

2020-09-21 15:45:23,766	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.0640262799570337, 'policy_loss': -0.06967756140511483, 'vf_loss': 0.01194842264521867, 'vf_explained_var': 0.983845, 'kl': 0.01328091078903526, 'entropy': 2.0562833994627, 'entropy_coeff': 0.005}
2020-09-21 15:45:24,219	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06381087785121053, 'policy_loss': -0.06912034703418612, 'vf_loss': 0.011062202276661992, 'vf_explained_var': 0.98383087, 'kl': 0.015094630012754351, 'entropy': 2.056224435567856, 'entropy_coeff': 0.005}
2020-09-21 15:45:24,705	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06488168629584834, 'policy_loss': -0.06966610765084624, 'vf_loss': 0.010754866758361459, 'vf_explained_var': 0.98519707, 'kl': 0.014473182149231434, 'entropy': 2.062480255961418, 'entropy_coeff': 0.005}
2020-09-21 

custom_metrics: {}
date: 2020-09-21_15-45-30
done: false
episode_len_mean: 1042.22
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.209143117401744
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 114
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.0561567544937134
      entropy_coeff: 0.005
      kl: 0.01701094169402495
      policy_loss: -0.09188821341376752
      total_loss: -0.08894275210332125
      vf_explained_var: 0.9884886741638184
      vf_loss: 0.008122966581140645
  num_steps_sampled: 121000
  num_steps_trained: 121000
iterations_since_restore: 121
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.20952380952381
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  

2020-09-21 15:45:33,549	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.17008613736834377, 'policy_loss': -0.00029642414301633835, 'vf_loss': 0.17910295818001032, 'vf_explained_var': 0.74750674, 'kl': 0.005527169641257101, 'entropy': 2.0757097378373146, 'entropy_coeff': 0.005}
2020-09-21 15:45:33,999	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.055661993857938796, 'policy_loss': -0.0186909754993394, 'vf_loss': 0.08296984597109258, 'vf_explained_var': 0.8765126, 'kl': 0.005911018335609697, 'entropy': 2.078035905957222, 'entropy_coeff': 0.005}
2020-09-21 15:45:34,487	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.02270108787342906, 'policy_loss': -0.021264268463710323, 'vf_loss': 0.052977229468524456, 'vf_explained_var': 0.92672765, 'kl': 0.004727944789920002, 'entropy': 2.086050659418106, 'entropy_coeff': 0.005}
2020-09-

2020-09-21 15:45:45,327	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09008355450350791, 'policy_loss': -0.09218147763749585, 'vf_loss': 0.007896454277215526, 'vf_explained_var': 0.9886873, 'kl': 0.015563213790301234, 'entropy': 2.093498185276985, 'entropy_coeff': 0.005}
2020-09-21 15:45:45,807	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09456768387462944, 'policy_loss': -0.09684457746334374, 'vf_loss': 0.007954458938911557, 'vf_explained_var': 0.9885577, 'kl': 0.01590078033041209, 'entropy': 2.089559957385063, 'entropy_coeff': 0.005}
2020-09-21 15:45:46,259	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.10585315944626927, 'policy_loss': -0.10788978869095445, 'vf_loss': 0.007795136014465243, 'vf_explained_var': 0.989064, 'kl': 0.015710720617789775, 'entropy': 2.0943451821804047, 'entropy_coeff': 0.005}
2020-09-

custom_metrics: {}
date: 2020-09-21_15-45-47
done: false
episode_len_mean: 1042.22
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.209143117401744
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 114
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.0868921726942062
      entropy_coeff: 0.005
      kl: 0.015944068087264895
      policy_loss: -0.10059688601177186
      total_loss: -0.09891369368415326
      vf_explained_var: 0.9894505143165588
      vf_loss: 0.007334436581004411
  num_steps_sampled: 122000
  num_steps_trained: 122000
iterations_since_restore: 122
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.363636363636363
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282177


2020-09-21 15:45:50,325	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.09411884588189423, 'policy_loss': -0.0008244122145697474, 'vf_loss': 0.10503007844090462, 'vf_explained_var': 0.87443614, 'kl': 0.000902604122339945, 'entropy': 2.0715210288763046, 'entropy_coeff': 0.005}
2020-09-21 15:45:50,778	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.035740286693908274, 'policy_loss': -0.004997056763386354, 'vf_loss': 0.050810141023248434, 'vf_explained_var': 0.9402116, 'kl': 0.001007381513772998, 'entropy': 2.075002908706665, 'entropy_coeff': 0.005}
2020-09-21 15:45:51,261	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.016359887085855007, 'policy_loss': -0.011924553196877241, 'vf_loss': 0.037545024417340755, 'vf_explained_var': 0.9574064, 'kl': 0.0038719780786777847, 'entropy': 2.084435746073723, 'entropy_coeff': 0.005}
2020-

2020-09-21 15:46:02,155	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08671963517554104, 'policy_loss': -0.08859171799849719, 'vf_loss': 0.008840163616696373, 'vf_explained_var': 0.9896909, 'kl': 0.011086177488323301, 'entropy': 2.0587868615984917, 'entropy_coeff': 0.005}
2020-09-21 15:46:02,645	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08858846873044968, 'policy_loss': -0.09035904868505895, 'vf_loss': 0.008684440515935421, 'vf_explained_var': 0.9898908, 'kl': 0.011238123290240765, 'entropy': 2.0570607483386993, 'entropy_coeff': 0.005}
2020-09-21 15:46:03,099	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09068466792814434, 'policy_loss': -0.09226724342443049, 'vf_loss': 0.00840168391005136, 'vf_explained_var': 0.9898775, 'kl': 0.011643867008388042, 'entropy': 2.0624532997608185, 'entropy_coeff': 0.005}
2020-

custom_metrics: {}
date: 2020-09-21_15-46-04
done: false
episode_len_mean: 1042.22
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.209143117401744
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 114
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.057455040514469
      entropy_coeff: 0.005
      kl: 0.011880958802066743
      policy_loss: -0.09111524268519133
      total_loss: -0.08966131485067308
      vf_explained_var: 0.9904348850250244
      vf_loss: 0.00817691074917093
  num_steps_sampled: 123000
  num_steps_trained: 123000
iterations_since_restore: 123
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.345454545454545
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282177
  

2020-09-21 15:46:07,026	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.3455240512266755, 'policy_loss': -0.005243322346359491, 'vf_loss': 0.36054814886301756, 'vf_explained_var': 0.6451725, 'kl': 0.0011231914839848756, 'entropy': 2.0235486701130867, 'entropy_coeff': 0.005}
2020-09-21 15:46:07,514	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.19777900562621653, 'policy_loss': -0.010405407752841711, 'vf_loss': 0.2172892768867314, 'vf_explained_var': 0.79025346, 'kl': 0.0034093579815817066, 'entropy': 2.0255359783768654, 'entropy_coeff': 0.005}
2020-09-21 15:46:07,967	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.13300737494137138, 'policy_loss': -0.019003497902303934, 'vf_loss': 0.1602221680805087, 'vf_explained_var': 0.84483945, 'kl': 0.006512119143735617, 'entropy': 2.032986745238304, 'entropy_coeff': 0.005}
2020-09-21

2020-09-21 15:46:16,751	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.3,
                                         'cur_lr': 1e-05,
                                         'entropy': 2.0291948318481445,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.009367118589580059,
                                         'policy_loss': -0.07005288451910019,
                                         'total_loss': -0.06156817823648453,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.987, max=0.987, mean=0.987),
                                         'vf_loss': 0.015820538625121117}}}

2020-09-21 15:46:17,036	DEBUG sgd.py:120 -- 21 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.04751777183264494, 'policy_loss': -0.07221498112

custom_metrics: {}
date: 2020-09-21_15-46-20
done: false
episode_len_mean: 1063.49
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.21504221094887
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 115
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.031127743422985
      entropy_coeff: 0.005
      kl: 0.010577781242318451
      policy_loss: -0.08481606573332101
      total_loss: -0.07204600516706705
      vf_explained_var: 0.980009913444519
      vf_loss: 0.01975236745784059
  num_steps_sampled: 124000
  num_steps_trained: 124000
iterations_since_restore: 124
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.319047619047619
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  vr

[2m[36m(pid=24699)[0m 2020-09-21 15:46:21,114	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 10967.217266734011,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 1858},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-11.179, max=28.274, mean=0.375),
[2m[36m(pid=24699)[0m                                   'prev_action': 3,
[2m[36m(pid=24699

2020-09-21 15:46:23,856	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.226947573304642, 'policy_loss': -0.0017500192916486412, 'vf_loss': 0.23751058243215084, 'vf_explained_var': 0.7906693, 'kl': 0.0036634573246023816, 'entropy': 1.982405036687851, 'entropy_coeff': 0.005}
2020-09-21 15:46:24,342	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.07579194544814527, 'policy_loss': -0.014011291437782347, 'vf_loss': 0.0978208240121603, 'vf_explained_var': 0.90875816, 'kl': 0.005985115363728255, 'entropy': 1.9626237824559212, 'entropy_coeff': 0.005}
2020-09-21 15:46:24,791	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.02540552814025432, 'policy_loss': -0.026776065700687468, 'vf_loss': 0.060099338879808784, 'vf_explained_var': 0.94115186, 'kl': 0.006206629652297124, 'entropy': 1.9559464156627655, 'entropy_coeff': 0.005}
2020-09-2

2020-09-21 15:46:35,693	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08640076825395226, 'policy_loss': -0.08663227048236877, 'vf_loss': 0.005751403732574545, 'vf_explained_var': 0.9943887, 'kl': 0.014381827786564827, 'entropy': 1.966888852417469, 'entropy_coeff': 0.005}
2020-09-21 15:46:36,149	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08784706494770944, 'policy_loss': -0.08789918734692037, 'vf_loss': 0.00564552417199593, 'vf_explained_var': 0.99457645, 'kl': 0.014092942699790001, 'entropy': 1.9642573967576027, 'entropy_coeff': 0.005}
2020-09-21 15:46:36,633	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09921924851369113, 'policy_loss': -0.0990616912022233, 'vf_loss': 0.00548353350313846, 'vf_explained_var': 0.99499464, 'kl': 0.013984452409204096, 'entropy': 1.967286467552185, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-46-37
done: false
episode_len_mean: 1063.49
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.21504221094887
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 115
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 1.9658232927322388
      entropy_coeff: 0.005
      kl: 0.015173005813267082
      policy_loss: -0.09986179764382541
      total_loss: -0.09966261312365532
      vf_explained_var: 0.9951279759407043
      vf_loss: 0.005476399281178601
  num_steps_sampled: 125000
  num_steps_trained: 125000
iterations_since_restore: 125
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.222727272727273
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282177
 

[2m[36m(pid=24699)[0m 2020-09-21 15:46:37,824	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:46:40,272	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2029630052857101, 'policy_loss': 0.00021758017828688025, 'vf_loss': 0.21237501502037048, 'vf_explained_var': 0.95710975, 'kl': 0.0003282356057829894, 'entropy': 1.9456134140491486, 'entropy_coeff': 0.005}
2020-09-21 15:46:40,724	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.11181115778163075, 'policy_loss': -0.0050940997316502035, 'vf_loss': 0.12607612973079085, 'vf_explained_var': 0.97394854, 'kl': 0.0018414187543385196, 'entropy': 1.9446613565087318, 'entropy_coeff': 0.005}
2020-09-21 15:46:41,208	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.059869267512112856, 'policy_loss': -0.020276229013688862, 'vf_loss': 0.08893006294965744, 'vf

2020-09-21 15:46:52,355	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09096288692671806, 'policy_loss': -0.09589945914922282, 'vf_loss': 0.01106193009763956, 'vf_explained_var': 0.9976108, 'kl': 0.011728587560355663, 'entropy': 1.9287887439131737, 'entropy_coeff': 0.005}
2020-09-21 15:46:52,832	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09513465885538608, 'policy_loss': -0.099667523347307, 'vf_loss': 0.01068370189750567, 'vf_explained_var': 0.99759746, 'kl': 0.01164992427220568, 'entropy': 1.9291624501347542, 'entropy_coeff': 0.005}
2020-09-21 15:46:53,311	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.0993442734470591, 'policy_loss': -0.10385865124408156, 'vf_loss': 0.010452610236825421, 'vf_explained_var': 0.99771017, 'kl': 0.012319632805883884, 'entropy': 1.9268233701586723, 'entropy_coeff': 0.005}
2020-09-

custom_metrics: {}
date: 2020-09-21_15-46-54
done: false
episode_len_mean: 1063.49
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.21504221094887
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 115
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 1.9313188940286636
      entropy_coeff: 0.005
      kl: 0.012352595163974911
      policy_loss: -0.1129790956620127
      total_loss: -0.10919778002426028
      vf_explained_var: 0.997905969619751
      vf_loss: 0.009732134087244049
  num_steps_sampled: 126000
  num_steps_trained: 126000
iterations_since_restore: 126
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.868181818181818
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.613636363636362
  vram_util_percent0: 0.9449909821282177
  v

2020-09-21 15:46:57,641	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2548036479856819, 'policy_loss': -0.00865432561840862, 'vf_loss': 0.27319781109690666, 'vf_explained_var': 0.81392837, 'kl': 0.0007037755160259618, 'entropy': 1.9901950284838676, 'entropy_coeff': 0.005}
2020-09-21 15:46:58,130	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1320595322176814, 'policy_loss': -0.008054955629631877, 'vf_loss': 0.14900011755526066, 'vf_explained_var': 0.88822746, 'kl': 0.003544383551343344, 'entropy': 1.989789143204689, 'entropy_coeff': 0.005}
2020-09-21 15:46:58,581	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.07732879754621536, 'policy_loss': -0.01808393280953169, 'vf_loss': 0.10337362065911293, 'vf_explained_var': 0.9185362, 'kl': 0.0067942768801003695, 'entropy': 1.9998334646224976, 'entropy_coeff': 0.005}
2020-09-21 

2020-09-21 15:47:09,499	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.055817957618273795, 'policy_loss': -0.08005741098895669, 'vf_loss': 0.02985330823867116, 'vf_explained_var': 0.9736578, 'kl': 0.014521877164952457, 'entropy': 1.9940833747386932, 'entropy_coeff': 0.005}
2020-09-21 15:47:09,954	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.05633278738241643, 'policy_loss': -0.08179231238318607, 'vf_loss': 0.030681397751322947, 'vf_explained_var': 0.9749814, 'kl': 0.0157731490326114, 'entropy': 1.9907634258270264, 'entropy_coeff': 0.005}
2020-09-21 15:47:10,439	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.061844113166444004, 'policy_loss': -0.08736146317096427, 'vf_loss': 0.030712131614563987, 'vf_explained_var': 0.9744243, 'kl': 0.015916953969281167, 'entropy': 1.993974931538105, 'entropy_coeff': 0.005}
2020-0

custom_metrics: {}
date: 2020-09-21_15-47-11
done: false
episode_len_mean: 1107.36
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.232360942649883
episode_reward_min: -32.22698139089825
episodes_this_iter: 2
episodes_total: 117
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 1.9916659817099571
      entropy_coeff: 0.005
      kl: 0.015962585690431297
      policy_loss: -0.08056942257098854
      total_loss: -0.056491300696507096
      vf_explained_var: 0.9753746390342712
      vf_loss: 0.02924767934018746
  num_steps_sampled: 127000
  num_steps_trained: 127000
iterations_since_restore: 127
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.440909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282177


2020-09-21 15:47:13,819	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.23394993087276816, 'policy_loss': 0.0030810429016128182, 'vf_loss': 0.24026354309171438, 'vf_explained_var': 0.87322325, 'kl': 0.0002983226398587391, 'entropy': 1.8968297615647316, 'entropy_coeff': 0.005}
2020-09-21 15:47:14,303	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.08141564705874771, 'policy_loss': -0.010786404483951628, 'vf_loss': 0.10104182036593556, 'vf_explained_var': 0.943188, 'kl': 0.0022855004208395258, 'entropy': 1.905083529651165, 'entropy_coeff': 0.005}
2020-09-21 15:47:14,756	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.031382664339616895, 'policy_loss': -0.021523567964322865, 'vf_loss': 0.0614562479313463, 'vf_explained_var': 0.9646733, 'kl': 0.0031732936477055773, 'entropy': 1.9004009366035461, 'entropy_coeff': 0.005}
2020-09-

2020-09-21 15:47:20,464	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07632631284650415, 'policy_loss': -0.08124337042681873, 'vf_loss': 0.011077967588789761, 'vf_explained_var': 0.9936042, 'kl': 0.011369480576831847, 'entropy': 1.9143514335155487, 'entropy_coeff': 0.005}
2020-09-21 15:47:20,937	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07688505091937259, 'policy_loss': -0.08130146079929546, 'vf_loss': 0.010438717086799443, 'vf_explained_var': 0.99405515, 'kl': 0.01183885126374662, 'entropy': 1.9147927910089493, 'entropy_coeff': 0.005}
2020-09-21 15:47:21,397	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08494628040352836, 'policy_loss': -0.0892767122713849, 'vf_loss': 0.010146438668016344, 'vf_explained_var': 0.99411416, 'kl': 0.012556560686789453, 'entropy': 1.9165960922837257, 'entropy_coeff': 0.005}
2020

custom_metrics: {}
date: 2020-09-21_15-47-27
done: false
episode_len_mean: 1107.36
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.23236094264989
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 117
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 1.9142846539616585
      entropy_coeff: 0.005
      kl: 0.017795379913877696
      policy_loss: -0.10490210121497512
      total_loss: -0.102606620057486
      vf_explained_var: 0.9961326122283936
      vf_loss: 0.006528287660330534
  num_steps_sampled: 128000
  num_steps_trained: 128000
iterations_since_restore: 128
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.385714285714286
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.60952380952381
  vram_util_percent0: 0.9449909821282179
  vr

2020-09-21 15:47:30,620	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.45942349545657635, 'policy_loss': -0.004202005220577121, 'vf_loss': 0.47237952053546906, 'vf_explained_var': 0.9276392, 'kl': 0.003034176824680601, 'entropy': 1.932853527367115, 'entropy_coeff': 0.005}
2020-09-21 15:47:31,068	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.17425710172392428, 'policy_loss': -0.025049513671547174, 'vf_loss': 0.20738396607339382, 'vf_explained_var': 0.966181, 'kl': 0.0050336251151748, 'entropy': 1.9174876809120178, 'entropy_coeff': 0.005}
2020-09-21 15:47:31,557	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.08977226517163217, 'policy_loss': -0.03174414788372815, 'vf_loss': 0.1293212352320552, 'vf_explained_var': 0.9784636, 'kl': 0.006050303461961448, 'entropy': 1.9239827916026115, 'entropy_coeff': 0.005}
2020-09-21 15:47

2020-09-21 15:47:42,456	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08757989760488272, 'policy_loss': -0.1108580743893981, 'vf_loss': 0.028024432016536593, 'vf_explained_var': 0.9954118, 'kl': 0.01598602021113038, 'entropy': 1.9084125012159348, 'entropy_coeff': 0.005}
2020-09-21 15:47:42,941	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08985584252513945, 'policy_loss': -0.11225745739648119, 'vf_loss': 0.027142064296640456, 'vf_explained_var': 0.9953266, 'kl': 0.016097991727292538, 'entropy': 1.913969524204731, 'entropy_coeff': 0.005}
2020-09-21 15:47:43,426	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09280322829727083, 'policy_loss': -0.11455839907284826, 'vf_loss': 0.02639332681428641, 'vf_explained_var': 0.9956176, 'kl': 0.016392771271057427, 'entropy': 1.9111974090337753, 'entropy_coeff': 0.005}
2020-09-

custom_metrics: {}
date: 2020-09-21_15-47-44
done: false
episode_len_mean: 1107.36
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.23236094264989
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 117
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 1.9116630926728249
      entropy_coeff: 0.005
      kl: 0.016777754935901612
      policy_loss: -0.1216280038934201
      total_loss: -0.09897068119607866
      vf_explained_var: 0.9955210089683533
      vf_loss: 0.027182311634533107
  num_steps_sampled: 129000
  num_steps_trained: 129000
iterations_since_restore: 129
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.190909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282177
  

[2m[36m(pid=24699)[0m 2020-09-21 15:47:44,625	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 6909.417382265227,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 2858},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.44, max=1.305, mean=0.317),
[2m[36m(pid=24699)[0m                                   'prev_action': 12,
[2m[36m(pid=24699)[

2020-09-21 15:47:47,248	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.21767881908454, 'policy_loss': -0.003943182760849595, 'vf_loss': 0.23107869736850262, 'vf_explained_var': 0.8281903, 'kl': 0.0007870482797385225, 'entropy': 1.9385627135634422, 'entropy_coeff': 0.005}
2020-09-21 15:47:47,735	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.09743822645395994, 'policy_loss': -0.013912270966102369, 'vf_loss': 0.11974043259397149, 'vf_explained_var': 0.91025776, 'kl': 0.004169550607912242, 'entropy': 1.9281619191169739, 'entropy_coeff': 0.005}
2020-09-21 15:47:48,190	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.04931585560552776, 'policy_loss': -0.022429374512284994, 'vf_loss': 0.07974461023695767, 'vf_explained_var': 0.9385849, 'kl': 0.0056331075029447675, 'entropy': 1.9378623813390732, 'entropy_coeff': 0.005}
2020-09-21

2020-09-21 15:47:59,055	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.1034058837685734, 'policy_loss': -0.11067713276133873, 'vf_loss': 0.01159508089767769, 'vf_explained_var': 0.9911268, 'kl': 0.01765461196191609, 'entropy': 1.9240426793694496, 'entropy_coeff': 0.005}
2020-09-21 15:47:59,508	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09990895818918943, 'policy_loss': -0.10768852173350751, 'vf_loss': 0.01211709002382122, 'vf_explained_var': 0.9914001, 'kl': 0.017501407535746694, 'entropy': 1.9175881370902061, 'entropy_coeff': 0.005}
2020-09-21 15:47:59,989	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.1108044208958745, 'policy_loss': -0.11765629565343261, 'vf_loss': 0.011041410238249227, 'vf_explained_var': 0.9915971, 'kl': 0.01800563419237733, 'entropy': 1.9182460829615593, 'entropy_coeff': 0.005}
2020-09-21

custom_metrics: {}
date: 2020-09-21_15-48-00
done: false
episode_len_mean: 1107.36
episode_reward_max: -25.26537357940651
episode_reward_mean: -30.23236094264989
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 117
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 1.9199092835187912
      entropy_coeff: 0.005
      kl: 0.01814779790583998
      policy_loss: -0.12105188961140811
      total_loss: -0.1142868367023766
      vf_explained_var: 0.9919812083244324
      vf_loss: 0.010920253349468112
  num_steps_sampled: 130000
  num_steps_trained: 130000
iterations_since_restore: 130
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.219047619047619
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282179
  v

[2m[36m(pid=24699)[0m 2020-09-21 15:48:01,176	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:48:03,546	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2730459385784343, 'policy_loss': 0.0026736309519037604, 'vf_loss': 0.2805165611207485, 'vf_explained_var': 0.8984525, 'kl': 0.0005018968336762963, 'entropy': 2.05896382778883, 'entropy_coeff': 0.005}
2020-09-21 15:48:04,041	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.09513824566965923, 'policy_loss': -0.025463085854426026, 'vf_loss': 0.1299198349006474, 'vf_explained_var': 0.9513352, 'kl': 0.003373225190443918, 'entropy': 2.066094249486923, 'entropy_coeff': 0.005}
2020-09-21 15:48:04,527	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.030297314864583313, 'policy_loss': -0.04032199480570853, 'vf_loss': 0.0790833537466824, 'vf_explained_v

2020-09-21 15:48:15,409	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.10263235398451798, 'policy_loss': -0.10798598942346871, 'vf_loss': 0.01084307735436596, 'vf_explained_var': 0.9956773, 'kl': 0.015714944282080978, 'entropy': 2.040785200893879, 'entropy_coeff': 0.005}
2020-09-21 15:48:15,901	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.10758682363666594, 'policy_loss': -0.11365739675238729, 'vf_loss': 0.01151617060531862, 'vf_explained_var': 0.9957622, 'kl': 0.01572345441672951, 'entropy': 2.032526083290577, 'entropy_coeff': 0.005}
2020-09-21 15:48:16,356	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.10403852199669927, 'policy_loss': -0.11025121819693595, 'vf_loss': 0.01168025069637224, 'vf_explained_var': 0.9957532, 'kl': 0.015671039931476116, 'entropy': 2.0337744429707527, 'entropy_coeff': 0.005}
2020-09-21

custom_metrics: {}
date: 2020-09-21_15-48-17
done: false
episode_len_mean: 1139.33
episode_reward_max: -24.818585521219084
episode_reward_mean: -30.163835210746047
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 118
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 2.0300024896860123
      entropy_coeff: 0.005
      kl: 0.01607155322562903
      policy_loss: -0.11114795610774308
      total_loss: -0.105932880891487
      vf_explained_var: 0.9959591627120972
      vf_loss: 0.010543615062488243
  num_steps_sampled: 131000
  num_steps_trained: 131000
iterations_since_restore: 131
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.327272727272727
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.599999999999998
  vram_util_percent0: 0.9449909821282177
  

2020-09-21 15:48:19,987	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.27705560019239783, 'policy_loss': -0.0023108631721697748, 'vf_loss': 0.2883471818640828, 'vf_explained_var': 0.8819972, 'kl': 0.0018887782009791576, 'entropy': 1.909471519291401, 'entropy_coeff': 0.005}
2020-09-21 15:48:20,477	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.13735198858194053, 'policy_loss': -0.006690531736239791, 'vf_loss': 0.15026376023888588, 'vf_explained_var': 0.9380506, 'kl': 0.010624975635437295, 'entropy': 1.8817450255155563, 'entropy_coeff': 0.005}
2020-09-21 15:48:20,925	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.07349214132409543, 'policy_loss': -0.015747963916510344, 'vf_loss': 0.09548537735827267, 'vf_explained_var': 0.9616811, 'kl': 0.010393369710072875, 'entropy': 1.8726570308208466, 'entropy_coeff': 0.005}
2020-09-21

2020-09-21 15:48:31,801	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08124438126105815, 'policy_loss': -0.0900191490072757, 'vf_loss': 0.013450546888634562, 'vf_explained_var': 0.9947356, 'kl': 0.016116208222229034, 'entropy': 1.902128741145134, 'entropy_coeff': 0.005}
2020-09-21 15:48:32,293	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.0881783845834434, 'policy_loss': -0.09682641015388072, 'vf_loss': 0.01274491025833413, 'vf_explained_var': 0.9945303, 'kl': 0.01786039024591446, 'entropy': 1.8910001143813133, 'entropy_coeff': 0.005}
2020-09-21 15:48:32,778	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08401314588263631, 'policy_loss': -0.09171457448974252, 'vf_loss': 0.012080973596312106, 'vf_explained_var': 0.9947077, 'kl': 0.016964565904345363, 'entropy': 1.893782563507557, 'entropy_coeff': 0.005}
2020-09-21

custom_metrics: {}
date: 2020-09-21_15-48-33
done: false
episode_len_mean: 1139.33
episode_reward_max: -24.818585521219084
episode_reward_mean: -30.163835210746047
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 118
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 1.894645817577839
      entropy_coeff: 0.005
      kl: 0.017624614876694977
      policy_loss: -0.093473942251876
      total_loss: -0.08557509630918503
      vf_explained_var: 0.9949507117271423
      vf_loss: 0.012084690388292074
  num_steps_sampled: 132000
  num_steps_trained: 132000
iterations_since_restore: 132
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.099999999999999
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.690476190476188
  vram_util_percent0: 0.9449909821282179
  

2020-09-21 15:48:36,391	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.31744754198007286, 'policy_loss': 0.0024763611145317554, 'vf_loss': 0.3243248611688614, 'vf_explained_var': 0.92452514, 'kl': 0.00046403354084101345, 'entropy': 1.898579202592373, 'entropy_coeff': 0.005}
2020-09-21 15:48:36,876	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.12214181758463383, 'policy_loss': -0.011191351804882288, 'vf_loss': 0.1415057689882815, 'vf_explained_var': 0.96859807, 'kl': 0.004501873969275039, 'entropy': 1.9046323150396347, 'entropy_coeff': 0.005}
2020-09-21 15:48:37,364	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.046342549845576286, 'policy_loss': -0.03015276393853128, 'vf_loss': 0.08432112820446491, 'vf_explained_var': 0.98056906, 'kl': 0.005889008185476996, 'entropy': 1.9185032919049263, 'entropy_coeff': 0.005}
2020-09-

2020-09-21 15:48:48,206	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.10280592809431255, 'policy_loss': -0.10542443301528692, 'vf_loss': 0.00688166607869789, 'vf_explained_var': 0.9983088, 'kl': 0.017919176374562085, 'entropy': 1.9277839288115501, 'entropy_coeff': 0.005}
2020-09-21 15:48:48,685	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.11139465193264186, 'policy_loss': -0.11413142317906022, 'vf_loss': 0.006974366755457595, 'vf_explained_var': 0.9983114, 'kl': 0.01802789856446907, 'entropy': 1.9291933253407478, 'entropy_coeff': 0.005}
2020-09-21 15:48:49,128	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.11362981796264648, 'policy_loss': -0.11588072299491614, 'vf_loss': 0.0063903253903845325, 'vf_explained_var': 0.9983543, 'kl': 0.01821161719271913, 'entropy': 1.9205822572112083, 'entropy_coeff': 0.005}
2020-0

custom_metrics: {}
date: 2020-09-21_15-48-50
done: false
episode_len_mean: 1139.33
episode_reward_max: -24.818585521219084
episode_reward_mean: -30.163835210746047
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 118
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 1.9237502738833427
      entropy_coeff: 0.005
      kl: 0.018884143326431513
      policy_loss: -0.11674942495301366
      total_loss: -0.11457706359215081
      vf_explained_var: 0.9983721971511841
      vf_loss: 0.0061258662171894684
  num_steps_sampled: 133000
  num_steps_trained: 133000
iterations_since_restore: 133
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.242857142857143
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.944990982128217

2020-09-21 15:48:53,132	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.44766847393475473, 'policy_loss': -0.0007044317899271846, 'vf_loss': 0.45771337766200304, 'vf_explained_var': 0.7338029, 'kl': 0.0009521715395962027, 'entropy': 1.9252233654260635, 'entropy_coeff': 0.005}
2020-09-21 15:48:53,623	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.13962046173401177, 'policy_loss': -0.01405394118046388, 'vf_loss': 0.1628150176256895, 'vf_explained_var': 0.90153265, 'kl': 0.0016391583922086284, 'entropy': 1.9264730587601662, 'entropy_coeff': 0.005}
2020-09-21 15:48:54,079	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.052511485875584185, 'policy_loss': -0.02365982031915337, 'vf_loss': 0.08490894827991724, 'vf_explained_var': 0.94735646, 'kl': 0.0030327530985232443, 'entropy': 1.9294931441545486, 'entropy_coeff': 0.005}
2020-0

2020-09-21 15:49:05,015	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08789183653425425, 'policy_loss': -0.09046429116278887, 'vf_loss': 0.007542992287199013, 'vf_explained_var': 0.9950841, 'kl': 0.015750586870126426, 'entropy': 1.9391433745622635, 'entropy_coeff': 0.005}
2020-09-21 15:49:05,460	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08976087463088334, 'policy_loss': -0.09248114074580371, 'vf_loss': 0.007550680136773735, 'vf_explained_var': 0.99464554, 'kl': 0.016202896018512547, 'entropy': 1.9382558465003967, 'entropy_coeff': 0.005}
2020-09-21 15:49:05,938	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.0932845160132274, 'policy_loss': -0.09614972840063274, 'vf_loss': 0.007495690311770886, 'vf_explained_var': 0.99510515, 'kl': 0.016857111710123718, 'entropy': 1.937522977590561, 'entropy_coeff': 0.005}
2020

custom_metrics: {}
date: 2020-09-21_15-49-06
done: false
episode_len_mean: 1139.33
episode_reward_max: -24.818585521219084
episode_reward_mean: -30.163835210746047
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 118
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 1.9385345876216888
      entropy_coeff: 0.005
      kl: 0.017966162296943367
      policy_loss: -0.09950058604590595
      total_loss: -0.09649342577904463
      vf_explained_var: 0.9951839447021484
      vf_loss: 0.007309980603167787
  num_steps_sampled: 134000
  num_steps_trained: 134000
iterations_since_restore: 134
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.154545454545455
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282177

[2m[36m(pid=24699)[0m 2020-09-21 15:49:07,148	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 9229.260490107565,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 3858},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-34.004, max=35.904, mean=1.064),
[2m[36m(pid=24699)[0m                                   'prev_action': 19,
[2m[36m(pid=24699

2020-09-21 15:49:10,005	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.22390904906205833, 'policy_loss': 0.004180077696219087, 'vf_loss': 0.22933754697442055, 'vf_explained_var': 0.9173759, 'kl': 0.0006047062284548765, 'entropy': 1.9579976052045822, 'entropy_coeff': 0.005}
2020-09-21 15:49:10,493	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.08217667136341333, 'policy_loss': -0.021936783799901605, 'vf_loss': 0.11298079369589686, 'vf_explained_var': 0.96299267, 'kl': 0.0028802630986319855, 'entropy': 1.9462825134396553, 'entropy_coeff': 0.005}
2020-09-21 15:49:10,948	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.017184003023430705, 'policy_loss': -0.028241090127266943, 'vf_loss': 0.05357742728665471, 'vf_explained_var': 0.97835374, 'kl': 0.005085727461846545, 'entropy': 1.9356113001704216, 'entropy_coeff': 0.005}
2020-0

2020-09-21 15:49:17,154	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08990161982364953, 'policy_loss': -0.09185520990286022, 'vf_loss': 0.008008934033568949, 'vf_explained_var': 0.9965606, 'kl': 0.011874401825480163, 'entropy': 1.9235328361392021, 'entropy_coeff': 0.005}
2020-09-21 15:49:17,611	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09158035728614777, 'policy_loss': -0.09302849101368338, 'vf_loss': 0.007459931191988289, 'vf_explained_var': 0.99687517, 'kl': 0.012075240374542773, 'entropy': 1.9268761426210403, 'entropy_coeff': 0.005}
2020-09-21 15:49:18,097	DEBUG sgd.py:120 -- 17 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09685324470046908, 'policy_loss': -0.09801727847661823, 'vf_loss': 0.006877929292386398, 'vf_explained_var': 0.99712455, 'kl': 0.013077332521788776, 'entropy': 1.9274200052022934, 'entropy_coeff': 0.005}
20

custom_metrics: {}
date: 2020-09-21_15-49-23
done: false
episode_len_mean: 1139.33
episode_reward_max: -24.818585521219084
episode_reward_mean: -30.163835210746047
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 118
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 1.933394193649292
      entropy_coeff: 0.005
      kl: 0.015761067217681557
      policy_loss: -0.1076208003796637
      total_loss: -0.10875924082938582
      vf_explained_var: 0.9983670115470886
      vf_loss: 0.0038002044602762908
  num_steps_sampled: 135000
  num_steps_trained: 135000
iterations_since_restore: 135
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.159090909090909
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282177


[2m[36m(pid=24699)[0m 2020-09-21 15:49:24,065	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:49:26,580	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.42842261865735054, 'policy_loss': 0.004546396958176047, 'vf_loss': 0.433434396982193, 'vf_explained_var': 0.7997446, 'kl': 0.0013759123449310628, 'entropy': 1.9941909164190292, 'entropy_coeff': 0.005}
2020-09-21 15:49:27,035	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1311868850607425, 'policy_loss': -0.024865117040462792, 'vf_loss': 0.16543623805046082, 'vf_explained_var': 0.9188036, 'kl': 0.0019917594181606546, 'entropy': 1.9963524863123894, 'entropy_coeff': 0.005}
2020-09-21 15:49:27,526	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.056988801807165146, 'policy_loss': -0.039527699700556695, 'vf_loss': 0.10503428033553064, 'vf_explai

2020-09-21 15:49:38,430	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.10602233908139169, 'policy_loss': -0.12162949750199914, 'vf_loss': 0.020998251507990062, 'vf_explained_var': 0.9896339, 'kl': 0.01526642998214811, 'entropy': 1.9942031726241112, 'entropy_coeff': 0.005}
2020-09-21 15:49:38,917	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.10058888897765428, 'policy_loss': -0.1147754262201488, 'vf_loss': 0.019585349247790873, 'vf_explained_var': 0.9897655, 'kl': 0.0152473179041408, 'entropy': 1.9946004524827003, 'entropy_coeff': 0.005}
2020-09-21 15:49:39,401	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.10491136414930224, 'policy_loss': -0.11896503536263481, 'vf_loss': 0.019386180152650923, 'vf_explained_var': 0.99010324, 'kl': 0.015424781886395067, 'entropy': 1.9919883757829666, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-49-40
done: false
episode_len_mean: 1139.33
episode_reward_max: -24.818585521219084
episode_reward_mean: -30.163835210746047
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 118
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 1.993565395474434
      entropy_coeff: 0.005
      kl: 0.016155601537320763
      policy_loss: -0.11827682552393526
      total_loss: -0.10395658621564507
      vf_explained_var: 0.9903641939163208
      vf_loss: 0.019441386684775352
  num_steps_sampled: 136000
  num_steps_trained: 136000
iterations_since_restore: 136
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.3500000000000005
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282177

2020-09-21 15:49:43,659	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.40907256305217743, 'policy_loss': 0.003186414483934641, 'vf_loss': 0.41485436540097, 'vf_explained_var': 0.68744844, 'kl': 0.0010967416964287047, 'entropy': 1.8594499453902245, 'entropy_coeff': 0.005}
2020-09-21 15:49:44,151	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.15822706354083493, 'policy_loss': -0.016558986739255488, 'vf_loss': 0.1833677850663662, 'vf_explained_var': 0.85330135, 'kl': 0.00229590341768926, 'entropy': 1.854101538658142, 'entropy_coeff': 0.005}
2020-09-21 15:49:44,600	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.050642061745747924, 'policy_loss': -0.041759047890082, 'vf_loss': 0.09987772163003683, 'vf_explained_var': 0.9210702, 'kl': 0.005787119196611457, 'entropy': 1.8425492346286774, 'entropy_coeff': 0.005}
2020-09-21 15:49

2020-09-21 15:49:55,504	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.11096719501074404, 'policy_loss': -0.11405546625610441, 'vf_loss': 0.006780306342989206, 'vf_explained_var': 0.99430835, 'kl': 0.018411840253975242, 'entropy': 1.8431170582771301, 'entropy_coeff': 0.005}
2020-09-21 15:49:55,957	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.1206050788750872, 'policy_loss': -0.12362341186963022, 'vf_loss': 0.006707378895953298, 'vf_explained_var': 0.9944156, 'kl': 0.018382349866442382, 'entropy': 1.84075166285038, 'entropy_coeff': 0.005}
2020-09-21 15:49:56,442	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.11372319544898346, 'policy_loss': -0.11642790155019611, 'vf_loss': 0.0062985041295178235, 'vf_explained_var': 0.99456525, 'kl': 0.018715227546636015, 'entropy': 1.8416735157370567, 'entropy_coeff': 0.005}
2020

custom_metrics: {}
date: 2020-09-21_15-49-57
done: false
episode_len_mean: 1139.33
episode_reward_max: -24.818585521219084
episode_reward_mean: -30.163835210746047
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 118
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 1.8444480672478676
      entropy_coeff: 0.005
      kl: 0.019071043701842427
      policy_loss: -0.12130066647659987
      total_loss: -0.11880262021441013
      vf_explained_var: 0.9947808980941772
      vf_loss: 0.0059989733272232115
  num_steps_sampled: 137000
  num_steps_trained: 137000
iterations_since_restore: 137
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.245454545454544
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944990982128217

2020-09-21 15:50:00,500	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.32745986920781434, 'policy_loss': 0.005882840137928724, 'vf_loss': 0.3308749133720994, 'vf_explained_var': 0.6192888, 'kl': 0.0004913723607236076, 'entropy': 1.8890572562813759, 'entropy_coeff': 0.005}
2020-09-21 15:50:00,985	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.1468491330742836, 'policy_loss': -0.011264741595368832, 'vf_loss': 0.16594696324318647, 'vf_explained_var': 0.81076586, 'kl': 0.0050539731528260745, 'entropy': 1.869855523109436, 'entropy_coeff': 0.005}
2020-09-21 15:50:01,436	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.06715528864879161, 'policy_loss': -0.02574351173825562, 'vf_loss': 0.1006101118400693, 'vf_explained_var': 0.8775452, 'kl': 0.0055566009832546115, 'entropy': 1.8756573647260666, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:50:12,303	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08515170379541814, 'policy_loss': -0.09139914368279278, 'vf_loss': 0.011835189943667501, 'vf_explained_var': 0.9846039, 'kl': 0.013031599461100996, 'entropy': 1.8994460254907608, 'entropy_coeff': 0.005}
2020-09-21 15:50:12,783	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08855573768960312, 'policy_loss': -0.09442151889379602, 'vf_loss': 0.011363699450157583, 'vf_explained_var': 0.9854642, 'kl': 0.01333542267093435, 'entropy': 1.8997089341282845, 'entropy_coeff': 0.005}
2020-09-21 15:50:13,239	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.09617295092903078, 'policy_loss': -0.10157266003079712, 'vf_loss': 0.010932797333225608, 'vf_explained_var': 0.9861147, 'kl': 0.013189538731239736, 'entropy': 1.897989958524704, 'entropy_coeff': 0.005}
2020-0

custom_metrics: {}
date: 2020-09-21_15-50-14
done: false
episode_len_mean: 1139.33
episode_reward_max: -24.818585521219084
episode_reward_mean: -30.163835210746047
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 118
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 1.9013384729623795
      entropy_coeff: 0.005
      kl: 0.014075184299144894
      policy_loss: -0.10413668910041451
      total_loss: -0.0992751088924706
      vf_explained_var: 0.987123966217041
      vf_loss: 0.010145713604288176
  num_steps_sampled: 138000
  num_steps_trained: 138000
iterations_since_restore: 138
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.463636363636365
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282177
 

2020-09-21 15:50:16,922	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-5.431, max=5.842, mean=-0.083),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-4.954, max=-0.249, mean=-1.778),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.007, max=0.78, mean=0.271),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=5.938),
                                                    'advantages': np.ndarray((64,), dtype=float32, min=-2.875, max=2.487, mean=0.087),
                                                    'agent_index': np.ndarray((64,), dtype=int64, min=0.0, max=0.0, mean=0.0),
                                                    'dones': np.ndarray((64,), dty

2020-09-21 15:50:23,884	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.07393524353392422, 'policy_loss': -0.08229476842097938, 'vf_loss': 0.014329144963994622, 'vf_explained_var': 0.9844593, 'kl': 0.011265361274126917, 'entropy': 1.8698448911309242, 'entropy_coeff': 0.005}
2020-09-21 15:50:24,336	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.06638164026662707, 'policy_loss': -0.07390214200131595, 'vf_loss': 0.013313416915480047, 'vf_explained_var': 0.98497224, 'kl': 0.011941895354539156, 'entropy': 1.875099316239357, 'entropy_coeff': 0.005}
2020-09-21 15:50:24,818	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.08240122953429818, 'policy_loss': -0.0899910720763728, 'vf_loss': 0.01329520985018462, 'vf_explained_var': 0.9862838, 'kl': 0.012290799291804433, 'entropy': 1.878521367907524, 'entropy_coeff': 0.005}
2020-09

custom_metrics: {}
date: 2020-09-21_15-50-30
done: false
episode_len_mean: 1181.3
episode_reward_max: -24.818585521219084
episode_reward_mean: -30.116018760673406
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 119
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 1.8774088993668556
      entropy_coeff: 0.005
      kl: 0.01645236654439941
      policy_loss: -0.1142884221044369
      total_loss: -0.11129192973021418
      vf_explained_var: 0.9919557571411133
      vf_loss: 0.007447826530551538
  num_steps_sampled: 139000
  num_steps_trained: 139000
iterations_since_restore: 139
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.252380952380953
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.9449909821282179
  

[2m[36m(pid=24699)[0m 2020-09-21 15:50:31,193	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 10305.19,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 90},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.737, max=4.314, mean=0.279),
[2m[36m(pid=24699)[0m                                   'prev_action': 2,
[2m[36m(pid=24699)[0m         

2020-09-21 15:50:33,787	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.2094812518917024, 'policy_loss': -6.685941480100155e-05, 'vf_loss': 0.218602130189538, 'vf_explained_var': 0.8141475, 'kl': 0.0003291512139502739, 'entropy': 1.830553188920021, 'entropy_coeff': 0.005}
2020-09-21 15:50:34,239	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.08197985525475815, 'policy_loss': -0.011689656763337553, 'vf_loss': 0.10087214363738894, 'vf_explained_var': 0.9122891, 'kl': 0.006175842288939748, 'entropy': 1.811076782643795, 'entropy_coeff': 0.005}
2020-09-21 15:50:34,749	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': 0.005300224060192704, 'policy_loss': -0.03924543084576726, 'vf_loss': 0.051433436339721084, 'vf_explained_var': 0.9528019, 'kl': 0.007277625874849036, 'entropy': 1.8142146617174149, 'entropy_coeff': 0.005}
2020-09-21 1

2020-09-21 15:50:45,590	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.12063929031137377, 'policy_loss': -0.12010539998300374, 'vf_loss': 0.003107015567366034, 'vf_explained_var': 0.9971795, 'kl': 0.01839859306346625, 'entropy': 1.8320964723825455, 'entropy_coeff': 0.005}
2020-09-21 15:50:46,066	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.12322183675132692, 'policy_loss': -0.12263881973922253, 'vf_loss': 0.0029343531859922223, 'vf_explained_var': 0.99721235, 'kl': 0.018872485030442476, 'entropy': 1.8358240723609924, 'entropy_coeff': 0.005}
2020-09-21 15:50:46,550	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.3, 'cur_lr': 1e-05, 'total_loss': -0.12463676533661783, 'policy_loss': -0.1241010061930865, 'vf_loss': 0.002909129048930481, 'vf_explained_var': 0.9973297, 'kl': 0.019028119277209044, 'entropy': 1.8306634277105331, 'entropy_coeff': 0.005}
2020

custom_metrics: {}
date: 2020-09-21_15-50-47
done: false
episode_len_mean: 1181.3
episode_reward_max: -24.818585521219084
episode_reward_mean: -30.116018760673402
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 119
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.3
      cur_lr: 1.0e-05
      entropy: 1.8337196931242943
      entropy_coeff: 0.005
      kl: 0.020127083058468997
      policy_loss: -0.1220726533792913
      total_loss: -0.12252569472184405
      vf_explained_var: 0.9975325465202332
      vf_loss: 0.002677438984392211
  num_steps_sampled: 140000
  num_steps_trained: 140000
iterations_since_restore: 140
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.318181818181819
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282177
 

[2m[36m(pid=24699)[0m 2020-09-21 15:50:47,743	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:50:50,411	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.2193942228332162, 'policy_loss': 0.0016749791102483869, 'vf_loss': 0.2267965991050005, 'vf_explained_var': 0.916657, 'kl': 0.000506951848476378, 'entropy': 1.8610964715480804, 'entropy_coeff': 0.005}
2020-09-21 15:50:50,902	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.06595895066857338, 'policy_loss': -0.015276702120900154, 'vf_loss': 0.08774994942359626, 'vf_explained_var': 0.9656565, 'kl': 0.006479520539869554, 'entropy': 1.8860173523426056, 'entropy_coeff': 0.005}
2020-09-21 15:50:51,353	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.009818123653531075, 'policy_loss': -0.032061790581792

2020-09-21 15:51:01,731	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.10856172221247107, 'policy_loss': -0.10953550459817052, 'vf_loss': 0.004447113286005333, 'vf_explained_var': 0.9984878, 'kl': 0.01287493237759918, 'entropy': 1.85341028124094, 'entropy_coeff': 0.005}
2020-09-21 15:51:02,219	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.10991451900918037, 'policy_loss': -0.1107642452698201, 'vf_loss': 0.004190014660707675, 'vf_explained_var': 0.9984898, 'kl': 0.013194192782975733, 'entropy': 1.8555351197719574, 'entropy_coeff': 0.005}
2020-09-21 15:51:02,675	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.11119862250052392, 'policy_loss': -0.11201829550554976, 'vf_loss': 0.003947570265154354, 'vf_explained_var': 0.99855065, 'kl': 0.013657651550602168, 'entropy': 1.8

custom_metrics: {}
date: 2020-09-21_15-51-04
done: false
episode_len_mean: 1205.17
episode_reward_max: -24.818585521219084
episode_reward_mean: -30.13869353443222
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 120
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.8565694689750671
      entropy_coeff: 0.005
      kl: 0.014074482372961938
      policy_loss: -0.11249576357658952
      total_loss: -0.11230854678433388
      vf_explained_var: 0.9988059997558594
      vf_loss: 0.0031365450558951125
  num_steps_sampled: 141000
  num_steps_trained: 141000
iterations_since_restore: 141
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.1380952380952385
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0

2020-09-21 15:51:06,639	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.4081842531450093, 'policy_loss': -0.00295186520088464, 'vf_loss': 0.419660234823823, 'vf_explained_var': 0.67527896, 'kl': 0.001070611049770187, 'entropy': 1.8011806011199951, 'entropy_coeff': 0.005}
2020-09-21 15:51:07,128	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.14824720635078847, 'policy_loss': -0.01870279957074672, 'vf_loss': 0.17442335886880755, 'vf_explained_var': 0.869742, 'kl': 0.0032814219885040075, 'entropy': 1.7899980917572975, 'entropy_coeff': 0.005}
2020-09-21 15:51:07,611	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.06488200288731605, 'policy_loss': -0.02403683215379715, 'vf_loss': 0.09545305045321584, 'vf_explained_var': 0.9265773, 'kl': 0.005311078668455593, 'entropy': 1.784840233

2020-09-21 15:51:16,991	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.44999999999999996,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.7728655338287354,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.012588994577527046,
                                         'policy_loss': -0.10777059197425842,
                                         'total_loss': -0.10532110929489136,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.995, max=0.995, mean=0.995),
                                         'vf_loss': 0.005648748017847538}}}

2020-09-21 15:51:17,074	DEBUG sgd.py:120 -- 22 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.0793364134151488

custom_metrics: {}
date: 2020-09-21_15-51-20
done: false
episode_len_mean: 1205.17
episode_reward_max: -24.818585521219084
episode_reward_mean: -30.138693534432228
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 120
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.7798494547605515
      entropy_coeff: 0.005
      kl: 0.011719122703652829
      policy_loss: -0.08967382682021707
      total_loss: -0.08908642700407654
      vf_explained_var: 0.9964410066604614
      vf_loss: 0.0042130429137614556
  num_steps_sampled: 142000
  num_steps_trained: 142000
iterations_since_restore: 142
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.309523809523809
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0

2020-09-21 15:51:23,064	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.5484019545838237, 'policy_loss': 0.004033319826703519, 'vf_loss': 0.5533949974924326, 'vf_explained_var': 0.8868331, 'kl': 0.00016102457622535482, 'entropy': 1.8197624757885933, 'entropy_coeff': 0.005}
2020-09-21 15:51:23,550	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.14456614409573376, 'policy_loss': -0.012532725813798606, 'vf_loss': 0.16587757505476475, 'vf_explained_var': 0.9625601, 'kl': 0.0006749667454641894, 'entropy': 1.81648538261652, 'entropy_coeff': 0.005}
2020-09-21 15:51:24,036	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.037677568245271686, 'policy_loss': -0.029228582978248596, 'vf_loss': 0.07510715420357883, 'vf_explained_var': 0.9831277, 'kl': 0.0020312541819293983, 'entropy': 1.8230

2020-09-21 15:51:34,431	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.1142805281560868, 'policy_loss': -0.11512087262235582, 'vf_loss': 0.0028752177313435823, 'vf_explained_var': 0.9993086, 'kl': 0.015942992002237588, 'entropy': 1.8418449833989143, 'entropy_coeff': 0.005}
2020-09-21 15:51:34,913	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.11340264766477048, 'policy_loss': -0.11421457841061056, 'vf_loss': 0.0027971987874479964, 'vf_explained_var': 0.99932486, 'kl': 0.016036491724662483, 'entropy': 1.8403375297784805, 'entropy_coeff': 0.005}
2020-09-21 15:51:35,399	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.110057930694893, 'policy_loss': -0.11104318092111498, 'vf_loss': 0.002653677831403911, 'vf_explained_var': 0.9993143, 'kl': 0.016707257949747145, 'entropy': 

custom_metrics: {}
date: 2020-09-21_15-51-36
done: false
episode_len_mean: 1233.72
episode_reward_max: -24.818585521219084
episode_reward_mean: -30.125304593971496
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 121
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.8431872352957726
      entropy_coeff: 0.005
      kl: 0.017623474763240665
      policy_loss: -0.11433589307125658
      total_loss: -0.1132634641835466
      vf_explained_var: 0.999417781829834
      vf_loss: 0.0023577954852953553
  num_steps_sampled: 143000
  num_steps_trained: 143000
iterations_since_restore: 143
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.2666666666666675
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.

2020-09-21 15:51:39,722	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.28132591024041176, 'policy_loss': -0.0012769863242283463, 'vf_loss': 0.2908799620345235, 'vf_explained_var': 0.8104589, 'kl': 0.0013601907118452328, 'entropy': 1.777830794453621, 'entropy_coeff': 0.005}
2020-09-21 15:51:40,212	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.11117516364902258, 'policy_loss': -0.011111110681667924, 'vf_loss': 0.12933488748967648, 'vf_explained_var': 0.91729164, 'kl': 0.003942989431379829, 'entropy': 1.7645915895700455, 'entropy_coeff': 0.005}
2020-09-21 15:51:40,663	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.03797629853943363, 'policy_loss': -0.03179225162602961, 'vf_loss': 0.0760126356035471, 'vf_explained_var': 0.95119095, 'kl': 0.00547336129238829, 'entropy': 1.74142

2020-09-21 15:51:51,098	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.09235365455970168, 'policy_loss': -0.09338340512476861, 'vf_loss': 0.003681354588479735, 'vf_explained_var': 0.99757534, 'kl': 0.013420144037809223, 'entropy': 1.7381337732076645, 'entropy_coeff': 0.005}
2020-09-21 15:51:51,585	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.09971170057542622, 'policy_loss': -0.10059914691373706, 'vf_loss': 0.003522004495607689, 'vf_explained_var': 0.997645, 'kl': 0.013481063186191022, 'entropy': 1.7402076944708824, 'entropy_coeff': 0.005}
2020-09-21 15:51:52,040	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.0970041083637625, 'policy_loss': -0.0978463958017528, 'vf_loss': 0.0033273072476731613, 'vf_explained_var': 0.99773175, 'kl': 0.013915305666159838, 'entropy': 

custom_metrics: {}
date: 2020-09-21_15-51-53
done: false
episode_len_mean: 1233.72
episode_reward_max: -24.818585521219084
episode_reward_mean: -30.125304593971496
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 121
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.7456455007195473
      entropy_coeff: 0.005
      kl: 0.013901138212531805
      policy_loss: -0.10879775113426149
      total_loss: -0.10847355099394917
      vf_explained_var: 0.998092770576477
      vf_loss: 0.0027969186776317656
  num_steps_sampled: 144000
  num_steps_trained: 144000
iterations_since_restore: 144
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.436363636363637
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.

[2m[36m(pid=24699)[0m 2020-09-21 15:51:53,738	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 10157.235176459302,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 1090},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-4.054, max=3.22, mean=0.351),
[2m[36m(pid=24699)[0m                                   'prev_action': 15,
[2m[36m(pid=24699)

2020-09-21 15:51:56,059	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.27088864031247795, 'policy_loss': 0.001377770910039544, 'vf_loss': 0.2787469485774636, 'vf_explained_var': 0.9406489, 'kl': 0.0011479661664166585, 'entropy': 1.950535148382187, 'entropy_coeff': 0.005}
2020-09-21 15:51:56,551	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.08770025329431519, 'policy_loss': -0.023283233982510865, 'vf_loss': 0.11818498373031616, 'vf_explained_var': 0.97510064, 'kl': 0.00550081065739505, 'entropy': 1.9353723675012589, 'entropy_coeff': 0.005}
2020-09-21 15:51:57,035	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.018550504697486758, 'policy_loss': -0.03876390808727592, 'vf_loss': 0.06449427315965295, 'vf_explained_var': 0.98617196, 'kl': 0.005599172553047538, 'entropy': 1.93989

2020-09-21 15:52:07,405	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.1281644536065869, 'policy_loss': -0.1282474968174938, 'vf_loss': 0.002397471522272099, 'vf_explained_var': 0.9994657, 'kl': 0.016355897241737694, 'entropy': 1.9349160268902779, 'entropy_coeff': 0.005}
2020-09-21 15:52:07,891	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.1380704523762688, 'policy_loss': -0.13801900250837207, 'vf_loss': 0.0023579805783811025, 'vf_explained_var': 0.9994648, 'kl': 0.016174889635294676, 'entropy': 1.9376258254051208, 'entropy_coeff': 0.005}
2020-09-21 15:52:08,346	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.1324229877209291, 'policy_loss': -0.132903229794465, 'vf_loss': 0.00238757213082863, 'vf_explained_var': 0.9994923, 'kl': 0.017277233360800892, 'entropy': 1.9364

custom_metrics: {}
date: 2020-09-21_15-52-09
done: false
episode_len_mean: 1259.01
episode_reward_max: -24.818585521219084
episode_reward_mean: -30.157983051060995
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 122
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.9359645694494247
      entropy_coeff: 0.005
      kl: 0.017452033469453454
      policy_loss: -0.13969427440315485
      total_loss: -0.1396232962142676
      vf_explained_var: 0.9995789527893066
      vf_loss: 0.0018973774567712098
  num_steps_sampled: 145000
  num_steps_trained: 145000
iterations_since_restore: 145
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.771428571428571
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.

[2m[36m(pid=24699)[0m 2020-09-21 15:52:10,044	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:52:12,680	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.2323377236025408, 'policy_loss': -0.0018446799367666245, 'vf_loss': 0.24344603903591633, 'vf_explained_var': 0.8387177, 'kl': 0.0002254948485969771, 'entropy': 1.8730213344097137, 'entropy_coeff': 0.005}
2020-09-21 15:52:13,169	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.08144668891327456, 'policy_loss': -0.018402004847303033, 'vf_loss': 0.10801161895506084, 'vf_explained_var': 0.9253334, 'kl': 0.0026694345906435046, 'entropy': 1.8728349953889847, 'entropy_coeff': 0.005}
2020-09-21 15:52:13,616	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.022750438016373664, 'policy_loss': -0.0254964435

2020-09-21 15:52:18,836	DEBUG sgd.py:120 -- 13 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.09397770219948143, 'policy_loss': -0.09544642164837569, 'vf_loss': 0.00567285243596416, 'vf_explained_var': 0.9961494, 'kl': 0.011717117042280734, 'entropy': 1.8953655511140823, 'entropy_coeff': 0.005}
2020-09-21 15:52:19,321	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.09651500964537263, 'policy_loss': -0.09782245755195618, 'vf_loss': 0.005261378741124645, 'vf_explained_var': 0.9963131, 'kl': 0.0122607868979685, 'entropy': 1.8942584544420242, 'entropy_coeff': 0.005}
2020-09-21 15:52:19,774	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.09951579477638006, 'policy_loss': -0.10044976603239775, 'vf_loss': 0.004685804669861682, 'vf_explained_var': 0.99668074, 'kl': 0.012717948644421995, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_15-52-26
done: false
episode_len_mean: 1259.01
episode_reward_max: -24.818585521219084
episode_reward_mean: -30.157983051060995
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 122
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.887818031013012
      entropy_coeff: 0.005
      kl: 0.01602287107380107
      policy_loss: -0.12503610318526626
      total_loss: -0.1252400097437203
      vf_explained_var: 0.9985764622688293
      vf_loss: 0.0020248898945283145
  num_steps_sampled: 146000
  num_steps_trained: 146000
iterations_since_restore: 146
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.295454545454546
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94

2020-09-21 15:52:28,988	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.16841373650822788, 'policy_loss': -0.0005680285976268351, 'vf_loss': 0.1781628425233066, 'vf_explained_var': 0.9567006, 'kl': 0.0011976966820412827, 'entropy': 1.9440083354711533, 'entropy_coeff': 0.005}
2020-09-21 15:52:29,471	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.0572549351491034, 'policy_loss': -0.01431468190276064, 'vf_loss': 0.07945992890745401, 'vf_explained_var': 0.981205, 'kl': 0.004123182850889862, 'entropy': 1.9491480588912964, 'entropy_coeff': 0.005}
2020-09-21 15:52:29,930	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.005386580596677959, 'policy_loss': -0.02719085686840117, 'vf_loss': 0.04113021807279438, 'vf_explained_var': 0.98979926, 'kl': 0.002561788998718839, 'entropy': 1.94111

2020-09-21 15:52:40,330	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.11574207292869687, 'policy_loss': -0.11522755341138691, 'vf_loss': 0.001799858066078741, 'vf_explained_var': 0.9995543, 'kl': 0.016682623012457043, 'entropy': 1.9643104895949364, 'entropy_coeff': 0.005}
2020-09-21 15:52:40,813	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.11655614187475294, 'policy_loss': -0.11603947728872299, 'vf_loss': 0.001649120677029714, 'vf_explained_var': 0.9995993, 'kl': 0.017061386490240693, 'entropy': 1.9686822146177292, 'entropy_coeff': 0.005}
2020-09-21 15:52:41,265	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.12258572387509048, 'policy_loss': -0.12219079164788127, 'vf_loss': 0.0015684822865296155, 'vf_explained_var': 0.99960846, 'kl': 0.017532195197418332, 'entropy'

custom_metrics: {}
date: 2020-09-21_15-52-42
done: false
episode_len_mean: 1259.01
episode_reward_max: -24.818585521219084
episode_reward_mean: -30.157983051060995
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 122
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.9695418179035187
      entropy_coeff: 0.005
      kl: 0.017990951950196177
      policy_loss: -0.12487169390078634
      total_loss: -0.12526438024360687
      vf_explained_var: 0.9996604919433594
      vf_loss: 0.0013590911075880285
  num_steps_sampled: 147000
  num_steps_trained: 147000
iterations_since_restore: 147
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.1
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.9449909821282

2020-09-21 15:52:45,053	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.2126646356191486, 'policy_loss': 0.00027405552100390196, 'vf_loss': 0.22196806780993938, 'vf_explained_var': 0.92230976, 'kl': 0.0004278950613723076, 'entropy': 1.9540087208151817, 'entropy_coeff': 0.005}
2020-09-21 15:52:45,541	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.04537882632575929, 'policy_loss': -0.018632304505445063, 'vf_loss': 0.07252478832378983, 'vf_explained_var': 0.97441775, 'kl': 0.0029215241665951908, 'entropy': 1.965667687356472, 'entropy_coeff': 0.005}
2020-09-21 15:52:45,989	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.013506971416063607, 'policy_loss': -0.03694705024827272, 'vf_loss': 0.03070081432815641, 'vf_explained_var': 0.9884664, 'kl': 0.0056925076933111995, 'entropy': 1

2020-09-21 15:52:56,427	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.10135373217053711, 'policy_loss': -0.09946662397123873, 'vf_loss': 0.0017315242257609498, 'vf_explained_var': 0.99933577, 'kl': 0.013765074836555868, 'entropy': 1.9625842049717903, 'entropy_coeff': 0.005}
2020-09-21 15:52:56,914	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.10802150215022266, 'policy_loss': -0.10610523144714534, 'vf_loss': 0.0016313293381244875, 'vf_explained_var': 0.99938273, 'kl': 0.013861300190910697, 'entropy': 1.9570371732115746, 'entropy_coeff': 0.005}
2020-09-21 15:52:57,366	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.10812069533858448, 'policy_loss': -0.10660926916170865, 'vf_loss': 0.0017230950106750242, 'vf_explained_var': 0.99938476, 'kl': 0.014656955550890416, 'entr

custom_metrics: {}
date: 2020-09-21_15-52-58
done: false
episode_len_mean: 1303.43
episode_reward_max: -23.652509059612406
episode_reward_mean: -30.107512849432887
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 123
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.960918828845024
      entropy_coeff: 0.005
      kl: 0.015063745027873665
      policy_loss: -0.10785153415054083
      total_loss: -0.10957975033670664
      vf_explained_var: 0.9995085597038269
      vf_loss: 0.001297695100220153
  num_steps_sampled: 148000
  num_steps_trained: 148000
iterations_since_restore: 148
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.142857142857142
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.9

2020-09-21 15:53:01,359	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.08141648507444188, 'policy_loss': -0.0017186011536978185, 'vf_loss': 0.09286446892656386, 'vf_explained_var': 0.9622376, 'kl': 0.0007322940887182217, 'entropy': 2.01178340613842, 'entropy_coeff': 0.005}
2020-09-21 15:53:01,852	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.011790789430961013, 'policy_loss': -0.017437878937926143, 'vf_loss': 0.03643647546414286, 'vf_explained_var': 0.9838568, 'kl': 0.006260251684579998, 'entropy': 2.0049845576286316, 'entropy_coeff': 0.005}
2020-09-21 15:53:02,307	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.023694440722465515, 'policy_loss': -0.03671876073349267, 'vf_loss': 0.019160622963681817, 'vf_explained_var': 0.9915846, 'kl': 0.008710325229912996, 'entropy': 2.0

2020-09-21 15:53:12,702	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.10562404338270426, 'policy_loss': -0.10481211464502849, 'vf_loss': 0.0014180497673805803, 'vf_explained_var': 0.9993849, 'kl': 0.01724719541380182, 'entropy': 1.9982441887259483, 'entropy_coeff': 0.005}
2020-09-21 15:53:13,187	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.11454309907276183, 'policy_loss': -0.11361299233976752, 'vf_loss': 0.0012952323704666924, 'vf_explained_var': 0.99939007, 'kl': 0.01723754429258406, 'entropy': 1.9964470192790031, 'entropy_coeff': 0.005}
2020-09-21 15:53:13,639	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.10805659659672529, 'policy_loss': -0.10723719059024006, 'vf_loss': 0.0013071911162114702, 'vf_explained_var': 0.9994003, 'kl': 0.01749918021960184, 'entropy':

custom_metrics: {}
date: 2020-09-21_15-53-15
done: false
episode_len_mean: 1303.43
episode_reward_max: -23.652509059612406
episode_reward_mean: -30.107512849432887
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 123
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.9964474141597748
      entropy_coeff: 0.005
      kl: 0.01836066332180053
      policy_loss: -0.11487569171003997
      total_loss: -0.11534403706900775
      vf_explained_var: 0.9994882941246033
      vf_loss: 0.0012515941525634844
  num_steps_sampled: 149000
  num_steps_trained: 149000
iterations_since_restore: 149
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.176190476190477
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.

[2m[36m(pid=24699)[0m 2020-09-21 15:53:15,304	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 6609.095447695504,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 2090},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.181, max=3.047, mean=0.343),
[2m[36m(pid=24699)[0m                                   'prev_action': 0,
[2m[36m(pid=24699)[

2020-09-21 15:53:17,080	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-5.501, max=5.03, mean=-0.095),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-4.725, max=-0.406, mean=-1.854),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.009, max=0.666, mean=0.242),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=7.234),
                                                    'advantages': np.ndarray((64,), dtype=float32, min=-2.636, max=1.817, mean=0.013),
                                                    'agent_index': np.ndarray((64,), dtype=int64, min=0.0, max=0.0, mean=0.0),
                                                    'dones': np.ndarray((64,), dty

2020-09-21 15:53:24,141	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.08872162800980732, 'policy_loss': -0.08614583278540522, 'vf_loss': 0.0021906057081650943, 'vf_explained_var': 0.99953437, 'kl': 0.011642329394817352, 'entropy': 2.001090258359909, 'entropy_coeff': 0.005}
2020-09-21 15:53:24,594	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.0901479524327442, 'policy_loss': -0.08755602815654129, 'vf_loss': 0.0020938921297783963, 'vf_explained_var': 0.99956524, 'kl': 0.011827546870335937, 'entropy': 2.0016428008675575, 'entropy_coeff': 0.005}
2020-09-21 15:53:25,073	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.09682840516325086, 'policy_loss': -0.09422479575732723, 'vf_loss': 0.0018909078935394064, 'vf_explained_var': 0.99960065, 'kl': 0.01222090347437188, 'entropy

custom_metrics: {}
date: 2020-09-21_15-53-31
done: false
episode_len_mean: 1303.43
episode_reward_max: -23.652509059612406
episode_reward_mean: -30.107512849432887
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 123
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.9895519688725471
      entropy_coeff: 0.005
      kl: 0.014963805151637644
      policy_loss: -0.12194372934754938
      total_loss: -0.12414269335567951
      vf_explained_var: 0.9997819066047668
      vf_loss: 0.0010150826237804722
  num_steps_sampled: 150000
  num_steps_trained: 150000
iterations_since_restore: 150
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.133333333333334
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0

[2m[36m(pid=24699)[0m 2020-09-21 15:53:31,816	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:53:34,020	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.06939941179007292, 'policy_loss': -0.002606283815111965, 'vf_loss': 0.0810018836054951, 'vf_explained_var': 0.9292921, 'kl': 0.0022224562677745086, 'entropy': 1.9992584735155106, 'entropy_coeff': 0.005}
2020-09-21 15:53:34,509	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.022408170043490827, 'policy_loss': -0.015514515340328217, 'vf_loss': 0.04448944586329162, 'vf_explained_var': 0.96544385, 'kl': 0.007604597572935745, 'entropy': 1.9977658689022064, 'entropy_coeff': 0.005}
2020-09-21 15:53:34,994	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.005974432220682502, 'policy_loss': -0.022606223

2020-09-21 15:53:45,380	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.09851825126679614, 'policy_loss': -0.09623649338027462, 'vf_loss': 0.0012585455733642448, 'vf_explained_var': 0.99882287, 'kl': 0.014267046877648681, 'entropy': 1.9920959919691086, 'entropy_coeff': 0.005}
2020-09-21 15:53:45,859	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.10027310601435602, 'policy_loss': -0.09834002482239157, 'vf_loss': 0.0013176907232264057, 'vf_explained_var': 0.9988221, 'kl': 0.014834855624940246, 'entropy': 1.9852916970849037, 'entropy_coeff': 0.005}
2020-09-21 15:53:46,341	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.1021192001644522, 'policy_loss': -0.10012427484616637, 'vf_loss': 0.0011614148424996529, 'vf_explained_var': 0.9989029, 'kl': 0.015019676182419062, 'entropy

custom_metrics: {}
date: 2020-09-21_15-53-47
done: false
episode_len_mean: 1303.43
episode_reward_max: -23.652509059612406
episode_reward_mean: -30.107512849432887
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 123
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.986270546913147
      entropy_coeff: 0.005
      kl: 0.015842781402170658
      policy_loss: -0.1066920964512974
      total_loss: -0.10849977401085198
      vf_explained_var: 0.9990118741989136
      vf_loss: 0.0009944220109900925
  num_steps_sampled: 151000
  num_steps_trained: 151000
iterations_since_restore: 151
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.25
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.686363636363632
  vram_util_percent0: 0.94499098212821

2020-09-21 15:53:50,548	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.056210332142654806, 'policy_loss': 0.004838735796511173, 'vf_loss': 0.06069197575561702, 'vf_explained_var': 0.97155887, 'kl': 0.0007401111688101203, 'entropy': 1.9306855648756027, 'entropy_coeff': 0.005}
2020-09-21 15:53:51,036	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.01055957528296858, 'policy_loss': -0.006256196298636496, 'vf_loss': 0.025350053561851382, 'vf_explained_var': 0.98877937, 'kl': 0.0027459630473458674, 'entropy': 1.9539928585290909, 'entropy_coeff': 0.005}
2020-09-21 15:53:51,491	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.013901765225455165, 'policy_loss': -0.018243585946038365, 'vf_loss': 0.012350474251434207, 'vf_explained_var': 0.9938368, 'kl': 0.003858313226373866, 'entropy'

2020-09-21 15:54:01,900	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.10045541182626039, 'policy_loss': -0.09820719645358622, 'vf_loss': 0.0009352028500870802, 'vf_explained_var': 0.9995258, 'kl': 0.014758643694221973, 'entropy': 1.9649633914232254, 'entropy_coeff': 0.005}
2020-09-21 15:54:02,376	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.10545594699215144, 'policy_loss': -0.10334107372909784, 'vf_loss': 0.0009182070825772826, 'vf_explained_var': 0.9994774, 'kl': 0.015048619534354657, 'entropy': 1.9609917849302292, 'entropy_coeff': 0.005}
2020-09-21 15:54:02,831	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.09996256500016898, 'policy_loss': -0.09805851173587143, 'vf_loss': 0.0008542739633412566, 'vf_explained_var': 0.99957293, 'kl': 0.015718221315182745, 'entrop

custom_metrics: {}
date: 2020-09-21_15-54-04
done: false
episode_len_mean: 1303.43
episode_reward_max: -23.652509059612406
episode_reward_mean: -30.107512849432887
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 123
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.96133603900671
      entropy_coeff: 0.005
      kl: 0.015930374152958393
      policy_loss: -0.10601198708172888
      total_loss: -0.10788126895204186
      vf_explained_var: 0.9995894432067871
      vf_loss: 0.0007687279212404974
  num_steps_sampled: 152000
  num_steps_trained: 152000
iterations_since_restore: 152
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.247619047619048
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.9

2020-09-21 15:54:06,801	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.10191229137126356, 'policy_loss': 0.003355797380208969, 'vf_loss': 0.10816639056429267, 'vf_explained_var': 0.94385815, 'kl': 0.00045795440176432667, 'entropy': 1.9631963148713112, 'entropy_coeff': 0.005}
2020-09-21 15:54:07,285	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.031172447837889194, 'policy_loss': -0.010648915078490973, 'vf_loss': 0.05049215955659747, 'vf_explained_var': 0.9742819, 'kl': 0.0025868759330478497, 'entropy': 1.9669785723090172, 'entropy_coeff': 0.005}
2020-09-21 15:54:07,739	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.006308158277533948, 'policy_loss': -0.019600038474891335, 'vf_loss': 0.03402622870635241, 'vf_explained_var': 0.98311937, 'kl': 0.003676014021039009, 'entropy': 

2020-09-21 15:54:17,165	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.44999999999999996,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.9453175067901611,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.015618851408362389,
                                         'policy_loss': -0.038482654839754105,
                                         'total_loss': -0.03780689835548401,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.998, max=0.998, mean=0.998),
                                         'vf_loss': 0.003373874118551612}}}

2020-09-21 15:54:17,274	DEBUG sgd.py:120 -- 22 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.084809517022222

custom_metrics: {}
date: 2020-09-21_15-54-20
done: false
episode_len_mean: 1303.43
episode_reward_max: -23.652509059612406
episode_reward_mean: -30.107512849432887
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 123
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.9575928673148155
      entropy_coeff: 0.005
      kl: 0.016065825941041112
      policy_loss: -0.1003387188538909
      total_loss: -0.10151656111702323
      vf_explained_var: 0.999221682548523
      vf_loss: 0.0013805015660182107
  num_steps_sampled: 153000
  num_steps_trained: 153000
iterations_since_restore: 153
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.40952380952381
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.685714285714282
  vram_util_percent0: 0.94

2020-09-21 15:54:23,503	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.12902836629655212, 'policy_loss': 0.0007133177714422345, 'vf_loss': 0.1364406435750425, 'vf_explained_var': 0.7644722, 'kl': 0.0023074613146709666, 'entropy': 1.832789309322834, 'entropy_coeff': 0.005}
2020-09-21 15:54:24,001	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.04385665291920304, 'policy_loss': -0.015639607096090913, 'vf_loss': 0.06585890729911625, 'vf_explained_var': 0.8826058, 'kl': 0.0064000560669228435, 'entropy': 1.8485334143042564, 'entropy_coeff': 0.005}
2020-09-21 15:54:24,456	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.012982300366275012, 'policy_loss': -0.018971264944411814, 'vf_loss': 0.038657508441247046, 'vf_explained_var': 0.9272007, 'kl': 0.005630040366668254, 'entropy': 1.84

2020-09-21 15:54:34,877	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.08956613281043246, 'policy_loss': -0.08793595526367426, 'vf_loss': 0.002303105080500245, 'vf_explained_var': 0.99567485, 'kl': 0.01164271793095395, 'entropy': 1.8345019966363907, 'entropy_coeff': 0.005}
2020-09-21 15:54:35,358	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.08900135703152046, 'policy_loss': -0.08754005451919511, 'vf_loss': 0.0021742717653978616, 'vf_explained_var': 0.9957894, 'kl': 0.01233751542167738, 'entropy': 1.8374896198511124, 'entropy_coeff': 0.005}
2020-09-21 15:54:35,809	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.089114386937581, 'policy_loss': -0.08736209478229284, 'vf_loss': 0.0020233554096193984, 'vf_explained_var': 0.9962555, 'kl': 0.012017082539387047, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_15-54-37
done: false
episode_len_mean: 1303.43
episode_reward_max: -23.652509059612406
episode_reward_mean: -30.107512849432887
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 123
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.834793321788311
      entropy_coeff: 0.005
      kl: 0.013047124666627496
      policy_loss: -0.08747793221846223
      total_loss: -0.0889657671796158
      vf_explained_var: 0.9966373443603516
      vf_loss: 0.001814925366488751
  num_steps_sampled: 154000
  num_steps_trained: 154000
iterations_since_restore: 154
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.336363636363637
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94

[2m[36m(pid=24699)[0m 2020-09-21 15:54:37,480	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 5829.85545587098,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 3090},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.871, max=1.993, mean=0.36),
[2m[36m(pid=24699)[0m                                   'prev_action': 14,
[2m[36m(pid=24699)[0

2020-09-21 15:54:39,835	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.10237805638462305, 'policy_loss': -0.004236668115481734, 'vf_loss': 0.11570636928081512, 'vf_explained_var': 0.8883618, 'kl': 0.001111524313895773, 'entropy': 1.9183666184544563, 'entropy_coeff': 0.005}
2020-09-21 15:54:40,325	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.028386124526150525, 'policy_loss': -0.020834801602177322, 'vf_loss': 0.05597227159887552, 'vf_explained_var': 0.95012176, 'kl': 0.00610545149538666, 'entropy': 1.89976005256176, 'entropy_coeff': 0.005}
2020-09-21 15:54:40,780	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.013286279165185988, 'policy_loss': -0.03187539055943489, 'vf_loss': 0.02521149767562747, 'vf_explained_var': 0.9740247, 'kl': 0.006410815403796732, 'entropy': 1.9014

2020-09-21 15:54:51,196	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.11749774275813252, 'policy_loss': -0.11608497519046068, 'vf_loss': 0.001263220048713265, 'vf_explained_var': 0.9987103, 'kl': 0.015466630458831787, 'entropy': 1.927194781601429, 'entropy_coeff': 0.005}
2020-09-21 15:54:51,682	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.11663341242820024, 'policy_loss': -0.11517937737517059, 'vf_loss': 0.0011962567732552998, 'vf_explained_var': 0.99877465, 'kl': 0.015558085753582418, 'entropy': 1.9302868619561195, 'entropy_coeff': 0.005}
2020-09-21 15:54:52,493	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.11559169046813622, 'policy_loss': -0.11428552801953629, 'vf_loss': 0.0011607271153479815, 'vf_explained_var': 0.99882567, 'kl': 0.015961269207764417, 'entropy

custom_metrics: {}
date: 2020-09-21_15-54-53
done: false
episode_len_mean: 1331.12
episode_reward_max: -23.652509059612406
episode_reward_mean: -30.093164349171715
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 124
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.9296801462769508
      entropy_coeff: 0.005
      kl: 0.01710340689169243
      policy_loss: -0.11774386744946241
      total_loss: -0.1186483739875257
      vf_explained_var: 0.998932957649231
      vf_loss: 0.0010473636612005066
  num_steps_sampled: 155000
  num_steps_trained: 155000
iterations_since_restore: 155
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.233333333333333
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.685714285714282
  vram_util_percent0: 0.94

[2m[36m(pid=24699)[0m 2020-09-21 15:54:54,187	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:54:56,470	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.0926234774524346, 'policy_loss': 0.0007102505769580603, 'vf_loss': 0.10083028068765998, 'vf_explained_var': 0.8662456, 'kl': 0.0006623195967028206, 'entropy': 1.8430191799998283, 'entropy_coeff': 0.005}
2020-09-21 15:54:56,921	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.03421094792429358, 'policy_loss': -0.005078391928691417, 'vf_loss': 0.046856488566845655, 'vf_explained_var': 0.94047475, 'kl': 0.003701498582813656, 'entropy': 1.8465636894106865, 'entropy_coeff': 0.005}
2020-09-21 15:54:57,406	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.003778909973334521, 'policy_loss': -0.022705677

2020-09-21 15:55:07,815	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.09416319127194583, 'policy_loss': -0.09233817365020514, 'vf_loss': 0.0015397318493342027, 'vf_explained_var': 0.9978649, 'kl': 0.013166412711143494, 'entropy': 1.8579274713993073, 'entropy_coeff': 0.005}
2020-09-21 15:55:08,299	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.08926694002002478, 'policy_loss': -0.08751705149188638, 'vf_loss': 0.0015048182031023316, 'vf_explained_var': 0.99797595, 'kl': 0.013395369402132928, 'entropy': 1.856525644659996, 'entropy_coeff': 0.005}
2020-09-21 15:55:08,779	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.10119146946817636, 'policy_loss': -0.09932985948398709, 'vf_loss': 0.0014329471850942355, 'vf_explained_var': 0.99805826, 'kl': 0.013321901438757777, 'entrop

custom_metrics: {}
date: 2020-09-21_15-55-10
done: false
episode_len_mean: 1331.12
episode_reward_max: -23.652509059612406
episode_reward_mean: -30.093164349171712
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 124
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.8621849864721298
      entropy_coeff: 0.005
      kl: 0.014025574724655598
      policy_loss: -0.09401273913681507
      total_loss: -0.0958576793782413
      vf_explained_var: 0.9983426928520203
      vf_loss: 0.0011544710796442814
  num_steps_sampled: 156000
  num_steps_trained: 156000
iterations_since_restore: 156
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.3
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94499098212821

2020-09-21 15:55:13,124	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.14514101552776992, 'policy_loss': 0.0022921895142644644, 'vf_loss': 0.15217426978051662, 'vf_explained_var': 0.9718597, 'kl': 0.00024611868445156837, 'entropy': 1.8872405961155891, 'entropy_coeff': 0.005}
2020-09-21 15:55:13,615	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.027451359666883945, 'policy_loss': -0.022687150165438652, 'vf_loss': 0.05762944952584803, 'vf_explained_var': 0.9889645, 'kl': 0.00433973093458917, 'entropy': 1.8887646868824959, 'entropy_coeff': 0.005}
2020-09-21 15:55:14,071	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.014913070830516517, 'policy_loss': -0.03831127751618624, 'vf_loss': 0.029655258520506322, 'vf_explained_var': 0.9938117, 'kl': 0.007085721677867696, 'entropy': 1.

2020-09-21 15:55:19,778	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.08925687707960606, 'policy_loss': -0.0873718080110848, 'vf_loss': 0.002611622941913083, 'vf_explained_var': 0.99943244, 'kl': 0.01087171392282471, 'entropy': 1.877792127430439, 'entropy_coeff': 0.005}
2020-09-21 15:55:20,223	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.10375021165236831, 'policy_loss': -0.10159844649024308, 'vf_loss': 0.002376571501372382, 'vf_explained_var': 0.9994867, 'kl': 0.010821968433447182, 'entropy': 1.8796444684267044, 'entropy_coeff': 0.005}
2020-09-21 15:55:20,707	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.10787257994525135, 'policy_loss': -0.10587395215407014, 'vf_loss': 0.002245496114483103, 'vf_explained_var': 0.9995581, 'kl': 0.011505166534334421, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_15-55-26
done: false
episode_len_mean: 1331.12
episode_reward_max: -23.652509059612406
episode_reward_mean: -30.093164349171712
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 124
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.8778082206845284
      entropy_coeff: 0.005
      kl: 0.015204833645839244
      policy_loss: -0.12555990216787905
      total_loss: -0.12726189696695656
      vf_explained_var: 0.9998182654380798
      vf_loss: 0.0008448760108876741
  num_steps_sampled: 157000
  num_steps_trained: 157000
iterations_since_restore: 157
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.409523809523809
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.685714285714282
  vram_util_percent0: 0

2020-09-21 15:55:29,624	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.17390567786060274, 'policy_loss': 0.005349873332306743, 'vf_loss': 0.1771084642969072, 'vf_explained_var': 0.8091298, 'kl': 0.0011585794526187687, 'entropy': 1.8148054629564285, 'entropy_coeff': 0.005}
2020-09-21 15:55:30,078	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.04985333140939474, 'policy_loss': -0.014676375198177993, 'vf_loss': 0.07054950320161879, 'vf_explained_var': 0.9188478, 'kl': 0.007082582873408683, 'entropy': 1.8413920104503632, 'entropy_coeff': 0.005}
2020-09-21 15:55:30,561	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.008269341720733792, 'policy_loss': -0.0298512622830458, 'vf_loss': 0.0436348031507805, 'vf_explained_var': 0.9489025, 'kl': 0.00790666596731171, 'entropy': 1.81443989

2020-09-21 15:55:40,973	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.09792520548216999, 'policy_loss': -0.0994936996139586, 'vf_loss': 0.0033494215458631516, 'vf_explained_var': 0.9959091, 'kl': 0.015996793925296515, 'entropy': 1.7958974614739418, 'entropy_coeff': 0.005}
2020-09-21 15:55:41,427	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.09403421462047845, 'policy_loss': -0.09527636569691822, 'vf_loss': 0.003176943471771665, 'vf_explained_var': 0.9960578, 'kl': 0.015624528517946601, 'entropy': 1.793165236711502, 'entropy_coeff': 0.005}
2020-09-21 15:55:41,904	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.10263681085780263, 'policy_loss': -0.10365471406839788, 'vf_loss': 0.0030444622971117496, 'vf_explained_var': 0.9963774, 'kl': 0.015312855714000762, 'entropy': 

custom_metrics: {}
date: 2020-09-21_15-55-43
done: false
episode_len_mean: 1331.12
episode_reward_max: -23.652509059612406
episode_reward_mean: -30.093164349171712
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 124
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.7853310108184814
      entropy_coeff: 0.005
      kl: 0.016420999076217413
      policy_loss: -0.10582952294498682
      total_loss: -0.10483729210682213
      vf_explained_var: 0.9969522953033447
      vf_loss: 0.0025294410879723728
  num_steps_sampled: 158000
  num_steps_trained: 158000
iterations_since_restore: 158
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.245454545454545
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0

2020-09-21 15:55:46,689	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.23152562975883484, 'policy_loss': 0.0030429295729845762, 'vf_loss': 0.23696463461965322, 'vf_explained_var': 0.8967186, 'kl': 0.0006598582558687838, 'entropy': 1.7557760179042816, 'entropy_coeff': 0.005}
2020-09-21 15:55:47,177	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.0509217563085258, 'policy_loss': -0.019352762261405587, 'vf_loss': 0.07738393335603178, 'vf_explained_var': 0.9652032, 'kl': 0.003915004373993725, 'entropy': 1.7742317840456963, 'entropy_coeff': 0.005}
2020-09-21 15:55:47,629	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.0015747033758088946, 'policy_loss': -0.034400648437440395, 'vf_loss': 0.041890627588145435, 'vf_explained_var': 0.9816127, 'kl': 0.006443276011850685, 'entropy': 1.7

2020-09-21 15:55:58,029	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.11138469743309543, 'policy_loss': -0.11170781590044498, 'vf_loss': 0.0017993958026636392, 'vf_explained_var': 0.9991112, 'kl': 0.016170980816241354, 'entropy': 1.7506443932652473, 'entropy_coeff': 0.005}
2020-09-21 15:55:58,513	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.1157391439191997, 'policy_loss': -0.11607256648130715, 'vf_loss': 0.0016723782318877056, 'vf_explained_var': 0.99919844, 'kl': 0.0164735495345667, 'entropy': 1.7504097521305084, 'entropy_coeff': 0.005}
2020-09-21 15:55:58,966	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.12126913794782013, 'policy_loss': -0.12145231221802533, 'vf_loss': 0.0016227373780566268, 'vf_explained_var': 0.9992465, 'kl': 0.01621479942696169, 'entropy': 

custom_metrics: {}
date: 2020-09-21_15-56-00
done: false
episode_len_mean: 1331.12
episode_reward_max: -23.652509059612406
episode_reward_mean: -30.093164349171712
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 124
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.7546891272068024
      entropy_coeff: 0.005
      kl: 0.016939323279075325
      policy_loss: -0.12111959489993751
      total_loss: -0.12085814168676734
      vf_explained_var: 0.9993399977684021
      vf_loss: 0.0014121999374765437
  num_steps_sampled: 159000
  num_steps_trained: 159000
iterations_since_restore: 159
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.345454545454545
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.68181818181818
  vram_util_percent0: 0.

[2m[36m(pid=24699)[0m 2020-09-21 15:56:00,637	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 10706.239653352954,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 881},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.428, max=10.339, mean=0.387),
[2m[36m(pid=24699)[0m                                   'prev_action': 0,
[2m[36m(pid=24699)

2020-09-21 15:56:03,597	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.2877996484749019, 'policy_loss': 0.0008420969825237989, 'vf_loss': 0.29472916200757027, 'vf_explained_var': 0.8119189, 'kl': 0.0012076178858388964, 'entropy': 1.6630092039704323, 'entropy_coeff': 0.005}
2020-09-21 15:56:04,093	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.11588751943781972, 'policy_loss': -0.01986454299185425, 'vf_loss': 0.1414581323042512, 'vf_explained_var': 0.90391743, 'kl': 0.005700160967535339, 'entropy': 1.654228612780571, 'entropy_coeff': 0.005}
2020-09-21 15:56:04,548	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.051208671648055315, 'policy_loss': -0.03557203122181818, 'vf_loss': 0.09148122603073716, 'vf_explained_var': 0.9295353, 'kl': 0.00774110879865475, 'entropy': 1.6368044

2020-09-21 15:56:14,968	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.09613579057622701, 'policy_loss': -0.10035575978690758, 'vf_loss': 0.005468460803967901, 'vf_explained_var': 0.99599326, 'kl': 0.015614355390425771, 'entropy': 1.6549903526902199, 'entropy_coeff': 0.005}
2020-09-21 15:56:15,445	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.09747262659948319, 'policy_loss': -0.10145819198805839, 'vf_loss': 0.0047425411466974765, 'vf_explained_var': 0.996431, 'kl': 0.016751263348851353, 'entropy': 1.6590090095996857, 'entropy_coeff': 0.005}
2020-09-21 15:56:15,891	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.0983218364417553, 'policy_loss': -0.10178394056856632, 'vf_loss': 0.004350364790298045, 'vf_explained_var': 0.9966289, 'kl': 0.016380494576878846, 'entropy': 

custom_metrics: {}
date: 2020-09-21_15-56-17
done: false
episode_len_mean: 1331.12
episode_reward_max: -23.652509059612406
episode_reward_mean: -30.093164349171712
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 124
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.6564846485853195
      entropy_coeff: 0.005
      kl: 0.017086103791370988
      policy_loss: -0.1005349550396204
      total_loss: -0.0972469246480614
      vf_explained_var: 0.9972258806228638
      vf_loss: 0.0038817066961200908
  num_steps_sampled: 160000
  num_steps_trained: 160000
iterations_since_restore: 160
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.4772727272727275
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.

[2m[36m(pid=24699)[0m 2020-09-21 15:56:17,635	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:56:21,170	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.29365903325378895, 'policy_loss': -0.0004514057654887438, 'vf_loss': 0.30169463716447353, 'vf_explained_var': 0.85206366, 'kl': 0.0015343451100051642, 'entropy': 1.6549297347664833, 'entropy_coeff': 0.005}
2020-09-21 15:56:21,660	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.1139531135559082, 'policy_loss': -0.023411210742779076, 'vf_loss': 0.14335539750754833, 'vf_explained_var': 0.9315863, 'kl': 0.005453773395856842, 'entropy': 1.6890558004379272, 'entropy_coeff': 0.005}
2020-09-21 15:56:22,111	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': 0.024846383603289723, 'policy_loss': -0.0415493252

2020-09-21 15:56:32,504	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.12648407625965774, 'policy_loss': -0.12962949671782553, 'vf_loss': 0.0027492609078763053, 'vf_explained_var': 0.9985757, 'kl': 0.019798953435383737, 'entropy': 1.7026726752519608, 'entropy_coeff': 0.005}
2020-09-21 15:56:32,992	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.1264290832914412, 'policy_loss': -0.12960947712417692, 'vf_loss': 0.0025400783051736653, 'vf_explained_var': 0.99870247, 'kl': 0.020415891078300774, 'entropy': 1.7093685492873192, 'entropy_coeff': 0.005}
2020-09-21 15:56:33,444	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.44999999999999996, 'cur_lr': 1e-05, 'total_loss': -0.13012400711886585, 'policy_loss': -0.13338091154582798, 'vf_loss': 0.002513702529540751, 'vf_explained_var': 0.99874794, 'kl': 0.020574498863425106, 'entropy

custom_metrics: {}
date: 2020-09-21_15-56-34
done: false
episode_len_mean: 1331.12
episode_reward_max: -23.652509059612406
episode_reward_mean: -30.093164349171712
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 124
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.44999999999999996
      cur_lr: 1.0e-05
      entropy: 1.701616756618023
      entropy_coeff: 0.005
      kl: 0.021406606771051884
      policy_loss: -0.1394177118781954
      total_loss: -0.13613245682790875
      vf_explained_var: 0.9989316463470459
      vf_loss: 0.00216036569327116
  num_steps_sampled: 161000
  num_steps_trained: 161000
iterations_since_restore: 161
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.430434782608696
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.68260869565217
  vram_util_percent0: 0.9449

2020-09-21 15:56:38,253	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.27595950243994594, 'policy_loss': 0.0015110070817172527, 'vf_loss': 0.28226007521152496, 'vf_explained_var': 0.8779268, 'kl': 0.0010806733681134606, 'entropy': 1.7082041874527931, 'entropy_coeff': 0.005}
2020-09-21 15:56:38,744	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.08158095320686698, 'policy_loss': -0.020069365040399134, 'vf_loss': 0.1089506521821022, 'vf_explained_var': 0.9528786, 'kl': 0.0018243176164105535, 'entropy': 1.7063511162996292, 'entropy_coeff': 0.005}
2020-09-21 15:56:39,199	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.018017144175246358, 'policy_loss': -0.03955683810636401, 'vf_loss': 0.06363388663157821, 'vf_explained_var': 0.9715423, 'kl': 0.0036016486992593855, 'entropy': 1.69820

2020-09-21 15:56:49,623	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12208525207825005, 'policy_loss': -0.1259765325812623, 'vf_loss': 0.002596616148366593, 'vf_explained_var': 0.9987545, 'kl': 0.01450445456430316, 'entropy': 1.6991685032844543, 'entropy_coeff': 0.005}
2020-09-21 15:56:50,111	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12621220631990582, 'policy_loss': -0.13034941256046295, 'vf_loss': 0.002546327239542734, 'vf_explained_var': 0.9988761, 'kl': 0.014920229266863316, 'entropy': 1.696054831147194, 'entropy_coeff': 0.005}
2020-09-21 15:56:50,566	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12680832284968346, 'policy_loss': -0.1308374414802529, 'vf_loss': 0.002418483476503752, 'vf_explained_var': 0.9989476, 'kl': 0.01491758058546111, 'entropy': 1.691747

custom_metrics: {}
date: 2020-09-21_15-56-52
done: false
episode_len_mean: 1331.12
episode_reward_max: -23.652509059612406
episode_reward_mean: -30.093164349171712
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 124
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.6913268864154816
      entropy_coeff: 0.005
      kl: 0.01543827133718878
      policy_loss: -0.12869689555373043
      total_loss: -0.12464340380392969
      vf_explained_var: 0.9990909695625305
      vf_loss: 0.002089292072923854
  num_steps_sampled: 162000
  num_steps_trained: 162000
iterations_since_restore: 162
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.377272727272728
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94

2020-09-21 15:56:55,173	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.28073418606072664, 'policy_loss': 0.0018297923379577696, 'vf_loss': 0.28648917376995087, 'vf_explained_var': 0.8890556, 'kl': 0.0007269152163116299, 'entropy': 1.6150893792510033, 'entropy_coeff': 0.005}
2020-09-21 15:56:55,667	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.06500551151111722, 'policy_loss': -0.015299372957088053, 'vf_loss': 0.08747581182979047, 'vf_explained_var': 0.9629629, 'kl': 0.0014319401525426656, 'entropy': 1.627498134970665, 'entropy_coeff': 0.005}
2020-09-21 15:56:56,122	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.01382144505623728, 'policy_loss': -0.026046796003356576, 'vf_loss': 0.04601467330940068, 'vf_explained_var': 0.98079836, 'kl': 0.0030019653859199025, 'entropy': 1.6345

2020-09-21 15:57:06,572	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08872688666451722, 'policy_loss': -0.09061726051731966, 'vf_loss': 0.0026280150850652717, 'vf_explained_var': 0.9988432, 'kl': 0.010959613253362477, 'entropy': 1.6270756050944328, 'entropy_coeff': 0.005}
2020-09-21 15:57:07,052	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08855178742669523, 'policy_loss': -0.09071855340152979, 'vf_loss': 0.0025069328694371507, 'vf_explained_var': 0.9988618, 'kl': 0.011568578542210162, 'entropy': 1.62979094684124, 'entropy_coeff': 0.005}
2020-09-21 15:57:07,510	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.09479757153894752, 'policy_loss': -0.09683434036560357, 'vf_loss': 0.0024415594452875666, 'vf_explained_var': 0.9989275, 'kl': 0.011414216365665197, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_15-57-08
done: false
episode_len_mean: 1369.43
episode_reward_max: -23.652509059612406
episode_reward_mean: -30.040599853550656
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 125
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.6286216750741005
      entropy_coeff: 0.005
      kl: 0.01178363966755569
      policy_loss: -0.09610630621318705
      total_loss: -0.09422668311162852
      vf_explained_var: 0.9990757703781128
      vf_loss: 0.002068768248136621
  num_steps_sampled: 163000
  num_steps_trained: 163000
iterations_since_restore: 163
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 6.1909090909090905
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9

2020-09-21 15:57:11,995	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.3612729371525347, 'policy_loss': -0.002016219077631831, 'vf_loss': 0.3715551272034645, 'vf_explained_var': 0.9266125, 'kl': 0.0004201512381193373, 'entropy': 1.709912858903408, 'entropy_coeff': 0.005}
2020-09-21 15:57:12,484	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.08635697944555432, 'policy_loss': -0.02562588022556156, 'vf_loss': 0.11826695036143064, 'vf_explained_var': 0.97686297, 'kl': 0.003262300837377552, 'entropy': 1.6972301676869392, 'entropy_coeff': 0.005}
2020-09-21 15:57:12,936	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.005864037841092795, 'policy_loss': -0.0532271817792207, 'vf_loss': 0.05264098849147558, 'vf_explained_var': 0.9900526, 'kl': 0.004760368537972681, 'entropy': 1.698218762

2020-09-21 15:57:18,670	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1196319890441373, 'policy_loss': -0.12231881794286892, 'vf_loss': 0.0031776097894180566, 'vf_explained_var': 0.99930656, 'kl': 0.011895005765836686, 'entropy': 1.7039827480912209, 'entropy_coeff': 0.005}
2020-09-21 15:57:19,124	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11803999030962586, 'policy_loss': -0.12088036257773638, 'vf_loss': 0.0028843854524893686, 'vf_explained_var': 0.99935746, 'kl': 0.012553954496979713, 'entropy': 1.7035859823226929, 'entropy_coeff': 0.005}
2020-09-21 15:57:19,612	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1238568719709292, 'policy_loss': -0.12663151614833623, 'vf_loss': 0.002727984137891326, 'vf_explained_var': 0.9994132, 'kl': 0.012630660610739142, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_15-57-25
done: false
episode_len_mean: 1369.43
episode_reward_max: -23.652509059612406
episode_reward_mean: -30.040599853550653
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 125
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.6954502537846565
      entropy_coeff: 0.005
      kl: 0.014861401170492172
      policy_loss: -0.14844948763493448
      total_loss: -0.1457193389069289
      vf_explained_var: 0.9997463226318359
      vf_loss: 0.001175953006168129
  num_steps_sampled: 164000
  num_steps_trained: 164000
iterations_since_restore: 164
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.468181818181819
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94

[2m[36m(pid=24699)[0m 2020-09-21 15:57:26,092	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 8783.404934913247,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 1881},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-4.039, max=4.953, mean=0.37),
[2m[36m(pid=24699)[0m                                   'prev_action': 10,
[2m[36m(pid=24699)[

2020-09-21 15:57:29,021	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.39883063128218055, 'policy_loss': 0.0009022010490298271, 'vf_loss': 0.4060558592900634, 'vf_explained_var': 0.9252331, 'kl': 0.0005647539072466401, 'entropy': 1.7017282545566559, 'entropy_coeff': 0.005}
2020-09-21 15:57:29,512	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.10762987518683076, 'policy_loss': -0.030938825977500528, 'vf_loss': 0.14505325676873326, 'vf_explained_var': 0.97432643, 'kl': 0.0030341202873387374, 'entropy': 1.7065158858895302, 'entropy_coeff': 0.005}
2020-09-21 15:57:29,964	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.015283221960999072, 'policy_loss': -0.04357250523753464, 'vf_loss': 0.06390960467979312, 'vf_explained_var': 0.98804235, 'kl': 0.005090507096610963, 'entropy': 1.6979

2020-09-21 15:57:40,425	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11992663703858852, 'policy_loss': -0.12311098421923816, 'vf_loss': 0.0019795827683992684, 'vf_explained_var': 0.9996025, 'kl': 0.014443999796640128, 'entropy': 1.708987832069397, 'entropy_coeff': 0.005}
2020-09-21 15:57:40,918	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11865012277849019, 'policy_loss': -0.12197381857549772, 'vf_loss': 0.0018423825968056917, 'vf_explained_var': 0.99962616, 'kl': 0.01486138062318787, 'entropy': 1.7100250497460365, 'entropy_coeff': 0.005}
2020-09-21 15:57:41,374	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12134701432660222, 'policy_loss': -0.12463810003828257, 'vf_loss': 0.0017917540462804027, 'vf_explained_var': 0.9996421, 'kl': 0.014893238781951368, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_15-57-42
done: false
episode_len_mean: 1407.66
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.97877574118291
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 126
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.709369219839573
      entropy_coeff: 0.005
      kl: 0.015315633383579552
      policy_loss: -0.13207502919249237
      total_loss: -0.1286990900989622
      vf_explained_var: 0.9996963143348694
      vf_loss: 0.0015847251052036881
  num_steps_sampled: 165000
  num_steps_trained: 165000
iterations_since_restore: 165
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.240909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944

[2m[36m(pid=24699)[0m 2020-09-21 15:57:43,088	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:57:46,044	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.17094000871293247, 'policy_loss': 0.002603591186925769, 'vf_loss': 0.1766617144457996, 'vf_explained_var': 0.9387585, 'kl': 0.0006029376195622049, 'entropy': 1.7464543282985687, 'entropy_coeff': 0.005}
2020-09-21 15:57:46,534	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.021342656342312694, 'policy_loss': -0.024863694794476032, 'vf_loss': 0.053076328127644956, 'vf_explained_var': 0.9821807, 'kl': 0.0027554510452318937, 'entropy': 1.7459817454218864, 'entropy_coeff': 0.005}
2020-09-21 15:57:46,998	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.027526433113962412, 'policy_loss': -0.045600524870

2020-09-21 15:57:57,439	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12809424963779747, 'policy_loss': -0.13203358161263168, 'vf_loss': 0.0016326923505403101, 'vf_explained_var': 0.99947894, 'kl': 0.01644122850848362, 'entropy': 1.7582381591200829, 'entropy_coeff': 0.005}
2020-09-21 15:57:57,923	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12490695575252175, 'policy_loss': -0.12904003961011767, 'vf_loss': 0.0016001159528968856, 'vf_explained_var': 0.99946755, 'kl': 0.01670339156407863, 'entropy': 1.7483627125620842, 'entropy_coeff': 0.005}
2020-09-21 15:57:58,376	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11891055479645729, 'policy_loss': -0.12346100166905671, 'vf_loss': 0.0013527666887966916, 'vf_explained_var': 0.9995154, 'kl': 0.017790055368095636, 'entropy': 

custom_metrics: {}
date: 2020-09-21_15-57-59
done: false
episode_len_mean: 1407.66
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.97877574118292
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 126
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.756555236876011
      entropy_coeff: 0.005
      kl: 0.017587590438779444
      policy_loss: -0.13641344959614798
      total_loss: -0.13215854833833873
      vf_explained_var: 0.9995654225349426
      vf_loss: 0.0011660566233331338
  num_steps_sampled: 166000
  num_steps_trained: 166000
iterations_since_restore: 166
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.268181818181818
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94

2020-09-21 15:58:02,986	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.19722901796922088, 'policy_loss': -0.004261350492015481, 'vf_loss': 0.21020456682890654, 'vf_explained_var': 0.9704695, 'kl': 0.00016890487753390415, 'entropy': 1.76564309746027, 'entropy_coeff': 0.005}
2020-09-21 15:58:03,475	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.03244457649998367, 'policy_loss': -0.020498156256508082, 'vf_loss': 0.060786233050748706, 'vf_explained_var': 0.9908956, 'kl': 0.001577717048348859, 'entropy': 1.7816908732056618, 'entropy_coeff': 0.005}
2020-09-21 15:58:03,929	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.015866059344261885, 'policy_loss': -0.03806272509973496, 'vf_loss': 0.028193425619974732, 'vf_explained_var': 0.9955089, 'kl': 0.004239805304678157, 'entropy': 1.7717

2020-09-21 15:58:14,391	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12418082594012958, 'policy_loss': -0.12676458728674334, 'vf_loss': 0.0013032643146289047, 'vf_explained_var': 0.99978906, 'kl': 0.015112448309082538, 'entropy': 1.7840804904699326, 'entropy_coeff': 0.005}
2020-09-21 15:58:14,876	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12767166690900922, 'policy_loss': -0.1303397276205942, 'vf_loss': 0.001244915954885073, 'vf_explained_var': 0.9997958, 'kl': 0.015266367059666663, 'entropy': 1.7763302996754646, 'entropy_coeff': 0.005}
2020-09-21 15:58:15,329	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12514175556134433, 'policy_loss': -0.12783949973527342, 'vf_loss': 0.0011838522041216493, 'vf_explained_var': 0.9998074, 'kl': 0.015474878251552582, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_15-58-16
done: false
episode_len_mean: 1407.66
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.97877574118292
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 126
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.7825671285390854
      entropy_coeff: 0.005
      kl: 0.015855411940719932
      policy_loss: -0.13281638538319385
      total_loss: -0.13003257576201577
      vf_explained_var: 0.999840497970581
      vf_loss: 0.000994240544969216
  num_steps_sampled: 167000
  num_steps_trained: 167000
iterations_since_restore: 167
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.340909090909093
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944

2020-09-21 15:58:19,159	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.6749999999999999,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.7423226833343506,
                                         'entropy_coeff': 0.005,
                                         'kl': 2.6778907979974065e-09,
                                         'policy_loss': -0.0022996505722403526,
                                         'total_loss': 0.5631668567657471,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.754, max=0.754, mean=0.754),
                                         'vf_loss': 0.574178159236908}}}

2020-09-21 15:58:19,163	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 

2020-09-21 15:58:26,274	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.09547413536347449, 'policy_loss': -0.10255080158822238, 'vf_loss': 0.007300605328055099, 'vf_explained_var': 0.9967477, 'kl': 0.012635997933102772, 'entropy': 1.750647060573101, 'entropy_coeff': 0.005}
2020-09-21 15:58:26,731	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10007931711152196, 'policy_loss': -0.10625646379776299, 'vf_loss': 0.006241968279937282, 'vf_explained_var': 0.99717206, 'kl': 0.012789576663635671, 'entropy': 1.7395563423633575, 'entropy_coeff': 0.005}
2020-09-21 15:58:27,219	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.09825746930437163, 'policy_loss': -0.10438663806417026, 'vf_loss': 0.005758360042818822, 'vf_explained_var': 0.9974404, 'kl': 0.013457632216159254, 'entropy': 1.7

custom_metrics: {}
date: 2020-09-21_15-58-33
done: false
episode_len_mean: 1446.99
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.979347884386275
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 127
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.7390615865588188
      entropy_coeff: 0.005
      kl: 0.01610029034782201
      policy_loss: -0.1323044984601438
      total_loss: -0.12770545959938318
      vf_explained_var: 0.9988987445831299
      vf_loss: 0.0024266497130156495
  num_steps_sampled: 168000
  num_steps_trained: 168000
iterations_since_restore: 168
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.268181818181819
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94

2020-09-21 15:58:36,484	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.1601075898797717, 'policy_loss': -0.008641615975648165, 'vf_loss': 0.1770641808398068, 'vf_explained_var': 0.9354994, 'kl': 0.0014267818706586999, 'entropy': 1.8556116446852684, 'entropy_coeff': 0.005}
2020-09-21 15:58:36,978	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.03902588918572292, 'policy_loss': -0.03127877018414438, 'vf_loss': 0.07558752479963005, 'vf_explained_var': 0.9718795, 'kl': 0.005778568651294336, 'entropy': 1.836679846048355, 'entropy_coeff': 0.005}
2020-09-21 15:58:37,436	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.014081049477681518, 'policy_loss': -0.051107217790558934, 'vf_loss': 0.042887127376161516, 'vf_explained_var': 0.983227, 'kl': 0.00498809659620747, 'entropy': 1.845585115

2020-09-21 15:58:47,940	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1367465234361589, 'policy_loss': -0.13852909777779132, 'vf_loss': 0.0013149376536603086, 'vf_explained_var': 0.99947, 'kl': 0.014490797882899642, 'entropy': 1.8627291917800903, 'entropy_coeff': 0.005}
2020-09-21 15:58:48,429	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12854111252818257, 'policy_loss': -0.13055306440219283, 'vf_loss': 0.0012483538703236263, 'vf_explained_var': 0.9995125, 'kl': 0.014853960776235908, 'entropy': 1.8525659143924713, 'entropy_coeff': 0.005}
2020-09-21 15:58:48,883	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.13075514254160225, 'policy_loss': -0.13300764374434948, 'vf_loss': 0.0012303914845688269, 'vf_explained_var': 0.999526, 'kl': 0.015246099152136594, 'entropy': 1.85

custom_metrics: {}
date: 2020-09-21_15-58-50
done: false
episode_len_mean: 1446.99
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.979347884386275
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 127
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.8577398210763931
      entropy_coeff: 0.005
      kl: 0.015798803709913045
      policy_loss: -0.13278925465419888
      total_loss: -0.13032992649823427
      vf_explained_var: 0.9996051788330078
      vf_loss: 0.0010838254493137356
  num_steps_sampled: 169000
  num_steps_trained: 169000
iterations_since_restore: 169
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.340909090909092
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.

[2m[36m(pid=24699)[0m 2020-09-21 15:58:50,554	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 6711.494789549763,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 2881},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.44, max=2.13, mean=0.299),
[2m[36m(pid=24699)[0m                                   'prev_action': 0,
[2m[36m(pid=24699)[0m

2020-09-21 15:58:52,878	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.1464402035344392, 'policy_loss': 0.0018705346155911684, 'vf_loss': 0.15268793934956193, 'vf_explained_var': 0.9700819, 'kl': 0.0007348839975901411, 'entropy': 1.7228626161813736, 'entropy_coeff': 0.005}
2020-09-21 15:58:53,371	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.033107333758380264, 'policy_loss': -0.021291679120622575, 'vf_loss': 0.05812128889374435, 'vf_explained_var': 0.9888035, 'kl': 0.007427369098877534, 'entropy': 1.7471502274274826, 'entropy_coeff': 0.005}
2020-09-21 15:58:53,827	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.004400159465149045, 'policy_loss': -0.032029462745413184, 'vf_loss': 0.03288349008653313, 'vf_explained_var': 0.9942136, 'kl': 0.005065970675786957, 'entropy': 1.7347

2020-09-21 15:59:04,345	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11714928597211838, 'policy_loss': -0.12010605493560433, 'vf_loss': 0.0016421316868218128, 'vf_explained_var': 0.9996636, 'kl': 0.014736102020833641, 'entropy': 1.7264462485909462, 'entropy_coeff': 0.005}
2020-09-21 15:59:04,837	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1145005605649203, 'policy_loss': -0.11768359283450991, 'vf_loss': 0.0015628835244569927, 'vf_explained_var': 0.9996893, 'kl': 0.015146243444178253, 'entropy': 1.7207128927111626, 'entropy_coeff': 0.005}
2020-09-21 15:59:05,290	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1147095178021118, 'policy_loss': -0.11745189840439707, 'vf_loss': 0.0014702035732625518, 'vf_explained_var': 0.9997171, 'kl': 0.014607959252316505, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_15-59-06
done: false
episode_len_mean: 1446.99
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.979347884386275
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 127
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.7167185544967651
      entropy_coeff: 0.005
      kl: 0.01539347117068246
      policy_loss: -0.12883399182464927
      total_loss: -0.1257285661995411
      vf_explained_var: 0.9997529983520508
      vf_loss: 0.0012984256136405747
  num_steps_sampled: 170000
  num_steps_trained: 170000
iterations_since_restore: 170
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.342857142857143
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.94

[2m[36m(pid=24699)[0m 2020-09-21 15:59:07,002	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 15:59:09,837	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.1865959150600247, 'policy_loss': -0.007053934677969664, 'vf_loss': 0.2028434262610972, 'vf_explained_var': 0.9415908, 'kl': 0.00038260912577092743, 'entropy': 1.8903677836060524, 'entropy_coeff': 0.005}
2020-09-21 15:59:10,320	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.042938706232234836, 'policy_loss': -0.02670069586019963, 'vf_loss': 0.07683962536975741, 'vf_explained_var': 0.97450364, 'kl': 0.0033574856643099338, 'entropy': 1.8933042734861374, 'entropy_coeff': 0.005}
2020-09-21 15:59:10,778	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.009078582574147731, 'policy_loss': -0.041820347192

2020-09-21 15:59:19,215	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.6749999999999999,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.8822425603866577,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.011743061244487762,
                                         'policy_loss': -0.18412791192531586,
                                         'total_loss': -0.18393990397453308,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=1.0, max=1.0, mean=1.0),
                                         'vf_loss': 0.0016726560425013304}}}

2020-09-21 15:59:19,362	DEBUG sgd.py:120 -- 20 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10971560428151861, 'pol

custom_metrics: {}
date: 2020-09-21_15-59-23
done: false
episode_len_mean: 1446.99
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.979347884386275
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 127
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.8805453702807426
      entropy_coeff: 0.005
      kl: 0.013988466933369637
      policy_loss: -0.13281626615207642
      total_loss: -0.13168119615875185
      vf_explained_var: 0.9996606111526489
      vf_loss: 0.0010955841971735936
  num_steps_sampled: 171000
  num_steps_trained: 171000
iterations_since_restore: 171
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.263636363636364
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.

2020-09-21 15:59:26,313	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.1029901229776442, 'policy_loss': -0.004014625679701567, 'vf_loss': 0.11570108449086547, 'vf_explained_var': 0.9504252, 'kl': 0.00034804919952341473, 'entropy': 1.7862556278705597, 'entropy_coeff': 0.005}
2020-09-21 15:59:26,767	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.01770637946901843, 'policy_loss': -0.01618093135766685, 'vf_loss': 0.04069719952531159, 'vf_explained_var': 0.9819473, 'kl': 0.003124435246718349, 'entropy': 1.7837767973542213, 'entropy_coeff': 0.005}
2020-09-21 15:59:27,253	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.016326494398526847, 'policy_loss': -0.03109931922517717, 'vf_loss': 0.021230236103292555, 'vf_explained_var': 0.99047416, 'kl': 0.0035635580570669845, 'entropy': 1.772

2020-09-21 15:59:37,722	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10566294690943323, 'policy_loss': -0.10779451447888277, 'vf_loss': 0.0015260348154697567, 'vf_explained_var': 0.9992856, 'kl': 0.014030672842636704, 'entropy': 1.7730346024036407, 'entropy_coeff': 0.005}
2020-09-21 15:59:38,169	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11198490066453815, 'policy_loss': -0.11400184198282659, 'vf_loss': 0.001397165691741975, 'vf_explained_var': 0.9993499, 'kl': 0.014024300500750542, 'entropy': 1.7693249881267548, 'entropy_coeff': 0.005}
2020-09-21 15:59:38,647	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10371505049988627, 'policy_loss': -0.10577908135019243, 'vf_loss': 0.0013100675241730642, 'vf_explained_var': 0.99937075, 'kl': 0.014244988153222948, 'entropy': 

custom_metrics: {}
date: 2020-09-21_15-59-40
done: false
episode_len_mean: 1490.85
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.932715082005746
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 128
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.7714091837406158
      entropy_coeff: 0.005
      kl: 0.01498947199434042
      policy_loss: -0.12254404556006193
      total_loss: -0.12012571387458593
      vf_explained_var: 0.999463677406311
      vf_loss: 0.0011574844138522167
  num_steps_sampled: 172000
  num_steps_trained: 172000
iterations_since_restore: 172
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.200000000000001
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94

2020-09-21 15:59:42,802	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.07651986822020262, 'policy_loss': -0.0004033321274619084, 'vf_loss': 0.0850998058449477, 'vf_explained_var': 0.9757111, 'kl': 0.0009924354430850268, 'entropy': 1.769299477338791, 'entropy_coeff': 0.005}
2020-09-21 15:59:43,297	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.006536698434501886, 'policy_loss': -0.015711523359641433, 'vf_loss': 0.029357783147133887, 'vf_explained_var': 0.9921298, 'kl': 0.0024551090245950036, 'entropy': 1.753350391983986, 'entropy_coeff': 0.005}
2020-09-21 15:59:43,753	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.022674121311865747, 'policy_loss': -0.03142733115237206, 'vf_loss': 0.01456389989471063, 'vf_explained_var': 0.9958662, 'kl': 0.004391913738800213, 'entropy': 1.7550

2020-09-21 15:59:54,278	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10415003150410485, 'policy_loss': -0.1057913308541174, 'vf_loss': 0.0010519280804146547, 'vf_explained_var': 0.9996885, 'kl': 0.013942453952040523, 'entropy': 1.764355793595314, 'entropy_coeff': 0.005}
2020-09-21 15:59:54,767	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10803274088539183, 'policy_loss': -0.10991423379164189, 'vf_loss': 0.0010602624352031853, 'vf_explained_var': 0.99968183, 'kl': 0.014266745129134506, 'entropy': 1.7617650255560875, 'entropy_coeff': 0.005}
2020-09-21 15:59:55,215	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10933431843295693, 'policy_loss': -0.11161626840475947, 'vf_loss': 0.0010071725118905306, 'vf_explained_var': 0.9997035, 'kl': 0.014921241730917245, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_15-59-56
done: false
episode_len_mean: 1490.85
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.932715082005743
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 128
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.7653860747814178
      entropy_coeff: 0.005
      kl: 0.01496434275759384
      policy_loss: -0.11299283528933302
      total_loss: -0.11086593638174236
      vf_explained_var: 0.999728262424469
      vf_loss: 0.0008528937032679096
  num_steps_sampled: 173000
  num_steps_trained: 173000
iterations_since_restore: 173
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.290476190476191
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.94

2020-09-21 15:59:59,391	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.16612637281650677, 'policy_loss': -0.0038060465594753623, 'vf_loss': 0.17834548698738217, 'vf_explained_var': 0.97417176, 'kl': 0.00044378731964411777, 'entropy': 1.7425259053707123, 'entropy_coeff': 0.005}
2020-09-21 15:59:59,842	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.03149804298300296, 'policy_loss': -0.015554314712062478, 'vf_loss': 0.05488399660680443, 'vf_explained_var': 0.9921932, 'kl': 0.0012871845246991143, 'entropy': 1.7400984168052673, 'entropy_coeff': 0.005}
2020-09-21 16:00:00,319	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.018078107619658113, 'policy_loss': -0.03523905307520181, 'vf_loss': 0.023063881555572152, 'vf_explained_var': 0.9967979, 'kl': 0.0039803229228709824, 'entropy': 1

2020-09-21 16:00:10,783	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.112493819615338, 'policy_loss': -0.11251534876646474, 'vf_loss': 0.0010107197704201099, 'vf_explained_var': 0.9998311, 'kl': 0.01131319027626887, 'entropy': 1.7251194193959236, 'entropy_coeff': 0.005}
2020-09-21 16:00:11,232	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1125000825850293, 'policy_loss': -0.112915163859725, 'vf_loss': 0.0009613629408704583, 'vf_explained_var': 0.9998392, 'kl': 0.0119825309375301, 'entropy': 1.7269009798765182, 'entropy_coeff': 0.005}
2020-09-21 16:00:11,717	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11233836715109646, 'policy_loss': -0.11255476670339704, 'vf_loss': 0.0009229275528923608, 'vf_explained_var': 0.9998303, 'kl': 0.011673666478600353, 'entropy': 1.717250

custom_metrics: {}
date: 2020-09-21_16-00-13
done: false
episode_len_mean: 1490.85
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.932715082005743
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 128
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.7297199442982674
      entropy_coeff: 0.005
      kl: 0.012698701350018382
      policy_loss: -0.11288084700936452
      total_loss: -0.1121809819014743
      vf_explained_var: 0.9998654723167419
      vf_loss: 0.0007768453269818565
  num_steps_sampled: 174000
  num_steps_trained: 174000
iterations_since_restore: 174
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.20909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94

[2m[36m(pid=24699)[0m 2020-09-21 16:00:13,376	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 7715.268058202887,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 3881},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-30.63, max=22.044, mean=0.605),
[2m[36m(pid=24699)[0m                                   'prev_action': 13,
[2m[36m(pid=24699)

2020-09-21 16:00:16,075	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.15145574125926942, 'policy_loss': 0.006386839901097119, 'vf_loss': 0.1532338848337531, 'vf_explained_var': 0.90610003, 'kl': 0.0006007889033454727, 'entropy': 1.7141042202711105, 'entropy_coeff': 0.005}
2020-09-21 16:00:16,535	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.010413744719699025, 'policy_loss': -0.02179629122838378, 'vf_loss': 0.03891799843404442, 'vf_explained_var': 0.9761291, 'kl': 0.002640565551701002, 'entropy': 1.6980685666203499, 'entropy_coeff': 0.005}
2020-09-21 16:00:17,032	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.026678823633119464, 'policy_loss': -0.041536777338478714, 'vf_loss': 0.021383430168498307, 'vf_explained_var': 0.986678, 'kl': 0.002926309098256752, 'entropy': 1.70014

2020-09-21 16:00:22,741	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08208550128620118, 'policy_loss': -0.08211487764492631, 'vf_loss': 0.0021849317417945713, 'vf_explained_var': 0.9985248, 'kl': 0.009371964464662597, 'entropy': 1.6963264718651772, 'entropy_coeff': 0.005}
2020-09-21 16:00:23,225	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08825631899526343, 'policy_loss': -0.08848306984873489, 'vf_loss': 0.0020573579458869062, 'vf_explained_var': 0.9986428, 'kl': 0.009864872583420947, 'entropy': 1.6978768184781075, 'entropy_coeff': 0.005}
2020-09-21 16:00:23,708	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08536481222836301, 'policy_loss': -0.08564052451401949, 'vf_loss': 0.0019017148806597106, 'vf_explained_var': 0.998721, 'kl': 0.010176475043408573, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-00-29
done: false
episode_len_mean: 1490.85
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.932715082005743
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 128
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.6954380199313164
      entropy_coeff: 0.005
      kl: 0.013443054107483476
      policy_loss: -0.10865796497091651
      total_loss: -0.10718678857665509
      vf_explained_var: 0.9994235634803772
      vf_loss: 0.0008743063990550581
  num_steps_sampled: 175000
  num_steps_trained: 175000
iterations_since_restore: 175
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.190909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.

[2m[36m(pid=24699)[0m 2020-09-21 16:00:30,145	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:00:32,924	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.14083107153419405, 'policy_loss': -0.004118002485483885, 'vf_loss': 0.15369398472830653, 'vf_explained_var': 0.94534785, 'kl': 0.000492011851427776, 'entropy': 1.8154041171073914, 'entropy_coeff': 0.005}
2020-09-21 16:00:33,408	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.01526087208185345, 'policy_loss': -0.03237849334254861, 'vf_loss': 0.05330634908750653, 'vf_explained_var': 0.980303, 'kl': 0.004875043741776608, 'entropy': 1.7915283888578415, 'entropy_coeff': 0.005}
2020-09-21 16:00:33,862	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.03144639462698251, 'policy_loss': -0.0493548162048682

2020-09-21 16:00:44,289	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1350006719585508, 'policy_loss': -0.13623065128922462, 'vf_loss': 0.0008205225276469719, 'vf_explained_var': 0.9996854, 'kl': 0.013983437966089696, 'entropy': 1.8058736026287079, 'entropy_coeff': 0.005}
2020-09-21 16:00:44,779	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.13552563183475286, 'policy_loss': -0.13668519852217287, 'vf_loss': 0.0008006103926163632, 'vf_explained_var': 0.9997031, 'kl': 0.013930979068391025, 'entropy': 1.808891199529171, 'entropy_coeff': 0.005}
2020-09-21 16:00:45,223	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.13155019748955965, 'policy_loss': -0.13295862451195717, 'vf_loss': 0.0007458951840817463, 'vf_explained_var': 0.99972385, 'kl': 0.014360851026140153, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-00-46
done: false
episode_len_mean: 1490.85
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.932715082005743
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 128
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.8047587797045708
      entropy_coeff: 0.005
      kl: 0.0150333036435768
      policy_loss: -0.14122790982946754
      total_loss: -0.13933009817264974
      vf_explained_var: 0.9997484683990479
      vf_loss: 0.0007741254012216814
  num_steps_sampled: 176000
  num_steps_trained: 176000
iterations_since_restore: 176
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.361904761904762
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.94

2020-09-21 16:00:49,720	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.16773501667194068, 'policy_loss': -0.0026592854410409927, 'vf_loss': 0.1783632948063314, 'vf_explained_var': 0.9182062, 'kl': 0.0004921044256281926, 'entropy': 1.6602322533726692, 'entropy_coeff': 0.005}
2020-09-21 16:00:50,205	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.024110098835080862, 'policy_loss': -0.027563862735405564, 'vf_loss': 0.05656464979983866, 'vf_explained_var': 0.9726258, 'kl': 0.00522851430287119, 'entropy': 1.683987334370613, 'entropy_coeff': 0.005}
2020-09-21 16:00:50,654	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.022839207900688052, 'policy_loss': -0.042096855177078396, 'vf_loss': 0.02283568709390238, 'vf_explained_var': 0.9879882, 'kl': 0.007101251190761104, 'entropy': 1.67427

2020-09-21 16:01:01,107	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12648693640949205, 'policy_loss': -0.13007452682359144, 'vf_loss': 0.0011468569064163603, 'vf_explained_var': 0.9993284, 'kl': 0.01610225043259561, 'entropy': 1.6856581345200539, 'entropy_coeff': 0.005}
2020-09-21 16:01:01,594	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.13016969442833215, 'policy_loss': -0.13352466089418158, 'vf_loss': 0.0010508333762118127, 'vf_explained_var': 0.99936867, 'kl': 0.015853963734116405, 'entropy': 1.6794590204954147, 'entropy_coeff': 0.005}
2020-09-21 16:01:02,065	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12378768646158278, 'policy_loss': -0.12736692721955478, 'vf_loss': 0.000998081286525121, 'vf_explained_var': 0.9993961, 'kl': 0.01633212377782911, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_16-01-03
done: false
episode_len_mean: 1490.85
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.932715082005743
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 128
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.682679034769535
      entropy_coeff: 0.005
      kl: 0.016477330587804317
      policy_loss: -0.13559670239919797
      total_loss: -0.13198955409461632
      vf_explained_var: 0.9994758367538452
      vf_loss: 0.0008983450170489959
  num_steps_sampled: 177000
  num_steps_trained: 177000
iterations_since_restore: 177
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.4363636363636365
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.

2020-09-21 16:01:06,324	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.14844041946344078, 'policy_loss': 0.0033218510216102004, 'vf_loss': 0.1524913120083511, 'vf_explained_var': 0.8892797, 'kl': 0.0013434770891025405, 'entropy': 1.655918464064598, 'entropy_coeff': 0.005}
2020-09-21 16:01:06,781	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.03617812029551715, 'policy_loss': -0.010778221068903804, 'vf_loss': 0.05334277474321425, 'vf_explained_var': 0.96090114, 'kl': 0.002925004177086521, 'entropy': 1.6721629947423935, 'entropy_coeff': 0.005}
2020-09-21 16:01:07,269	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.009939692099578679, 'policy_loss': -0.034536011749878526, 'vf_loss': 0.029931245953775942, 'vf_explained_var': 0.9785843, 'kl': 0.004579763291985728, 'entropy': 1.6852

2020-09-21 16:01:17,716	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10991227318299934, 'policy_loss': -0.11218827730044723, 'vf_loss': 0.0011999346061202232, 'vf_explained_var': 0.999081, 'kl': 0.014039246423635632, 'entropy': 1.6800828576087952, 'entropy_coeff': 0.005}
2020-09-21 16:01:18,175	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11762002744944766, 'policy_loss': -0.11990273569244891, 'vf_loss': 0.0011090541338489857, 'vf_explained_var': 0.9991373, 'kl': 0.014206513529643416, 'entropy': 1.6831480786204338, 'entropy_coeff': 0.005}
2020-09-21 16:01:18,660	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10873752902261913, 'policy_loss': -0.11112245672848076, 'vf_loss': 0.0010809184059326071, 'vf_explained_var': 0.99916315, 'kl': 0.014417263737414032, 'entropy': 

custom_metrics: {}
date: 2020-09-21_16-01-20
done: false
episode_len_mean: 1531.39
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.918575365730497
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 129
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.681033879518509
      entropy_coeff: 0.005
      kl: 0.014955763646867126
      policy_loss: -0.10563566628843546
      total_loss: -0.10289060918148607
      vf_explained_var: 0.9992828369140625
      vf_loss: 0.0010550892984610982
  num_steps_sampled: 178000
  num_steps_trained: 178000
iterations_since_restore: 178
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.45909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94

2020-09-21 16:01:22,991	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.08612047601491213, 'policy_loss': -0.001300959149375558, 'vf_loss': 0.09583181282505393, 'vf_explained_var': 0.926249, 'kl': 0.000396796147976497, 'entropy': 1.7356436476111412, 'entropy_coeff': 0.005}
2020-09-21 16:01:23,478	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.014253501896746457, 'policy_loss': -0.01365827617701143, 'vf_loss': 0.03490923857316375, 'vf_explained_var': 0.9724959, 'kl': 0.002629636393976398, 'entropy': 1.7544930055737495, 'entropy_coeff': 0.005}
2020-09-21 16:01:23,928	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.018406478688120842, 'policy_loss': -0.028385793790221214, 'vf_loss': 0.01604908483568579, 'vf_explained_var': 0.9876965, 'kl': 0.004091784649062902, 'entropy': 1.766346

2020-09-21 16:01:34,380	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11104477662593126, 'policy_loss': -0.1129118389217183, 'vf_loss': 0.000754308213799959, 'vf_explained_var': 0.99934816, 'kl': 0.014610403217375278, 'entropy': 1.7498540878295898, 'entropy_coeff': 0.005}
2020-09-21 16:01:34,871	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10956475976854563, 'policy_loss': -0.11192577448673546, 'vf_loss': 0.0007038710373308277, 'vf_explained_var': 0.99937654, 'kl': 0.01548556808847934, 'entropy': 1.7591225281357765, 'entropy_coeff': 0.005}
2020-09-21 16:01:35,325	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11289004236459732, 'policy_loss': -0.11512390407733619, 'vf_loss': 0.0006774343219149159, 'vf_explained_var': 0.999424, 'kl': 0.01527818338945508, 'entropy': 1.7

custom_metrics: {}
date: 2020-09-21_16-01-36
done: false
episode_len_mean: 1531.39
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.918575365730497
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 129
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.752976804971695
      entropy_coeff: 0.005
      kl: 0.015861196967307478
      policy_loss: -0.11934435204602778
      total_loss: -0.11680044489912689
      vf_explained_var: 0.9994796514511108
      vf_loss: 0.0006024882732162951
  num_steps_sampled: 179000
  num_steps_trained: 179000
iterations_since_restore: 179
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.261904761904763
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.9

[2m[36m(pid=24699)[0m 2020-09-21 16:01:37,042	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 9636.59,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 252},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.846, max=2.159, mean=0.141),
[2m[36m(pid=24699)[0m                                   'prev_action': 2,
[2m[36m(pid=24699)[0m         

2020-09-21 16:01:39,614	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.09568075439892709, 'policy_loss': 0.00014431506861001253, 'vf_loss': 0.10273428354412317, 'vf_explained_var': 0.9829174, 'kl': 0.00171207246162388, 'entropy': 1.6706977039575577, 'entropy_coeff': 0.005}
2020-09-21 16:01:40,098	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.0331678360234946, 'policy_loss': -0.015388083411380649, 'vf_loss': 0.0549325110623613, 'vf_explained_var': 0.9911692, 'kl': 0.0030072891095187515, 'entropy': 1.681302271783352, 'entropy_coeff': 0.005}
2020-09-21 16:01:40,552	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.0009608869440853596, 'policy_loss': -0.023531366197858006, 'vf_loss': 0.02951723779551685, 'vf_explained_var': 0.9949748, 'kl': 0.005150614451849833, 'entropy': 1.7003286

2020-09-21 16:01:50,989	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10372939379885793, 'policy_loss': -0.1065587792545557, 'vf_loss': 0.001279781739867758, 'vf_explained_var': 0.999768, 'kl': 0.014611850667279214, 'entropy': 1.6626797392964363, 'entropy_coeff': 0.005}
2020-09-21 16:01:51,441	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10610515292501077, 'policy_loss': -0.10868146488792263, 'vf_loss': 0.0012195128692837898, 'vf_explained_var': 0.9997804, 'kl': 0.014335150714032352, 'entropy': 1.6638855040073395, 'entropy_coeff': 0.005}
2020-09-21 16:01:51,919	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10263556288555264, 'policy_loss': -0.10517660179175436, 'vf_loss': 0.0011470244571682997, 'vf_explained_var': 0.9997737, 'kl': 0.014364273578394204, 'entropy': 1.6

custom_metrics: {}
date: 2020-09-21_16-01-53
done: false
episode_len_mean: 1531.39
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.918575365730497
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 129
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.6669438630342484
      entropy_coeff: 0.005
      kl: 0.015479878638871014
      policy_loss: -0.11934322235174477
      total_loss: -0.11617754539474845
      vf_explained_var: 0.9998133778572083
      vf_loss: 0.0010514810201129876
  num_steps_sampled: 180000
  num_steps_trained: 180000
iterations_since_restore: 180
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.168181818181818
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.

[2m[36m(pid=24699)[0m 2020-09-21 16:01:53,959	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:01:56,621	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.13763877656310797, 'policy_loss': -0.00373778585344553, 'vf_loss': 0.1485831425525248, 'vf_explained_var': 0.9173044, 'kl': 0.0011916608271018703, 'entropy': 1.602189652621746, 'entropy_coeff': 0.005}
2020-09-21 16:01:57,109	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.024450680357404053, 'policy_loss': -0.017882073996588588, 'vf_loss': 0.04728436889126897, 'vf_explained_var': 0.9740761, 'kl': 0.0046098749444354326, 'entropy': 1.6126563549041748, 'entropy_coeff': 0.005}
2020-09-21 16:01:57,562	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.01854847659706138, 'policy_loss': -0.031683688546763

2020-09-21 16:02:08,031	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1190538804512471, 'policy_loss': -0.12086339318193495, 'vf_loss': 0.00092209179274505, 'vf_explained_var': 0.99943686, 'kl': 0.012997297395486385, 'entropy': 1.5771509334445, 'entropy_coeff': 0.005}
2020-09-21 16:02:08,523	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1218104463769123, 'policy_loss': -0.12358844745904207, 'vf_loss': 0.0008449161032331176, 'vf_explained_var': 0.99947155, 'kl': 0.013100773969199508, 'entropy': 1.5819881707429886, 'entropy_coeff': 0.005}
2020-09-21 16:02:08,978	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12146675703115761, 'policy_loss': -0.12360737868584692, 'vf_loss': 0.0008194164984161034, 'vf_explained_var': 0.9994925, 'kl': 0.013682743185199797, 'entropy': 1.582

custom_metrics: {}
date: 2020-09-21_16-02-10
done: false
episode_len_mean: 1531.39
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.918575365730497
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 129
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.5762590244412422
      entropy_coeff: 0.005
      kl: 0.014111473108641803
      policy_loss: -0.12790574599057436
      total_loss: -0.12554472126066685
      vf_explained_var: 0.9995574355125427
      vf_loss: 0.0007170739099819912
  num_steps_sampled: 181000
  num_steps_trained: 181000
iterations_since_restore: 181
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 6.136363636363637
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.

2020-09-21 16:02:13,473	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.16650409623980522, 'policy_loss': -0.0018569037783890963, 'vf_loss': 0.17636602139100432, 'vf_explained_var': 0.91181976, 'kl': 0.00010326990137338177, 'entropy': 1.6149463132023811, 'entropy_coeff': 0.005}
2020-09-21 16:02:13,972	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.04088415205478668, 'policy_loss': -0.011854488868266344, 'vf_loss': 0.059848647797480226, 'vf_explained_var': 0.9698869, 'kl': 0.0014801360484852921, 'entropy': 1.6218196749687195, 'entropy_coeff': 0.005}
2020-09-21 16:02:14,428	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.004221914918161929, 'policy_loss': -0.02761992608429864, 'vf_loss': 0.02949568594340235, 'vf_explained_var': 0.9850596, 'kl': 0.002906813060690183, 'entropy': 1.

2020-09-21 16:02:20,135	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08234182838350534, 'policy_loss': -0.08261135243810713, 'vf_loss': 0.002243368318886496, 'vf_explained_var': 0.9987264, 'kl': 0.008841424714773893, 'entropy': 1.5883611291646957, 'entropy_coeff': 0.005}
2020-09-21 16:02:20,590	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08507264300715178, 'policy_loss': -0.08553143253084272, 'vf_loss': 0.002279783300764393, 'vf_explained_var': 0.998757, 'kl': 0.009071139065781608, 'entropy': 1.5888028591871262, 'entropy_coeff': 0.005}
2020-09-21 16:02:21,074	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08998475060798228, 'policy_loss': -0.09019551146775484, 'vf_loss': 0.0018385450603091158, 'vf_explained_var': 0.99888057, 'kl': 0.009340277232695371, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_16-02-27
done: false
episode_len_mean: 1531.39
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.918575365730497
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 129
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.5858130604028702
      entropy_coeff: 0.005
      kl: 0.012967423244845122
      policy_loss: -0.1107066321419552
      total_loss: -0.10901719599496573
      vf_explained_var: 0.9994713664054871
      vf_loss: 0.0008654929770273156
  num_steps_sampled: 182000
  num_steps_trained: 182000
iterations_since_restore: 182
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.404545454545455
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9

2020-09-21 16:02:30,595	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.11590879131108522, 'policy_loss': -0.002651549060828984, 'vf_loss': 0.12415737006813288, 'vf_explained_var': 0.9172141, 'kl': 0.0038221558639985886, 'entropy': 1.6353955566883087, 'entropy_coeff': 0.005}
2020-09-21 16:02:31,079	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.020051580853760242, 'policy_loss': -0.019634378259070218, 'vf_loss': 0.04423122096341103, 'vf_explained_var': 0.96886146, 'kl': 0.005382177798310295, 'entropy': 1.635646529495716, 'entropy_coeff': 0.005}
2020-09-21 16:02:31,531	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.0239466680213809, 'policy_loss': -0.04259015154093504, 'vf_loss': 0.021653508709277958, 'vf_explained_var': 0.9845014, 'kl': 0.00763642814126797, 'entropy': 1.632922

2020-09-21 16:02:41,964	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11591204884462059, 'policy_loss': -0.12070671131368726, 'vf_loss': 0.0012349209428066388, 'vf_explained_var': 0.99906313, 'kl': 0.0172250765026547, 'entropy': 1.6134361922740936, 'entropy_coeff': 0.005}
2020-09-21 16:02:42,449	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12028869823552668, 'policy_loss': -0.12508285022340715, 'vf_loss': 0.0011925379367312416, 'vf_explained_var': 0.99913096, 'kl': 0.017322027357295156, 'entropy': 1.618151769042015, 'entropy_coeff': 0.005}
2020-09-21 16:02:42,906	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12316827732138336, 'policy_loss': -0.12809410551562905, 'vf_loss': 0.0011487339615996461, 'vf_explained_var': 0.99915016, 'kl': 0.01754429697757587, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-02-44
done: false
episode_len_mean: 1531.39
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.918575365730497
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 129
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.6117998287081718
      entropy_coeff: 0.005
      kl: 0.01827290147775784
      policy_loss: -0.12703950935974717
      total_loss: -0.1217649559257552
      vf_explained_var: 0.9992602467536926
      vf_loss: 0.000999348016193835
  num_steps_sampled: 183000
  num_steps_trained: 183000
iterations_since_restore: 183
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.263636363636364
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944

2020-09-21 16:02:47,465	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.2095665920060128, 'policy_loss': -0.002913623582571745, 'vf_loss': 0.2196781043894589, 'vf_explained_var': 0.9009606, 'kl': 0.0006931952289670473, 'entropy': 1.5331602990627289, 'entropy_coeff': 0.005}
2020-09-21 16:02:47,953	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.041438809246756136, 'policy_loss': -0.02722214604727924, 'vf_loss': 0.07416704436764121, 'vf_explained_var': 0.9639277, 'kl': 0.003271916626545135, 'entropy': 1.54292593896389, 'entropy_coeff': 0.005}
2020-09-21 16:02:48,409	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.024369358900003135, 'policy_loss': -0.053545828559435904, 'vf_loss': 0.03452757420018315, 'vf_explained_var': 0.98233235, 'kl': 0.0033827252336777747, 'entropy': 1.526888

2020-09-21 16:02:58,789	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1241612492594868, 'policy_loss': -0.12651519768405706, 'vf_loss': 0.0008597213891334832, 'vf_explained_var': 0.9995631, 'kl': 0.013401222648099065, 'entropy': 1.510320708155632, 'entropy_coeff': 0.005}
2020-09-21 16:02:59,279	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12147884647129104, 'policy_loss': -0.12372796016279608, 'vf_loss': 0.0008207266182580497, 'vf_explained_var': 0.9996099, 'kl': 0.013341008045244962, 'entropy': 1.5153599083423615, 'entropy_coeff': 0.005}
2020-09-21 16:02:59,730	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12336994183715433, 'policy_loss': -0.12563970626797527, 'vf_loss': 0.0007582497346447781, 'vf_explained_var': 0.9996223, 'kl': 0.013454506057314575, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_16-03-01
done: false
episode_len_mean: 1531.39
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.918575365730497
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 129
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.5175468176603317
      entropy_coeff: 0.005
      kl: 0.013956559996586293
      policy_loss: -0.12304623401723802
      total_loss: -0.12055187858641148
      vf_explained_var: 0.9996882081031799
      vf_loss: 0.0006614186258957488
  num_steps_sampled: 184000
  num_steps_trained: 184000
iterations_since_restore: 184
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.3909090909090915
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0

[2m[36m(pid=24699)[0m 2020-09-21 16:03:01,411	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 11273.299317229543,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 1252},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-4.668, max=3.656, mean=0.215),
[2m[36m(pid=24699)[0m                                   'prev_action': 10,
[2m[36m(pid=24699)

2020-09-21 16:03:04,850	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.16335209785029292, 'policy_loss': 0.002440687734633684, 'vf_loss': 0.1677775587886572, 'vf_explained_var': 0.9003431, 'kl': 0.001288747088616815, 'entropy': 1.547209158539772, 'entropy_coeff': 0.005}
2020-09-21 16:03:05,341	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.032521021319553256, 'policy_loss': -0.027024217590223998, 'vf_loss': 0.0635256702080369, 'vf_explained_var': 0.96112305, 'kl': 0.0054430927557405084, 'entropy': 1.530903398990631, 'entropy_coeff': 0.005}
2020-09-21 16:03:05,794	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.011141560971736908, 'policy_loss': -0.04072297248058021, 'vf_loss': 0.03266589669510722, 'vf_explained_var': 0.9818075, 'kl': 0.006667226582067087, 'entropy': 1.51697310

2020-09-21 16:03:16,273	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12389072624500841, 'policy_loss': -0.1265656928298995, 'vf_loss': 0.0010777487914310768, 'vf_explained_var': 0.99926287, 'kl': 0.013646019855514169, 'entropy': 1.5227682292461395, 'entropy_coeff': 0.005}
2020-09-21 16:03:16,761	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12195679766591638, 'policy_loss': -0.12474817922338843, 'vf_loss': 0.001045818684360711, 'vf_explained_var': 0.9992638, 'kl': 0.013861635117791593, 'entropy': 1.5222082287073135, 'entropy_coeff': 0.005}
2020-09-21 16:03:17,216	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11611078405985609, 'policy_loss': -0.11931308696512133, 'vf_loss': 0.000994259309663903, 'vf_explained_var': 0.99932927, 'kl': 0.014522811747156084, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-03-18
done: false
episode_len_mean: 1531.39
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.918575365730497
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 129
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.5248022601008415
      entropy_coeff: 0.005
      kl: 0.014751804585102946
      policy_loss: -0.12008492089807987
      total_loss: -0.11686462874058634
      vf_explained_var: 0.9994183778762817
      vf_loss: 0.0008868389268172905
  num_steps_sampled: 185000
  num_steps_trained: 185000
iterations_since_restore: 185
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.409090909090909
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.

[2m[36m(pid=24699)[0m 2020-09-21 16:03:18,930	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:03:21,606	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.6749999999999999,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.5915801525115967,
                                         'entropy_coeff': 0.005,
                                         'kl': 1.678306382757455e-08,
                                         'policy_loss': -0.06692703068256378,
                                         'total_loss': 0.30903589725494385,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.818, max=0.818, mean=0.818),
                                         'vf_loss': 0.3839207887649536}}}

2020-09-21 16:03:21,610	INFO rollout_worker.py:73

2020-09-21 16:03:28,160	DEBUG sgd.py:120 -- 13 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10834848770173267, 'policy_loss': -0.1122508691623807, 'vf_loss': 0.003890593448886648, 'vf_explained_var': 0.99805266, 'kl': 0.011691172141581774, 'entropy': 1.5759512856602669, 'entropy_coeff': 0.005}
2020-09-21 16:03:28,647	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10562431230209768, 'policy_loss': -0.10951383435167372, 'vf_loss': 0.0036461833369685337, 'vf_explained_var': 0.99826014, 'kl': 0.012059214932378381, 'entropy': 1.5793255120515823, 'entropy_coeff': 0.005}
2020-09-21 16:03:29,101	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11377641547005624, 'policy_loss': -0.11750346468761563, 'vf_loss': 0.0033427463058615103, 'vf_explained_var': 0.9984238, 'kl': 0.012213232752401382, 'entropy': 

custom_metrics: {}
date: 2020-09-21_16-03-35
done: false
episode_len_mean: 1531.39
episode_reward_max: -23.652509059612406
episode_reward_mean: -29.918575365730497
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 129
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.5731611624360085
      entropy_coeff: 0.005
      kl: 0.014080813562031835
      policy_loss: -0.14004808582831174
      total_loss: -0.13723394414409995
      vf_explained_var: 0.9994246959686279
      vf_loss: 0.0011753966464311816
  num_steps_sampled: 186000
  num_steps_trained: 186000
iterations_since_restore: 186
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.631818181818182
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.

2020-09-21 16:03:39,001	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.16086761583574116, 'policy_loss': 0.0036801244132220745, 'vf_loss': 0.16412304667755961, 'vf_explained_var': 0.9261979, 'kl': 0.001049221698165681, 'entropy': 1.5287551507353783, 'entropy_coeff': 0.005}
2020-09-21 16:03:39,491	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.031659522908739746, 'policy_loss': -0.024939861672464758, 'vf_loss': 0.06141083990223706, 'vf_explained_var': 0.9720686, 'kl': 0.004078767145983875, 'entropy': 1.5129244849085808, 'entropy_coeff': 0.005}
2020-09-21 16:03:39,935	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.0015252213343046606, 'policy_loss': -0.03064036462455988, 'vf_loss': 0.03417456604074687, 'vf_explained_var': 0.9842967, 'kl': 0.00380066545039881, 'entropy': 1.52497

2020-09-21 16:03:50,379	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11825215932913125, 'policy_loss': -0.12201753770932555, 'vf_loss': 0.0011956207490584347, 'vf_explained_var': 0.9994049, 'kl': 0.01518755912547931, 'entropy': 1.5363692566752434, 'entropy_coeff': 0.005}
2020-09-21 16:03:50,869	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11506091034971178, 'policy_loss': -0.11884124879725277, 'vf_loss': 0.0011304372965241782, 'vf_explained_var': 0.9994527, 'kl': 0.015323143801651895, 'entropy': 1.5386422723531723, 'entropy_coeff': 0.005}
2020-09-21 16:03:51,325	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12684802012518048, 'policy_loss': -0.13072784122778103, 'vf_loss': 0.0011442449649621267, 'vf_explained_var': 0.99947894, 'kl': 0.015402194403577596, 'entropy': 

custom_metrics: {}
date: 2020-09-21_16-03-52
done: false
episode_len_mean: 1573.37
episode_reward_max: -23.34772977973811
episode_reward_mean: -29.877566810936386
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 130
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.5363573879003525
      entropy_coeff: 0.005
      kl: 0.01596789842005819
      policy_loss: -0.13505799020640552
      total_loss: -0.1310824699467048
      vf_explained_var: 0.9995704889297485
      vf_loss: 0.0008789785679255147
  num_steps_sampled: 187000
  num_steps_trained: 187000
iterations_since_restore: 187
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.2727272727272725
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94

2020-09-21 16:03:55,929	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.23493850277736783, 'policy_loss': 0.001661296235397458, 'vf_loss': 0.2403527945280075, 'vf_explained_var': 0.94841963, 'kl': 0.00061442485482055, 'entropy': 1.4980662912130356, 'entropy_coeff': 0.005}
2020-09-21 16:03:56,417	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.03567310015205294, 'policy_loss': -0.02519128704443574, 'vf_loss': 0.06647752714343369, 'vf_explained_var': 0.9853315, 'kl': 0.0027747823332902044, 'entropy': 1.4972241893410683, 'entropy_coeff': 0.005}
2020-09-21 16:03:56,873	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.01591433712746948, 'policy_loss': -0.043666381388902664, 'vf_loss': 0.032411500927992165, 'vf_explained_var': 0.993169, 'kl': 0.00422566024644766, 'entropy': 1.502356991

2020-09-21 16:04:07,292	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11457401816733181, 'policy_loss': -0.11831846018321812, 'vf_loss': 0.001080884991097264, 'vf_explained_var': 0.99973905, 'kl': 0.014979643747210503, 'entropy': 1.4895396903157234, 'entropy_coeff': 0.005}
2020-09-21 16:04:07,778	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11078326590359211, 'policy_loss': -0.11460447078570724, 'vf_loss': 0.0010875668413063977, 'vf_explained_var': 0.9997511, 'kl': 0.015071846544742584, 'entropy': 1.4879715666174889, 'entropy_coeff': 0.005}
2020-09-21 16:04:08,230	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1115744998678565, 'policy_loss': -0.11546603252645582, 'vf_loss': 0.0010539638242335059, 'vf_explained_var': 0.99976367, 'kl': 0.01525493385270238, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-04-09
done: false
episode_len_mean: 1573.37
episode_reward_max: -23.34772977973811
episode_reward_mean: -29.877566810936383
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 130
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.4887313321232796
      entropy_coeff: 0.005
      kl: 0.015895631513558328
      policy_loss: -0.12652624945621938
      total_loss: -0.12241813202854246
      vf_explained_var: 0.9998011589050293
      vf_loss: 0.0008222206706705038
  num_steps_sampled: 188000
  num_steps_trained: 188000
iterations_since_restore: 188
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.345454545454547
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9

2020-09-21 16:04:13,058	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.43297302117571235, 'policy_loss': -0.0037432688986882567, 'vf_loss': 0.44448412768542767, 'vf_explained_var': 0.9279451, 'kl': 0.00044355255204320265, 'entropy': 1.6134509220719337, 'entropy_coeff': 0.005}
2020-09-21 16:04:13,543	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.12845239648595452, 'policy_loss': -0.02145663625560701, 'vf_loss': 0.15570029243826866, 'vf_explained_var': 0.9766962, 'kl': 0.003457135200733319, 'entropy': 1.6249657422304153, 'entropy_coeff': 0.005}
2020-09-21 16:04:14,004	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.03341225907206535, 'policy_loss': -0.04428378283046186, 'vf_loss': 0.08269937732256949, 'vf_explained_var': 0.9866847, 'kl': 0.004566801566397771, 'entropy': 1.617185

2020-09-21 16:04:21,643	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.6749999999999999,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.5123602151870728,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.01669006608426571,
                                         'policy_loss': -0.014110658317804337,
                                         'total_loss': -0.006842285394668579,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=1.0, max=1.0, mean=1.0),
                                         'vf_loss': 0.003564391052350402}}}

2020-09-21 16:04:22,064	DEBUG sgd.py:120 -- 19 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1264710472896695, 'poli

custom_metrics: {}
date: 2020-09-21_16-04-26
done: false
episode_len_mean: 1573.37
episode_reward_max: -23.34772977973811
episode_reward_mean: -29.877566810936383
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 130
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.6270451843738556
      entropy_coeff: 0.005
      kl: 0.017338074918370694
      policy_loss: -0.14532965180114843
      total_loss: -0.13988351135049015
      vf_explained_var: 0.9996984601020813
      vf_loss: 0.0018781658363877796
  num_steps_sampled: 189000
  num_steps_trained: 189000
iterations_since_restore: 189
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.368181818181818
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9

[2m[36m(pid=24699)[0m 2020-09-21 16:04:27,061	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 9173.06856383866,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 2252},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.778, max=1.87, mean=0.304),
[2m[36m(pid=24699)[0m                                   'prev_action': 16,
[2m[36m(pid=24699)[0

2020-09-21 16:04:30,037	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.3663335544988513, 'policy_loss': 0.00360035034827888, 'vf_loss': 0.3704077424481511, 'vf_explained_var': 0.8511921, 'kl': 0.0002623460601726135, 'entropy': 1.570323757827282, 'entropy_coeff': 0.005}
2020-09-21 16:04:30,524	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.12233933270908892, 'policy_loss': -0.016251522349193692, 'vf_loss': 0.14493422000668943, 'vf_explained_var': 0.94032097, 'kl': 0.0021571205761574674, 'entropy': 1.5598833188414574, 'entropy_coeff': 0.005}
2020-09-21 16:04:30,976	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.04572017451573629, 'policy_loss': -0.03288946335669607, 'vf_loss': 0.08313681744039059, 'vf_explained_var': 0.96610683, 'kl': 0.004689970475737937, 'entropy': 1.538581818

2020-09-21 16:04:41,401	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12530282296938822, 'policy_loss': -0.13180843042209744, 'vf_loss': 0.0032574776123510674, 'vf_explained_var': 0.99858475, 'kl': 0.016354472492821515, 'entropy': 1.558227851986885, 'entropy_coeff': 0.005}
2020-09-21 16:04:41,888	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12476675608195364, 'policy_loss': -0.1313127086032182, 'vf_loss': 0.003092206039582379, 'vf_explained_var': 0.998636, 'kl': 0.016642129980027676, 'entropy': 1.5559378787875175, 'entropy_coeff': 0.005}
2020-09-21 16:04:42,335	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.13157748244702816, 'policy_loss': -0.13799481428577565, 'vf_loss': 0.002877306229493115, 'vf_explained_var': 0.99874425, 'kl': 0.016761843115091324, 'entropy': 1.5

custom_metrics: {}
date: 2020-09-21_16-04-43
done: false
episode_len_mean: 1573.37
episode_reward_max: -23.34772977973811
episode_reward_mean: -29.877566810936383
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 130
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.5643068104982376
      entropy_coeff: 0.005
      kl: 0.017327725887298584
      policy_loss: -0.14121081354096532
      total_loss: -0.1348840999417007
      vf_explained_var: 0.9989519119262695
      vf_loss: 0.002452026790706441
  num_steps_sampled: 190000
  num_steps_trained: 190000
iterations_since_restore: 190
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.359090909090909
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944

[2m[36m(pid=24699)[0m 2020-09-21 16:04:44,035	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:04:47,470	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.5569558320567012, 'policy_loss': 0.0019472513813525438, 'vf_loss': 0.561822135001421, 'vf_explained_var': 0.88094974, 'kl': 0.0007477484290130398, 'entropy': 1.4636541977524757, 'entropy_coeff': 0.005}
2020-09-21 16:04:47,955	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.14752012421377003, 'policy_loss': -0.029543454060330987, 'vf_loss': 0.1817239010706544, 'vf_explained_var': 0.96050596, 'kl': 0.003969157027313486, 'entropy': 1.4679012671113014, 'entropy_coeff': 0.005}
2020-09-21 16:04:48,408	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.03411601559491828, 'policy_loss': -0.05092324502766132

2020-09-21 16:04:58,825	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12080506992060691, 'policy_loss': -0.12727762223221362, 'vf_loss': 0.003025270256330259, 'vf_explained_var': 0.99928695, 'kl': 0.016094620514195412, 'entropy': 1.4833161756396294, 'entropy_coeff': 0.005}
2020-09-21 16:04:59,280	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12200345494784415, 'policy_loss': -0.12905013700947165, 'vf_loss': 0.0028090277046430856, 'vf_explained_var': 0.9993521, 'kl': 0.01726425887318328, 'entropy': 1.4831443130970001, 'entropy_coeff': 0.005}
2020-09-21 16:04:59,756	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11918532499112189, 'policy_loss': -0.12576457858085632, 'vf_loss': 0.0026559802936390042, 'vf_explained_var': 0.9994029, 'kl': 0.016817568975966424, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-05-01
done: false
episode_len_mean: 1573.37
episode_reward_max: -23.34772977973811
episode_reward_mean: -29.877566810936383
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 130
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.4850925728678703
      entropy_coeff: 0.005
      kl: 0.01739674311829731
      policy_loss: -0.13624165707733482
      total_loss: -0.12970192567445338
      vf_explained_var: 0.9994933605194092
      vf_loss: 0.0022223997802939266
  num_steps_sampled: 191000
  num_steps_trained: 191000
iterations_since_restore: 191
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.326086956521738
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94

2020-09-21 16:05:04,528	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.33594583300873637, 'policy_loss': 0.008192168723326176, 'vf_loss': 0.3349555628374219, 'vf_explained_var': 0.89704615, 'kl': 0.0006465172018297882, 'entropy': 1.5276601612567902, 'entropy_coeff': 0.005}
2020-09-21 16:05:05,017	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.10474156658165157, 'policy_loss': -0.023481932352297008, 'vf_loss': 0.13310312060639262, 'vf_explained_var': 0.9587629, 'kl': 0.00396238399844151, 'entropy': 1.5108460634946823, 'entropy_coeff': 0.005}
2020-09-21 16:05:05,474	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.03045103349722922, 'policy_loss': -0.03664071043021977, 'vf_loss': 0.07187220035120845, 'vf_explained_var': 0.97812796, 'kl': 0.004133404130698182, 'entropy': 1.51410207

2020-09-21 16:05:15,929	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11265792301855981, 'policy_loss': -0.11815310746897012, 'vf_loss': 0.003996495972387493, 'vf_explained_var': 0.9986502, 'kl': 0.013559797895140946, 'entropy': 1.5308355912566185, 'entropy_coeff': 0.005}
2020-09-21 16:05:16,415	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11235640151426196, 'policy_loss': -0.1180698894422676, 'vf_loss': 0.0039323488308582455, 'vf_explained_var': 0.9987488, 'kl': 0.013960223877802491, 'entropy': 1.528400681912899, 'entropy_coeff': 0.005}
2020-09-21 16:05:16,878	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12884727481286973, 'policy_loss': -0.1344513589865528, 'vf_loss': 0.00378955811902415, 'vf_explained_var': 0.99872464, 'kl': 0.01400450721848756, 'entropy': 1.5277

custom_metrics: {}
date: 2020-09-21_16-05-18
done: false
episode_len_mean: 1615.26
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.808906649069673
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 131
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.5297561585903168
      entropy_coeff: 0.005
      kl: 0.01482495031086728
      policy_loss: -0.12432440696284175
      total_loss: -0.11870528687722981
      vf_explained_var: 0.998991847038269
      vf_loss: 0.003261057354393415
  num_steps_sampled: 192000
  num_steps_trained: 192000
iterations_since_restore: 192
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.545454545454546
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449

2020-09-21 16:05:21,377	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.30563340289518237, 'policy_loss': -5.52408801013371e-05, 'vf_loss': 0.31296135298907757, 'vf_explained_var': 0.84694976, 'kl': 0.0008617860055724069, 'entropy': 1.570883259177208, 'entropy_coeff': 0.005}
2020-09-21 16:05:21,703	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-6.831, max=7.511, mean=-0.211),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-5.949, max=-0.018, mean=-1.503),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.003, max=0.982, mean=0.413),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=5.781

2020-09-21 16:05:28,027	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10334944049827754, 'policy_loss': -0.11459660762920976, 'vf_loss': 0.009634140616981313, 'vf_explained_var': 0.9949926, 'kl': 0.013948245265055448, 'entropy': 1.560408554971218, 'entropy_coeff': 0.005}
2020-09-21 16:05:28,481	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1010629408992827, 'policy_loss': -0.11213014414533973, 'vf_loss': 0.008830975741147995, 'vf_explained_var': 0.99487823, 'kl': 0.014832831104286015, 'entropy': 1.5551864206790924, 'entropy_coeff': 0.005}
2020-09-21 16:05:28,964	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.09938391053583473, 'policy_loss': -0.10908446123357862, 'vf_loss': 0.007752722594887018, 'vf_explained_var': 0.9957768, 'kl': 0.014396063343156129, 'entropy': 1.55

custom_metrics: {}
date: 2020-09-21_16-05-35
done: false
episode_len_mean: 1615.26
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.808906649069673
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 131
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.5537347793579102
      entropy_coeff: 0.005
      kl: 0.0180702320067212
      policy_loss: -0.12810388836078346
      total_loss: -0.12051334115676582
      vf_explained_var: 0.9983190298080444
      vf_loss: 0.003161809698212892
  num_steps_sampled: 193000
  num_steps_trained: 193000
iterations_since_restore: 193
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.372727272727273
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449

2020-09-21 16:05:38,466	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.21046231780201197, 'policy_loss': 0.006668223300948739, 'vf_loss': 0.21142503013834357, 'vf_explained_var': 0.9678037, 'kl': 0.0005140655979999353, 'entropy': 1.5955872908234596, 'entropy_coeff': 0.005}
2020-09-21 16:05:38,950	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.05356061819475144, 'policy_loss': -0.017460506525821984, 'vf_loss': 0.07713519665412605, 'vf_explained_var': 0.98863256, 'kl': 0.0026815759629243985, 'entropy': 1.5848279893398285, 'entropy_coeff': 0.005}
2020-09-21 16:05:39,405	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.0025666182627901435, 'policy_loss': -0.03481056506279856, 'vf_loss': 0.04236140288412571, 'vf_explained_var': 0.99303544, 'kl': 0.004365757733467035, 'entropy': 1.586

2020-09-21 16:05:49,849	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12953384243883193, 'policy_loss': -0.13651155191473663, 'vf_loss': 0.002959083481982816, 'vf_explained_var': 0.99951196, 'kl': 0.017540352826472372, 'entropy': 1.5642233341932297, 'entropy_coeff': 0.005}
2020-09-21 16:05:50,333	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11714870459400117, 'policy_loss': -0.12406697473488748, 'vf_loss': 0.0029081763059366494, 'vf_explained_var': 0.99948335, 'kl': 0.017534404702018946, 'entropy': 1.5651253536343575, 'entropy_coeff': 0.005}
2020-09-21 16:05:50,786	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12755319697316736, 'policy_loss': -0.13489373866468668, 'vf_loss': 0.0027129248046549037, 'vf_explained_var': 0.9995562, 'kl': 0.01842639036476612, 'entropy': 

custom_metrics: {}
date: 2020-09-21_16-05-52
done: false
episode_len_mean: 1615.26
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.808906649069673
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 131
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.5615827590227127
      entropy_coeff: 0.005
      kl: 0.01899535901611671
      policy_loss: -0.13475419068709016
      total_loss: -0.1274276098702103
      vf_explained_var: 0.9995929002761841
      vf_loss: 0.0023126224623410963
  num_steps_sampled: 194000
  num_steps_trained: 194000
iterations_since_restore: 194
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.4409090909090905
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94

[2m[36m(pid=24699)[0m 2020-09-21 16:05:52,499	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 7539.783877278231,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 3252},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-11.782, max=27.721, mean=0.45),
[2m[36m(pid=24699)[0m                                   'prev_action': 0,
[2m[36m(pid=24699)

2020-09-21 16:05:55,959	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.21780097304144874, 'policy_loss': -0.0005261474289000034, 'vf_loss': 0.22558195516467094, 'vf_explained_var': 0.9205693, 'kl': 0.0011002692223369692, 'entropy': 1.5995043218135834, 'entropy_coeff': 0.005}
2020-09-21 16:05:56,442	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.06373822822934017, 'policy_loss': -0.027848677127622068, 'vf_loss': 0.09691726346500218, 'vf_explained_var': 0.9678644, 'kl': 0.0039835628267610446, 'entropy': 1.603852890431881, 'entropy_coeff': 0.005}
2020-09-21 16:05:56,895	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.0037683958653360605, 'policy_loss': -0.04776526428759098, 'vf_loss': 0.04686548898462206, 'vf_explained_var': 0.98362803, 'kl': 0.00761742377653718, 'entropy': 1.602

2020-09-21 16:06:07,334	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1267319674952887, 'policy_loss': -0.1321324440650642, 'vf_loss': 0.0024738366410019808, 'vf_explained_var': 0.9991205, 'kl': 0.01625474722823128, 'entropy': 1.609063133597374, 'entropy_coeff': 0.005}
2020-09-21 16:06:07,819	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12369935237802565, 'policy_loss': -0.12900027632713318, 'vf_loss': 0.0024286535699502565, 'vf_explained_var': 0.9990884, 'kl': 0.0161347491084598, 'entropy': 1.6037363111972809, 'entropy_coeff': 0.005}
2020-09-21 16:06:08,272	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12300537870032713, 'policy_loss': -0.12841975153423846, 'vf_loss': 0.002355464981519617, 'vf_explained_var': 0.9991826, 'kl': 0.0164461059612222, 'entropy': 1.6084430

custom_metrics: {}
date: 2020-09-21_16-06-09
done: false
episode_len_mean: 1615.26
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.808906649069673
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 131
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.6108081862330437
      entropy_coeff: 0.005
      kl: 0.016999301442410797
      policy_loss: -0.13688789121806622
      total_loss: -0.13153585034888238
      vf_explained_var: 0.9992976784706116
      vf_loss: 0.001931554106704425
  num_steps_sampled: 195000
  num_steps_trained: 195000
iterations_since_restore: 195
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.339130434782609
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94

[2m[36m(pid=24699)[0m 2020-09-21 16:06:09,960	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:06:12,779	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.37921368423849344, 'policy_loss': 0.0016387409996241331, 'vf_loss': 0.38542120438069105, 'vf_explained_var': 0.9060638, 'kl': 0.00031176962896495297, 'entropy': 1.611341804265976, 'entropy_coeff': 0.005}
2020-09-21 16:06:13,266	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.11730412335600704, 'policy_loss': -0.012396117963362485, 'vf_loss': 0.1363723617978394, 'vf_explained_var': 0.9652098, 'kl': 0.0021337401049095206, 'entropy': 1.6224803999066353, 'entropy_coeff': 0.005}
2020-09-21 16:06:13,713	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.023347157053649426, 'policy_loss': -0.03674453403800

2020-09-21 16:06:21,771	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.6749999999999999,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.7824320793151855,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.01623011752963066,
                                         'policy_loss': -0.22147291898727417,
                                         'total_loss': -0.21514151990413666,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.999, max=0.999, mean=0.999),
                                         'vf_loss': 0.004288231022655964}}}

2020-09-21 16:06:21,772	DEBUG sgd.py:120 -- 19 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11932454688940197, 

custom_metrics: {}
date: 2020-09-21_16-06-26
done: false
episode_len_mean: 1670.64
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.747244120955934
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 132
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.597115896642208
      entropy_coeff: 0.005
      kl: 0.01657032046932727
      policy_loss: -0.13297151017468423
      total_loss: -0.12815154041163623
      vf_explained_var: 0.9995706081390381
      vf_loss: 0.001620583308977075
  num_steps_sampled: 196000
  num_steps_trained: 196000
iterations_since_restore: 196
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.381818181818182
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449

2020-09-21 16:06:29,360	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.30768186086788774, 'policy_loss': 0.0031321216374635696, 'vf_loss': 0.31192832067608833, 'vf_explained_var': 0.88546365, 'kl': 0.0006863551927039357, 'entropy': 1.5683764591813087, 'entropy_coeff': 0.005}
2020-09-21 16:06:29,811	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.09594484401168302, 'policy_loss': -0.017316383426077664, 'vf_loss': 0.1198900300078094, 'vf_explained_var': 0.95435536, 'kl': 0.001714804606308462, 'entropy': 1.5572595745325089, 'entropy_coeff': 0.005}
2020-09-21 16:06:30,302	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.017241371620912105, 'policy_loss': -0.042900225380435586, 'vf_loss': 0.06484861951321363, 'vf_explained_var': 0.9753114, 'kl': 0.0045965197059558704, 'entropy': 1.561

2020-09-21 16:06:40,734	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1046918067149818, 'policy_loss': -0.10798520816024393, 'vf_loss': 0.002404784092504997, 'vf_explained_var': 0.9990531, 'kl': 0.01284600974759087, 'entropy': 1.5564886406064034, 'entropy_coeff': 0.005}
2020-09-21 16:06:41,178	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11140883574262261, 'policy_loss': -0.11471571016591042, 'vf_loss': 0.0023143670332501642, 'vf_explained_var': 0.99909806, 'kl': 0.012958639883436263, 'entropy': 1.5509138852357864, 'entropy_coeff': 0.005}
2020-09-21 16:06:41,659	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11014594011976442, 'policy_loss': -0.11366353341145441, 'vf_loss': 0.0021868285548407584, 'vf_explained_var': 0.9991585, 'kl': 0.013477071363013238, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_16-06-43
done: false
episode_len_mean: 1670.64
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.747244120955934
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 132
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.554347887635231
      entropy_coeff: 0.005
      kl: 0.014006113458890468
      policy_loss: -0.12076002033427358
      total_loss: -0.11720821773633361
      vf_explained_var: 0.9992610216140747
      vf_loss: 0.0018694216123549268
  num_steps_sampled: 197000
  num_steps_trained: 197000
iterations_since_restore: 197
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.1090909090909085
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9

2020-09-21 16:06:46,236	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.36041616555303335, 'policy_loss': -0.0035079645458608866, 'vf_loss': 0.3715841006487608, 'vf_explained_var': 0.95159566, 'kl': 0.00040046890758735354, 'entropy': 1.586057223379612, 'entropy_coeff': 0.005}
2020-09-21 16:06:46,731	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.07209156395401806, 'policy_loss': -0.026197254774160683, 'vf_loss': 0.1044140812009573, 'vf_explained_var': 0.9858228, 'kl': 0.0027512624801602215, 'entropy': 1.5964726954698563, 'entropy_coeff': 0.005}
2020-09-21 16:06:47,180	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.0011429783189669251, 'policy_loss': -0.04739669163245708, 'vf_loss': 0.05126715870574117, 'vf_explained_var': 0.99311304, 'kl': 0.004371179646113887, 'entropy': 1.59

2020-09-21 16:06:57,631	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1227569481707178, 'policy_loss': -0.12767948088003322, 'vf_loss': 0.0018989956224686466, 'vf_explained_var': 0.99971247, 'kl': 0.01620736054610461, 'entropy': 1.583286538720131, 'entropy_coeff': 0.005}
2020-09-21 16:06:58,127	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1311826602614019, 'policy_loss': -0.13620150927454233, 'vf_loss': 0.0018108327931258827, 'vf_explained_var': 0.99971294, 'kl': 0.016477485711220652, 'entropy': 1.5828567892313004, 'entropy_coeff': 0.005}
2020-09-21 16:06:58,584	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.13112589297816157, 'policy_loss': -0.1361796564888209, 'vf_loss': 0.001678496366366744, 'vf_explained_var': 0.9997474, 'kl': 0.016728715389035642, 'entropy': 1.58

custom_metrics: {}
date: 2020-09-21_16-07-00
done: false
episode_len_mean: 1670.64
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.747244120955934
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 132
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.58287213742733
      entropy_coeff: 0.005
      kl: 0.01743484428152442
      policy_loss: -0.13403298787306994
      total_loss: -0.12865500163752586
      vf_explained_var: 0.9997730851173401
      vf_loss: 0.0015238293271977454
  num_steps_sampled: 198000
  num_steps_trained: 198000
iterations_since_restore: 198
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 6.280952380952381
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.714285714285714
  vram_util_percent0: 0.9449

2020-09-21 16:07:03,427	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.15764100244268775, 'policy_loss': -0.007141298381611705, 'vf_loss': 0.17196550127118826, 'vf_explained_var': 0.94668204, 'kl': 0.0011367385344977776, 'entropy': 1.590098686516285, 'entropy_coeff': 0.005}
2020-09-21 16:07:03,905	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.03534579137340188, 'policy_loss': -0.0242577601748053, 'vf_loss': 0.06326013058423996, 'vf_explained_var': 0.9796595, 'kl': 0.0062265843589557335, 'entropy': 1.5719044730067253, 'entropy_coeff': 0.005}
2020-09-21 16:07:04,353	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.014824948040768504, 'policy_loss': -0.0443913834169507, 'vf_loss': 0.03336789517197758, 'vf_explained_var': 0.98956215, 'kl': 0.006189436506247148, 'entropy': 1.595865

2020-09-21 16:07:14,753	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12790609942749143, 'policy_loss': -0.13400434120558202, 'vf_loss': 0.002043949702056125, 'vf_explained_var': 0.9993063, 'kl': 0.017790831858292222, 'entropy': 1.59090406447649, 'entropy_coeff': 0.005}
2020-09-21 16:07:15,236	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.13245746330358088, 'policy_loss': -0.13880614074878395, 'vf_loss': 0.0019546028997865506, 'vf_explained_var': 0.9993658, 'kl': 0.018273428548127413, 'entropy': 1.5880982875823975, 'entropy_coeff': 0.005}
2020-09-21 16:07:15,693	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.13194015115732327, 'policy_loss': -0.13838276284513995, 'vf_loss': 0.0018906972472905181, 'vf_explained_var': 0.99939424, 'kl': 0.018518736818805337, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_16-07-17
done: false
episode_len_mean: 1670.64
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.747244120955934
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 132
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.5920713022351265
      entropy_coeff: 0.005
      kl: 0.0187173904851079
      policy_loss: -0.1440243001561612
      total_loss: -0.1377023954410106
      vf_explained_var: 0.9994341731071472
      vf_loss: 0.0016480200210935436
  num_steps_sampled: 199000
  num_steps_trained: 199000
iterations_since_restore: 199
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.3500000000000005
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449

[2m[36m(pid=24699)[0m 2020-09-21 16:07:17,391	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 6980.509764783548,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 4252},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-14.874, max=14.975, mean=0.609),
[2m[36m(pid=24699)[0m                                   'prev_action': 10,
[2m[36m(pid=24699

2020-09-21 16:07:20,430	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.23963760596234351, 'policy_loss': 0.0008269910467788577, 'vf_loss': 0.24678521323949099, 'vf_explained_var': 0.8876339, 'kl': 0.00043087514626374834, 'entropy': 1.653088502585888, 'entropy_coeff': 0.005}
2020-09-21 16:07:20,919	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.06618800316937268, 'policy_loss': -0.023786441539414227, 'vf_loss': 0.09635348874144256, 'vf_explained_var': 0.95802855, 'kl': 0.002861433895304799, 'entropy': 1.6621020287275314, 'entropy_coeff': 0.005}
2020-09-21 16:07:21,371	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.0011089375184383243, 'policy_loss': -0.03976283478550613, 'vf_loss': 0.045057738898321986, 'vf_explained_var': 0.9807006, 'kl': 0.006062943168217316, 'entropy': 1.655

2020-09-21 16:07:27,082	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10182227031327784, 'policy_loss': -0.10689611942507327, 'vf_loss': 0.004349907379946671, 'vf_explained_var': 0.99811584, 'kl': 0.013368409068789333, 'entropy': 1.6599476039409637, 'entropy_coeff': 0.005}
2020-09-21 16:07:27,535	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10934079473372549, 'policy_loss': -0.11466667370405048, 'vf_loss': 0.004219354523229413, 'vf_explained_var': 0.9982172, 'kl': 0.01394457824062556, 'entropy': 1.6612124145030975, 'entropy_coeff': 0.005}
2020-09-21 16:07:28,021	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10769817547407001, 'policy_loss': -0.11244379496201873, 'vf_loss': 0.003591835091356188, 'vf_explained_var': 0.99833554, 'kl': 0.013998194481246173, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_16-07-34
done: false
episode_len_mean: 1670.64
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.747244120955934
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 132
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.6549387723207474
      entropy_coeff: 0.005
      kl: 0.017675298964604735
      policy_loss: -0.13262713595759124
      total_loss: -0.1272300841519609
      vf_explained_var: 0.9992071390151978
      vf_loss: 0.001740917650749907
  num_steps_sampled: 200000
  num_steps_trained: 200000
iterations_since_restore: 200
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.322727272727273
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944

[2m[36m(pid=24699)[0m 2020-09-21 16:07:34,442	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:07:37,291	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.19520223047584295, 'policy_loss': 0.0033647145610302687, 'vf_loss': 0.1988656548783183, 'vf_explained_var': 0.9304857, 'kl': 0.0014585489042775457, 'entropy': 1.6025303974747658, 'entropy_coeff': 0.005}
2020-09-21 16:07:37,777	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.043361632502637804, 'policy_loss': -0.023854580213082954, 'vf_loss': 0.0719656590372324, 'vf_explained_var': 0.97383404, 'kl': 0.004825778538361192, 'entropy': 1.6013695150613785, 'entropy_coeff': 0.005}
2020-09-21 16:07:38,229	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.003228307352401316, 'policy_loss': -0.0376903285505

2020-09-21 16:07:48,642	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11914066778263077, 'policy_loss': -0.12479223171249032, 'vf_loss': 0.0021818403474753723, 'vf_explained_var': 0.9991145, 'kl': 0.016789095709100366, 'entropy': 1.572582595050335, 'entropy_coeff': 0.005}
2020-09-21 16:07:49,128	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11966908152680844, 'policy_loss': -0.12537459679879248, 'vf_loss': 0.002082073930068873, 'vf_explained_var': 0.9991778, 'kl': 0.017063302802853286, 'entropy': 1.5788583010435104, 'entropy_coeff': 0.005}
2020-09-21 16:07:49,580	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1158402458531782, 'policy_loss': -0.1218769940896891, 'vf_loss': 0.001996864586544689, 'vf_explained_var': 0.9992, 'kl': 0.017624453583266586, 'entropy': 1.571324

custom_metrics: {}
date: 2020-09-21_16-07-51
done: false
episode_len_mean: 1670.64
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.747244120955934
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 132
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.5780325159430504
      entropy_coeff: 0.005
      kl: 0.01775960822124034
      policy_loss: -0.1254603296983987
      total_loss: -0.11963696696329862
      vf_explained_var: 0.9993312358856201
      vf_loss: 0.0017257844519917853
  num_steps_sampled: 201000
  num_steps_trained: 201000
iterations_since_restore: 201
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.509090909090909
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944

2020-09-21 16:07:54,520	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.18204879708355293, 'policy_loss': -0.005594484508037567, 'vf_loss': 0.19540068367496133, 'vf_explained_var': 0.89831984, 'kl': 0.0005914359141423464, 'entropy': 1.6313239261507988, 'entropy_coeff': 0.005}
2020-09-21 16:07:55,011	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.047594498144462705, 'policy_loss': -0.022178720450028777, 'vf_loss': 0.07553103938698769, 'vf_explained_var': 0.9591415, 'kl': 0.00347371278621722, 'entropy': 1.62051510065794, 'entropy_coeff': 0.005}
2020-09-21 16:07:55,455	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.006671383511275053, 'policy_loss': -0.04033157686353661, 'vf_loss': 0.03898637369275093, 'vf_explained_var': 0.9792536, 'kl': 0.004221133276587352, 'entropy': 1.635089

2020-09-21 16:08:05,877	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12229633145034313, 'policy_loss': -0.12689507100731134, 'vf_loss': 0.0019154010151396506, 'vf_explained_var': 0.998965, 'kl': 0.016110772383399308, 'entropy': 1.6382857412099838, 'entropy_coeff': 0.005}
2020-09-21 16:08:06,357	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12717277358751744, 'policy_loss': -0.13220238708890975, 'vf_loss': 0.0017994181616813876, 'vf_explained_var': 0.99895006, 'kl': 0.01691087754443288, 'entropy': 1.6369280964136124, 'entropy_coeff': 0.005}
2020-09-21 16:08:06,811	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12074649217538536, 'policy_loss': -0.1257431225385517, 'vf_loss': 0.0016997400307445787, 'vf_explained_var': 0.9990421, 'kl': 0.01704483520006761, 'entropy': 1.6

custom_metrics: {}
date: 2020-09-21_16-08-08
done: false
episode_len_mean: 1767.8
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.60586246477948
episode_reward_min: -32.22698139089825
episodes_this_iter: 2
episodes_total: 134
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.6344179287552834
      entropy_coeff: 0.005
      kl: 0.0176400929922238
      policy_loss: -0.13968726538587362
      total_loss: -0.13448803778737783
      vf_explained_var: 0.9991982579231262
      vf_loss: 0.0014642552196164615
  num_steps_sampled: 202000
  num_steps_trained: 202000
iterations_since_restore: 202
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.35
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282177
 

2020-09-21 16:08:10,534	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.09707610832992941, 'policy_loss': 0.012236670823767781, 'vf_loss': 0.09301803831476718, 'vf_explained_var': 0.9368973, 'kl': 0.00018702740004816754, 'entropy': 1.6609689518809319, 'entropy_coeff': 0.005}
2020-09-21 16:08:11,025	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.017432554392144084, 'policy_loss': -0.011688238475471735, 'vf_loss': 0.03528495505452156, 'vf_explained_var': 0.9720185, 'kl': 0.0030735042892047204, 'entropy': 1.6477556079626083, 'entropy_coeff': 0.005}
2020-09-21 16:08:11,485	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.013415005872957408, 'policy_loss': -0.028656249400228262, 'vf_loss': 0.020901524811051786, 'vf_explained_var': 0.9818363, 'kl': 0.003864364916807972, 'entropy': 1.6

2020-09-21 16:08:21,827	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-7.027, max=8.508, mean=-0.13),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-5.281, max=-0.019, mean=-1.441),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.005, max=0.981, mean=0.39),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=6.406),
                                                    'advantages': np.ndarray((64,), dtype=float32, min=-1.623, max=2.487, mean=0.3),
                                                    'agent_index': np.ndarray((64,), dtype=int64, min=0.0, max=0.0, mean=0.0),
                                                    'dones': np.ndarray((64,), dtype=

custom_metrics: {}
date: 2020-09-21_16-08-24
done: false
episode_len_mean: 1767.8
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.60586246477948
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 134
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.6655849441885948
      entropy_coeff: 0.005
      kl: 0.016305885335896164
      policy_loss: -0.11793256236705929
      total_loss: -0.11044780747033656
      vf_explained_var: 0.9958511590957642
      vf_loss: 0.004806215700227767
  num_steps_sampled: 203000
  num_steps_trained: 203000
iterations_since_restore: 203
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.3047619047619055
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.944

2020-09-21 16:08:27,109	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.15331324562430382, 'policy_loss': 0.00010389892850071192, 'vf_loss': 0.1613486260175705, 'vf_explained_var': 0.98303616, 'kl': 0.00040192222158119684, 'entropy': 1.6821153685450554, 'entropy_coeff': 0.005}
2020-09-21 16:08:27,597	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.04252194054424763, 'policy_loss': -0.015432100626640022, 'vf_loss': 0.06563101103529334, 'vf_explained_var': 0.9923587, 'kl': 0.001101206897146767, 'entropy': 1.6840568110346794, 'entropy_coeff': 0.005}
2020-09-21 16:08:28,049	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.015098071889951825, 'policy_loss': -0.020930965314619243, 'vf_loss': 0.04231682443059981, 'vf_explained_var': 0.99503756, 'kl': 0.0031142669613473117, 'entropy': 1.6

2020-09-21 16:08:38,516	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08208421291783452, 'policy_loss': -0.10483416559873149, 'vf_loss': 0.02054174937075004, 'vf_explained_var': 0.99758744, 'kl': 0.015618611534591764, 'entropy': 1.666870854794979, 'entropy_coeff': 0.005}
2020-09-21 16:08:39,006	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08193408837541938, 'policy_loss': -0.10524097667075694, 'vf_loss': 0.02106534008635208, 'vf_explained_var': 0.9974606, 'kl': 0.015637665463145822, 'entropy': 1.66277514398098, 'entropy_coeff': 0.005}
2020-09-21 16:08:39,463	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.07985028601251543, 'policy_loss': -0.10303110920358449, 'vf_loss': 0.020756153156980872, 'vf_explained_var': 0.99761236, 'kl': 0.015926206309814006, 'entropy': 1.6651

custom_metrics: {}
date: 2020-09-21_16-08-40
done: false
episode_len_mean: 1767.8
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.60586246477948
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 134
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.664504662156105
      entropy_coeff: 0.005
      kl: 0.016749419271945953
      policy_loss: -0.1131665320135653
      total_loss: -0.09041692106984556
      vf_explained_var: 0.9976716041564941
      vf_loss: 0.01976627390831709
  num_steps_sampled: 204000
  num_steps_trained: 204000
iterations_since_restore: 204
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.147619047619047
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.9449909

[2m[36m(pid=24699)[0m 2020-09-21 16:08:41,169	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 11428.40978329505,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 470},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-6.73, max=6.237, mean=0.36),
[2m[36m(pid=24699)[0m                                   'prev_action': 5,
[2m[36m(pid=24699)[0m 

2020-09-21 16:08:43,591	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.13570936757605523, 'policy_loss': 0.0024211134295910597, 'vf_loss': 0.14131806138902903, 'vf_explained_var': 0.94714665, 'kl': 0.00044563699261566947, 'entropy': 1.666121393442154, 'entropy_coeff': 0.005}
2020-09-21 16:08:44,045	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.042032606550492346, 'policy_loss': -0.01747258030809462, 'vf_loss': 0.0661352479364723, 'vf_explained_var': 0.97459346, 'kl': 0.0024554970441386104, 'entropy': 1.6575043424963951, 'entropy_coeff': 0.005}
2020-09-21 16:08:44,537	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.006766861188225448, 'policy_loss': -0.035027314675971866, 'vf_loss': 0.04828055016696453, 'vf_explained_var': 0.98022306, 'kl': 0.00280891329748556, 'entropy': 1.676

2020-09-21 16:08:54,952	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08588020957540721, 'policy_loss': -0.12067149009089917, 'vf_loss': 0.03315674024634063, 'vf_explained_var': 0.9867746, 'kl': 0.014659516047686338, 'entropy': 1.6521268486976624, 'entropy_coeff': 0.005}
2020-09-21 16:08:55,404	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.0866369481664151, 'policy_loss': -0.1232422684552148, 'vf_loss': 0.03491172846406698, 'vf_explained_var': 0.98699105, 'kl': 0.014760533522348851, 'entropy': 1.6539531350135803, 'entropy_coeff': 0.005}
2020-09-21 16:08:55,890	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08124542096629739, 'policy_loss': -0.11844647396355867, 'vf_loss': 0.03543939994415268, 'vf_explained_var': 0.9861878, 'kl': 0.014875322580337524, 'entropy': 1.65583

custom_metrics: {}
date: 2020-09-21_16-08-57
done: false
episode_len_mean: 1767.8
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.60586246477948
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 134
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.6482091695070267
      entropy_coeff: 0.005
      kl: 0.016372914833482355
      policy_loss: -0.12759481347166002
      total_loss: -0.09253083379007876
      vf_explained_var: 0.9866048097610474
      vf_loss: 0.032253309444058686
  num_steps_sampled: 205000
  num_steps_trained: 205000
iterations_since_restore: 205
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.345454545454545
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449

[2m[36m(pid=24699)[0m 2020-09-21 16:08:57,562	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:09:00,518	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.30529369227588177, 'policy_loss': -0.002650621929205954, 'vf_loss': 0.31552416551858187, 'vf_explained_var': 0.9215073, 'kl': 0.0008623128541470071, 'entropy': 1.6323814913630486, 'entropy_coeff': 0.005}
2020-09-21 16:09:00,974	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.05300725973211229, 'policy_loss': -0.02374977758154273, 'vf_loss': 0.08281487738713622, 'vf_explained_var': 0.98101413, 'kl': 0.0031396236081491224, 'entropy': 1.6354166865348816, 'entropy_coeff': 0.005}
2020-09-21 16:09:01,460	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.011190860997885466, 'policy_loss': -0.042175950249

2020-09-21 16:09:11,872	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08959204866550863, 'policy_loss': -0.09390988224186003, 'vf_loss': 0.0035911245649913326, 'vf_explained_var': 0.99898016, 'kl': 0.01293467782670632, 'entropy': 1.6008398160338402, 'entropy_coeff': 0.005}
2020-09-21 16:09:12,330	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.09598976536653936, 'policy_loss': -0.10045209620147943, 'vf_loss': 0.0035495917400112376, 'vf_explained_var': 0.9990099, 'kl': 0.013210865494329482, 'entropy': 1.6009192019701004, 'entropy_coeff': 0.005}
2020-09-21 16:09:12,815	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.09050399804254994, 'policy_loss': -0.09484169934876263, 'vf_loss': 0.0034767836914397776, 'vf_explained_var': 0.9989865, 'kl': 0.013124668970704079, 'entropy': 

custom_metrics: {}
date: 2020-09-21_16-09-14
done: false
episode_len_mean: 1767.8
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.60586246477948
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 134
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.6016025096178055
      entropy_coeff: 0.005
      kl: 0.013981170195620507
      policy_loss: -0.09842611732892692
      total_loss: -0.09366048173978925
      vf_explained_var: 0.9990566968917847
      vf_loss: 0.003336352951009758
  num_steps_sampled: 206000
  num_steps_trained: 206000
iterations_since_restore: 206
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.218181818181818
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449

2020-09-21 16:09:17,071	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.164628702448681, 'policy_loss': 0.0002877854276448488, 'vf_loss': 0.17216699896380305, 'vf_explained_var': 0.96225864, 'kl': 0.00048726651257346987, 'entropy': 1.6309978365898132, 'entropy_coeff': 0.005}
2020-09-21 16:09:17,555	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.03716377564705908, 'policy_loss': -0.023102888255380094, 'vf_loss': 0.065731814596802, 'vf_explained_var': 0.98348653, 'kl': 0.003975047751737293, 'entropy': 1.6296619921922684, 'entropy_coeff': 0.005}
2020-09-21 16:09:18,043	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.010297912755049765, 'policy_loss': -0.03712578723207116, 'vf_loss': 0.032212173799052835, 'vf_explained_var': 0.9920765, 'kl': 0.004258583110640757, 'entropy': 1.65176

2020-09-21 16:09:23,709	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08370067237410694, 'policy_loss': -0.0938152449671179, 'vf_loss': 0.010092235810589045, 'vf_explained_var': 0.9974968, 'kl': 0.012130965711548924, 'entropy': 1.633212611079216, 'entropy_coeff': 0.005}
2020-09-21 16:09:24,196	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08570614573545754, 'policy_loss': -0.09547628846485168, 'vf_loss': 0.009505193593213335, 'vf_explained_var': 0.99754846, 'kl': 0.012456582160666585, 'entropy': 1.6286494582891464, 'entropy_coeff': 0.005}
2020-09-21 16:09:24,652	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08634559263009578, 'policy_loss': -0.09602089819964021, 'vf_loss': 0.009358372102724388, 'vf_explained_var': 0.99765897, 'kl': 0.01249865157296881, 'entropy': 1.62

custom_metrics: {}
date: 2020-09-21_16-09-30
done: false
episode_len_mean: 1767.8
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.60586246477948
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 134
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.6315578445792198
      entropy_coeff: 0.005
      kl: 0.01637977105565369
      policy_loss: -0.11436752293957397
      total_loss: -0.1028941395925358
      vf_explained_var: 0.9979132413864136
      vf_loss: 0.008574833802413195
  num_steps_sampled: 207000
  num_steps_trained: 207000
iterations_since_restore: 207
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.286363636363638
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944990

2020-09-21 16:09:33,913	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.13422741764225066, 'policy_loss': 0.0060071758925914764, 'vf_loss': 0.1338290716521442, 'vf_explained_var': 0.90542513, 'kl': 0.003659528989796046, 'entropy': 1.615802325308323, 'entropy_coeff': 0.005}
2020-09-21 16:09:34,402	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.008784154750173911, 'policy_loss': -0.03237167268525809, 'vf_loss': 0.046100761159323156, 'vf_explained_var': 0.96813637, 'kl': 0.004668593770475127, 'entropy': 1.619247354567051, 'entropy_coeff': 0.005}
2020-09-21 16:09:34,848	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.0257145807845518, 'policy_loss': -0.04288239136803895, 'vf_loss': 0.021407227031886578, 'vf_explained_var': 0.9837818, 'kl': 0.005674492771504447, 'entropy': 1.6139392

2020-09-21 16:09:45,224	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11765899532474577, 'policy_loss': -0.12473577843047678, 'vf_loss': 0.0042739358032122254, 'vf_explained_var': 0.9968096, 'kl': 0.01613074872875586, 'entropy': 1.61708003282547, 'entropy_coeff': 0.005}
2020-09-21 16:09:45,708	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1212907248409465, 'policy_loss': -0.12802686367649585, 'vf_loss': 0.004022591645480134, 'vf_explained_var': 0.99662924, 'kl': 0.01596238964702934, 'entropy': 1.6122136116027832, 'entropy_coeff': 0.005}
2020-09-21 16:09:46,167	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1286118975840509, 'policy_loss': -0.13558101025409997, 'vf_loss': 0.004195699759293348, 'vf_explained_var': 0.9967401, 'kl': 0.01610498339869082, 'entropy': 1.619491

custom_metrics: {}
date: 2020-09-21_16-09-47
done: false
episode_len_mean: 1767.8
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.60586246477948
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 134
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.614195890724659
      entropy_coeff: 0.005
      kl: 0.016852090077009052
      policy_loss: -0.1391888097859919
      total_loss: -0.13156816619448364
      vf_explained_var: 0.9968032836914062
      vf_loss: 0.004316467617172748
  num_steps_sampled: 208000
  num_steps_trained: 208000
iterations_since_restore: 208
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.385714285714285
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.944990

2020-09-21 16:09:50,687	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.1225087740458548, 'policy_loss': 0.0007647849852219224, 'vf_loss': 0.12920263735577464, 'vf_explained_var': 0.8679637, 'kl': 0.0009011384084764984, 'entropy': 1.6133839935064316, 'entropy_coeff': 0.005}
2020-09-21 16:09:51,178	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.007310015149414539, 'policy_loss': -0.026136881788261235, 'vf_loss': 0.038803660310804844, 'vf_explained_var': 0.9603217, 'kl': 0.003994678379967809, 'entropy': 1.6106341853737831, 'entropy_coeff': 0.005}
2020-09-21 16:09:51,632	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.025092481402680278, 'policy_loss': -0.037264039274305105, 'vf_loss': 0.016512413509190083, 'vf_explained_var': 0.98106265, 'kl': 0.005434281702036969, 'entropy': 1.6

2020-09-21 16:10:02,044	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11866795143578202, 'policy_loss': -0.1208990627201274, 'vf_loss': 0.0014868807629682124, 'vf_explained_var': 0.9984211, 'kl': 0.012867731769802049, 'entropy': 1.5882969424128532, 'entropy_coeff': 0.005}
2020-09-21 16:10:02,547	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11138653452508152, 'policy_loss': -0.11391265573911369, 'vf_loss': 0.0014103098037594464, 'vf_explained_var': 0.99839133, 'kl': 0.013409372535534203, 'entropy': 1.5871024131774902, 'entropy_coeff': 0.005}
2020-09-21 16:10:03,018	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.10970552847720683, 'policy_loss': -0.11191045877058059, 'vf_loss': 0.0013499173255695496, 'vf_explained_var': 0.998309, 'kl': 0.013031515351030976, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-10-04
done: false
episode_len_mean: 1767.8
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.60586246477948
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 134
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.5887516513466835
      entropy_coeff: 0.005
      kl: 0.013892357121221721
      policy_loss: -0.11729800485773012
      total_loss: -0.11458059982396662
      vf_explained_var: 0.9984942674636841
      vf_loss: 0.0012838153525080997
  num_steps_sampled: 209000
  num_steps_trained: 209000
iterations_since_restore: 209
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.245454545454544
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944

[2m[36m(pid=24699)[0m 2020-09-21 16:10:04,711	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 13943.87,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 1470},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.1, max=1.586, mean=0.383),
[2m[36m(pid=24699)[0m                                   'prev_action': 0,
[2m[36m(pid=24699)[0m         

2020-09-21 16:10:07,345	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.08843287895433605, 'policy_loss': 0.0019956459291279316, 'vf_loss': 0.09429085813462734, 'vf_explained_var': 0.9455706, 'kl': 0.0006920978256821364, 'entropy': 1.6641585603356361, 'entropy_coeff': 0.005}
2020-09-21 16:10:07,830	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.012492700305301696, 'policy_loss': -0.01923858793452382, 'vf_loss': 0.03780766762793064, 'vf_explained_var': 0.9773135, 'kl': 0.0032383790821768343, 'entropy': 1.6524574980139732, 'entropy_coeff': 0.005}
2020-09-21 16:10:08,282	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.02141324372496456, 'policy_loss': -0.03757354948902503, 'vf_loss': 0.021064121334347874, 'vf_explained_var': 0.9876226, 'kl': 0.004863992027821951, 'entropy': 1.6374

2020-09-21 16:10:18,656	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11559894541278481, 'policy_loss': -0.1216952153481543, 'vf_loss': 0.002926052547991276, 'vf_explained_var': 0.9980325, 'kl': 0.016811064910143614, 'entropy': 1.6354500353336334, 'entropy_coeff': 0.005}
2020-09-21 16:10:19,142	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12215541931800544, 'policy_loss': -0.1284708371385932, 'vf_loss': 0.0027614428618107922, 'vf_explained_var': 0.99822736, 'kl': 0.017366587184369564, 'entropy': 1.6336941048502922, 'entropy_coeff': 0.005}
2020-09-21 16:10:19,598	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11969775694888085, 'policy_loss': -0.12567654927261174, 'vf_loss': 0.002677132513781544, 'vf_explained_var': 0.9982697, 'kl': 0.01700962003087625, 'entropy': 1.63

custom_metrics: {}
date: 2020-09-21_16-10-21
done: false
episode_len_mean: 1767.8
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.60586246477948
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 134
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.6307931393384933
      entropy_coeff: 0.005
      kl: 0.0175936539308168
      policy_loss: -0.1396702806232497
      total_loss: -0.13323499372927472
      vf_explained_var: 0.9981984496116638
      vf_loss: 0.0027135395648656413
  num_steps_sampled: 210000
  num_steps_trained: 210000
iterations_since_restore: 210
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.461904761904762
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.944990

[2m[36m(pid=24699)[0m 2020-09-21 16:10:21,606	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:10:23,847	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.6749999999999999,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.5900243520736694,
                                         'entropy_coeff': 0.005,
                                         'kl': -3.112888435907735e-08,
                                         'policy_loss': -0.10985769331455231,
                                         'total_loss': 0.1732948273420334,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.929, max=0.929, mean=0.929),
                                         'vf_loss': 0.29110267758369446}}}

2020-09-21 16:10:23,852	INFO rollout_worker.py:7

2020-09-21 16:10:30,490	DEBUG sgd.py:120 -- 13 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08159600070212036, 'policy_loss': -0.09854151529725641, 'vf_loss': 0.016716563142836094, 'vf_explained_var': 0.9949234, 'kl': 0.011521596170496196, 'entropy': 1.5096242725849152, 'entropy_coeff': 0.005}
2020-09-21 16:10:30,972	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08522725221700966, 'policy_loss': -0.10206253896467388, 'vf_loss': 0.016570982814300805, 'vf_explained_var': 0.99501574, 'kl': 0.011559281061636284, 'entropy': 1.5076409131288528, 'entropy_coeff': 0.005}
2020-09-21 16:10:31,420	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.08390766306547448, 'policy_loss': -0.10050457523902878, 'vf_loss': 0.01608715602196753, 'vf_explained_var': 0.9950875, 'kl': 0.011913073249161243, 'entropy': 1.5

custom_metrics: {}
date: 2020-09-21_16-10-38
done: false
episode_len_mean: 1767.8
episode_reward_max: -22.42452783415769
episode_reward_mean: -29.60586246477948
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 134
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.4959161207079887
      entropy_coeff: 0.005
      kl: 0.01503011683234945
      policy_loss: -0.1236228495836258
      total_loss: -0.10606918763369322
      vf_explained_var: 0.9955087304115295
      vf_loss: 0.014887915109284222
  num_steps_sampled: 211000
  num_steps_trained: 211000
iterations_since_restore: 211
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.636363636363637
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944990

2020-09-21 16:10:41,320	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.2533381790854037, 'policy_loss': 0.004458557581529021, 'vf_loss': 0.2559802928008139, 'vf_explained_var': 0.8713528, 'kl': 0.0006644086119755865, 'entropy': 1.5098277479410172, 'entropy_coeff': 0.005}
2020-09-21 16:10:41,808	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.06983341602608562, 'policy_loss': -0.015358244301751256, 'vf_loss': 0.09020085795782506, 'vf_explained_var': 0.95580673, 'kl': 0.0036667065214714967, 'entropy': 1.4968456402420998, 'entropy_coeff': 0.005}
2020-09-21 16:10:42,260	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.003521659644320607, 'policy_loss': -0.042438618023879826, 'vf_loss': 0.04955215938389301, 'vf_explained_var': 0.9747226, 'kl': 0.005698455264791846, 'entropy': 1.487668

2020-09-21 16:10:52,699	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1134722043061629, 'policy_loss': -0.13006024167407304, 'vf_loss': 0.012050908611854538, 'vf_explained_var': 0.993659, 'kl': 0.017767221375834197, 'entropy': 1.4911495819687843, 'entropy_coeff': 0.005}
2020-09-21 16:10:53,183	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11032109661027789, 'policy_loss': -0.1266910817939788, 'vf_loss': 0.011732839891919866, 'vf_explained_var': 0.99366945, 'kl': 0.017910979106090963, 'entropy': 1.490554466843605, 'entropy_coeff': 0.005}
2020-09-21 16:10:53,636	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.1104396628215909, 'policy_loss': -0.12771725002676249, 'vf_loss': 0.01239883858943358, 'vf_explained_var': 0.99333763, 'kl': 0.018254067515954375, 'entropy': 1.48855

custom_metrics: {}
date: 2020-09-21_16-10-55
done: false
episode_len_mean: 1809.92
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.518933757855578
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 135
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.4870520755648613
      entropy_coeff: 0.005
      kl: 0.01882084069075063
      policy_loss: -0.14511405088705942
      total_loss: -0.1285514086484909
      vf_explained_var: 0.9940699338912964
      vf_loss: 0.01129383806255646
  num_steps_sampled: 212000
  num_steps_trained: 212000
iterations_since_restore: 212
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.386363636363637
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449

2020-09-21 16:10:58,161	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.2284471420571208, 'policy_loss': 0.001234215043950826, 'vf_loss': 0.23463628813624382, 'vf_explained_var': 0.9084506, 'kl': 0.0006510038117778505, 'entropy': 1.5725577622652054, 'entropy_coeff': 0.005}
2020-09-21 16:10:58,647	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.011597763397730887, 'policy_loss': -0.03733600629493594, 'vf_loss': 0.05518491251859814, 'vf_explained_var': 0.9785495, 'kl': 0.0024310313892783597, 'entropy': 1.5784171223640442, 'entropy_coeff': 0.005}
2020-09-21 16:10:59,110	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.04236900026444346, 'policy_loss': -0.061679386999458075, 'vf_loss': 0.02372081356588751, 'vf_explained_var': 0.9908217, 'kl': 0.005033414825447835, 'entropy': 1.561596

2020-09-21 16:11:09,497	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12586615106556565, 'policy_loss': -0.13021189242135733, 'vf_loss': 0.0016767269335105084, 'vf_explained_var': 0.9992557, 'kl': 0.015477053821086884, 'entropy': 1.555600382387638, 'entropy_coeff': 0.005}
2020-09-21 16:11:09,984	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12709496496245265, 'policy_loss': -0.1314289680449292, 'vf_loss': 0.0016690147822373547, 'vf_explained_var': 0.99925065, 'kl': 0.01546251680701971, 'entropy': 1.554442822933197, 'entropy_coeff': 0.005}
2020-09-21 16:11:10,436	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.12089603923959658, 'policy_loss': -0.1255698173190467, 'vf_loss': 0.001606393234396819, 'vf_explained_var': 0.9993011, 'kl': 0.0160591229214333, 'entropy': 1.55450

custom_metrics: {}
date: 2020-09-21_16-11-11
done: false
episode_len_mean: 1809.92
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.518933757855585
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 135
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.5569414123892784
      entropy_coeff: 0.005
      kl: 0.01607815350871533
      policy_loss: -0.1345196629408747
      total_loss: -0.1299226952251047
      vf_explained_var: 0.9993222951889038
      vf_loss: 0.0015289171715267003
  num_steps_sampled: 213000
  num_steps_trained: 213000
iterations_since_restore: 213
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.386363636363637
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944

2020-09-21 16:11:14,586	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.15793311945162714, 'policy_loss': -0.00033919140696525574, 'vf_loss': 0.16513489512726665, 'vf_explained_var': 0.9729841, 'kl': 0.0010509005445904496, 'entropy': 1.5143878906965256, 'entropy_coeff': 0.005}
2020-09-21 16:11:15,053	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': 0.042571783225866966, 'policy_loss': -0.023599829990416765, 'vf_loss': 0.07026353222317994, 'vf_explained_var': 0.9881106, 'kl': 0.005237537567154504, 'entropy': 1.5254519432783127, 'entropy_coeff': 0.005}
2020-09-21 16:11:15,538	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.006617486127652228, 'policy_loss': -0.04205749635002576, 'vf_loss': 0.03882873337715864, 'vf_explained_var': 0.99263126, 'kl': 0.006381907471222803, 'entropy': 1.5

2020-09-21 16:11:23,899	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 0.6749999999999999,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.5371778011322021,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.018657764419913292,
                                         'policy_loss': -0.08358568698167801,
                                         'total_loss': -0.07622046768665314,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=1.0, max=1.0, mean=1.0),
                                         'vf_loss': 0.0024571267422288656}}}

2020-09-21 16:11:24,121	DEBUG sgd.py:120 -- 20 {'allreduce_latency': 0.0, 'cur_kl_coeff': 0.6749999999999999, 'cur_lr': 1e-05, 'total_loss': -0.11173802916891873, 'pol

custom_metrics: {}
date: 2020-09-21_16-11-28
done: false
episode_len_mean: 1809.92
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.518933757855585
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 135
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 0.6749999999999999
      cur_lr: 1.0e-05
      entropy: 1.5492440462112427
      entropy_coeff: 0.005
      kl: 0.020603093784302473
      policy_loss: -0.13860886602196842
      total_loss: -0.12704454374033958
      vf_explained_var: 0.9988840818405151
      vf_loss: 0.005403458577347919
  num_steps_sampled: 214000
  num_steps_trained: 214000
iterations_since_restore: 214
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.4363636363636365
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.

[2m[36m(pid=24699)[0m 2020-09-21 16:11:28,645	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 10744.574568677916,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 2470},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.095, max=3.687, mean=0.364),
[2m[36m(pid=24699)[0m                                   'prev_action': 6,
[2m[36m(pid=24699)

2020-09-21 16:11:31,532	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.28519284655340016, 'policy_loss': 0.003931601590011269, 'vf_loss': 0.2882532421499491, 'vf_explained_var': 0.9467783, 'kl': 0.0005327877846710294, 'entropy': 1.5062887743115425, 'entropy_coeff': 0.005}
2020-09-21 16:11:32,021	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.06534478976391256, 'policy_loss': -0.014519692282192409, 'vf_loss': 0.08232202241197228, 'vf_explained_var': 0.98437834, 'kl': 0.0049451836093794554, 'entropy': 1.492908202111721, 'entropy_coeff': 0.005}
2020-09-21 16:11:32,472	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0021633997093886137, 'policy_loss': -0.03225670545361936, 'vf_loss': 0.03294744831509888, 'vf_explained_var': 0.99329865, 'kl': 0.0045650065439986065, 'entropy': 1.4952432289719582, 'entropy_coeff': 0.00

2020-09-21 16:11:43,369	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11287353752413765, 'policy_loss': -0.12465428170980886, 'vf_loss': 0.005761800741311163, 'vf_explained_var': 0.9987781, 'kl': 0.013235363003332168, 'entropy': 1.4763726070523262, 'entropy_coeff': 0.005}
2020-09-21 16:11:43,823	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10537314362591133, 'policy_loss': -0.11712556710699573, 'vf_loss': 0.005563281229115091, 'vf_explained_var': 0.9987798, 'kl': 0.013410154613666236, 'entropy': 1.47772928327322, 'entropy_coeff': 0.005}
2020-09-21 16:11:44,310	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1061872533755377, 'policy_loss': -0.11803289607632905, 'vf_loss': 0.0055757587106199935, 'vf_explained_var': 0.9988109, 'kl': 0.013480436464305967, 'entropy': 1.4758107513189316, 'entropy_coeff': 0.00

custom_metrics: {}
date: 2020-09-21_16-11-45
done: false
episode_len_mean: 1809.92
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.518933757855585
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 135
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.4808587729930878
      entropy_coeff: 0.005
      kl: 0.01400473271496594
      policy_loss: -0.11708813300356269
      total_loss: -0.10472207691054791
      vf_explained_var: 0.9987624883651733
      vf_loss: 0.005590558081166819
  num_steps_sampled: 215000
  num_steps_trained: 215000
iterations_since_restore: 215
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.333333333333333
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.776190476190479
  vram_util_percent0: 0.94499098212821

[2m[36m(pid=24699)[0m 2020-09-21 16:11:45,541	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:11:48,379	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.15827025403268635, 'policy_loss': -0.003560435608960688, 'vf_loss': 0.16864353325217962, 'vf_explained_var': 0.92702425, 'kl': 0.0008511336153342874, 'entropy': 1.534924104809761, 'entropy_coeff': 0.005}
2020-09-21 16:11:48,899	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.023634790093638003, 'policy_loss': -0.030601863283663988, 'vf_loss': 0.05785233713686466, 'vf_explained_var': 0.97440726, 'kl': 0.003937827903428115, 'entropy': 1.5205476731061935, 'entropy_coeff': 0.005}
2020-09-21 16:11:49,354	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.01306436606682837, 'policy_loss': -0.03671130444854498, 'vf_loss': 0.0263886810862459

2020-09-21 16:12:00,319	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10548778309021145, 'policy_loss': -0.1132194894598797, 'vf_loss': 0.0020556614399538375, 'vf_explained_var': 0.99897397, 'kl': 0.01315340626752004, 'entropy': 1.5283556059002876, 'entropy_coeff': 0.005}
2020-09-21 16:12:00,778	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10406611138023436, 'policy_loss': -0.11203093337826431, 'vf_loss': 0.0020293385969125666, 'vf_explained_var': 0.99900603, 'kl': 0.01341109786881134, 'entropy': 1.5286511331796646, 'entropy_coeff': 0.005}
2020-09-21 16:12:01,264	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.110645656241104, 'policy_loss': -0.11856769322184846, 'vf_loss': 0.0019662157938000746, 'vf_explained_var': 0.99899304, 'kl': 0.013428759411908686, 'entropy': 1.528158962726593, 'entropy_coeff': 0.

custom_metrics: {}
date: 2020-09-21_16-12-02
done: false
episode_len_mean: 1851.82
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.4794780888356
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 136
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5306019335985184
      entropy_coeff: 0.005
      kl: 0.01372065267059952
      policy_loss: -0.1152163241058588
      total_loss: -0.10708745627198368
      vf_explained_var: 0.9990379810333252
      vf_loss: 0.0018897123663919047
  num_steps_sampled: 216000
  num_steps_trained: 216000
iterations_since_restore: 216
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 6.259090909090909
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.754545454545458
  vram_util_percent0: 0.9449909821282177

2020-09-21 16:12:04,890	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.1380604594014585, 'policy_loss': -0.00231939647346735, 'vf_loss': 0.1476744399406016, 'vf_explained_var': 0.95441175, 'kl': 0.0005039735089865105, 'entropy': 1.5609720349311829, 'entropy_coeff': 0.005}
2020-09-21 16:12:05,343	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.003735904465429485, 'policy_loss': -0.034286636917386204, 'vf_loss': 0.0427876889007166, 'vf_explained_var': 0.986078, 'kl': 0.002936128235887736, 'entropy': 1.5475955307483673, 'entropy_coeff': 0.005}
2020-09-21 16:12:05,834	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.03320663014892489, 'policy_loss': -0.05547452700557187, 'vf_loss': 0.024912755994591862, 'vf_explained_var': 0.9916825, 'kl': 0.005037586350226775, 'entropy': 1.5490829795598984, 'entropy_coeff': 0.005}
20

2020-09-21 16:12:16,706	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11045519507024437, 'policy_loss': -0.12515661935321987, 'vf_loss': 0.00876461758161895, 'vf_explained_var': 0.9967916, 'kl': 0.013596977514680475, 'entropy': 1.5660272017121315, 'entropy_coeff': 0.005}
2020-09-21 16:12:17,189	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10816655831877142, 'policy_loss': -0.1232850857777521, 'vf_loss': 0.008760240132687613, 'vf_explained_var': 0.9969573, 'kl': 0.014023240422829986, 'entropy': 1.5680492669343948, 'entropy_coeff': 0.005}
2020-09-21 16:12:17,640	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1169135122327134, 'policy_loss': -0.1316386009566486, 'vf_loss': 0.008623975474620238, 'vf_explained_var': 0.9971647, 'kl': 0.01378173183184117, 'entropy': 1.5705763176083565, 'entropy_coeff': 0.005}


custom_metrics: {}
date: 2020-09-21_16-12-18
done: false
episode_len_mean: 1851.82
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.4794780888356
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 136
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5688544288277626
      entropy_coeff: 0.005
      kl: 0.014323245908599347
      policy_loss: -0.13625624659471214
      total_loss: -0.12083920044824481
      vf_explained_var: 0.9968681335449219
      vf_loss: 0.008759028423810378
  num_steps_sampled: 217000
  num_steps_trained: 217000
iterations_since_restore: 217
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.400000000000001
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944990982128217

2020-09-21 16:12:21,597	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.11229683575220406, 'policy_loss': -0.0033362460089847445, 'vf_loss': 0.12296476727351546, 'vf_explained_var': 0.9814446, 'kl': 0.0005117400232590441, 'entropy': 1.5699661150574684, 'entropy_coeff': 0.005}
2020-09-21 16:12:22,080	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.0086523144855164, 'policy_loss': -0.024563601473346353, 'vf_loss': 0.037552954512648284, 'vf_explained_var': 0.99361527, 'kl': 0.003579077892936766, 'entropy': 1.5921709090471268, 'entropy_coeff': 0.005}
2020-09-21 16:12:22,530	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.029722390172537416, 'policy_loss': -0.04588955326471478, 'vf_loss': 0.017449416918680072, 'vf_explained_var': 0.99689454, 'kl': 0.006643090222496539, 'entropy': 1.601677544414997, 'entropy_coeff': 0.0

2020-09-21 16:12:28,232	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1082918862812221, 'policy_loss': -0.11483712703920901, 'vf_loss': 0.003504567146592308, 'vf_explained_var': 0.99936455, 'kl': 0.010861517977900803, 'entropy': 1.5913219898939133, 'entropy_coeff': 0.005}
2020-09-21 16:12:28,683	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1096254923613742, 'policy_loss': -0.11646356480196118, 'vf_loss': 0.00343820484704338, 'vf_explained_var': 0.99940383, 'kl': 0.011212065175641328, 'entropy': 1.5904695391654968, 'entropy_coeff': 0.005}
2020-09-21 16:12:29,167	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10812436111154966, 'policy_loss': -0.1152003449969925, 'vf_loss': 0.0034068452077917755, 'vf_explained_var': 0.9993938, 'kl': 0.011515471967868507, 'entropy': 1.5980561301112175, 'entropy_coeff': 0.0

custom_metrics: {}
date: 2020-09-21_16-12-35
done: false
episode_len_mean: 1851.82
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.4794780888356
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 136
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5888138860464096
      entropy_coeff: 0.005
      kl: 0.014082811598200351
      policy_loss: -0.1395369505044073
      total_loss: -0.1304006902500987
      vf_explained_var: 0.999474048614502
      vf_loss: 0.002821478570695035
  num_steps_sampled: 218000
  num_steps_trained: 218000
iterations_since_restore: 218
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.328571428571428
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000002
  vram_util_percent0: 0.9449909821282179
 

2020-09-21 16:12:38,418	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.09307310543954372, 'policy_loss': 0.002484562573954463, 'vf_loss': 0.09756174171343446, 'vf_explained_var': 0.9699323, 'kl': 0.0008144362067415423, 'entropy': 1.5595637559890747, 'entropy_coeff': 0.005}
2020-09-21 16:12:38,898	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0016620108508504927, 'policy_loss': -0.029635634506121278, 'vf_loss': 0.02925352519378066, 'vf_explained_var': 0.990349, 'kl': 0.0064897478732746094, 'entropy': 1.5701535120606422, 'entropy_coeff': 0.005}
2020-09-21 16:12:39,356	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.034062498016282916, 'policy_loss': -0.047183718299493194, 'vf_loss': 0.014264533121604472, 'vf_explained_var': 0.99541605, 'kl': 0.006520679860841483, 'entropy': 1.5491012334823608, 'entropy_coeff': 0

2020-09-21 16:12:50,296	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12650525977369398, 'policy_loss': -0.13473559590056539, 'vf_loss': 0.0023806612007319927, 'vf_explained_var': 0.9992007, 'kl': 0.013387967133894563, 'entropy': 1.5411302745342255, 'entropy_coeff': 0.005}
2020-09-21 16:12:50,760	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11828250845428556, 'policy_loss': -0.12648340442683548, 'vf_loss': 0.0023039481602609158, 'vf_explained_var': 0.99911535, 'kl': 0.013426999154035002, 'entropy': 1.5395790413022041, 'entropy_coeff': 0.005}
2020-09-21 16:12:51,250	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11843289772514254, 'policy_loss': -0.12703538872301579, 'vf_loss': 0.0023706720312475227, 'vf_explained_var': 0.9991739, 'kl': 0.013794707250781357, 'entropy': 1.5470655784010887, 'entropy_coeff'

custom_metrics: {}
date: 2020-09-21_16-12-52
done: false
episode_len_mean: 1851.82
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.4794780888356
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 136
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5449085384607315
      entropy_coeff: 0.005
      kl: 0.01419637113576755
      policy_loss: -0.12631931947544217
      total_loss: -0.11728161235805601
      vf_explained_var: 0.999056339263916
      vf_loss: 0.002388421562500298
  num_steps_sampled: 219000
  num_steps_trained: 219000
iterations_since_restore: 219
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.495454545454546
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449909821282177


[2m[36m(pid=24699)[0m 2020-09-21 16:12:52,474	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 10017.199837069391,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 3470},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-6.525, max=3.689, mean=0.424),
[2m[36m(pid=24699)[0m                                   'prev_action': 0,
[2m[36m(pid=24699)

2020-09-21 16:12:55,542	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.17444498208351433, 'policy_loss': 0.0006044136243872344, 'vf_loss': 0.1805827603675425, 'vf_explained_var': 0.94742066, 'kl': 0.0009557770376080743, 'entropy': 1.5419817566871643, 'entropy_coeff': 0.005}
2020-09-21 16:12:56,028	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.023020225926302373, 'policy_loss': -0.023581611865665764, 'vf_loss': 0.04989715828560293, 'vf_explained_var': 0.9864716, 'kl': 0.0043203240929869935, 'entropy': 1.5339292958378792, 'entropy_coeff': 0.005}
2020-09-21 16:12:56,481	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.011053428752347827, 'policy_loss': -0.03289777273312211, 'vf_loss': 0.025047788280062377, 'vf_explained_var': 0.99271876, 'kl': 0.004422026977408677, 'entropy': 1.5361495688557625, 'entropy_coeff': 0.

2020-09-21 16:13:07,367	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10672017268370837, 'policy_loss': -0.11914133513346314, 'vf_loss': 0.006962187675526366, 'vf_explained_var': 0.9976939, 'kl': 0.013022215862292796, 'entropy': 1.5452024713158607, 'entropy_coeff': 0.005}
2020-09-21 16:13:07,821	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10784781788242981, 'policy_loss': -0.12065527599770576, 'vf_loss': 0.006927474241820164, 'vf_explained_var': 0.9977914, 'kl': 0.013459768670145422, 'entropy': 1.5496074855327606, 'entropy_coeff': 0.005}
2020-09-21 16:13:08,305	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11004584026522934, 'policy_loss': -0.12275100662373006, 'vf_loss': 0.006820348236942664, 'vf_explained_var': 0.997624, 'kl': 0.013491336372680962, 'entropy': 1.5550315529108047, 'entropy_coeff': 0.0

custom_metrics: {}
date: 2020-09-21_16-13-09
done: false
episode_len_mean: 1851.82
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.4794780888356
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 136
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5481027215719223
      entropy_coeff: 0.005
      kl: 0.013638413103763014
      policy_loss: -0.12605821748729795
      total_loss: -0.11281798523850739
      vf_explained_var: 0.9976428747177124
      vf_loss: 0.007171852164901793
  num_steps_sampled: 220000
  num_steps_trained: 220000
iterations_since_restore: 220
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.295454545454546
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944990982128217

[2m[36m(pid=24699)[0m 2020-09-21 16:13:09,519	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:13:12,344	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.285209147259593, 'policy_loss': -0.0013323939638212323, 'vf_loss': 0.29199488554149866, 'vf_explained_var': 0.9123057, 'kl': 0.001757252817452315, 'entropy': 1.4465116932988167, 'entropy_coeff': 0.005}
2020-09-21 16:13:12,835	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.07336516806390136, 'policy_loss': -0.02895407541655004, 'vf_loss': 0.1075089939404279, 'vf_explained_var': 0.96716154, 'kl': 0.001976139821636025, 'entropy': 1.4381184205412865, 'entropy_coeff': 0.005}
2020-09-21 16:13:13,289	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.024542823317460716, 'policy_loss': -0.035696998005732894, 'vf_loss': 0.06289596832357347, '

2020-09-21 16:13:23,951	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-6.502, max=6.954, mean=-0.22),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-5.376, max=-0.058, mean=-1.466),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.005, max=0.943, mean=0.412),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=4.094),
                                                    'advantages': np.ndarray((64,), dtype=float32, min=-2.494, max=2.916, mean=-0.116),
                                                    'agent_index': np.ndarray((64,), dtype=int64, min=0.0, max=0.0, mean=0.0),
                                                    'dones': np.ndarray((64,), dt

custom_metrics: {}
date: 2020-09-21_16-13-26
done: false
episode_len_mean: 1851.82
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.4794780888356
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 136
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.4422132074832916
      entropy_coeff: 0.005
      kl: 0.016681891516782343
      policy_loss: -0.12397010938730091
      total_loss: -0.08563002594746649
      vf_explained_var: 0.9908707737922668
      vf_loss: 0.028660729876719415
  num_steps_sampled: 221000
  num_steps_trained: 221000
iterations_since_restore: 221
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.377272727272726
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944990982128217

2020-09-21 16:13:29,287	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.26847510412335396, 'policy_loss': 0.0013115227920934558, 'vf_loss': 0.27395654655992985, 'vf_explained_var': 0.9150058, 'kl': 0.0005492377643832036, 'entropy': 1.4698151275515556, 'entropy_coeff': 0.005}
2020-09-21 16:13:29,774	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.12333594809751958, 'policy_loss': -0.027753861621022224, 'vf_loss': 0.15568482037633657, 'vf_explained_var': 0.95176756, 'kl': 0.002730021507886704, 'entropy': 1.4718324467539787, 'entropy_coeff': 0.005}
2020-09-21 16:13:30,228	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.07966302544809878, 'policy_loss': -0.03650126082357019, 'vf_loss': 0.11833410942927003, 'vf_explained_var': 0.95946074, 'kl': 0.00512956605234649, 'entropy': 1.4727017879486084, 'entropy_coeff': 0.005}


2020-09-21 16:13:41,100	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.030064281541854143, 'policy_loss': -0.1259177529718727, 'vf_loss': 0.08768777921795845, 'vf_explained_var': 0.96808606, 'kl': 0.015310256625525653, 'entropy': 1.4671876430511475, 'entropy_coeff': 0.005}
2020-09-21 16:13:41,546	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.03725605609361082, 'policy_loss': -0.13308232370764017, 'vf_loss': 0.08755711046978831, 'vf_explained_var': 0.9686867, 'kl': 0.015417520015034825, 'entropy': 1.46821678429842, 'entropy_coeff': 0.005}
2020-09-21 16:13:42,032	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0319806607440114, 'policy_loss': -0.13019891385920346, 'vf_loss': 0.08906455477699637, 'vf_explained_var': 0.96866816, 'kl': 0.01628177531529218, 'entropy': 1.4663196727633476, 'entropy_coeff': 0.005}


custom_metrics: {}
date: 2020-09-21_16-13-43
done: false
episode_len_mean: 1898.67
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.398800074604363
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 137
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.4621456414461136
      entropy_coeff: 0.005
      kl: 0.01596840936690569
      policy_loss: -0.13125936535652727
      total_loss: -0.03577250172384083
      vf_explained_var: 0.968407392501831
      vf_loss: 0.08662957674823701
  num_steps_sampled: 222000
  num_steps_trained: 222000
iterations_since_restore: 222
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.281818181818181
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449909821282177

2020-09-21 16:13:46,023	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.12021926883608103, 'policy_loss': -0.0017235398991033435, 'vf_loss': 0.12995630083605647, 'vf_explained_var': 0.97096765, 'kl': 0.0003769489604998899, 'entropy': 1.67903071641922, 'entropy_coeff': 0.005}
2020-09-21 16:13:46,504	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.025020406930707395, 'policy_loss': -0.028501417487859726, 'vf_loss': 0.05893598636612296, 'vf_explained_var': 0.9863891, 'kl': 0.0029694363838643767, 'entropy': 1.6841428875923157, 'entropy_coeff': 0.005}
2020-09-21 16:13:46,976	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.010222774697467685, 'policy_loss': -0.052924844552762806, 'vf_loss': 0.04468015441671014, 'vf_explained_var': 0.9892941, 'kl': 0.006407972570741549, 'entropy': 1.6932315230369568, 'entropy_coeff': 0.0

2020-09-21 16:13:57,905	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08942753204610199, 'policy_loss': -0.1282570466864854, 'vf_loss': 0.031199894496239722, 'vf_explained_var': 0.9922235, 'kl': 0.015940286801196635, 'entropy': 1.7019847929477692, 'entropy_coeff': 0.005}
2020-09-21 16:13:58,357	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09639213234186172, 'policy_loss': -0.13572998682502657, 'vf_loss': 0.0314391398569569, 'vf_explained_var': 0.9923911, 'kl': 0.016199904202949256, 'entropy': 1.7007380425930023, 'entropy_coeff': 0.005}
2020-09-21 16:13:58,841	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09379897313192487, 'policy_loss': -0.1324596726335585, 'vf_loss': 0.030868713976815343, 'vf_explained_var': 0.99250233, 'kl': 0.016074608138296753, 'entropy': 1.696711078286171, 'entropy_coeff': 0.005}

custom_metrics: {}
date: 2020-09-21_16-13-59
done: false
episode_len_mean: 1898.67
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.398800074604363
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 137
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.6994337439537048
      entropy_coeff: 0.005
      kl: 0.016083691269159317
      policy_loss: -0.14230116322869435
      total_loss: -0.10407919361023232
      vf_explained_var: 0.9927182197570801
      vf_loss: 0.030434397398494184
  num_steps_sampled: 223000
  num_steps_trained: 223000
iterations_since_restore: 223
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.281818181818182
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449909821282

2020-09-21 16:14:02,655	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.1298449813039042, 'policy_loss': 0.00599680352024734, 'vf_loss': 0.13078888971358538, 'vf_explained_var': 0.9829086, 'kl': 0.0012584875244733285, 'entropy': 1.642986223101616, 'entropy_coeff': 0.005}
2020-09-21 16:14:03,111	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.022466668277047575, 'policy_loss': -0.01971805887296796, 'vf_loss': 0.04698812135029584, 'vf_explained_var': 0.9937259, 'kl': 0.0034570981151773594, 'entropy': 1.6607408374547958, 'entropy_coeff': 0.005}
2020-09-21 16:14:03,594	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.012273264117538929, 'policy_loss': -0.03790737269446254, 'vf_loss': 0.030499714659526944, 'vf_explained_var': 0.99556553, 'kl': 0.003335082088597119, 'entropy': 1.6484756544232368, 'entropy_coeff': 0.005}


2020-09-21 16:14:14,450	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10582748509477824, 'policy_loss': -0.13003837107680738, 'vf_loss': 0.014765812520636246, 'vf_explained_var': 0.99778265, 'kl': 0.017529672069940716, 'entropy': 1.660744920372963, 'entropy_coeff': 0.005}
2020-09-21 16:14:14,942	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10801579221151769, 'policy_loss': -0.13199453661218286, 'vf_loss': 0.014972604403737932, 'vf_explained_var': 0.9977114, 'kl': 0.01709810580359772, 'entropy': 1.6611392572522163, 'entropy_coeff': 0.005}
2020-09-21 16:14:15,395	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10675684013403952, 'policy_loss': -0.1308937252033502, 'vf_loss': 0.014688500028569251, 'vf_explained_var': 0.99790245, 'kl': 0.01752158359158784, 'entropy': 1.6584437042474747, 'entropy_coeff': 0.00

custom_metrics: {}
date: 2020-09-21_16-14-16
done: false
episode_len_mean: 1898.67
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.398800074604363
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 137
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.6589577570557594
      entropy_coeff: 0.005
      kl: 0.017897646001074463
      policy_loss: -0.13900109159294516
      total_loss: -0.11450291617074981
      vf_explained_var: 0.9979304075241089
      vf_loss: 0.014671603130409494
  num_steps_sampled: 224000
  num_steps_trained: 224000
iterations_since_restore: 224
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.3136363636363635
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944990982128

[2m[36m(pid=24699)[0m 2020-09-21 16:14:16,623	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 8210.830085303882,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 4470},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-5.832, max=9.616, mean=0.77),
[2m[36m(pid=24699)[0m                                   'prev_action': 3,
[2m[36m(pid=24699)[0

2020-09-21 16:14:19,889	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.10761847964022309, 'policy_loss': 0.004474280285649002, 'vf_loss': 0.11105900118127465, 'vf_explained_var': 0.97574323, 'kl': 0.0003311038185713766, 'entropy': 1.6500093936920166, 'entropy_coeff': 0.005}
2020-09-21 16:14:20,378	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.014168952475301921, 'policy_loss': -0.026531199924647808, 'vf_loss': 0.045914920629002154, 'vf_explained_var': 0.9894816, 'kl': 0.0030436204688157886, 'entropy': 1.659287929534912, 'entropy_coeff': 0.005}
2020-09-21 16:14:20,825	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.015340978396125138, 'policy_loss': -0.045453038765117526, 'vf_loss': 0.03161182673648, 'vf_explained_var': 0.9930067, 'kl': 0.006685870874207467, 'entropy': 1.6538410931825638, 'entropy_coeff': 0.005}

2020-09-21 16:14:26,492	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0942934810591396, 'policy_loss': -0.11583682021591812, 'vf_loss': 0.01645562899648212, 'vf_explained_var': 0.99615896, 'kl': 0.013253684854134917, 'entropy': 1.666328638792038, 'entropy_coeff': 0.005}
2020-09-21 16:14:26,947	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09375586302485317, 'policy_loss': -0.11562200787011534, 'vf_loss': 0.016325259261066094, 'vf_explained_var': 0.9960645, 'kl': 0.013684426667168736, 'entropy': 1.6629203110933304, 'entropy_coeff': 0.005}
2020-09-21 16:14:27,429	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09555124572943896, 'policy_loss': -0.11699744031648152, 'vf_loss': 0.015702825039625168, 'vf_explained_var': 0.99623084, 'kl': 0.01386183750582859, 'entropy': 1.658348634839058, 'entropy_coeff': 0.005

custom_metrics: {}
date: 2020-09-21_16-14-33
done: false
episode_len_mean: 1898.67
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.398800074604363
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 137
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.6604391485452652
      entropy_coeff: 0.005
      kl: 0.016960970824584365
      policy_loss: -0.1412299649673514
      total_loss: -0.11676124681252986
      vf_explained_var: 0.9962425231933594
      vf_loss: 0.015597925055772066
  num_steps_sampled: 225000
  num_steps_trained: 225000
iterations_since_restore: 225
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.2681818181818185
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282

[2m[36m(pid=24699)[0m 2020-09-21 16:14:33,811	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:14:36,919	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.17749513406306505, 'policy_loss': 0.0012159296602476388, 'vf_loss': 0.18384678312577307, 'vf_explained_var': 0.96933055, 'kl': 0.0007943137580952442, 'entropy': 1.6743633449077606, 'entropy_coeff': 0.005}
2020-09-21 16:14:37,404	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.010716668330132961, 'policy_loss': -0.023492744658142328, 'vf_loss': 0.03979767230339348, 'vf_explained_var': 0.992236, 'kl': 0.002703886064409744, 'entropy': 1.6651889234781265, 'entropy_coeff': 0.005}
2020-09-21 16:14:37,856	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.023549931356683373, 'policy_loss': -0.04109172278549522, 'vf_loss': 0.0214248470729216

2020-09-21 16:14:48,737	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11995093210134655, 'policy_loss': -0.1309750135987997, 'vf_loss': 0.004916513746138662, 'vf_explained_var': 0.99903584, 'kl': 0.014235862763598561, 'entropy': 1.6612489446997643, 'entropy_coeff': 0.005}
2020-09-21 16:14:49,189	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11898691981332377, 'policy_loss': -0.13024326087906957, 'vf_loss': 0.0048523654259042814, 'vf_explained_var': 0.99905145, 'kl': 0.0144995441660285, 'entropy': 1.655362881720066, 'entropy_coeff': 0.005}
2020-09-21 16:14:49,673	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11692458653124049, 'policy_loss': -0.12876151636010036, 'vf_loss': 0.004978844124707393, 'vf_explained_var': 0.99900967, 'kl': 0.01493392261909321, 'entropy': 1.6525021567940712, 'entropy_coeff': 0.0

custom_metrics: {}
date: 2020-09-21_16-14-50
done: false
episode_len_mean: 1898.67
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.398800074604363
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 137
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.6515679061412811
      entropy_coeff: 0.005
      kl: 0.015350187837611884
      policy_loss: -0.13498082221485674
      total_loss: -0.12256492138840258
      vf_explained_var: 0.999049186706543
      vf_loss: 0.005131672631250694
  num_steps_sampled: 226000
  num_steps_trained: 226000
iterations_since_restore: 226
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.345454545454547
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94499098212821

2020-09-21 16:14:54,067	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.13730098446831107, 'policy_loss': 0.004840006062295288, 'vf_loss': 0.13976102229207754, 'vf_explained_var': 0.93910766, 'kl': 0.00039886165896430015, 'entropy': 1.5407802909612656, 'entropy_coeff': 0.005}
2020-09-21 16:14:54,552	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.017656677518971264, 'policy_loss': -0.025125017971731722, 'vf_loss': 0.04807430901564658, 'vf_explained_var': 0.97863615, 'kl': 0.0024663331860210747, 'entropy': 1.5579547137022018, 'entropy_coeff': 0.005}
2020-09-21 16:14:55,005	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.021543875220231712, 'policy_loss': -0.04465531406458467, 'vf_loss': 0.026227811817079782, 'vf_explained_var': 0.9876659, 'kl': 0.004589231946738437, 'entropy': 1.5525949075818062, 'entropy_coeff': 0

2020-09-21 16:15:05,922	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10300781612750143, 'policy_loss': -0.11682784440927207, 'vf_loss': 0.0074942465871572495, 'vf_explained_var': 0.9962852, 'kl': 0.013881818042136729, 'entropy': 1.5459119603037834, 'entropy_coeff': 0.005}
2020-09-21 16:15:06,378	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11323246388928965, 'policy_loss': -0.1269575392943807, 'vf_loss': 0.007287583983270451, 'vf_explained_var': 0.99659884, 'kl': 0.014025992539245635, 'entropy': 1.5527648404240608, 'entropy_coeff': 0.005}
2020-09-21 16:15:06,866	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.101209229324013, 'policy_loss': -0.11576445517130196, 'vf_loss': 0.007596640702104196, 'vf_explained_var': 0.99613357, 'kl': 0.014557801303453743, 'entropy': 1.5562366917729378, 'entropy_coeff': 0.

custom_metrics: {}
date: 2020-09-21_16-15-07
done: false
episode_len_mean: 1898.67
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.398800074604363
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 137
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5494634881615639
      entropy_coeff: 0.005
      kl: 0.014560843876097351
      policy_loss: -0.12328123115003109
      total_loss: -0.10888426483143121
      vf_explained_var: 0.9964460134506226
      vf_loss: 0.007401426002616063
  num_steps_sampled: 227000
  num_steps_trained: 227000
iterations_since_restore: 227
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.486363636363636
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.704545454545451
  vram_util_percent0: 0.9449909821282

2020-09-21 16:15:10,866	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.08593962760642171, 'policy_loss': 0.0003446820192039013, 'vf_loss': 0.09277308732271194, 'vf_explained_var': 0.9148158, 'kl': 0.0009099379960954002, 'entropy': 1.6198890432715416, 'entropy_coeff': 0.005}
2020-09-21 16:15:11,358	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.03983555384911597, 'policy_loss': -0.011642032215604559, 'vf_loss': 0.05713977001141757, 'vf_explained_var': 0.9500066, 'kl': 0.0024283231250592507, 'entropy': 1.6241730973124504, 'entropy_coeff': 0.005}
2020-09-21 16:15:11,809	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.01054548000684008, 'policy_loss': -0.029829567181877792, 'vf_loss': 0.04400726035237312, 'vf_explained_var': 0.9616251, 'kl': 0.004498319409321994, 'entropy': 1.6373523771762848, 'entropy_coeff': 0.005}

2020-09-21 16:15:22,730	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.07455823314376175, 'policy_loss': -0.11081330687738955, 'vf_loss': 0.030032286478672177, 'vf_explained_var': 0.9715316, 'kl': 0.014140337763819844, 'entropy': 1.6188611909747124, 'entropy_coeff': 0.005}
2020-09-21 16:15:23,181	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08754934021271765, 'policy_loss': -0.12308454443700612, 'vf_loss': 0.02923512429697439, 'vf_explained_var': 0.9735923, 'kl': 0.014199633500538766, 'entropy': 1.6154106333851814, 'entropy_coeff': 0.005}
2020-09-21 16:15:23,667	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08483146771322936, 'policy_loss': -0.11955555726308376, 'vf_loss': 0.028395210130838677, 'vf_explained_var': 0.9751768, 'kl': 0.014217783289495856, 'entropy': 1.6133245304226875, 'entropy_coeff': 0.0

custom_metrics: {}
date: 2020-09-21_16-15-24
done: false
episode_len_mean: 1944.84
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.319871631790335
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 138
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.6154141128063202
      entropy_coeff: 0.005
      kl: 0.014676564605906606
      policy_loss: -0.1277623139321804
      total_loss: -0.0922330062603578
      vf_explained_var: 0.9735097885131836
      vf_loss: 0.028746360971126705
  num_steps_sampled: 228000
  num_steps_trained: 228000
iterations_since_restore: 228
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.340909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.704545454545451
  vram_util_percent0: 0.944990982128217

2020-09-21 16:15:27,946	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.3693271428346634, 'policy_loss': 8.481752593070269e-05, 'vf_loss': 0.37606556434184313, 'vf_explained_var': 0.809731, 'kl': 0.0009217740261318053, 'entropy': 1.5513071715831757, 'entropy_coeff': 0.005}
2020-09-21 16:15:28,432	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.07479226938448846, 'policy_loss': -0.02822226914577186, 'vf_loss': 0.10885025793686509, 'vf_explained_var': 0.9397342, 'kl': 0.001920480644912459, 'entropy': 1.5560427904129028, 'entropy_coeff': 0.005}
2020-09-21 16:15:28,890	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.006638380233198404, 'policy_loss': -0.044230189523659647, 'vf_loss': 0.042367456248030066, 'vf_explained_var': 0.97663665, 'kl': 0.002889901530579664, 'entropy': 1.5403355434536934, 'entropy_coeff': 0.005}

2020-09-21 16:15:39,769	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08129454296431504, 'policy_loss': -0.08343934681033716, 'vf_loss': 0.0011115807610622142, 'vf_explained_var': 0.9993688, 'kl': 0.008649772295029834, 'entropy': 1.5449330136179924, 'entropy_coeff': 0.005}
2020-09-21 16:15:40,229	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08605662093032151, 'policy_loss': -0.08811442030128092, 'vf_loss': 0.0010523807432036847, 'vf_explained_var': 0.9993952, 'kl': 0.008665408357046545, 'entropy': 1.5536619499325752, 'entropy_coeff': 0.005}
2020-09-21 16:15:40,712	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08244085773912957, 'policy_loss': -0.0853857759211678, 'vf_loss': 0.0009727456017571967, 'vf_explained_var': 0.9994279, 'kl': 0.009588544693542644, 'entropy': 1.5472450479865074, 'entropy_coeff': 

custom_metrics: {}
date: 2020-09-21_16-15-41
done: false
episode_len_mean: 1944.84
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.319871631790342
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 138
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5504210591316223
      entropy_coeff: 0.005
      kl: 0.00986771855968982
      policy_loss: -0.08402904635295272
      total_loss: -0.08085793629288673
      vf_explained_var: 0.9994577765464783
      vf_loss: 0.0009321493052993901
  num_steps_sampled: 229000
  num_steps_trained: 229000
iterations_since_restore: 229
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.3090909090909095
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.795454545454549
  vram_util_percent0: 0.944990982128

[2m[36m(pid=24699)[0m 2020-09-21 16:15:41,906	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 9564.052691425344,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 308},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.312, max=1.381, mean=0.223),
[2m[36m(pid=24699)[0m                                   'prev_action': 2,
[2m[36m(pid=24699)[0

2020-09-21 16:15:44,762	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.13622837187722325, 'policy_loss': 0.002111228706780821, 'vf_loss': 0.13973135640844703, 'vf_explained_var': 0.97726184, 'kl': 0.002131489078890547, 'entropy': 1.5544705837965012, 'entropy_coeff': 0.005}
2020-09-21 16:15:45,257	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.03595173347275704, 'policy_loss': -0.013986121339257807, 'vf_loss': 0.05290530761703849, 'vf_explained_var': 0.99042296, 'kl': 0.00469450606033206, 'entropy': 1.5441286116838455, 'entropy_coeff': 0.005}
2020-09-21 16:15:45,708	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.008051644661463797, 'policy_loss': -0.036430636304430664, 'vf_loss': 0.02985200542025268, 'vf_explained_var': 0.99491984, 'kl': 0.006135018018539995, 'entropy': 1.5369439795613289, 'entropy_coeff': 0.005

2020-09-21 16:15:56,632	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11713558930205181, 'policy_loss': -0.12582660594489425, 'vf_loss': 0.0018002845681621693, 'vf_explained_var': 0.999692, 'kl': 0.014439867751207203, 'entropy': 1.5459279343485832, 'entropy_coeff': 0.005}
2020-09-21 16:15:57,080	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11119193653576076, 'policy_loss': -0.12064375542104244, 'vf_loss': 0.0016599796363152564, 'vf_explained_var': 0.9996935, 'kl': 0.015290200768504292, 'entropy': 1.5378986224532127, 'entropy_coeff': 0.005}
2020-09-21 16:15:57,564	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11287288856692612, 'policy_loss': -0.12220300920307636, 'vf_loss': 0.001665761970798485, 'vf_explained_var': 0.99968, 'kl': 0.0151967789279297, 'entropy': 1.5444759204983711, 'entropy_coeff': 0.005

custom_metrics: {}
date: 2020-09-21_16-15-58
done: false
episode_len_mean: 1944.84
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.319871631790342
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 138
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5461243093013763
      entropy_coeff: 0.005
      kl: 0.015243660775013268
      policy_loss: -0.12377551652025431
      total_loss: -0.114621268119663
      vf_explained_var: 0.9997525215148926
      vf_loss: 0.0014506669467664324
  num_steps_sampled: 230000
  num_steps_trained: 230000
iterations_since_restore: 230
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.368181818181818
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94499098212821

[2m[36m(pid=24699)[0m 2020-09-21 16:15:58,740	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:16:01,687	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.2751861200667918, 'policy_loss': 0.001958923996426165, 'vf_loss': 0.28021394461393356, 'vf_explained_var': 0.8896774, 'kl': 0.0006659634281327431, 'entropy': 1.5322043299674988, 'entropy_coeff': 0.005}
2020-09-21 16:16:02,174	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.11394065769854933, 'policy_loss': -0.01840177515987307, 'vf_loss': 0.1380231254734099, 'vf_explained_var': 0.9491234, 'kl': 0.0019678873322845902, 'entropy': 1.5346367806196213, 'entropy_coeff': 0.005}
2020-09-21 16:16:02,619	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.03552563162520528, 'policy_loss': -0.03273004468064755, 'vf_loss': 0.07094550807960331, 'vf

2020-09-21 16:16:13,511	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1190993890631944, 'policy_loss': -0.12635898916050792, 'vf_loss': 0.0020164762136118952, 'vf_explained_var': 0.9991814, 'kl': 0.012666431488469243, 'entropy': 1.516327552497387, 'entropy_coeff': 0.005}
2020-09-21 16:16:13,968	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11433458258397877, 'policy_loss': -0.12154915113933384, 'vf_loss': 0.0019095859097433276, 'vf_explained_var': 0.99921167, 'kl': 0.012744810723233968, 'entropy': 1.5198271572589874, 'entropy_coeff': 0.005}
2020-09-21 16:16:14,454	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12682577804662287, 'policy_loss': -0.13426765333861113, 'vf_loss': 0.0017668536165729165, 'vf_explained_var': 0.9992284, 'kl': 0.013091563188936561, 'entropy': 1.5160368531942368, 'entropy_coeff': 

custom_metrics: {}
date: 2020-09-21_16-16-15
done: false
episode_len_mean: 1944.84
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.319871631790342
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 138
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.514917328953743
      entropy_coeff: 0.005
      kl: 0.013094987720251083
      policy_loss: -0.12772112537641078
      total_loss: -0.1204703045077622
      vf_explained_var: 0.9993454813957214
      vf_loss: 0.0015667270199628547
  num_steps_sampled: 231000
  num_steps_trained: 231000
iterations_since_restore: 231
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.331818181818182
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94499098212821

2020-09-21 16:16:18,941	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.18249019142240286, 'policy_loss': 0.0009362715063616633, 'vf_loss': 0.18857205612584949, 'vf_explained_var': 0.9448657, 'kl': 0.0005464127991215983, 'entropy': 1.5142762959003448, 'entropy_coeff': 0.005}
2020-09-21 16:16:19,435	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.04170516930753365, 'policy_loss': -0.025554379739332944, 'vf_loss': 0.07162588369101286, 'vf_explained_var': 0.97778255, 'kl': 0.0031662265391787514, 'entropy': 1.5144275650382042, 'entropy_coeff': 0.005}
2020-09-21 16:16:19,883	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.00797384011093527, 'policy_loss': -0.0406060233945027, 'vf_loss': 0.0355153203709051, 'vf_explained_var': 0.9891913, 'kl': 0.004604872578056529, 'entropy': 1.5091134905815125, 'entropy_coeff': 0.005}


2020-09-21 16:16:25,599	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10090255812974647, 'policy_loss': -0.10862926382105798, 'vf_loss': 0.0031763708175276406, 'vf_explained_var': 0.9989685, 'kl': 0.011886033229529858, 'entropy': 1.4968559071421623, 'entropy_coeff': 0.005}
2020-09-21 16:16:26,045	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10044930374715477, 'policy_loss': -0.10801718413131312, 'vf_loss': 0.002842514884832781, 'vf_explained_var': 0.99905276, 'kl': 0.012077537656296045, 'entropy': 1.500628113746643, 'entropy_coeff': 0.005}
2020-09-21 16:16:26,532	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10764009703416377, 'policy_loss': -0.1152608091942966, 'vf_loss': 0.0026639044954208657, 'vf_explained_var': 0.9991559, 'kl': 0.012325722316745669, 'entropy': 1.5045961737632751, 'entropy_coeff': 0

custom_metrics: {}
date: 2020-09-21_16-16-32
done: false
episode_len_mean: 1944.84
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.319871631790342
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 138
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5044855326414108
      entropy_coeff: 0.005
      kl: 0.014835768786724657
      policy_loss: -0.13531375804450363
      total_loss: -0.12663328513735905
      vf_explained_var: 0.9996248483657837
      vf_loss: 0.0011816924925369676
  num_steps_sampled: 232000
  num_steps_trained: 232000
iterations_since_restore: 232
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.4
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449909821282177
  vram_ut

2020-09-21 16:16:35,947	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.182798930502031, 'policy_loss': -0.006738421972841024, 'vf_loss': 0.1953532020561397, 'vf_explained_var': 0.92658705, 'kl': 0.0020842008500341347, 'entropy': 1.5852211639285088, 'entropy_coeff': 0.005}
2020-09-21 16:16:36,431	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.026606848929077387, 'policy_loss': -0.0378874127054587, 'vf_loss': 0.06490195682272315, 'vf_explained_var': 0.97422916, 'kl': 0.007309144013561308, 'entropy': 1.5616406202316284, 'entropy_coeff': 0.005}
2020-09-21 16:16:36,886	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.021735234462539665, 'policy_loss': -0.04992189886979759, 'vf_loss': 0.029689007787965238, 'vf_explained_var': 0.98819053, 'kl': 0.006360590370604768, 'entropy': 1.588488593697548, 'entropy_coeff': 0.005}


2020-09-21 16:16:47,779	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.13216749858111143, 'policy_loss': -0.13983543938957155, 'vf_loss': 0.001345931836112868, 'vf_explained_var': 0.99944484, 'kl': 0.013993282045703381, 'entropy': 1.569239117205143, 'entropy_coeff': 0.005}
2020-09-21 16:16:48,232	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.13161689427215606, 'policy_loss': -0.13951782998628914, 'vf_loss': 0.0012370210788503755, 'vf_explained_var': 0.9994768, 'kl': 0.014334288018289953, 'entropy': 1.569911114871502, 'entropy_coeff': 0.005}
2020-09-21 16:16:48,719	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12902333651436493, 'policy_loss': -0.13692407333292067, 'vf_loss': 0.0011572246949071996, 'vf_explained_var': 0.9994949, 'kl': 0.014388329233042896, 'entropy': 1.5649354010820389, 'entropy_coeff': 0

custom_metrics: {}
date: 2020-09-21_16-16-49
done: false
episode_len_mean: 1944.84
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.319871631790342
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 138
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.571216344833374
      entropy_coeff: 0.005
      kl: 0.014462829101830721
      policy_loss: -0.13781013470725156
      total_loss: -0.12994204566348344
      vf_explained_var: 0.9995605945587158
      vf_loss: 0.0010805536185216624
  num_steps_sampled: 233000
  num_steps_trained: 233000
iterations_since_restore: 233
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.368181818181818
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449909821282

2020-09-21 16:16:53,342	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.21866009174846113, 'policy_loss': -0.00658616959117353, 'vf_loss': 0.23141289921477437, 'vf_explained_var': 0.84236485, 'kl': 0.001313281643987918, 'entropy': 1.4992686659097672, 'entropy_coeff': 0.005}
2020-09-21 16:16:53,831	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.04889316682238132, 'policy_loss': -0.026791233627591282, 'vf_loss': 0.0803295352961868, 'vf_explained_var': 0.93952703, 'kl': 0.0028838225771323778, 'entropy': 1.5130003616213799, 'entropy_coeff': 0.005}
2020-09-21 16:16:54,279	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.006799250986659899, 'policy_loss': -0.04113365605007857, 'vf_loss': 0.03605627454817295, 'vf_explained_var': 0.9723147, 'kl': 0.005801895022159442, 'entropy': 1.5192580223083496, 'entropy_coeff': 0.005}

2020-09-21 16:17:05,254	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12410161225125194, 'policy_loss': -0.13442723429761827, 'vf_loss': 0.001759292819770053, 'vf_explained_var': 0.9986057, 'kl': 0.015960275486577302, 'entropy': 1.5186908692121506, 'entropy_coeff': 0.005}
2020-09-21 16:17:05,706	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12346310541033745, 'policy_loss': -0.13416113937273622, 'vf_loss': 0.0016452955096610822, 'vf_explained_var': 0.9986407, 'kl': 0.016405283357016742, 'entropy': 1.5115221440792084, 'entropy_coeff': 0.005}
2020-09-21 16:17:06,188	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1283370058517903, 'policy_loss': -0.13934939820319414, 'vf_loss': 0.0017034057236742228, 'vf_explained_var': 0.9987205, 'kl': 0.01667712372727692, 'entropy': 1.5153195932507515, 'entropy_coeff': 0.

custom_metrics: {}
date: 2020-09-21_16-17-07
done: false
episode_len_mean: 1944.84
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.319871631790342
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 138
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5126606151461601
      entropy_coeff: 0.005
      kl: 0.016553421563003212
      policy_loss: -0.1402351235738024
      total_loss: -0.12945791997481138
      vf_explained_var: 0.99875807762146
      vf_loss: 0.0015801670197106432
  num_steps_sampled: 234000
  num_steps_trained: 234000
iterations_since_restore: 234
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 6.340909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944990982128217

[2m[36m(pid=24699)[0m 2020-09-21 16:17:07,423	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 12479.66841784718,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 1308},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.08, max=2.065, mean=0.36),
[2m[36m(pid=24699)[0m                                   'prev_action': 12,
[2m[36m(pid=24699)[0

2020-09-21 16:17:10,469	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.2335952297435142, 'policy_loss': -0.0010852021514438093, 'vf_loss': 0.24108648113906384, 'vf_explained_var': 0.9165243, 'kl': 0.001047615622193021, 'entropy': 1.4933519139885902, 'entropy_coeff': 0.005}
2020-09-21 16:17:10,959	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.05254654190503061, 'policy_loss': -0.018945407820865512, 'vf_loss': 0.07708380790427327, 'vf_explained_var': 0.9728954, 'kl': 0.001867787228547968, 'entropy': 1.4965974763035774, 'entropy_coeff': 0.005}
2020-09-21 16:17:11,412	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.0030687882099300623, 'policy_loss': -0.031222254619933665, 'vf_loss': 0.0381425297819078, 'vf_explained_var': 0.98703957, 'kl': 0.0035573278582887724, 'entropy': 1.4906566068530083, 'entropy_coeff': 0.005

2020-09-21 16:17:22,337	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11734104249626398, 'policy_loss': -0.12696612928994, 'vf_loss': 0.0019748466947930865, 'vf_explained_var': 0.9993183, 'kl': 0.014929028693586588, 'entropy': 1.493081457912922, 'entropy_coeff': 0.005}
2020-09-21 16:17:22,791	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11821436323225498, 'policy_loss': -0.12820132332853973, 'vf_loss': 0.0019173475666320883, 'vf_explained_var': 0.9992912, 'kl': 0.015347705339081585, 'entropy': 1.493988148868084, 'entropy_coeff': 0.005}
2020-09-21 16:17:23,276	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11646736291004345, 'policy_loss': -0.12585642136400566, 'vf_loss': 0.0017304035209235735, 'vf_explained_var': 0.999359, 'kl': 0.014915329520590603, 'entropy': 1.4886237159371376, 'entropy_coeff': 0.005

custom_metrics: {}
date: 2020-09-21_16-17-24
done: false
episode_len_mean: 2003.22
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.234773368777883
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 139
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.489793799817562
      entropy_coeff: 0.005
      kl: 0.015549455536529422
      policy_loss: -0.129409946850501
      total_loss: -0.11943692760542035
      vf_explained_var: 0.9994023442268372
      vf_loss: 0.001678160384471994
  num_steps_sampled: 235000
  num_steps_trained: 235000
iterations_since_restore: 235
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.390909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449909821282177

[2m[36m(pid=24699)[0m 2020-09-21 16:17:24,512	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:17:27,511	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.18745187850436196, 'policy_loss': 0.0013986860867589712, 'vf_loss': 0.19192597875371575, 'vf_explained_var': 0.9317759, 'kl': 0.00181184767880338, 'entropy': 1.541455440223217, 'entropy_coeff': 0.005}
2020-09-21 16:17:28,005	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.03992823354201391, 'policy_loss': -0.029215254937298596, 'vf_loss': 0.07255533430725336, 'vf_explained_var': 0.97329473, 'kl': 0.004229549245792441, 'entropy': 1.5388526320457458, 'entropy_coeff': 0.005}
2020-09-21 16:17:28,458	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.005492506781592965, 'policy_loss': -0.043021425837650895, 'vf_loss': 0.0389416926773265, 

2020-09-21 16:17:39,377	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11895198561251163, 'policy_loss': -0.12754760636016726, 'vf_loss': 0.0010663818975444883, 'vf_explained_var': 0.99958724, 'kl': 0.015035549411550164, 'entropy': 1.5388504043221474, 'entropy_coeff': 0.005}
2020-09-21 16:17:39,831	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11569569318089634, 'policy_loss': -0.12492382945492864, 'vf_loss': 0.0010338380670873448, 'vf_explained_var': 0.999602, 'kl': 0.015707772981841117, 'entropy': 1.5419653803110123, 'entropy_coeff': 0.005}
2020-09-21 16:17:40,315	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11695131473243237, 'policy_loss': -0.12537300772964954, 'vf_loss': 0.000990399672446074, 'vf_explained_var': 0.9996389, 'kl': 0.014925430295988917, 'entropy': 1.536142259836197, 'entropy_coeff': 0

custom_metrics: {}
date: 2020-09-21_16-17-41
done: false
episode_len_mean: 2003.22
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.23477336877788
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 139
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.536969393491745
      entropy_coeff: 0.005
      kl: 0.01550882775336504
      policy_loss: -0.1268281924421899
      total_loss: -0.11787526187254116
      vf_explained_var: 0.9996640682220459
      vf_loss: 0.0009350971304229461
  num_steps_sampled: 236000
  num_steps_trained: 236000
iterations_since_restore: 236
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.504545454545455
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449909821282177

2020-09-21 16:17:44,266	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.2157679529627785, 'policy_loss': 0.0007877235766500235, 'vf_loss': 0.22135677933692932, 'vf_explained_var': 0.97417665, 'kl': 0.001244827528816106, 'entropy': 1.5273876786231995, 'entropy_coeff': 0.005}
2020-09-21 16:17:44,750	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.029585321783088148, 'policy_loss': -0.02908641181420535, 'vf_loss': 0.06228315504267812, 'vf_explained_var': 0.99292123, 'kl': 0.00400521673145704, 'entropy': 1.5333394333720207, 'entropy_coeff': 0.005}
2020-09-21 16:17:45,207	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.04005844076164067, 'policy_loss': -0.0625477236462757, 'vf_loss': 0.024994139501359314, 'vf_explained_var': 0.9969442, 'kl': 0.0051156112749595195, 'entropy': 1.5368842631578445, 'entropy_coeff': 0.005}


2020-09-21 16:17:56,142	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1354240120272152, 'policy_loss': -0.1443722719559446, 'vf_loss': 0.0006447586529247928, 'vf_explained_var': 0.9999182, 'kl': 0.01576283451868221, 'entropy': 1.531272754073143, 'entropy_coeff': 0.005}
2020-09-21 16:17:56,598	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.13422189420089126, 'policy_loss': -0.1432969553861767, 'vf_loss': 0.0006201947726367507, 'vf_explained_var': 0.9999223, 'kl': 0.015931471483781934, 'entropy': 1.5351503640413284, 'entropy_coeff': 0.005}
2020-09-21 16:17:57,086	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.14091071806615219, 'policy_loss': -0.1498999814502895, 'vf_loss': 0.0005810768743685912, 'vf_explained_var': 0.9999262, 'kl': 0.015865173772908747, 'entropy': 1.5310587584972382, 'entropy_coeff': 0.005

custom_metrics: {}
date: 2020-09-21_16-17-58
done: false
episode_len_mean: 2003.22
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.23477336877788
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 139
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5289485454559326
      entropy_coeff: 0.005
      kl: 0.01615657826187089
      policy_loss: -0.15186549094505608
      total_loss: -0.14267659094184637
      vf_explained_var: 0.9999377131462097
      vf_loss: 0.00047510670265182853
  num_steps_sampled: 237000
  num_steps_trained: 237000
iterations_since_restore: 237
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.495454545454545
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.781818181818181
  vram_util_percent0: 0.9449909821282

2020-09-21 16:18:02,107	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.14922751067206264, 'policy_loss': 0.00211994128767401, 'vf_loss': 0.15414400165900588, 'vf_explained_var': 0.95314586, 'kl': 0.001047916589423048, 'entropy': 1.6194891333580017, 'entropy_coeff': 0.005}
2020-09-21 16:18:02,595	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.017696244409307837, 'policy_loss': -0.03282223327551037, 'vf_loss': 0.0545100747840479, 'vf_explained_var': 0.9841763, 'kl': 0.003992691534222104, 'entropy': 1.60684023052454, 'entropy_coeff': 0.005}
2020-09-21 16:18:03,046	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.02549897285643965, 'policy_loss': -0.048872701125219464, 'vf_loss': 0.026955172419548035, 'vf_explained_var': 0.99165785, 'kl': 0.004422563957632519, 'entropy': 1.6118579655885696, 'entropy_coeff': 0.005}
20

2020-09-21 16:18:14,165	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.13073539175093174, 'policy_loss': -0.1379809956997633, 'vf_loss': 0.0009381123927596491, 'vf_explained_var': 0.9997134, 'kl': 0.014153869065921754, 'entropy': 1.6046595573425293, 'entropy_coeff': 0.005}
2020-09-21 16:18:14,657	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.13201612059492618, 'policy_loss': -0.1397076731082052, 'vf_loss': 0.0008852815044519957, 'vf_explained_var': 0.99971795, 'kl': 0.014665512484498322, 'entropy': 1.608512595295906, 'entropy_coeff': 0.005}
2020-09-21 16:18:15,211	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.14026773790828884, 'policy_loss': -0.14773815171793103, 'vf_loss': 0.0008524873192072846, 'vf_explained_var': 0.99973404, 'kl': 0.014478017576038837, 'entropy': 1.6082126125693321, 'entropy_coeff': 

custom_metrics: {}
date: 2020-09-21_16-18-16
done: false
episode_len_mean: 2003.22
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.23477336877788
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 139
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.612499661743641
      entropy_coeff: 0.005
      kl: 0.014893449493683875
      policy_loss: -0.13786960439756513
      total_loss: -0.13011915888637304
      vf_explained_var: 0.9997740983963013
      vf_loss: 0.0007333285939239431
  num_steps_sampled: 238000
  num_steps_trained: 238000
iterations_since_restore: 238
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.495652173913045
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94499098212821

2020-09-21 16:18:20,358	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.09397415909916162, 'policy_loss': -0.0005574927199631929, 'vf_loss': 0.09901629760861397, 'vf_explained_var': 0.9719167, 'kl': 0.0030172615205548148, 'entropy': 1.5079245492815971, 'entropy_coeff': 0.005}
2020-09-21 16:18:20,848	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.0004755980335175991, 'policy_loss': -0.03040712559595704, 'vf_loss': 0.033151511452160776, 'vf_explained_var': 0.99026185, 'kl': 0.005163325724424794, 'entropy': 1.4993299320340157, 'entropy_coeff': 0.005}
2020-09-21 16:18:21,301	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.032007772824727, 'policy_loss': -0.044056792510673404, 'vf_loss': 0.01438752529793419, 'vf_explained_var': 0.9956198, 'kl': 0.0050691092474153265, 'entropy': 1.494196079671383, 'entropy_coeff': 0.00

2020-09-21 16:18:26,969	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1041313644964248, 'policy_loss': -0.10927185718901455, 'vf_loss': 0.0012120786086597946, 'vf_explained_var': 0.9996177, 'kl': 0.011173141829203814, 'entropy': 1.4768782034516335, 'entropy_coeff': 0.005}
2020-09-21 16:18:27,412	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10471398197114468, 'policy_loss': -0.1103764675790444, 'vf_loss': 0.0011550800154509488, 'vf_explained_var': 0.99963397, 'kl': 0.01173475559335202, 'entropy': 1.4748088493943214, 'entropy_coeff': 0.005}
2020-09-21 16:18:27,898	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11038635566364974, 'policy_loss': -0.116187640465796, 'vf_loss': 0.001081771984900115, 'vf_explained_var': 0.9996767, 'kl': 0.01197477534878999, 'entropy': 1.480990208685398, 'entropy_coeff': 0.005}

custom_metrics: {}
date: 2020-09-21_16-18-34
done: false
episode_len_mean: 2050.35
episode_reward_max: -21.973405018039006
episode_reward_mean: -29.180106330775885
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 140
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.4760284200310707
      entropy_coeff: 0.005
      kl: 0.014185065985657275
      policy_loss: -0.13713698321953416
      total_loss: -0.12969477905426174
      vf_explained_var: 0.9998562932014465
      vf_loss: 0.0004599676867655944
  num_steps_sampled: 239000
  num_steps_trained: 239000
iterations_since_restore: 239
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.054545454545455
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944990982128

[2m[36m(pid=24699)[0m 2020-09-21 16:18:34,310	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 9647.389222511087,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 2308},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.309, max=1.916, mean=0.257),
[2m[36m(pid=24699)[0m                                   'prev_action': 6,
[2m[36m(pid=24699)[

2020-09-21 16:18:36,819	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.11991086642956361, 'policy_loss': -0.0007975066546350718, 'vf_loss': 0.1276370733976364, 'vf_explained_var': 0.96232414, 'kl': 0.0006059679020866682, 'entropy': 1.50844756513834, 'entropy_coeff': 0.005}
2020-09-21 16:18:37,275	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.018566510872915387, 'policy_loss': -0.027904634807782713, 'vf_loss': 0.04994306107982993, 'vf_explained_var': 0.986046, 'kl': 0.0040021046152105555, 'entropy': 1.5048085078597069, 'entropy_coeff': 0.005}
2020-09-21 16:18:37,760	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.022083002608269453, 'policy_loss': -0.04303912748582661, 'vf_loss': 0.023163316946011037, 'vf_explained_var': 0.99316496, 'kl': 0.005225373868597671, 'entropy': 1.499575287103653, 'entropy_coeff': 0.005

2020-09-21 16:18:48,646	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09537725627887994, 'policy_loss': -0.10153795540099964, 'vf_loss': 0.0006809897440689383, 'vf_explained_var': 0.9997921, 'kl': 0.01290280866669491, 'entropy': 1.516875982284546, 'entropy_coeff': 0.005}
2020-09-21 16:18:49,134	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09924067358952016, 'policy_loss': -0.10542914317920804, 'vf_loss': 0.0006690608624921879, 'vf_explained_var': 0.9997944, 'kl': 0.012909606390167028, 'entropy': 1.5103142112493515, 'entropy_coeff': 0.005}
2020-09-21 16:18:49,588	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10524498485028744, 'policy_loss': -0.11187532334588468, 'vf_loss': 0.0005862214529770426, 'vf_explained_var': 0.99981904, 'kl': 0.0134440892143175, 'entropy': 1.5136052072048187, 'entropy_coeff': 0.

custom_metrics: {}
date: 2020-09-21_16-18-50
done: false
episode_len_mean: 2090.11
episode_reward_max: -21.771561900678247
episode_reward_mean: -29.07892230153467
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 141
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5142247751355171
      entropy_coeff: 0.005
      kl: 0.013565981993451715
      policy_loss: -0.11220138298813254
      total_loss: -0.10546111594885588
      vf_explained_var: 0.999826192855835
      vf_loss: 0.0005758295519626699
  num_steps_sampled: 240000
  num_steps_trained: 240000
iterations_since_restore: 240
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.263636363636364
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94499098212821

[2m[36m(pid=24699)[0m 2020-09-21 16:18:50,805	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:18:53,218	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.14912296924740076, 'policy_loss': 0.0009569632820785046, 'vf_loss': 0.15532705606892705, 'vf_explained_var': 0.97093755, 'kl': 0.0005410965419769265, 'entropy': 1.5417836531996727, 'entropy_coeff': 0.005}
2020-09-21 16:18:53,701	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.024503914173692465, 'policy_loss': -0.019793619925621897, 'vf_loss': 0.04818688775412738, 'vf_explained_var': 0.990518, 'kl': 0.0037067838638904504, 'entropy': 1.528495155274868, 'entropy_coeff': 0.005}
2020-09-21 16:18:54,155	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.027537713525816798, 'policy_loss': -0.04334771423600614, 'vf_loss': 0.0185076862107962

2020-09-21 16:19:05,090	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11790089774876833, 'policy_loss': -0.12473231425974518, 'vf_loss': 0.0006013030179019552, 'vf_explained_var': 0.9998584, 'kl': 0.013745378411840647, 'entropy': 1.53741654753685, 'entropy_coeff': 0.005}
2020-09-21 16:19:05,544	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11584626388503239, 'policy_loss': -0.12340264258091338, 'vf_loss': 0.0005864708527951734, 'vf_explained_var': 0.99986935, 'kl': 0.014459459111094475, 'entropy': 1.5340596288442612, 'entropy_coeff': 0.005}
2020-09-21 16:19:06,024	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12389322760282084, 'policy_loss': -0.13125922228209674, 'vf_loss': 0.0005726808176405029, 'vf_explained_var': 0.9998722, 'kl': 0.014291321334894747, 'entropy': 1.535331018269062, 'entropy_coeff': 0

custom_metrics: {}
date: 2020-09-21_16-19-06
done: false
episode_len_mean: 2090.11
episode_reward_max: -21.771561900678247
episode_reward_mean: -29.07892230153467
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 141
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5361055433750153
      entropy_coeff: 0.005
      kl: 0.014882974559441209
      policy_loss: -0.1278800298459828
      total_loss: -0.11997667199466377
      vf_explained_var: 0.9998867511749268
      vf_loss: 0.0005148757645656588
  num_steps_sampled: 241000
  num_steps_trained: 241000
iterations_since_restore: 241
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.257142857142857
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.94499098212821

2020-09-21 16:19:09,574	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.16164492396637797, 'policy_loss': 0.0006275338819250464, 'vf_loss': 0.16657915525138378, 'vf_explained_var': 0.97553676, 'kl': 0.002250990867906144, 'entropy': 1.5681787133216858, 'entropy_coeff': 0.005}
2020-09-21 16:19:10,056	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.022728009964339435, 'policy_loss': -0.019770266953855753, 'vf_loss': 0.047901191865094006, 'vf_explained_var': 0.9935821, 'kl': 0.002494251777534373, 'entropy': 1.5856697410345078, 'entropy_coeff': 0.005}
2020-09-21 16:19:10,541	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.02367894258350134, 'policy_loss': -0.037655922933481634, 'vf_loss': 0.015966470818966627, 'vf_explained_var': 0.99735546, 'kl': 0.005820644146297127, 'entropy': 1.5765781551599503, 'entropy_coeff': 0.

2020-09-21 16:19:21,458	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11682305205613375, 'policy_loss': -0.12521471118088812, 'vf_loss': 0.00048462694940099027, 'vf_explained_var': 0.99990785, 'kl': 0.015521821682341397, 'entropy': 1.5617632642388344, 'entropy_coeff': 0.005}
2020-09-21 16:19:21,945	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10859499522484839, 'policy_loss': -0.11685700388625264, 'vf_loss': 0.0004743300760310376, 'vf_explained_var': 0.99991345, 'kl': 0.015369071043096483, 'entropy': 1.5547024309635162, 'entropy_coeff': 0.005}
2020-09-21 16:19:22,399	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11258580355206504, 'policy_loss': -0.12178262195084244, 'vf_loss': 0.0004629581999324728, 'vf_explained_var': 0.9999194, 'kl': 0.016308009275235236, 'entropy': 1.555600382387638, 'entropy_coeff

custom_metrics: {}
date: 2020-09-21_16-19-23
done: false
episode_len_mean: 2090.11
episode_reward_max: -21.771561900678247
episode_reward_mean: -29.07892230153467
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 141
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.554118126630783
      entropy_coeff: 0.005
      kl: 0.01588000578340143
      policy_loss: -0.12666571780573577
      total_loss: -0.11796825053170323
      vf_explained_var: 0.9999247789382935
      vf_loss: 0.0003895535392075544
  num_steps_sampled: 242000
  num_steps_trained: 242000
iterations_since_restore: 242
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.081818181818182
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944990982128217

2020-09-21 16:19:26,157	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.0125,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.6167069673538208,
                                         'entropy_coeff': 0.005,
                                         'kl': -2.8976408827929845e-08,
                                         'policy_loss': -0.023073513060808182,
                                         'total_loss': 0.12092601507902145,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.955, max=0.955, mean=0.955),
                                         'vf_loss': 0.15208308398723602}}}

2020-09-21 16:19:26,162	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_d

2020-09-21 16:19:33,215	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10352512111421674, 'policy_loss': -0.1077886475250125, 'vf_loss': 0.0008541424067516346, 'vf_explained_var': 0.99976724, 'kl': 0.0116492576780729, 'entropy': 1.6770993620157242, 'entropy_coeff': 0.005}
2020-09-21 16:19:33,670	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1100131532875821, 'policy_loss': -0.11424530413933098, 'vf_loss': 0.0007487173788831569, 'vf_explained_var': 0.99978197, 'kl': 0.011719401634763926, 'entropy': 1.676491767168045, 'entropy_coeff': 0.005}
2020-09-21 16:19:34,155	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11472635925747454, 'policy_loss': -0.11916774837300181, 'vf_loss': 0.0007434169456246309, 'vf_explained_var': 0.9997906, 'kl': 0.01193736569257453, 'entropy': 1.6777227073907852, 'entropy_coeff': 0.0

custom_metrics: {}
date: 2020-09-21_16-19-40
done: false
episode_len_mean: 2090.11
episode_reward_max: -21.771561900678247
episode_reward_mean: -29.07892230153467
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 141
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.6658824756741524
      entropy_coeff: 0.005
      kl: 0.014490589790511876
      policy_loss: -0.1364555365871638
      total_loss: -0.12976898322813213
      vf_explained_var: 0.9998955726623535
      vf_loss: 0.0003442399683990516
  num_steps_sampled: 243000
  num_steps_trained: 243000
iterations_since_restore: 243
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.2285714285714295
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.9449909821282

2020-09-21 16:19:43,471	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.08577297790907323, 'policy_loss': -0.0012446483597159386, 'vf_loss': 0.09364691167138517, 'vf_explained_var': 0.97662914, 'kl': 0.000877233139129352, 'entropy': 1.5034967511892319, 'entropy_coeff': 0.005}
2020-09-21 16:19:43,964	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0026486414717510343, 'policy_loss': -0.028569444082677364, 'vf_loss': 0.028864987194538116, 'vf_explained_var': 0.9922824, 'kl': 0.004535512009169906, 'entropy': 1.5072776675224304, 'entropy_coeff': 0.005}
2020-09-21 16:19:44,421	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.03264038625638932, 'policy_loss': -0.043942478543613106, 'vf_loss': 0.013357825169805437, 'vf_explained_var': 0.99645585, 'kl': 0.0054345592216122895, 'entropy': 1.5116451904177666, 'entropy_coeff'

2020-09-21 16:19:55,402	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12978784111328423, 'policy_loss': -0.13849658286198974, 'vf_loss': 0.00034045008942484856, 'vf_explained_var': 0.99990356, 'kl': 0.015831647790037096, 'entropy': 1.5322506874799728, 'entropy_coeff': 0.005}
2020-09-21 16:19:55,860	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.13092310936190188, 'policy_loss': -0.13911204016767442, 'vf_loss': 0.0003244388681196142, 'vf_explained_var': 0.9999087, 'kl': 0.015330981346778572, 'entropy': 1.531625509262085, 'entropy_coeff': 0.005}
2020-09-21 16:19:56,346	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.133847841527313, 'policy_loss': -0.14210404036566615, 'vf_loss': 0.00031428620059159584, 'vf_explained_var': 0.9999169, 'kl': 0.015397246985230595, 'entropy': 1.529559887945652, 'entropy_coeff': 

custom_metrics: {}
date: 2020-09-21_16-19-57
done: false
episode_len_mean: 2090.11
episode_reward_max: -21.771561900678247
episode_reward_mean: -29.07892230153467
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 141
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5311526134610176
      entropy_coeff: 0.005
      kl: 0.01603827840881422
      policy_loss: -0.1335435500368476
      total_loss: -0.12469386705197394
      vf_explained_var: 0.9999244213104248
      vf_loss: 0.00026668206010072026
  num_steps_sampled: 244000
  num_steps_trained: 244000
iterations_since_restore: 244
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.204545454545454
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94499098212821

[2m[36m(pid=24699)[0m 2020-09-21 16:19:57,579	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 9002.270390373786,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 3308},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-0.992, max=2.308, mean=0.507),
[2m[36m(pid=24699)[0m                                   'prev_action': 3,
[2m[36m(pid=24699)[

2020-09-21 16:20:00,205	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.07739562215283513, 'policy_loss': -0.0012733410112559795, 'vf_loss': 0.085252481745556, 'vf_explained_var': 0.9666474, 'kl': 0.0013965072430990877, 'entropy': 1.5994961708784103, 'entropy_coeff': 0.005}
2020-09-21 16:20:00,694	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0020652874372899532, 'policy_loss': -0.02598709613084793, 'vf_loss': 0.029147428227588534, 'vf_explained_var': 0.9876985, 'kl': 0.002688424225198105, 'entropy': 1.589529275894165, 'entropy_coeff': 0.005}
2020-09-21 16:20:01,142	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.033604127704165876, 'policy_loss': -0.042241763905622065, 'vf_loss': 0.012498872180003673, 'vf_explained_var': 0.99485666, 'kl': 0.003931147017283365, 'entropy': 1.56830433011055, 'entropy_coeff': 0.00

2020-09-21 16:20:12,060	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.13020223611965775, 'policy_loss': -0.1372430888004601, 'vf_loss': 0.00043092496707686223, 'vf_explained_var': 0.999805, 'kl': 0.014335671497974545, 'entropy': 1.5809875056147575, 'entropy_coeff': 0.005}
2020-09-21 16:20:12,514	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12695610139053315, 'policy_loss': -0.13426267192699015, 'vf_loss': 0.0004203955249977298, 'vf_explained_var': 0.99980307, 'kl': 0.01459539873758331, 'entropy': 1.578334242105484, 'entropy_coeff': 0.005}
2020-09-21 16:20:13,001	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1268204721272923, 'policy_loss': -0.13432932924479246, 'vf_loss': 0.0004026487349619856, 'vf_explained_var': 0.9998112, 'kl': 0.014854286098852754, 'entropy': 1.5867520347237587, 'entropy_coeff': 0.

custom_metrics: {}
date: 2020-09-21_16-20-13
done: false
episode_len_mean: 2090.11
episode_reward_max: -21.771561900678247
episode_reward_mean: -29.07892230153467
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 141
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5703075975179672
      entropy_coeff: 0.005
      kl: 0.015353956026956439
      policy_loss: -0.13919798098504543
      total_loss: -0.13113120384514332
      vf_explained_var: 0.9998332262039185
      vf_loss: 0.0003724359066836769
  num_steps_sampled: 245000
  num_steps_trained: 245000
iterations_since_restore: 245
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.190909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282

[2m[36m(pid=24699)[0m 2020-09-21 16:20:14,186	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:20:17,350	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.0989955555414781, 'policy_loss': 0.0031575319590047, 'vf_loss': 0.10277567012235522, 'vf_explained_var': 0.93006957, 'kl': 0.0008044392625473629, 'entropy': 1.550427883863449, 'entropy_coeff': 0.005}
2020-09-21 16:20:17,840	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.0024690114660188556, 'policy_loss': -0.03518913977313787, 'vf_loss': 0.04100037505850196, 'vf_explained_var': 0.9707581, 'kl': 0.0043451993115013465, 'entropy': 1.5483480617403984, 'entropy_coeff': 0.005}
2020-09-21 16:20:18,294	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.03760736633557826, 'policy_loss': -0.05654719891026616, 'vf_loss': 0.019867240043822676, 

2020-09-21 16:20:26,197	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.0125,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.4838536977767944,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.012255329638719559,
                                         'policy_loss': -0.20093977451324463,
                                         'total_loss': -0.19528941810131073,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.999, max=0.999, mean=0.999),
                                         'vf_loss': 0.0006611162680201232}}}

2020-09-21 16:20:26,388	DEBUG sgd.py:120 -- 19 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12122772203292698, 'policy_loss': -0.1278

custom_metrics: {}
date: 2020-09-21_16-20-31
done: false
episode_len_mean: 2090.11
episode_reward_max: -21.771561900678247
episode_reward_mean: -29.07892230153467
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 141
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5370035395026207
      entropy_coeff: 0.005
      kl: 0.01505836215801537
      policy_loss: -0.14163650409318507
      total_loss: -0.13371106190606952
      vf_explained_var: 0.9997122287750244
      vf_loss: 0.00036386356623552274
  num_steps_sampled: 246000
  num_steps_trained: 246000
iterations_since_restore: 246
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.4409090909090905
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944990982128

2020-09-21 16:20:34,465	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.12572021852247417, 'policy_loss': 0.0020235265837982297, 'vf_loss': 0.12979215732775629, 'vf_explained_var': 0.9540134, 'kl': 0.0012021346082958795, 'entropy': 1.462526798248291, 'entropy_coeff': 0.005}
2020-09-21 16:20:34,946	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.002231499820481986, 'policy_loss': -0.03048802400007844, 'vf_loss': 0.03507717570755631, 'vf_explained_var': 0.986555, 'kl': 0.004869187105214223, 'entropy': 1.4575418680906296, 'entropy_coeff': 0.005}
2020-09-21 16:20:35,396	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.02802719990722835, 'policy_loss': -0.040870460274163634, 'vf_loss': 0.014055302948690951, 'vf_explained_var': 0.99448866, 'kl': 0.005984348797937855, 'entropy': 1.4542393162846565, 'entropy_coeff': 0.005}

2020-09-21 16:20:46,294	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11319505493156612, 'policy_loss': -0.12219892785651609, 'vf_loss': 0.0004016088987555122, 'vf_explained_var': 0.9998281, 'kl': 0.015590696188155562, 'entropy': 1.4366632550954819, 'entropy_coeff': 0.005}
2020-09-21 16:20:46,745	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11603819951415062, 'policy_loss': -0.12541452515870333, 'vf_loss': 0.0003803143099503359, 'vf_explained_var': 0.9998455, 'kl': 0.01597124495310709, 'entropy': 1.43497534096241, 'entropy_coeff': 0.005}
2020-09-21 16:20:47,228	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11939684278331697, 'policy_loss': -0.1288959444500506, 'vf_loss': 0.0003670292726383195, 'vf_explained_var': 0.9998598, 'kl': 0.016100107808597386, 'entropy': 1.4338574260473251, 'entropy_coeff': 0.0

custom_metrics: {}
date: 2020-09-21_16-20-48
done: false
episode_len_mean: 2090.11
episode_reward_max: -21.771561900678247
episode_reward_mean: -29.07892230153467
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 141
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.4351190701127052
      entropy_coeff: 0.005
      kl: 0.016836519702337682
      policy_loss: -0.12884395208675414
      total_loss: -0.11864040757063776
      vf_explained_var: 0.9998754262924194
      vf_loss: 0.00033216514384548645
  num_steps_sampled: 247000
  num_steps_trained: 247000
iterations_since_restore: 247
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.3545454545454545
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94499098212

2020-09-21 16:20:51,141	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.12419911031611264, 'policy_loss': 0.0006177945761010051, 'vf_loss': 0.13048619031906128, 'vf_explained_var': 0.93085873, 'kl': 0.0006256886951582219, 'entropy': 1.5076774135231972, 'entropy_coeff': 0.005}
2020-09-21 16:20:51,593	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.0024674476590007544, 'policy_loss': -0.025966416927985847, 'vf_loss': 0.03297520836349577, 'vf_explained_var': 0.98022497, 'kl': 0.0029795095324516296, 'entropy': 1.5116196125745773, 'entropy_coeff': 0.005}
2020-09-21 16:20:52,077	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.04021048697177321, 'policy_loss': -0.051660467055626214, 'vf_loss': 0.01375575183192268, 'vf_explained_var': 0.99170995, 'kl': 0.00516440678620711, 'entropy': 1.5069473162293434, 'entropy_coeff': 0

2020-09-21 16:21:02,987	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.13176231482066214, 'policy_loss': -0.14056654856540263, 'vf_loss': 0.0004937139401590684, 'vf_explained_var': 0.9996983, 'kl': 0.015644477563910186, 'entropy': 1.5059026032686234, 'entropy_coeff': 0.005}
2020-09-21 16:21:03,472	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1348238738719374, 'policy_loss': -0.1432858337648213, 'vf_loss': 0.00044587982483790256, 'vf_explained_var': 0.9997203, 'kl': 0.015328337030950934, 'entropy': 1.5007716789841652, 'entropy_coeff': 0.005}
2020-09-21 16:21:03,955	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1310476156650111, 'policy_loss': -0.1400551670230925, 'vf_loss': 0.00042491714157222304, 'vf_explained_var': 0.9997277, 'kl': 0.01591092173475772, 'entropy': 1.5054343715310097, 'entropy_coeff': 0.

custom_metrics: {}
date: 2020-09-21_16-21-04
done: false
episode_len_mean: 2131.46
episode_reward_max: -21.771561900678247
episode_reward_mean: -29.025943371534495
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 142
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.503337286412716
      entropy_coeff: 0.005
      kl: 0.016133429075125605
      policy_loss: -0.13734414405189455
      total_loss: -0.12815208826214075
      vf_explained_var: 0.9997622966766357
      vf_loss: 0.0003736432418008917
  num_steps_sampled: 248000
  num_steps_trained: 248000
iterations_since_restore: 248
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.20909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94499098212821

2020-09-21 16:21:08,366	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.14995950472075492, 'policy_loss': 0.005715697538107634, 'vf_loss': 0.1498649357818067, 'vf_explained_var': 0.92541814, 'kl': 0.0022439886645673757, 'entropy': 1.5786332190036774, 'entropy_coeff': 0.005}
2020-09-21 16:21:08,850	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0016487645916640759, 'policy_loss': -0.039862534671556205, 'vf_loss': 0.04304002190474421, 'vf_explained_var': 0.9768588, 'kl': 0.002981279685627669, 'entropy': 1.568959154188633, 'entropy_coeff': 0.005}
2020-09-21 16:21:09,307	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.04502366742963204, 'policy_loss': -0.061154172639362514, 'vf_loss': 0.016862861113622785, 'vf_explained_var': 0.9907634, 'kl': 0.006967198773054406, 'entropy': 1.5573292896151543, 'entropy_coeff': 0.00

2020-09-21 16:21:20,232	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1352849886752665, 'policy_loss': -0.14185346011072397, 'vf_loss': 0.0003743716897588456, 'vf_explained_var': 0.9997808, 'kl': 0.013926077052019536, 'entropy': 1.5812111869454384, 'entropy_coeff': 0.005}
2020-09-21 16:21:20,688	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.14283270842861384, 'policy_loss': -0.1495294573251158, 'vf_loss': 0.00034274387871846557, 'vf_explained_var': 0.9997963, 'kl': 0.014060703746508807, 'entropy': 1.5764930546283722, 'entropy_coeff': 0.005}
2020-09-21 16:21:21,173	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.14304988214280456, 'policy_loss': -0.14952897222246975, 'vf_loss': 0.0003295859478384955, 'vf_explained_var': 0.99979854, 'kl': 0.013857277866918594, 'entropy': 1.5761976912617683, 'entropy_coeff':

custom_metrics: {}
date: 2020-09-21_16-21-22
done: false
episode_len_mean: 2131.46
episode_reward_max: -21.771561900678247
episode_reward_mean: -29.025943371534495
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 142
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5746160075068474
      entropy_coeff: 0.005
      kl: 0.014481233025435358
      policy_loss: -0.14599910960532725
      total_loss: -0.13889258867129683
      vf_explained_var: 0.9997916221618652
      vf_loss: 0.00031734978256281465
  num_steps_sampled: 249000
  num_steps_trained: 249000
iterations_since_restore: 249
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.290909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94499098212

[2m[36m(pid=24699)[0m 2020-09-21 16:21:22,416	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 8635.496694445537,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 4308},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-9.296, max=10.375, mean=0.797),
[2m[36m(pid=24699)[0m                                   'prev_action': 0,
[2m[36m(pid=24699)

2020-09-21 16:21:25,150	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.17646499897819012, 'policy_loss': -0.001163596985861659, 'vf_loss': 0.18487236485816538, 'vf_explained_var': 0.9784509, 'kl': 0.0007345228097415113, 'entropy': 1.597495973110199, 'entropy_coeff': 0.005}
2020-09-21 16:21:25,644	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.010083094239234924, 'policy_loss': -0.024857779731974006, 'vf_loss': 0.03990793798584491, 'vf_explained_var': 0.99480164, 'kl': 0.0029950355092296377, 'entropy': 1.5999071896076202, 'entropy_coeff': 0.005}
2020-09-21 16:21:26,100	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.023233797634020448, 'policy_loss': -0.034929605200886726, 'vf_loss': 0.015376959985587746, 'vf_explained_var': 0.9980394, 'kl': 0.004271974874427542, 'entropy': 1.6013058200478554, 'entropy_coeff': 0.

2020-09-21 16:21:31,857	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1000609899056144, 'policy_loss': -0.10449220362352207, 'vf_loss': 0.0010525295292609371, 'vf_explained_var': 0.99986243, 'kl': 0.01125433630659245, 'entropy': 1.603266142308712, 'entropy_coeff': 0.005}
2020-09-21 16:21:32,342	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09695736761204898, 'policy_loss': -0.10119088273495436, 'vf_loss': 0.0009407326542714145, 'vf_explained_var': 0.999872, 'kl': 0.011183592141605914, 'entropy': 1.6061200723052025, 'entropy_coeff': 0.005}
2020-09-21 16:21:32,801	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11214016703888774, 'policy_loss': -0.11693970777560025, 'vf_loss': 0.0008331023309438024, 'vf_explained_var': 0.9998908, 'kl': 0.011841978528536856, 'entropy': 1.6047117337584496, 'entropy_coeff': 0.

custom_metrics: {}
date: 2020-09-21_16-21-39
done: false
episode_len_mean: 2131.46
episode_reward_max: -21.771561900678247
episode_reward_mean: -29.025943371534495
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 142
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.6052980795502663
      entropy_coeff: 0.005
      kl: 0.014133904129266739
      policy_loss: -0.13084701099433005
      total_loss: -0.12418755050748587
      vf_explained_var: 0.999948263168335
      vf_loss: 0.0003753758737730095
  num_steps_sampled: 250000
  num_steps_trained: 250000
iterations_since_restore: 250
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.277272727272727
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282

[2m[36m(pid=24699)[0m 2020-09-21 16:21:39,271	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:21:41,818	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.06883719062898308, 'policy_loss': 0.0048302022041752934, 'vf_loss': 0.07126465602777898, 'vf_explained_var': 0.95321983, 'kl': 0.0006397583278667796, 'entropy': 1.5810861960053444, 'entropy_coeff': 0.005}
2020-09-21 16:21:42,274	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0028896865842398256, 'policy_loss': -0.02090984454844147, 'vf_loss': 0.02328861062414944, 'vf_explained_var': 0.98419917, 'kl': 0.002546681891544722, 'entropy': 1.5693938359618187, 'entropy_coeff': 0.005}
2020-09-21 16:21:42,784	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.032137202797457576, 'policy_loss': -0.039531890070065856, 'vf_loss': 0.010221995529

2020-09-21 16:21:53,677	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12079995973181212, 'policy_loss': -0.1275981908547692, 'vf_loss': 0.0003192274944012752, 'vf_explained_var': 0.99977404, 'kl': 0.014208299748133868, 'entropy': 1.5813786461949348, 'entropy_coeff': 0.005}
2020-09-21 16:21:54,161	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11461450858041644, 'policy_loss': -0.12142921891063452, 'vf_loss': 0.00028740198195009725, 'vf_explained_var': 0.999789, 'kl': 0.0142443478689529, 'entropy': 1.5790185332298279, 'entropy_coeff': 0.005}
2020-09-21 16:21:54,642	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12946038972586393, 'policy_loss': -0.13629791024141014, 'vf_loss': 0.0002884647101382143, 'vf_explained_var': 0.99979806, 'kl': 0.014248762396164238, 'entropy': 1.5755629017949104, 'entropy_coeff': 

custom_metrics: {}
date: 2020-09-21_16-21-55
done: false
episode_len_mean: 2131.46
episode_reward_max: -21.771561900678247
episode_reward_mean: -29.025943371534495
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 142
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5759359523653984
      entropy_coeff: 0.005
      kl: 0.015062905382364988
      policy_loss: -0.132483446970582
      total_loss: -0.1248475726461038
      vf_explained_var: 0.9998000264167786
      vf_loss: 0.0002643621392053319
  num_steps_sampled: 251000
  num_steps_trained: 251000
iterations_since_restore: 251
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.6000000000000005
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94499098212821

2020-09-21 16:21:58,795	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.0698859118274413, 'policy_loss': 0.0015290307346731424, 'vf_loss': 0.07533125579357147, 'vf_explained_var': 0.97420657, 'kl': 0.0006856116941957868, 'entropy': 1.5337106436491013, 'entropy_coeff': 0.005}
2020-09-21 16:21:59,276	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.015005908440798521, 'policy_loss': -0.02998476999346167, 'vf_loss': 0.019799780391622335, 'vf_explained_var': 0.993515, 'kl': 0.0027804830970126204, 'entropy': 1.5272311866283417, 'entropy_coeff': 0.005}
2020-09-21 16:21:59,731	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.04530465370044112, 'policy_loss': -0.051356839714571834, 'vf_loss': 0.00816428568214178, 'vf_explained_var': 0.9971478, 'kl': 0.005417669686721638, 'entropy': 1.519498459994793, 'entropy_coeff': 0.005

2020-09-21 16:22:10,607	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11217319127172232, 'policy_loss': -0.11858044419204816, 'vf_loss': 0.0002877835249819327, 'vf_explained_var': 0.99988854, 'kl': 0.013570800481829792, 'entropy': 1.524192526936531, 'entropy_coeff': 0.005}
2020-09-21 16:22:11,093	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12364254822023213, 'policy_loss': -0.1299266575369984, 'vf_loss': 0.0002733046731009381, 'vf_explained_var': 0.9998951, 'kl': 0.01349340786691755, 'entropy': 1.5302549302577972, 'entropy_coeff': 0.005}
2020-09-21 16:22:11,581	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11821887700352818, 'policy_loss': -0.12460954941343516, 'vf_loss': 0.00025476981591054937, 'vf_explained_var': 0.9998997, 'kl': 0.013628988526761532, 'entropy': 1.5326905399560928, 'entropy_coeff': 

custom_metrics: {}
date: 2020-09-21_16-22-12
done: false
episode_len_mean: 2131.46
episode_reward_max: -21.771561900678247
episode_reward_mean: -29.025943371534495
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 142
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.5265080034732819
      entropy_coeff: 0.005
      kl: 0.013835991034284234
      policy_loss: -0.12157583312364295
      total_loss: -0.11495850270148367
      vf_explained_var: 0.9999114274978638
      vf_loss: 0.00024093305910355411
  num_steps_sampled: 252000
  num_steps_trained: 252000
iterations_since_restore: 252
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.922727272727273
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94499098212

2020-09-21 16:22:16,004	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.06586696184240282, 'policy_loss': 0.0042504670564085245, 'vf_loss': 0.06804876204114407, 'vf_explained_var': 0.9566622, 'kl': 0.0005567703368989374, 'entropy': 1.39919912815094, 'entropy_coeff': 0.005}
2020-09-21 16:22:16,491	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0026480265660211444, 'policy_loss': -0.024354992667213082, 'vf_loss': 0.023090633563697338, 'vf_explained_var': 0.9835067, 'kl': 0.005571681875153445, 'entropy': 1.4049992635846138, 'entropy_coeff': 0.005}
2020-09-21 16:22:16,963	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.028345989412628114, 'policy_loss': -0.03520397166721523, 'vf_loss': 0.009270918264519423, 'vf_explained_var': 0.99315655, 'kl': 0.004552977756247856, 'entropy': 1.4045658931136131, 'entropy_coeff': 0.

2020-09-21 16:22:26,327	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.0125,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.3963899612426758,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.013279031030833721,
                                         'policy_loss': 0.04975033551454544,
                                         'total_loss': 0.05661485344171524,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=1.0, max=1.0, mean=1.0),
                                         'vf_loss': 0.0004014326259493828}}}

2020-09-21 16:22:26,439	DEBUG sgd.py:120 -- 22 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09996094403322786, 'policy_loss': -0.106321070110

custom_metrics: {}
date: 2020-09-21_16-22-29
done: false
episode_len_mean: 2131.46
episode_reward_max: -21.771561900678247
episode_reward_mean: -29.025943371534495
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 142
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.388867475092411
      entropy_coeff: 0.005
      kl: 0.013215930026490241
      policy_loss: -0.11316681827884167
      total_loss: -0.10643219260964543
      vf_explained_var: 0.9997758865356445
      vf_loss: 0.0002978303182317177
  num_steps_sampled: 253000
  num_steps_trained: 253000
iterations_since_restore: 253
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.327272727272727
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282

2020-09-21 16:22:32,848	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.1952100619673729, 'policy_loss': -0.0012241593794897199, 'vf_loss': 0.20245895627886057, 'vf_explained_var': 0.9353336, 'kl': 0.0010441260804230912, 'entropy': 1.4163813441991806, 'entropy_coeff': 0.005}
2020-09-21 16:22:33,335	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.03331899945624173, 'policy_loss': -0.034386618761345744, 'vf_loss': 0.07011377322487533, 'vf_explained_var': 0.9755509, 'kl': 0.004517620720434934, 'entropy': 1.3964481726288795, 'entropy_coeff': 0.005}
2020-09-21 16:22:33,784	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.012476248084567487, 'policy_loss': -0.04434037418104708, 'vf_loss': 0.03451762837357819, 'vf_explained_var': 0.9874936, 'kl': 0.004304545946069993, 'entropy': 1.4023713171482086, 'entropy_coeff': 0.005}

2020-09-21 16:22:44,697	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1080401900690049, 'policy_loss': -0.11517042759805918, 'vf_loss': 0.0006715052040817682, 'vf_explained_var': 0.99976075, 'kl': 0.013318239827640355, 'entropy': 1.4051979258656502, 'entropy_coeff': 0.005}
2020-09-21 16:22:45,156	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10889537073671818, 'policy_loss': -0.11629143776372075, 'vf_loss': 0.000606478384725051, 'vf_explained_var': 0.9997691, 'kl': 0.013659408898092806, 'entropy': 1.408112719655037, 'entropy_coeff': 0.005}
2020-09-21 16:22:45,645	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1040869359858334, 'policy_loss': -0.11143715132493526, 'vf_loss': 0.0005589175361819798, 'vf_explained_var': 0.9997834, 'kl': 0.013648085412569344, 'entropy': 1.405477799475193, 'entropy_coeff': 0.0

custom_metrics: {}
date: 2020-09-21_16-22-46
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.405701920390129
      entropy_coeff: 0.005
      kl: 0.014303167699836195
      policy_loss: -0.1184811748098582
      total_loss: -0.11050481861457229
      vf_explained_var: 0.9998100996017456
      vf_loss: 0.0005229071921348805
  num_steps_sampled: 254000
  num_steps_trained: 254000
iterations_since_restore: 254
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.331818181818182
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944990982128217

[2m[36m(pid=24699)[0m 2020-09-21 16:22:46,889	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 10074.35,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 128},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.141, max=1.396, mean=0.209),
[2m[36m(pid=24699)[0m                                   'prev_action': 6,
[2m[36m(pid=24699)[0m        

2020-09-21 16:22:49,256	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.11664946674136445, 'policy_loss': -0.010360150365158916, 'vf_loss': 0.13290294399484992, 'vf_explained_var': 0.92363244, 'kl': 0.0011807128023361146, 'entropy': 1.417758770287037, 'entropy_coeff': 0.005}
2020-09-21 16:22:49,711	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.012631797697395086, 'policy_loss': -0.028410358703695238, 'vf_loss': 0.04479150706902146, 'vf_explained_var': 0.9742787, 'kl': 0.0032421770301880315, 'entropy': 1.4064108207821846, 'entropy_coeff': 0.005}
2020-09-21 16:22:50,198	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.01598487829323858, 'policy_loss': -0.03727099811658263, 'vf_loss': 0.02036479744128883, 'vf_explained_var': 0.98734033, 'kl': 0.007884485297836363, 'entropy': 1.4123446568846703, 'entropy_coeff': 0.00

2020-09-21 16:23:01,090	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11120448261499405, 'policy_loss': -0.11911832669284195, 'vf_loss': 0.0004628210390364984, 'vf_explained_var': 0.99970794, 'kl': 0.014255271991714835, 'entropy': 1.3964898139238358, 'entropy_coeff': 0.005}
2020-09-21 16:23:01,574	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10260780801763758, 'policy_loss': -0.11138910701265559, 'vf_loss': 0.00042541136826912407, 'vf_explained_var': 0.99972284, 'kl': 0.015119841147679836, 'entropy': 1.3905902653932571, 'entropy_coeff': 0.005}
2020-09-21 16:23:02,040	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10043063556076959, 'policy_loss': -0.1086440184735693, 'vf_loss': 0.00041447190596954897, 'vf_explained_var': 0.9997372, 'kl': 0.014602180570363998, 'entropy': 1.3971578478813171, 'entropy_coef

custom_metrics: {}
date: 2020-09-21_16-23-03
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.392757587134838
      entropy_coeff: 0.005
      kl: 0.014772134192753583
      policy_loss: -0.12241582619026303
      total_loss: -0.11404267465695739
      vf_explained_var: 0.9997518062591553
      vf_loss: 0.000380153387595783
  num_steps_sampled: 255000
  num_steps_trained: 255000
iterations_since_restore: 255
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.440909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944990982128217

[2m[36m(pid=24699)[0m 2020-09-21 16:23:03,262	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:23:06,193	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.17838830687105656, 'policy_loss': 0.0015743686817586422, 'vf_loss': 0.1832252792082727, 'vf_explained_var': 0.9799258, 'kl': 0.0007351612297805676, 'entropy': 1.4311374127864838, 'entropy_coeff': 0.005}
2020-09-21 16:23:06,682	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.015332626120653003, 'policy_loss': -0.03238869330380112, 'vf_loss': 0.05047416186425835, 'vf_explained_var': 0.9939159, 'kl': 0.004342633503256366, 'entropy': 1.429951012134552, 'entropy_coeff': 0.005}
2020-09-21 16:23:07,127	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0261393761029467, 'policy_loss': -0.04828080418519676, 'vf_loss': 0.022284467646386474, '

2020-09-21 16:23:18,051	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1377873252495192, 'policy_loss': -0.14627508155535907, 'vf_loss': 0.0005176962422410725, 'vf_explained_var': 0.99993134, 'kl': 0.014973236946389079, 'entropy': 1.4380659461021423, 'entropy_coeff': 0.005}
2020-09-21 16:23:18,503	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12479163869284093, 'policy_loss': -0.13359288388164714, 'vf_loss': 0.00046516056318068877, 'vf_explained_var': 0.9999387, 'kl': 0.015346759115345776, 'entropy': 1.4405022487044334, 'entropy_coeff': 0.005}
2020-09-21 16:23:18,990	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12870243121869862, 'policy_loss': -0.13767729047685862, 'vf_loss': 0.0004378153971629217, 'vf_explained_var': 0.9999431, 'kl': 0.01552420825464651, 'entropy': 1.4362452626228333, 'entropy_coeff':

custom_metrics: {}
date: 2020-09-21_16-23-19
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.4423262849450111
      entropy_coeff: 0.005
      kl: 0.015385142818558961
      policy_loss: -0.13672987360041589
      total_loss: -0.127971155452542
      vf_explained_var: 0.9999457597732544
      vf_loss: 0.00039289332380576525
  num_steps_sampled: 256000
  num_steps_trained: 256000
iterations_since_restore: 256
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.476190476190476
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999998
  vram_util_percent0: 0.94499098212821

2020-09-21 16:23:23,250	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.12783081672387198, 'policy_loss': 0.005252498667687178, 'vf_loss': 0.12850001826882362, 'vf_explained_var': 0.92797613, 'kl': 0.0012261304563686015, 'entropy': 1.4326312318444252, 'entropy_coeff': 0.005}
2020-09-21 16:23:23,739	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.009232067735865712, 'policy_loss': -0.03275778563693166, 'vf_loss': 0.04469758924096823, 'vf_explained_var': 0.9759263, 'kl': 0.0043648713326547295, 'entropy': 1.425434187054634, 'entropy_coeff': 0.005}
2020-09-21 16:23:24,191	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.035964472335763276, 'policy_loss': -0.05344477033941075, 'vf_loss': 0.018774114840198308, 'vf_explained_var': 0.98925054, 'kl': 0.005765080233686604, 'entropy': 1.4261925518512726, 'entropy_coeff': 0.00

2020-09-21 16:23:29,881	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11428024189081043, 'policy_loss': -0.12161818920867518, 'vf_loss': 0.0011318611541355494, 'vf_explained_var': 0.99932873, 'kl': 0.013234656653366983, 'entropy': 1.4388001337647438, 'entropy_coeff': 0.005}
2020-09-21 16:23:30,331	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11564614647068083, 'policy_loss': -0.12241481023374945, 'vf_loss': 0.0009443331691727508, 'vf_explained_var': 0.9994176, 'kl': 0.012877287284936756, 'entropy': 1.4427844882011414, 'entropy_coeff': 0.005}
2020-09-21 16:23:30,812	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11917865881696343, 'policy_loss': -0.12649746221723035, 'vf_loss': 0.0008815379242150811, 'vf_explained_var': 0.99947923, 'kl': 0.013481710280757397, 'entropy': 1.4425940662622452, 'entropy_coeff

custom_metrics: {}
date: 2020-09-21_16-23-36
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.4394278600811958
      entropy_coeff: 0.005
      kl: 0.016132613585796207
      policy_loss: -0.1433989570941776
      total_loss: -0.13392809766810387
      vf_explained_var: 0.9997966289520264
      vf_loss: 0.00033372412326571066
  num_steps_sampled: 257000
  num_steps_trained: 257000
iterations_since_restore: 257
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.427272727272728
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282

2020-09-21 16:23:40,117	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.24784720363095403, 'policy_loss': -0.002533091465011239, 'vf_loss': 0.2563743805512786, 'vf_explained_var': 0.95891035, 'kl': 0.0008973982731629859, 'entropy': 1.3805399760603905, 'entropy_coeff': 0.005}
2020-09-21 16:23:40,604	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.051999614923261106, 'policy_loss': -0.026219682302325964, 'vf_loss': 0.08243538090027869, 'vf_explained_var': 0.9871259, 'kl': 0.002622566702484619, 'entropy': 1.374286338686943, 'entropy_coeff': 0.005}
2020-09-21 16:23:41,058	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.009333253197837621, 'policy_loss': -0.03524754487443715, 'vf_loss': 0.047653935733251274, 'vf_explained_var': 0.9928565, 'kl': 0.003734945712494664, 'entropy': 1.3709544092416763, 'entropy_coeff': 0.005}

2020-09-21 16:23:51,927	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10347782564349473, 'policy_loss': -0.11207214021123946, 'vf_loss': 0.0007199320571089629, 'vf_explained_var': 0.99986327, 'kl': 0.014542111079208553, 'entropy': 1.3698999881744385, 'entropy_coeff': 0.005}
2020-09-21 16:23:52,410	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10432759951800108, 'policy_loss': -0.1124895301181823, 'vf_loss': 0.0006919362695043674, 'vf_explained_var': 0.9998675, 'kl': 0.014172655588481575, 'entropy': 1.3759647235274315, 'entropy_coeff': 0.005}
2020-09-21 16:23:52,897	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10889808763749897, 'policy_loss': -0.11728475568816066, 'vf_loss': 0.0006395199106918881, 'vf_explained_var': 0.9998745, 'kl': 0.01440658641513437, 'entropy': 1.367905929684639, 'entropy_coeff': 0

custom_metrics: {}
date: 2020-09-21_16-23-53
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.376244030892849
      entropy_coeff: 0.005
      kl: 0.014661440858617425
      policy_loss: -0.12182421423494816
      total_loss: -0.11326572694815695
      vf_explained_var: 0.9998923540115356
      vf_loss: 0.0005949968654022086
  num_steps_sampled: 258000
  num_steps_trained: 258000
iterations_since_restore: 258
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.422727272727273
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94499098212821

2020-09-21 16:23:56,947	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.23920826031826437, 'policy_loss': 0.0033641234040260315, 'vf_loss': 0.24109903816133738, 'vf_explained_var': 0.9718425, 'kl': 0.0016244793891511122, 'entropy': 1.379938393831253, 'entropy_coeff': 0.005}
2020-09-21 16:23:57,400	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.06462100782664493, 'policy_loss': -0.022183196968398988, 'vf_loss': 0.09001616574823856, 'vf_explained_var': 0.98993176, 'kl': 0.0036567340284818783, 'entropy': 1.3828818276524544, 'entropy_coeff': 0.005}
2020-09-21 16:23:57,878	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.001276296330615878, 'policy_loss': -0.0420905202627182, 'vf_loss': 0.04127846751362085, 'vf_explained_var': 0.9946726, 'kl': 0.006398173340130597, 'entropy': 1.3884795233607292, 'entropy_coeff': 0.005}

2020-09-21 16:24:08,730	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10993234103079885, 'policy_loss': -0.12133148207794875, 'vf_loss': 0.0007453974321833812, 'vf_explained_var': 0.9999049, 'kl': 0.01743643672671169, 'entropy': 1.4001296609640121, 'entropy_coeff': 0.005}
2020-09-21 16:24:09,216	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11965077556669712, 'policy_loss': -0.13099476520437747, 'vf_loss': 0.0006607943905692082, 'vf_explained_var': 0.9999132, 'kl': 0.017454261134844273, 'entropy': 1.3978494331240654, 'entropy_coeff': 0.005}
2020-09-21 16:24:09,696	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1169481513206847, 'policy_loss': -0.12918076664209366, 'vf_loss': 0.0006397313663910609, 'vf_explained_var': 0.9999171, 'kl': 0.018370653328020126, 'entropy': 1.4014796614646912, 'entropy_coeff': 0

custom_metrics: {}
date: 2020-09-21_16-24-10
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.3989534750580788
      entropy_coeff: 0.005
      kl: 0.018084322684444487
      policy_loss: -0.1334895483450964
      total_loss: -0.12161881336942315
      vf_explained_var: 0.99992835521698
      vf_loss: 0.0005551252543227747
  num_steps_sampled: 259000
  num_steps_trained: 259000
iterations_since_restore: 259
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.386363636363637
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282177

[2m[36m(pid=24699)[0m 2020-09-21 16:24:11,275	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 15811.912674653888,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 1128},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-4.054, max=4.288, mean=0.338),
[2m[36m(pid=24699)[0m                                   'prev_action': 0,
[2m[36m(pid=24699)

2020-09-21 16:24:14,200	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.11494094354566187, 'policy_loss': -0.005252996808849275, 'vf_loss': 0.12565021612681448, 'vf_explained_var': 0.91113293, 'kl': 0.001442957064899053, 'entropy': 1.383454091846943, 'entropy_coeff': 0.005}
2020-09-21 16:24:14,683	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.003764393273741007, 'policy_loss': -0.034464524826034904, 'vf_loss': 0.04276577243581414, 'vf_explained_var': 0.97013295, 'kl': 0.002356969052925706, 'entropy': 1.384657084941864, 'entropy_coeff': 0.005}
2020-09-21 16:24:15,139	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.03402233705855906, 'policy_loss': -0.053159043192863464, 'vf_loss': 0.02110079920385033, 'vf_explained_var': 0.9845805, 'kl': 0.0048805374099174514, 'entropy': 1.3811278343200684, 'entropy_coeff': 0.005

2020-09-21 16:24:26,038	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12233304604887962, 'policy_loss': -0.13312191574368626, 'vf_loss': 0.0004775662582687801, 'vf_explained_var': 0.9996377, 'kl': 0.016975070349872112, 'entropy': 1.3751912415027618, 'entropy_coeff': 0.005}
2020-09-21 16:24:26,378	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-7.006, max=8.456, mean=-0.213),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-6.144, max=-0.012, mean=-1.521),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.002, max=0.988, mean=0.409),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=5.891),
       

custom_metrics: {}
date: 2020-09-21_16-24-27
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.375134401023388
      entropy_coeff: 0.005
      kl: 0.017845775175374
      policy_loss: -0.13845486275386065
      total_loss: -0.1268929272191599
      vf_explained_var: 0.9997332096099854
      vf_loss: 0.00036875938712910283
  num_steps_sampled: 260000
  num_steps_trained: 260000
iterations_since_restore: 260
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.45909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282177
 

[2m[36m(pid=24699)[0m 2020-09-21 16:24:28,507	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:24:31,575	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.13667248841375113, 'policy_loss': -4.0801940485835075e-05, 'vf_loss': 0.14219510648399591, 'vf_explained_var': 0.9399831, 'kl': 0.0017787344739045308, 'entropy': 1.456557646393776, 'entropy_coeff': 0.005}
2020-09-21 16:24:32,039	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.00999883795157075, 'policy_loss': -0.029626045376062393, 'vf_loss': 0.044068478862755, 'vf_explained_var': 0.97896194, 'kl': 0.002851302568160463, 'entropy': 1.466108113527298, 'entropy_coeff': 0.005}
2020-09-21 16:24:32,524	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0390142310061492, 'policy_loss': -0.05604425008641556, 'vf_loss': 0.01780224865069613, '

2020-09-21 16:24:43,383	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.14228350669145584, 'policy_loss': -0.15325428685173392, 'vf_loss': 0.0006104434942244552, 'vf_explained_var': 0.9996412, 'kl': 0.017438654496800154, 'entropy': 1.4592600762844086, 'entropy_coeff': 0.005}
2020-09-21 16:24:43,866	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.14084502845071256, 'policy_loss': -0.1523034581914544, 'vf_loss': 0.0005651054671034217, 'vf_explained_var': 0.99965584, 'kl': 0.017957303498405963, 'entropy': 1.4576903283596039, 'entropy_coeff': 0.005}
2020-09-21 16:24:44,318	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.14709723531268537, 'policy_loss': -0.15846803085878491, 'vf_loss': 0.0005484207013068954, 'vf_explained_var': 0.9996784, 'kl': 0.01787239540135488, 'entropy': 1.4546844437718391, 'entropy_coeff': 

custom_metrics: {}
date: 2020-09-21_16-24-45
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.4560146108269691
      entropy_coeff: 0.005
      kl: 0.01854016480501741
      policy_loss: -0.15334101766347885
      total_loss: -0.14136986923404038
      vf_explained_var: 0.9997266530990601
      vf_loss: 0.0004793055231857579
  num_steps_sampled: 261000
  num_steps_trained: 261000
iterations_since_restore: 261
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.343478260869565
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94499098212821

2020-09-21 16:24:49,219	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.4999830638989806, 'policy_loss': 0.0007842063205316663, 'vf_loss': 0.5056556491181254, 'vf_explained_var': 0.8660389, 'kl': 0.00044527418380058137, 'entropy': 1.3815246596932411, 'entropy_coeff': 0.005}
2020-09-21 16:24:49,705	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.0923851077677682, 'policy_loss': -0.023329447489231825, 'vf_loss': 0.12054275418631732, 'vf_explained_var': 0.96377903, 'kl': 0.002047798832791159, 'entropy': 1.3803184255957603, 'entropy_coeff': 0.005}
2020-09-21 16:24:50,149	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.004431314999237657, 'policy_loss': -0.04242947488091886, 'vf_loss': 0.05044484813697636, 'vf_explained_var': 0.98408175, 'kl': 0.003263911734393332, 'entropy': 1.3777545243501663, 'entropy_coeff': 0.005}


2020-09-21 16:25:01,031	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08745791506953537, 'policy_loss': -0.09321780875325203, 'vf_loss': 0.0010424262727610767, 'vf_explained_var': 0.99965787, 'kl': 0.011542790627572685, 'entropy': 1.3939210996031761, 'entropy_coeff': 0.005}
2020-09-21 16:25:01,484	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08742509502917528, 'policy_loss': -0.09318235830869526, 'vf_loss': 0.0009621030658308882, 'vf_explained_var': 0.99968016, 'kl': 0.011584169929847121, 'entropy': 1.3867623880505562, 'entropy_coeff': 0.005}
2020-09-21 16:25:01,984	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08460887175169773, 'policy_loss': -0.09057473787106574, 'vf_loss': 0.0008986049142549746, 'vf_explained_var': 0.99968433, 'kl': 0.011872392846271396, 'entropy': 1.3907062038779259, 'entropy_coef

custom_metrics: {}
date: 2020-09-21_16-25-02
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.3910768181085587
      entropy_coeff: 0.005
      kl: 0.012302639719564468
      policy_loss: -0.09777250653132796
      total_loss: -0.09142213501036167
      vf_explained_var: 0.9997406005859375
      vf_loss: 0.0008493305776937632
  num_steps_sampled: 262000
  num_steps_trained: 262000
iterations_since_restore: 262
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.445454545454544
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282

2020-09-21 16:25:06,515	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.24503943882882595, 'policy_loss': 0.0020761642372235656, 'vf_loss': 0.2496167472563684, 'vf_explained_var': 0.92332685, 'kl': 0.0003891071121308598, 'entropy': 1.4094891622662544, 'entropy_coeff': 0.005}
2020-09-21 16:25:07,000	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.05215487501118332, 'policy_loss': -0.02383387996815145, 'vf_loss': 0.08013107045553625, 'vf_explained_var': 0.9747307, 'kl': 0.0029174549563322216, 'entropy': 1.4192475005984306, 'entropy_coeff': 0.005}
2020-09-21 16:25:07,451	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.008975942386314273, 'policy_loss': -0.04798166558612138, 'vf_loss': 0.04104884841945022, 'vf_explained_var': 0.9872608, 'kl': 0.005001293815439567, 'entropy': 1.4213871732354164, 'entropy_coeff': 0.005}

2020-09-21 16:25:18,352	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12935546692460775, 'policy_loss': -0.14088768290821463, 'vf_loss': 0.0016450345901830588, 'vf_explained_var': 0.9994346, 'kl': 0.016747784160543233, 'entropy': 1.4139897003769875, 'entropy_coeff': 0.005}
2020-09-21 16:25:18,807	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.13484703202266246, 'policy_loss': -0.14667899312917143, 'vf_loss': 0.001609682774869725, 'vf_explained_var': 0.99946415, 'kl': 0.017091486835852265, 'entropy': 1.4165717139840126, 'entropy_coeff': 0.005}
2020-09-21 16:25:19,294	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12925358104985207, 'policy_loss': -0.14112793828826398, 'vf_loss': 0.0014537006827595178, 'vf_explained_var': 0.999501, 'kl': 0.017269222356844693, 'entropy': 1.412886567413807, 'entropy_coeff': 0

custom_metrics: {}
date: 2020-09-21_16-25-20
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.409431904554367
      entropy_coeff: 0.005
      kl: 0.017917769902851433
      policy_loss: -0.15407464979216456
      total_loss: -0.141606735996902
      vf_explained_var: 0.9995289444923401
      vf_loss: 0.001373327715555206
  num_steps_sampled: 263000
  num_steps_trained: 263000
iterations_since_restore: 263
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.463636363636364
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282177


2020-09-21 16:25:24,452	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.16616665048059076, 'policy_loss': -0.003841549623757601, 'vf_loss': 0.17482162732630968, 'vf_explained_var': 0.91484785, 'kl': 0.0017887165884893363, 'entropy': 1.324900060892105, 'entropy_coeff': 0.005}
2020-09-21 16:25:24,935	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.050671870354563, 'policy_loss': -0.021567017654888332, 'vf_loss': 0.07340445462614298, 'vf_explained_var': 0.96534514, 'kl': 0.005371781662688591, 'entropy': 1.3208993524312973, 'entropy_coeff': 0.005}
2020-09-21 16:25:25,382	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.004020074964500964, 'policy_loss': -0.04025197844021022, 'vf_loss': 0.03752267442177981, 'vf_explained_var': 0.9817097, 'kl': 0.005244112777290866, 'entropy': 1.3200870677828789, 'entropy_coeff': 0.005}


2020-09-21 16:25:31,076	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09014340175781399, 'policy_loss': -0.1003653728403151, 'vf_loss': 0.0036544465692713857, 'vf_explained_var': 0.99811184, 'kl': 0.013057507865596563, 'entropy': 1.3306402117013931, 'entropy_coeff': 0.005}
2020-09-21 16:25:31,520	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09456142445560545, 'policy_loss': -0.1045065673533827, 'vf_loss': 0.003309465479105711, 'vf_explained_var': 0.9982963, 'kl': 0.013117827358655632, 'entropy': 1.3292246311903, 'entropy_coeff': 0.005}
2020-09-21 16:25:32,017	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0945842147921212, 'policy_loss': -0.10456104605691507, 'vf_loss': 0.0030001574850757606, 'vf_explained_var': 0.99845487, 'kl': 0.013450216269120574, 'entropy': 1.3283340856432915, 'entropy_coeff': 0.00

custom_metrics: {}
date: 2020-09-21_16-25-38
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.3352017402648926
      entropy_coeff: 0.005
      kl: 0.0163375930278562
      policy_loss: -0.1175192513037473
      total_loss: -0.10629756608977914
      vf_explained_var: 0.9993181228637695
      vf_loss: 0.0013558731116063427
  num_steps_sampled: 264000
  num_steps_trained: 264000
iterations_since_restore: 264
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.569565217391306
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282177

[2m[36m(pid=24699)[0m 2020-09-21 16:25:38,452	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 11636.48,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 2128},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.209, max=2.365, mean=0.382),
[2m[36m(pid=24699)[0m                                   'prev_action': 0,
[2m[36m(pid=24699)[0m       

2020-09-21 16:25:42,279	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.34515353688038886, 'policy_loss': -0.0072225992335006595, 'vf_loss': 0.35769179463386536, 'vf_explained_var': 0.90046155, 'kl': 0.0013579483144011695, 'entropy': 1.3381149619817734, 'entropy_coeff': 0.005}
2020-09-21 16:25:42,764	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.08182787976693362, 'policy_loss': -0.02523775736335665, 'vf_loss': 0.10930440225638449, 'vf_explained_var': 0.97173536, 'kl': 0.004431780631421134, 'entropy': 1.3451889678835869, 'entropy_coeff': 0.005}
2020-09-21 16:25:43,217	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.013612975599244237, 'policy_loss': -0.037981134257279336, 'vf_loss': 0.05425731628201902, 'vf_explained_var': 0.98267436, 'kl': 0.0039743204397382215, 'entropy': 1.3374412208795547, 'entropy_coeff': 0.

2020-09-21 16:25:54,147	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09517196484375745, 'policy_loss': -0.1029107934446074, 'vf_loss': 0.0014449443551711738, 'vf_explained_var': 0.9995087, 'kl': 0.012814548856113106, 'entropy': 1.3361697494983673, 'entropy_coeff': 0.005}
2020-09-21 16:25:54,597	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09411393414484337, 'policy_loss': -0.10198717776802368, 'vf_loss': 0.0013448929967125878, 'vf_explained_var': 0.9995718, 'kl': 0.013065566308796406, 'entropy': 1.3401077687740326, 'entropy_coeff': 0.005}
2020-09-21 16:25:55,083	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0945738279260695, 'policy_loss': -0.10279192682355642, 'vf_loss': 0.0012654355487029534, 'vf_explained_var': 0.9995811, 'kl': 0.013469893194269389, 'entropy': 1.337121233344078, 'entropy_coeff': 0.

custom_metrics: {}
date: 2020-09-21_16-25-56
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.3337004780769348
      entropy_coeff: 0.005
      kl: 0.013909905566833913
      policy_loss: -0.10336649332020897
      total_loss: -0.09477240478008753
      vf_explained_var: 0.9996427297592163
      vf_loss: 0.001178808310214663
  num_steps_sampled: 265000
  num_steps_trained: 265000
iterations_since_restore: 265
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.743478260869565
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.94499098212821

[2m[36m(pid=24699)[0m 2020-09-21 16:25:56,339	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:26:00,197	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.15298556559719145, 'policy_loss': 0.0011151958751725033, 'vf_loss': 0.1576867252588272, 'vf_explained_var': 0.89912695, 'kl': 0.0009655301790614801, 'entropy': 1.3587917611002922, 'entropy_coeff': 0.005}
2020-09-21 16:26:00,674	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.03925000748131424, 'policy_loss': -0.024870642344467342, 'vf_loss': 0.06696724891662598, 'vf_explained_var': 0.9589696, 'kl': 0.003848539163300302, 'entropy': 1.3486502319574356, 'entropy_coeff': 0.005}
2020-09-21 16:26:01,116	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0031774186063557863, 'policy_loss': -0.034800810855813324, 'vf_loss': 0.033457161276601

2020-09-21 16:26:11,982	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12829629355110228, 'policy_loss': -0.13940719072706997, 'vf_loss': 0.0019791574086411856, 'vf_explained_var': 0.9987408, 'kl': 0.01564007345587015, 'entropy': 1.3407668098807335, 'entropy_coeff': 0.005}
2020-09-21 16:26:12,426	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12070236902218312, 'policy_loss': -0.13238786137662828, 'vf_loss': 0.0020580049822456203, 'vf_explained_var': 0.99878776, 'kl': 0.016132174408994615, 'entropy': 1.3412669822573662, 'entropy_coeff': 0.005}
2020-09-21 16:26:12,901	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12081869679968804, 'policy_loss': -0.13265288074035197, 'vf_loss': 0.0019527074036886916, 'vf_explained_var': 0.9988849, 'kl': 0.016327217570506036, 'entropy': 1.3299646973609924, 'entropy_coeff':

custom_metrics: {}
date: 2020-09-21_16-26-13
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.3355200588703156
      entropy_coeff: 0.005
      kl: 0.017054967465810478
      policy_loss: -0.144789187470451
      total_loss: -0.1326802390976809
      vf_explained_var: 0.9990134239196777
      vf_loss: 0.0015183871364570223
  num_steps_sampled: 266000
  num_steps_trained: 266000
iterations_since_restore: 266
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.5086956521739125
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944990982128217

2020-09-21 16:26:18,000	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.21269836672581732, 'policy_loss': 0.004043239518068731, 'vf_loss': 0.2134583368897438, 'vf_explained_var': 0.94750667, 'kl': 0.0010616614325245077, 'entropy': 1.175627924501896, 'entropy_coeff': 0.005}
2020-09-21 16:26:18,486	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.08513964281883091, 'policy_loss': -0.01430750044528395, 'vf_loss': 0.10057241283357143, 'vf_explained_var': 0.97460264, 'kl': 0.004785016717505641, 'entropy': 1.1940220519900322, 'entropy_coeff': 0.005}
2020-09-21 16:26:18,934	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.01128094014711678, 'policy_loss': -0.04207444749772549, 'vf_loss': 0.05339118489064276, 'vf_explained_var': 0.98483324, 'kl': 0.00584343247464858, 'entropy': 1.1904542669653893, 'entropy_coeff': 0.005}
202

2020-09-21 16:26:26,503	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.0125,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.2617380619049072,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.014123819768428802,
                                         'policy_loss': -0.1768249273300171,
                                         'total_loss': -0.1656762957572937,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.999, max=0.999, mean=0.999),
                                         'vf_loss': 0.003156983060762286}}}

2020-09-21 16:26:26,504	DEBUG sgd.py:120 -- 18 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09554814989678562, 'policy_loss': -0.1058580

custom_metrics: {}
date: 2020-09-21_16-26-31
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.1920848786830902
      entropy_coeff: 0.005
      kl: 0.015090343309566379
      policy_loss: -0.11729117739014328
      total_loss: -0.10619057749863714
      vf_explained_var: 0.9994869232177734
      vf_loss: 0.001782050145266112
  num_steps_sampled: 267000
  num_steps_trained: 267000
iterations_since_restore: 267
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.57391304347826
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.944990982128217

2020-09-21 16:26:35,902	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.21507363975979388, 'policy_loss': -0.0012745843268930912, 'vf_loss': 0.22073440719395876, 'vf_explained_var': 0.85617656, 'kl': 0.0019216603741700755, 'entropy': 1.266372799873352, 'entropy_coeff': 0.005}
2020-09-21 16:26:36,388	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.08620947343297303, 'policy_loss': -0.01801689580315724, 'vf_loss': 0.10488594137132168, 'vf_explained_var': 0.9323127, 'kl': 0.005578965021413751, 'entropy': 1.2616549879312515, 'entropy_coeff': 0.005}
2020-09-21 16:26:36,837	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.018458012054907158, 'policy_loss': -0.04071185062639415, 'vf_loss': 0.05926800589077175, 'vf_explained_var': 0.9604721, 'kl': 0.006151111650979146, 'entropy': 1.2652283981442451, 'entropy_coeff': 0.005}


2020-09-21 16:26:47,710	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.103133988333866, 'policy_loss': -0.11411580652929842, 'vf_loss': 0.0028456204090616666, 'vf_explained_var': 0.9980954, 'kl': 0.014257939590606838, 'entropy': 1.2599924057722092, 'entropy_coeff': 0.005}
2020-09-21 16:26:48,161	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10151352279353887, 'policy_loss': -0.11326696164906025, 'vf_loss': 0.0027128349611302838, 'vf_explained_var': 0.99817073, 'kl': 0.015131998341530561, 'entropy': 1.2561088278889656, 'entropy_coeff': 0.005}
2020-09-21 16:26:48,646	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0985625151079148, 'policy_loss': -0.11040882742963731, 'vf_loss': 0.0025501987111056224, 'vf_explained_var': 0.9982886, 'kl': 0.015425221878103912, 'entropy': 1.2643863372504711, 'entropy_coeff': 0

custom_metrics: {}
date: 2020-09-21_16-26-49
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.2653879299759865
      entropy_coeff: 0.005
      kl: 0.014626393152866513
      policy_loss: -0.11803425522521138
      total_loss: -0.10725361341610551
      vf_explained_var: 0.9984778165817261
      vf_loss: 0.0022983553426456638
  num_steps_sampled: 268000
  num_steps_trained: 268000
iterations_since_restore: 268
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.717391304347825
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.699999999999996
  vram_util_percent0: 0.9449909821282

2020-09-21 16:26:53,732	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.2458550384035334, 'policy_loss': 0.0031067520612850785, 'vf_loss': 0.24790498614311218, 'vf_explained_var': 0.90706825, 'kl': 0.0012912830616173265, 'entropy': 1.29282546043396, 'entropy_coeff': 0.005}
2020-09-21 16:26:54,221	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.061819423688575625, 'policy_loss': -0.03301962511613965, 'vf_loss': 0.09726939350366592, 'vf_explained_var': 0.96363544, 'kl': 0.0039315288595389575, 'entropy': 1.2822030037641525, 'entropy_coeff': 0.005}
2020-09-21 16:26:54,675	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.00182523753028363, 'policy_loss': -0.05092868662904948, 'vf_loss': 0.04835560265928507, 'vf_explained_var': 0.9812807, 'kl': 0.007084225653670728, 'entropy': 1.2849865481257439, 'entropy_coeff': 0.005}


2020-09-21 16:27:05,607	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.13058804359752685, 'policy_loss': -0.14284858445171267, 'vf_loss': 0.002158241935831029, 'vf_explained_var': 0.99911535, 'kl': 0.016292305197566748, 'entropy': 1.2787318006157875, 'entropy_coeff': 0.005}
2020-09-21 16:27:06,053	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12148623075336218, 'policy_loss': -0.13410749449394643, 'vf_loss': 0.00211847777973162, 'vf_explained_var': 0.99913186, 'kl': 0.016685888345818967, 'entropy': 1.2783347144722939, 'entropy_coeff': 0.005}
2020-09-21 16:27:06,531	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12329312111251056, 'policy_loss': -0.13579712016507983, 'vf_loss': 0.0020292767003411427, 'vf_explained_var': 0.99917, 'kl': 0.016630566911771894, 'entropy': 1.2727454826235771, 'entropy_coeff': 0.

custom_metrics: {}
date: 2020-09-21_16-27-07
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.278932861983776
      entropy_coeff: 0.005
      kl: 0.017459616356063634
      policy_loss: -0.14159202738665044
      total_loss: -0.12852817890234292
      vf_explained_var: 0.9992697834968567
      vf_loss: 0.0017806500472943299
  num_steps_sampled: 269000
  num_steps_trained: 269000
iterations_since_restore: 269
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 6.565217391304348
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.721739130434779
  vram_util_percent0: 0.94499098212821

[2m[36m(pid=24699)[0m 2020-09-21 16:27:07,754	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 10442.75631348234,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 3128},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-20.23, max=9.616, mean=0.425),
[2m[36m(pid=24699)[0m                                   'prev_action': 0,
[2m[36m(pid=24699)[

2020-09-21 16:27:11,916	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.26563079422339797, 'policy_loss': 0.004926204914227128, 'vf_loss': 0.26583360601216555, 'vf_explained_var': 0.917087, 'kl': 0.0012275684450515634, 'entropy': 1.2743861973285675, 'entropy_coeff': 0.005}
2020-09-21 16:27:12,416	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.09479136671870947, 'policy_loss': -0.02200317452661693, 'vf_loss': 0.12050057342275977, 'vf_explained_var': 0.96053565, 'kl': 0.002589072202681564, 'entropy': 1.2654939517378807, 'entropy_coeff': 0.005}
2020-09-21 16:27:12,873	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.034660795936360955, 'policy_loss': -0.038705854036379606, 'vf_loss': 0.0753432062920183, 'vf_explained_var': 0.9769732, 'kl': 0.004283524438505992, 'entropy': 1.2627252265810966, 'entropy_coeff': 0.005}
20

2020-09-21 16:27:23,778	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10576697764918208, 'policy_loss': -0.11695701605640352, 'vf_loss': 0.002614644239656627, 'vf_explained_var': 0.99909115, 'kl': 0.014775870542507619, 'entropy': 1.2770355120301247, 'entropy_coeff': 0.005}
2020-09-21 16:27:24,232	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10349226160906255, 'policy_loss': -0.11494143959134817, 'vf_loss': 0.002408719534287229, 'vf_explained_var': 0.9991499, 'kl': 0.015278724837116897, 'entropy': 1.2858496829867363, 'entropy_coeff': 0.005}
2020-09-21 16:27:24,715	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10905063385143876, 'policy_loss': -0.12029111734591424, 'vf_loss': 0.002279167681990657, 'vf_explained_var': 0.99918914, 'kl': 0.015172580780927092, 'entropy': 1.28018419444561, 'entropy_coeff': 0.

custom_metrics: {}
date: 2020-09-21_16-27-25
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.2795625030994415
      entropy_coeff: 0.005
      kl: 0.01580302562797442
      policy_loss: -0.13269394915550947
      total_loss: -0.12107561796437949
      vf_explained_var: 0.9992867708206177
      vf_loss: 0.002015578225837089
  num_steps_sampled: 270000
  num_steps_trained: 270000
iterations_since_restore: 270
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.604347826086957
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.708695652173908
  vram_util_percent0: 0.944990982128217

[2m[36m(pid=24699)[0m 2020-09-21 16:27:25,977	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:27:29,746	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.0125,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.3682541847229004,
                                         'entropy_coeff': 0.005,
                                         'kl': 4.916396179055482e-09,
                                         'policy_loss': 0.22606639564037323,
                                         'total_loss': 0.6190322637557983,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.78, max=0.78, mean=0.78),
                                         'vf_loss': 0.3998071253299713}}}

2020-09-21 16:27:29,751	INFO rollout_worker.py:736 -- Training on 

2020-09-21 16:27:36,838	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0791854690760374, 'policy_loss': -0.09420138387940824, 'vf_loss': 0.00849856183049269, 'vf_explained_var': 0.99370563, 'kl': 0.012499594478867948, 'entropy': 1.227696754038334, 'entropy_coeff': 0.005}
2020-09-21 16:27:37,327	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0861607292899862, 'policy_loss': -0.10104788490571082, 'vf_loss': 0.008320618537254632, 'vf_explained_var': 0.9943408, 'kl': 0.012532843626104295, 'entropy': 1.224592685699463, 'entropy_coeff': 0.005}
2020-09-21 16:27:37,787	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.086143086431548, 'policy_loss': -0.09977136482484639, 'vf_loss': 0.007175639126216993, 'vf_explained_var': 0.9947045, 'kl': 0.012472120171878487, 'entropy': 1.2350769713521004, 'entropy_coeff': 0.005}
2

custom_metrics: {}
date: 2020-09-21_16-27-44
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.230615183711052
      entropy_coeff: 0.005
      kl: 0.015204334806185216
      policy_loss: -0.12025387957692146
      total_loss: -0.10792369663249701
      vf_explained_var: 0.9976345896720886
      vf_loss: 0.0030888717155903578
  num_steps_sampled: 271000
  num_steps_trained: 271000
iterations_since_restore: 271
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.678260869565219
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.782608695652176
  vram_util_percent0: 0.94499098212821

2020-09-21 16:27:49,032	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.21797379455529153, 'policy_loss': 0.005623562843538821, 'vf_loss': 0.2166041349992156, 'vf_explained_var': 0.90476316, 'kl': 0.0020088098173194346, 'entropy': 1.2575643509626389, 'entropy_coeff': 0.005}
2020-09-21 16:27:49,525	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.06194777390919626, 'policy_loss': -0.025143501348793507, 'vf_loss': 0.08951476868242025, 'vf_explained_var': 0.9588997, 'kl': 0.0037455189012689516, 'entropy': 1.2431657165288925, 'entropy_coeff': 0.005}
2020-09-21 16:27:49,977	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.0029978021048009396, 'policy_loss': -0.04468179726973176, 'vf_loss': 0.04901582607999444, 'vf_explained_var': 0.9776634, 'kl': 0.004849023505812511, 'entropy': 1.2491717860102654, 'entropy_coeff': 0.005}

2020-09-21 16:28:00,932	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10914515133481473, 'policy_loss': -0.12296318460721523, 'vf_loss': 0.0031351904472103342, 'vf_explained_var': 0.998556, 'kl': 0.016710366879124194, 'entropy': 1.2472811490297318, 'entropy_coeff': 0.005}
2020-09-21 16:28:01,384	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11088595574256033, 'policy_loss': -0.12465861369855702, 'vf_loss': 0.0029215820832177997, 'vf_explained_var': 0.998634, 'kl': 0.016871649480890483, 'entropy': 1.2462930455803871, 'entropy_coeff': 0.005}
2020-09-21 16:28:01,883	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11377510032616556, 'policy_loss': -0.1270330250263214, 'vf_loss': 0.0027819605311378837, 'vf_explained_var': 0.9986839, 'kl': 0.01651379733812064, 'entropy': 1.24885144084692, 'entropy_coeff': 0.005

custom_metrics: {}
date: 2020-09-21_16-28-02
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.250605747103691
      entropy_coeff: 0.005
      kl: 0.017091899935621768
      policy_loss: -0.12735634809359908
      total_loss: -0.11372667882824317
      vf_explained_var: 0.9987678527832031
      vf_loss: 0.002577150022261776
  num_steps_sampled: 272000
  num_steps_trained: 272000
iterations_since_restore: 272
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.6875
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000002
  vram_util_percent0: 0.9449909821282177
  vram_ut

2020-09-21 16:28:07,460	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.3630591882392764, 'policy_loss': -0.0006065981579013169, 'vf_loss': 0.36822754330933094, 'vf_explained_var': 0.8778446, 'kl': 0.001017485949468877, 'entropy': 1.1183899343013763, 'entropy_coeff': 0.005}
2020-09-21 16:28:07,953	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.12178560870233923, 'policy_loss': -0.01913026173133403, 'vf_loss': 0.14243726339191198, 'vf_explained_var': 0.9519223, 'kl': 0.004041898035211489, 'entropy': 1.1227637827396393, 'entropy_coeff': 0.005}
2020-09-21 16:28:08,407	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.04037367342971265, 'policy_loss': -0.031800250173546374, 'vf_loss': 0.07413405645638704, 'vf_explained_var': 0.97634333, 'kl': 0.003615803536376916, 'entropy': 1.124226987361908, 'entropy_coeff': 0.005}
20

2020-09-21 16:28:19,396	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08223337447270751, 'policy_loss': -0.09259432752151042, 'vf_loss': 0.0029128916357876733, 'vf_explained_var': 0.9989709, 'kl': 0.012917121464852244, 'entropy': 1.126104362308979, 'entropy_coeff': 0.005}
2020-09-21 16:28:19,850	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0779543382814154, 'policy_loss': -0.08814724953845143, 'vf_loss': 0.002743862001807429, 'vf_explained_var': 0.99906194, 'kl': 0.012909023615065962, 'entropy': 1.1242655776441097, 'entropy_coeff': 0.005}
2020-09-21 16:28:20,338	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08802129048854113, 'policy_loss': -0.0981382925529033, 'vf_loss': 0.002621258099679835, 'vf_explained_var': 0.99907744, 'kl': 0.012951714976225048, 'entropy': 1.1235724985599518, 'entropy_coeff': 0.

custom_metrics: {}
date: 2020-09-21_16-28-21
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.1273029297590256
      entropy_coeff: 0.005
      kl: 0.013523535744752735
      policy_loss: -0.09770933643449098
      total_loss: -0.08723773283418268
      vf_explained_var: 0.999192476272583
      vf_loss: 0.0024155420614988543
  num_steps_sampled: 273000
  num_steps_trained: 273000
iterations_since_restore: 273
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.782608695652175
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94499098212821

2020-09-21 16:28:25,973	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.21056814584881067, 'policy_loss': 0.002363431267440319, 'vf_loss': 0.21351910335943103, 'vf_explained_var': 0.89404905, 'kl': 0.0007020463920265341, 'entropy': 1.2050415724515915, 'entropy_coeff': 0.005}
2020-09-21 16:28:26,463	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.05965896986890584, 'policy_loss': -0.020212837145663798, 'vf_loss': 0.0827719559893012, 'vf_explained_var': 0.9603057, 'kl': 0.0030777119027334265, 'entropy': 1.2032662406563759, 'entropy_coeff': 0.005}
2020-09-21 16:28:26,919	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.0025199707015417516, 'policy_loss': -0.042925787274725735, 'vf_loss': 0.04621319402940571, 'vf_explained_var': 0.97778285, 'kl': 0.005209026508964598, 'entropy': 1.2083143070340157, 'entropy_coeff': 0.00

2020-09-21 16:28:32,645	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08523883624002337, 'policy_loss': -0.10003876918926835, 'vf_loss': 0.0069644291361328214, 'vf_explained_var': 0.99653757, 'kl': 0.013735824963077903, 'entropy': 1.2144042290747166, 'entropy_coeff': 0.005}
2020-09-21 16:28:33,127	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09729975473601371, 'policy_loss': -0.11236284940969199, 'vf_loss': 0.006581665656995028, 'vf_explained_var': 0.9968991, 'kl': 0.014371711586136371, 'entropy': 1.2139863073825836, 'entropy_coeff': 0.005}
2020-09-21 16:28:33,580	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09308572846930474, 'policy_loss': -0.10799622966442257, 'vf_loss': 0.006022268469678238, 'vf_explained_var': 0.99702317, 'kl': 0.014791742723900825, 'entropy': 1.2176819443702698, 'entropy_coeff':

custom_metrics: {}
date: 2020-09-21_16-28-39
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.218642957508564
      entropy_coeff: 0.005
      kl: 0.016943288501352072
      policy_loss: -0.13469282747246325
      total_loss: -0.12076111789792776
      vf_explained_var: 0.9985508918762207
      vf_loss: 0.0028698372407234274
  num_steps_sampled: 274000
  num_steps_trained: 274000
iterations_since_restore: 274
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.965217391304347
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94499098212821

[2m[36m(pid=24699)[0m 2020-09-21 16:28:40,073	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 11925.056475933541,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 4128},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-55.132, max=10.685, mean=0.541),
[2m[36m(pid=24699)[0m                                   'prev_action': 8,
[2m[36m(pid=24699

2020-09-21 16:28:44,691	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.2671251171268523, 'policy_loss': 0.0019132058368995786, 'vf_loss': 0.27074797358363867, 'vf_explained_var': 0.84986943, 'kl': 0.00036978888666083787, 'entropy': 1.1820948272943497, 'entropy_coeff': 0.005}
2020-09-21 16:28:45,183	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.061840923386625946, 'policy_loss': -0.028621795354411006, 'vf_loss': 0.09357857797294855, 'vf_explained_var': 0.945217, 'kl': 0.00274349137180252, 'entropy': 1.1787296198308468, 'entropy_coeff': 0.005}
2020-09-21 16:28:45,636	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.0011144325835630298, 'policy_loss': -0.04799391177948564, 'vf_loss': 0.048806723207235336, 'vf_explained_var': 0.9703615, 'kl': 0.00615385954733938, 'entropy': 1.185832604765892, 'entropy_coeff': 0.005}


2020-09-21 16:28:56,561	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09962147672194988, 'policy_loss': -0.11153811705298722, 'vf_loss': 0.0030568755319109187, 'vf_explained_var': 0.99818814, 'kl': 0.014631017053034157, 'entropy': 1.1908280476927757, 'entropy_coeff': 0.005}
2020-09-21 16:28:57,006	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10250529181212187, 'policy_loss': -0.11441892373841256, 'vf_loss': 0.0028318051481619477, 'vf_explained_var': 0.99839, 'kl': 0.014855700894258916, 'entropy': 1.1919131502509117, 'entropy_coeff': 0.005}
2020-09-21 16:28:57,490	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10161755396984518, 'policy_loss': -0.11348802549764514, 'vf_loss': 0.0028474176360759884, 'vf_explained_var': 0.9977547, 'kl': 0.014755553449504077, 'entropy': 1.1833883598446846, 'entropy_coeff': 

custom_metrics: {}
date: 2020-09-21_16-28-58
done: false
episode_len_mean: 2176.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.96624589571231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 143
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.187165342271328
      entropy_coeff: 0.005
      kl: 0.014915646403096616
      policy_loss: -0.11521890899166465
      total_loss: -0.1034660964505747
      vf_explained_var: 0.9977320432662964
      vf_loss: 0.0025865513453027233
  num_steps_sampled: 275000
  num_steps_trained: 275000
iterations_since_restore: 275
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.817391304347827
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944990982128217

[2m[36m(pid=24699)[0m 2020-09-21 16:28:58,764	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:29:03,144	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.187945444136858, 'policy_loss': 0.003977179992944002, 'vf_loss': 0.188754266127944, 'vf_explained_var': 0.86198825, 'kl': 0.001468305567885242, 'entropy': 1.2545319870114326, 'entropy_coeff': 0.005}
2020-09-21 16:29:03,601	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.06552059738896787, 'policy_loss': -0.021531849401071668, 'vf_loss': 0.08861659374088049, 'vf_explained_var': 0.9374503, 'kl': 0.004582936307997443, 'entropy': 1.2408744543790817, 'entropy_coeff': 0.005}
2020-09-21 16:29:04,088	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.02111281087854877, 'policy_loss': -0.039678728382568806, 'vf_loss': 0.061416813638061285, 'vf

2020-09-21 16:29:14,980	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10430418862961233, 'policy_loss': -0.11872579995542765, 'vf_loss': 0.004634947661543265, 'vf_explained_var': 0.99699795, 'kl': 0.015970243606716394, 'entropy': 1.2766415104269981, 'entropy_coeff': 0.005}
2020-09-21 16:29:15,470	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10979534848593175, 'policy_loss': -0.12401645869249478, 'vf_loss': 0.004444794452865608, 'vf_explained_var': 0.99691045, 'kl': 0.015928403532598168, 'entropy': 1.2702389061450958, 'entropy_coeff': 0.005}
2020-09-21 16:29:15,959	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11057979660108685, 'policy_loss': -0.12455096712801605, 'vf_loss': 0.0037255591741995886, 'vf_explained_var': 0.99709594, 'kl': 0.01637226960156113, 'entropy': 1.2662623822689056, 'entropy_coeff':

custom_metrics: {}
date: 2020-09-21_16-29-16
done: false
episode_len_mean: 2252.27
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.91437118397421
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 144
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.2740057408809662
      entropy_coeff: 0.005
      kl: 0.016755316988565028
      policy_loss: -0.12348395492881536
      total_loss: -0.10959767596796155
      vf_explained_var: 0.9975041151046753
      vf_loss: 0.0032915457850322127
  num_steps_sampled: 276000
  num_steps_trained: 276000
iterations_since_restore: 276
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.683333333333333
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000002
  vram_util_percent0: 0.9449909821282

2020-09-21 16:29:20,388	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.15550813399022445, 'policy_loss': -0.00521866069175303, 'vf_loss': 0.1660024244338274, 'vf_explained_var': 0.9249231, 'kl': 0.0014461554393464526, 'entropy': 1.347971685230732, 'entropy_coeff': 0.005}
2020-09-21 16:29:20,879	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.03701027692295611, 'policy_loss': -0.02846645936369896, 'vf_loss': 0.06901807896792889, 'vf_explained_var': 0.96707296, 'kl': 0.003139105363516137, 'entropy': 1.3439372926950455, 'entropy_coeff': 0.005}
2020-09-21 16:29:21,332	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.00198599835857749, 'policy_loss': -0.03969479433726519, 'vf_loss': 0.043324874830432236, 'vf_explained_var': 0.97903585, 'kl': 0.005001559329684824, 'entropy': 1.3416327387094498, 'entropy_coeff': 0.005}
20

2020-09-21 16:29:29,875	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.0125,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.4052919149398804,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.012953994795680046,
                                         'policy_loss': -0.10380025207996368,
                                         'total_loss': -0.07639005780220032,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.991, max=0.991, mean=0.991),
                                         'vf_loss': 0.021320713683962822}}}

2020-09-21 16:29:29,934	DEBUG sgd.py:120 -- 20 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08509610337205231, 'policy_loss': -0.11628

custom_metrics: {}
date: 2020-09-21_16-29-34
done: false
episode_len_mean: 2318.7
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.87136120069231
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 145
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.3232589364051819
      entropy_coeff: 0.005
      kl: 0.017113386536948383
      policy_loss: -0.12587445229291916
      total_loss: -0.09495374350808561
      vf_explained_var: 0.9896231889724731
      vf_loss: 0.02020970283774659
  num_steps_sampled: 277000
  num_steps_trained: 277000
iterations_since_restore: 277
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.3545454545454545
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944990982128217

2020-09-21 16:29:38,185	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.11357565177604556, 'policy_loss': -0.0003218115889467299, 'vf_loss': 0.11930204555392265, 'vf_explained_var': 0.98878634, 'kl': 0.0017095886075603195, 'entropy': 1.427107810974121, 'entropy_coeff': 0.005}
2020-09-21 16:29:38,673	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.020100810506846756, 'policy_loss': -0.033214759547263384, 'vf_loss': 0.05450678744819015, 'vf_explained_var': 0.9947236, 'kl': 0.005915547561016865, 'entropy': 1.436140812933445, 'entropy_coeff': 0.005}
2020-09-21 16:29:39,126	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.016777878161519766, 'policy_loss': -0.0494504306698218, 'vf_loss': 0.03238963824696839, 'vf_explained_var': 0.9969137, 'kl': 0.007383238786133006, 'entropy': 1.4385227859020233, 'entropy_coeff': 0.005}

2020-09-21 16:29:49,989	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1064010796835646, 'policy_loss': -0.12241548276506364, 'vf_loss': 0.0063078925595618784, 'vf_explained_var': 0.9993869, 'kl': 0.016692445380613208, 'entropy': 1.438918948173523, 'entropy_coeff': 0.005}
2020-09-21 16:29:50,474	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1135007617995143, 'policy_loss': -0.1306384215131402, 'vf_loss': 0.006252658495213836, 'vf_explained_var': 0.999368, 'kl': 0.01783633342711255, 'entropy': 1.4348574355244637, 'entropy_coeff': 0.005}
2020-09-21 16:29:50,964	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11799168837023899, 'policy_loss': -0.13494123169220984, 'vf_loss': 0.006337754704873078, 'vf_explained_var': 0.9993937, 'kl': 0.01761102332966402, 'entropy': 1.4438732489943504, 'entropy_coeff': 0.005}
2

custom_metrics: {}
date: 2020-09-21_16-29-51
done: false
episode_len_mean: 2318.7
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.87136120069231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 145
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.437373399734497
      entropy_coeff: 0.005
      kl: 0.017793793871533126
      policy_loss: -0.13659247988834977
      total_loss: -0.11983336741104722
      vf_explained_var: 0.9994107484817505
      vf_loss: 0.005929763996391557
  num_steps_sampled: 278000
  num_steps_trained: 278000
iterations_since_restore: 278
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.356521739130435
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449909821282177

2020-09-21 16:29:55,485	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.22908390266820788, 'policy_loss': 0.0039739704225212336, 'vf_loss': 0.23112840810790658, 'vf_explained_var': 0.9190196, 'kl': 0.0009284295087231831, 'entropy': 1.391702100634575, 'entropy_coeff': 0.005}
2020-09-21 16:29:55,988	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.029035991930868477, 'policy_loss': -0.028111683786846697, 'vf_loss': 0.05969372927211225, 'vf_explained_var': 0.98093057, 'kl': 0.0043544192594708875, 'entropy': 1.3909801319241524, 'entropy_coeff': 0.005}
2020-09-21 16:29:56,442	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.013536528858821839, 'policy_loss': -0.04254189855419099, 'vf_loss': 0.029100249870680273, 'vf_explained_var': 0.9902238, 'kl': 0.0067137306614313275, 'entropy': 1.37850783765316, 'entropy_coeff': 0.00

2020-09-21 16:30:07,414	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09695212518272456, 'policy_loss': -0.11229640216333792, 'vf_loss': 0.007626810314832255, 'vf_explained_var': 0.99760145, 'kl': 0.01449811615748331, 'entropy': 1.392376184463501, 'entropy_coeff': 0.005}
2020-09-21 16:30:07,866	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10048320540226996, 'policy_loss': -0.11616744438651949, 'vf_loss': 0.007609460648382083, 'vf_explained_var': 0.9973589, 'kl': 0.01484670682111755, 'entropy': 1.391503781080246, 'entropy_coeff': 0.005}
2020-09-21 16:30:08,357	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10621734423330054, 'policy_loss': -0.12186250911327079, 'vf_loss': 0.007403004230582155, 'vf_explained_var': 0.99737024, 'kl': 0.015022926265373826, 'entropy': 1.3937103599309921, 'entropy_coeff': 0.00

custom_metrics: {}
date: 2020-09-21_16-30-09
done: false
episode_len_mean: 2318.7
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.87136120069231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 145
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.3891210481524467
      entropy_coeff: 0.005
      kl: 0.015006542205810547
      policy_loss: -0.11893095378763974
      total_loss: -0.1031352246645838
      vf_explained_var: 0.9975712895393372
      vf_loss: 0.007547212764620781
  num_steps_sampled: 279000
  num_steps_trained: 279000
iterations_since_restore: 279
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.585714285714285
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000002
  vram_util_percent0: 0.9449909821282179

[2m[36m(pid=24699)[0m 2020-09-21 16:30:09,922	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 7476.211843477993,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 5128},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-5.196, max=4.865, mean=0.352),
[2m[36m(pid=24699)[0m                                   'prev_action': 2,
[2m[36m(pid=24699)[

2020-09-21 16:30:13,443	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.14629124337807298, 'policy_loss': 0.0012407915201038122, 'vf_loss': 0.150162260979414, 'vf_explained_var': 0.9545097, 'kl': 0.0017247811051581685, 'entropy': 1.3716306388378143, 'entropy_coeff': 0.005}
2020-09-21 16:30:13,932	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.029894957202486694, 'policy_loss': -0.021023119683377445, 'vf_loss': 0.05261410097591579, 'vf_explained_var': 0.98413765, 'kl': 0.005086750483314972, 'entropy': 1.3692730888724327, 'entropy_coeff': 0.005}
2020-09-21 16:30:14,383	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.016336928703822196, 'policy_loss': -0.0449881749227643, 'vf_loss': 0.02961945836432278, 'vf_explained_var': 0.99088144, 'kl': 0.005781639949418604, 'entropy': 1.3644246235489845, 'entropy_coeff': 0.005}

2020-09-21 16:30:25,291	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1025259846355766, 'policy_loss': -0.119081124314107, 'vf_loss': 0.009157130494713783, 'vf_explained_var': 0.99694467, 'kl': 0.014088719501160085, 'entropy': 1.3733647167682648, 'entropy_coeff': 0.005}
2020-09-21 16:30:25,746	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10483338381163776, 'policy_loss': -0.12138465221505612, 'vf_loss': 0.008833768515614793, 'vf_explained_var': 0.9970614, 'kl': 0.01440461736638099, 'entropy': 1.37343480437994, 'entropy_coeff': 0.005}
2020-09-21 16:30:26,236	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10589768714271486, 'policy_loss': -0.12232351768761873, 'vf_loss': 0.008896716550225392, 'vf_explained_var': 0.9968372, 'kl': 0.014218720549251884, 'entropy': 1.3734677582979202, 'entropy_coeff': 0.005}


custom_metrics: {}
date: 2020-09-21_16-30-27
done: false
episode_len_mean: 2318.7
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.87136120069231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 145
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.3721956834197044
      entropy_coeff: 0.005
      kl: 0.014810853695962578
      policy_loss: -0.1241561898496002
      total_loss: -0.10698753967881203
      vf_explained_var: 0.9970022439956665
      vf_loss: 0.009033638110850006
  num_steps_sampled: 280000
  num_steps_trained: 280000
iterations_since_restore: 280
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.456521739130435
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449909821282177

[2m[36m(pid=24699)[0m 2020-09-21 16:30:27,442	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:30:30,685	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.0125,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.4015400409698486,
                                         'entropy_coeff': 0.005,
                                         'kl': 3.954288274599094e-08,
                                         'policy_loss': -0.02895389124751091,
                                         'total_loss': 0.13346318900585175,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.879, max=0.879, mean=0.879),
                                         'vf_loss': 0.16942471265792847}}}

2020-09-21 16:30:30,690	INFO rollout_worker.py:736 -- Traini

2020-09-21 16:30:37,777	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08280638651922345, 'policy_loss': -0.10078284912742674, 'vf_loss': 0.01086352879065089, 'vf_explained_var': 0.99195504, 'kl': 0.013784531911369413, 'entropy': 1.3687808960676193, 'entropy_coeff': 0.005}
2020-09-21 16:30:38,256	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08885621465742588, 'policy_loss': -0.10580087918788195, 'vf_loss': 0.009621965640690178, 'vf_explained_var': 0.9920441, 'kl': 0.013996184221468866, 'entropy': 1.3696868270635605, 'entropy_coeff': 0.005}
2020-09-21 16:30:38,715	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08465531305409968, 'policy_loss': -0.10169586283154786, 'vf_loss': 0.00943642167840153, 'vf_explained_var': 0.9922135, 'kl': 0.014259344607125968, 'entropy': 1.366692341864109, 'entropy_coeff': 0.00

custom_metrics: {}
date: 2020-09-21_16-30-44
done: false
episode_len_mean: 2318.7
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.87136120069231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 145
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.3726277127861977
      entropy_coeff: 0.005
      kl: 0.016245968348812312
      policy_loss: -0.12651727732736617
      total_loss: -0.10856238985434175
      vf_explained_var: 0.9930433630943298
      vf_loss: 0.008368983544642106
  num_steps_sampled: 281000
  num_steps_trained: 281000
iterations_since_restore: 281
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.360869565217391
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944990982128217

2020-09-21 16:30:49,054	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.11931835656287149, 'policy_loss': -0.0018859499832615256, 'vf_loss': 0.12632062193006277, 'vf_explained_var': 0.87589407, 'kl': 0.001141333629602892, 'entropy': 1.2543837651610374, 'entropy_coeff': 0.005}
2020-09-21 16:30:49,543	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.031908386503346264, 'policy_loss': -0.022238065779674798, 'vf_loss': 0.053682591300457716, 'vf_explained_var': 0.94916713, 'kl': 0.006583445996511728, 'entropy': 1.2403766810894012, 'entropy_coeff': 0.005}
2020-09-21 16:30:49,996	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.00516470440197736, 'policy_loss': -0.03347500925883651, 'vf_loss': 0.02813989412970841, 'vf_explained_var': 0.97007537, 'kl': 0.006260859023313969, 'entropy': 1.2337429150938988, 'entropy_coeff': 0.

2020-09-21 16:31:00,970	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11755323689430952, 'policy_loss': -0.13115259492769837, 'vf_loss': 0.0033171169998240657, 'vf_explained_var': 0.996439, 'kl': 0.016299290640745312, 'entropy': 1.2441596612334251, 'entropy_coeff': 0.005}
2020-09-21 16:31:01,422	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1135478604119271, 'policy_loss': -0.1265214323066175, 'vf_loss': 0.003035209156223573, 'vf_explained_var': 0.996794, 'kl': 0.016014701686799526, 'entropy': 1.2553049102425575, 'entropy_coeff': 0.005}
2020-09-21 16:31:01,918	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11824629083275795, 'policy_loss': -0.13161368371220306, 'vf_loss': 0.003014584603079129, 'vf_explained_var': 0.99691534, 'kl': 0.016383932321332395, 'entropy': 1.2471842169761658, 'entropy_coeff': 0.00

custom_metrics: {}
date: 2020-09-21_16-31-02
done: false
episode_len_mean: 2318.7
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.87136120069231
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 145
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.2486641630530357
      entropy_coeff: 0.005
      kl: 0.01670430856756866
      policy_loss: -0.1272716808016412
      total_loss: -0.1135047395946458
      vf_explained_var: 0.9969072341918945
      vf_loss: 0.003097142471233383
  num_steps_sampled: 282000
  num_steps_trained: 282000
iterations_since_restore: 282
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.5782608695652165
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449909821282177


2020-09-21 16:31:06,339	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.14183177368249744, 'policy_loss': 0.006886723218485713, 'vf_loss': 0.1402626521885395, 'vf_explained_var': 0.7846581, 'kl': 0.0014738758218114656, 'entropy': 1.3619810044765472, 'entropy_coeff': 0.005}
2020-09-21 16:31:06,792	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.026566775050014257, 'policy_loss': -0.016443130560219288, 'vf_loss': 0.046293088467791677, 'vf_explained_var': 0.9340886, 'kl': 0.003467852220637724, 'entropy': 1.3588760793209076, 'entropy_coeff': 0.005}
2020-09-21 16:31:07,275	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0144869948271662, 'policy_loss': -0.03548761049751192, 'vf_loss': 0.02366718655684963, 'vf_explained_var': 0.9623491, 'kl': 0.004089564288733527, 'entropy': 1.3614502400159836, 'entropy_coeff': 0.005}
2

2020-09-21 16:31:18,216	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10623685363680124, 'policy_loss': -0.12134104105643928, 'vf_loss': 0.005982047936413437, 'vf_explained_var': 0.989802, 'kl': 0.015793475438840687, 'entropy': 1.3737517222762108, 'entropy_coeff': 0.005}
2020-09-21 16:31:18,699	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10111529799178243, 'policy_loss': -0.11750507075339556, 'vf_loss': 0.006225892400834709, 'vf_explained_var': 0.9890424, 'kl': 0.016791859932709485, 'entropy': 1.3675757572054863, 'entropy_coeff': 0.005}
2020-09-21 16:31:19,158	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10306749996379949, 'policy_loss': -0.11926319904159755, 'vf_loss': 0.006272454600548372, 'vf_explained_var': 0.9891088, 'kl': 0.0165621557389386, 'entropy': 1.369188129901886, 'entropy_coeff': 0.005}

custom_metrics: {}
date: 2020-09-21_16-31-20
done: false
episode_len_mean: 2398.57
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.792519794026724
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 146
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.3650049716234207
      entropy_coeff: 0.005
      kl: 0.016610719729214907
      policy_loss: -0.12598370690830052
      total_loss: -0.11012539360672235
      vf_explained_var: 0.9900807738304138
      vf_loss: 0.005864988692337647
  num_steps_sampled: 283000
  num_steps_trained: 283000
iterations_since_restore: 283
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.560869565217391
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449909821282

2020-09-21 16:31:23,806	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.09643435466568917, 'policy_loss': -0.0019814035040326416, 'vf_loss': 0.10373420035466552, 'vf_explained_var': 0.93838173, 'kl': 0.0017246798873666869, 'entropy': 1.4129365012049675, 'entropy_coeff': 0.005}
2020-09-21 16:31:24,287	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.013299587415531278, 'policy_loss': -0.022624606266617775, 'vf_loss': 0.037300729542039335, 'vf_explained_var': 0.97850716, 'kl': 0.005596461443929002, 'entropy': 1.4085901901125908, 'entropy_coeff': 0.005}
2020-09-21 16:31:24,735	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.02547685126774013, 'policy_loss': -0.043348305160179734, 'vf_loss': 0.020530688227154315, 'vf_explained_var': 0.9878334, 'kl': 0.004365027969470248, 'entropy': 1.4157647490501404, 'entropy_coeff': 

2020-09-21 16:31:30,904	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09378822264261544, 'policy_loss': -0.1061699166893959, 'vf_loss': 0.005632834261632524, 'vf_explained_var': 0.9963467, 'kl': 0.01366766169667244, 'entropy': 1.4179295152425766, 'entropy_coeff': 0.005}
2020-09-21 16:31:31,390	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08989749854663387, 'policy_loss': -0.10159552658296889, 'vf_loss': 0.005493987089721486, 'vf_explained_var': 0.996623, 'kl': 0.01313022489193827, 'entropy': 1.4180639311671257, 'entropy_coeff': 0.005}
2020-09-21 16:31:31,890	DEBUG sgd.py:120 -- 17 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09526827931404114, 'policy_loss': -0.10744975134730339, 'vf_loss': 0.005761635649832897, 'vf_explained_var': 0.996294, 'kl': 0.013346585968974978, 'entropy': 1.418716512620449, 'entropy_coeff': 0.005}
2

custom_metrics: {}
date: 2020-09-21_16-31-37
done: false
episode_len_mean: 2398.57
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.79251979402673
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 146
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.415897898375988
      entropy_coeff: 0.005
      kl: 0.01573550998000428
      policy_loss: -0.127546266769059
      total_loss: -0.11426801155903377
      vf_explained_var: 0.9968842267990112
      vf_loss: 0.004425534112669993
  num_steps_sampled: 284000
  num_steps_trained: 284000
iterations_since_restore: 284
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.340909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449909821282177
 

[2m[36m(pid=24699)[0m 2020-09-21 16:31:37,821	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 7389.993776849049,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 6128},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-5.369, max=4.184, mean=0.499),
[2m[36m(pid=24699)[0m                                   'prev_action': 12,
[2m[36m(pid=24699)

2020-09-21 16:31:41,317	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.13379478082060814, 'policy_loss': -0.0012346296571195126, 'vf_loss': 0.1408421031665057, 'vf_explained_var': 0.96722734, 'kl': 0.0011573897731996935, 'entropy': 1.3969097509980202, 'entropy_coeff': 0.005}
2020-09-21 16:31:41,809	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.009705274016596377, 'policy_loss': -0.029973793076351285, 'vf_loss': 0.042527034878730774, 'vf_explained_var': 0.98950475, 'kl': 0.004092871051398106, 'entropy': 1.3984008803963661, 'entropy_coeff': 0.005}
2020-09-21 16:31:42,256	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.024193732475396246, 'policy_loss': -0.043798752943985164, 'vf_loss': 0.020402679685503244, 'vf_explained_var': 0.99418956, 'kl': 0.006090234877774492, 'entropy': 1.3928057998418808, 'entropy_coeff':

2020-09-21 16:31:53,188	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11828343744855374, 'policy_loss': -0.13241522770840675, 'vf_loss': 0.004614505218341947, 'vf_explained_var': 0.9987531, 'kl': 0.01627942855702713, 'entropy': 1.393127165734768, 'entropy_coeff': 0.005}
2020-09-21 16:31:53,640	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.12191273167263716, 'policy_loss': -0.13620488881133497, 'vf_loss': 0.004517351320828311, 'vf_explained_var': 0.99888587, 'kl': 0.016500783851370215, 'entropy': 1.3864484280347824, 'entropy_coeff': 0.005}
2020-09-21 16:31:54,124	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11373481014743447, 'policy_loss': -0.1283917350228876, 'vf_loss': 0.004491695362958126, 'vf_explained_var': 0.9988104, 'kl': 0.016907831595744938, 'entropy': 1.3907915353775024, 'entropy_coeff': 0.00

custom_metrics: {}
date: 2020-09-21_16-31-55
done: false
episode_len_mean: 2398.57
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.79251979402673
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 146
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.3870603665709496
      entropy_coeff: 0.005
      kl: 0.01747456449083984
      policy_loss: -0.1399321539211087
      total_loss: -0.12463780472171493
      vf_explained_var: 0.9988241195678711
      vf_loss: 0.004536656037089415
  num_steps_sampled: 285000
  num_steps_trained: 285000
iterations_since_restore: 285
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.234782608695652
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449909821282177

[2m[36m(pid=24699)[0m 2020-09-21 16:31:55,315	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:31:58,942	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.15314175630919635, 'policy_loss': 5.29381213709712e-05, 'vf_loss': 0.15933593455702066, 'vf_explained_var': 0.90441334, 'kl': 0.0009305878002606738, 'entropy': 1.4378684982657433, 'entropy_coeff': 0.005}
2020-09-21 16:31:59,439	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.04185395152308047, 'policy_loss': -0.027289122808724642, 'vf_loss': 0.07338245073333383, 'vf_explained_var': 0.9531947, 'kl': 0.0029210827633505687, 'entropy': 1.4393938779830933, 'entropy_coeff': 0.005}
2020-09-21 16:31:59,891	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.012445746338926256, 'policy_loss': -0.04608141025528312, 'vf_loss': 0.05971294036135077

2020-09-21 16:32:10,835	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08586779027245939, 'policy_loss': -0.12746437080204487, 'vf_loss': 0.0341759342700243, 'vf_explained_var': 0.97695595, 'kl': 0.014417852740734816, 'entropy': 1.435485064983368, 'entropy_coeff': 0.005}
2020-09-21 16:32:11,290	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08096685691270977, 'policy_loss': -0.1236144844442606, 'vf_loss': 0.03491506038699299, 'vf_explained_var': 0.9776819, 'kl': 0.01473666523816064, 'entropy': 1.4376625716686249, 'entropy_coeff': 0.005}
2020-09-21 16:32:11,776	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.084728047484532, 'policy_loss': -0.12843715513008647, 'vf_loss': 0.0355533555848524, 'vf_explained_var': 0.9769937, 'kl': 0.015128196857403964, 'entropy': 1.432308740913868, 'entropy_coeff': 0.005}
2020-

custom_metrics: {}
date: 2020-09-21_16-32-12
done: false
episode_len_mean: 2398.57
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.79251979402673
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 146
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.4364937022328377
      entropy_coeff: 0.005
      kl: 0.016238399199210107
      policy_loss: -0.1307889532763511
      total_loss: -0.08757535793120041
      vf_explained_var: 0.9770148992538452
      vf_loss: 0.03395468892995268
  num_steps_sampled: 286000
  num_steps_trained: 286000
iterations_since_restore: 286
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 6.2318181818181815
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944990982128217

2020-09-21 16:32:16,658	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.07235956168733537, 'policy_loss': -0.0045455434592440724, 'vf_loss': 0.08213920961134136, 'vf_explained_var': 0.89126784, 'kl': 0.002241341682442277, 'entropy': 1.500692680478096, 'entropy_coeff': 0.005}
2020-09-21 16:32:17,152	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.009747088770382106, 'policy_loss': -0.02088449546135962, 'vf_loss': 0.032407165970653296, 'vf_explained_var': 0.9573308, 'kl': 0.005525247019249946, 'entropy': 1.4739787802100182, 'entropy_coeff': 0.005}
2020-09-21 16:32:17,605	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.00884285126812756, 'policy_loss': -0.030975023983046412, 'vf_loss': 0.01957876473898068, 'vf_explained_var': 0.97340393, 'kl': 0.009821782412473112, 'entropy': 1.4782289043068886, 'entropy_coeff': 0.00

2020-09-21 16:32:28,508	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1092010943684727, 'policy_loss': -0.12206183047965169, 'vf_loss': 0.0060409237776184455, 'vf_explained_var': 0.99233884, 'kl': 0.014116476406343281, 'entropy': 1.4946229979395866, 'entropy_coeff': 0.005}
2020-09-21 16:32:28,964	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.11469458288047463, 'policy_loss': -0.12649073917418718, 'vf_loss': 0.005182418492040597, 'vf_explained_var': 0.9927878, 'kl': 0.013879597478080541, 'entropy': 1.4878724664449692, 'entropy_coeff': 0.005}
2020-09-21 16:32:29,446	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10292777779977769, 'policy_loss': -0.1152674718759954, 'vf_loss': 0.005562608581385575, 'vf_explained_var': 0.9914752, 'kl': 0.01404919393826276, 'entropy': 1.4895451664924622, 'entropy_coeff': 0.0

custom_metrics: {}
date: 2020-09-21_16-32-30
done: false
episode_len_mean: 2398.57
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.79251979402673
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 146
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.4864074140787125
      entropy_coeff: 0.005
      kl: 0.015005235094577074
      policy_loss: -0.13064232654869556
      total_loss: -0.11765405675396323
      vf_explained_var: 0.9903819561004639
      vf_loss: 0.005227507455856539
  num_steps_sampled: 287000
  num_steps_trained: 287000
iterations_since_restore: 287
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.421739130434782
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94499098212821

2020-09-21 16:32:33,983	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.0125,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.4391539096832275,
                                         'entropy_coeff': 0.005,
                                         'kl': -1.8590856853961668e-08,
                                         'policy_loss': -0.03536173701286316,
                                         'total_loss': 0.04558315873146057,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.887, max=0.887, mean=0.887),
                                         'vf_loss': 0.08814071118831635}}}

2020-09-21 16:32:33,988	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_di

2020-09-21 16:32:41,073	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.07415668678004295, 'policy_loss': -0.08518701989669353, 'vf_loss': 0.005998916109092534, 'vf_explained_var': 0.9895283, 'kl': 0.011725986521923915, 'entropy': 1.3682288527488708, 'entropy_coeff': 0.005}
2020-09-21 16:32:41,550	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0771666297223419, 'policy_loss': -0.08753459632862359, 'vf_loss': 0.005580008175456896, 'vf_explained_var': 0.98986566, 'kl': 0.01149905624333769, 'entropy': 1.3709671199321747, 'entropy_coeff': 0.005}
2020-09-21 16:32:42,013	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.07704685907810926, 'policy_loss': -0.08802423987071961, 'vf_loss': 0.00551615422591567, 'vf_explained_var': 0.99088335, 'kl': 0.012157151941210032, 'entropy': 1.3695781379938126, 'entropy_coeff': 0.0

custom_metrics: {}
date: 2020-09-21_16-32-48
done: false
episode_len_mean: 2398.57
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.79251979402673
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 146
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.368315577507019
      entropy_coeff: 0.005
      kl: 0.015149850281886756
      policy_loss: -0.11273679323494434
      total_loss: -0.09973625047132373
      vf_explained_var: 0.9923155307769775
      vf_loss: 0.004502888870774768
  num_steps_sampled: 288000
  num_steps_trained: 288000
iterations_since_restore: 288
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.4173913043478255
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94499098212821

2020-09-21 16:32:52,167	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.037178466795012355, 'policy_loss': -0.007161702960729599, 'vf_loss': 0.048525969381444156, 'vf_explained_var': 0.73285794, 'kl': 0.0023785019696080933, 'entropy': 1.31880734115839, 'entropy_coeff': 0.005}
2020-09-21 16:32:52,659	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.0017299801111221313, 'policy_loss': -0.022465717396698892, 'vf_loss': 0.024023177451454103, 'vf_explained_var': 0.86199707, 'kl': 0.006570583733264357, 'entropy': 1.296039618551731, 'entropy_coeff': 0.005}
2020-09-21 16:32:53,114	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.016337293898686767, 'policy_loss': -0.029946022434160113, 'vf_loss': 0.01463828474516049, 'vf_explained_var': 0.9125602, 'kl': 0.005428237869637087, 'entropy': 1.3051304891705513, 'entropy_coeff': 0

2020-09-21 16:33:04,075	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09609688678756356, 'policy_loss': -0.10703488753642887, 'vf_loss': 0.0028666802681982517, 'vf_explained_var': 0.98246324, 'kl': 0.014505649742204696, 'entropy': 1.323129527270794, 'entropy_coeff': 0.005}
2020-09-21 16:33:04,526	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09960672748275101, 'policy_loss': -0.11060694244224578, 'vf_loss': 0.0027394921198720112, 'vf_explained_var': 0.9833586, 'kl': 0.014713244803715497, 'entropy': 1.3272874057292938, 'entropy_coeff': 0.005}
2020-09-21 16:33:05,017	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10972374898847193, 'policy_loss': -0.12091693538241088, 'vf_loss': 0.0027529175858944654, 'vf_explained_var': 0.9829681, 'kl': 0.014872330415528268, 'entropy': 1.3235932812094688, 'entropy_coeff':

custom_metrics: {}
date: 2020-09-21_16-33-06
done: false
episode_len_mean: 2398.57
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.79251979402673
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 146
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.3365563750267029
      entropy_coeff: 0.005
      kl: 0.015497459331527352
      policy_loss: -0.11587547091767192
      total_loss: -0.10408124886453152
      vf_explained_var: 0.9834864139556885
      vf_loss: 0.0027858278845087625
  num_steps_sampled: 289000
  num_steps_trained: 289000
iterations_since_restore: 289
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.3999999999999995
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944990982128

[2m[36m(pid=24699)[0m 2020-09-21 16:33:06,258	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 5498.63,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 7128},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.375, max=2.547, mean=0.484),
[2m[36m(pid=24699)[0m                                   'prev_action': 0,
[2m[36m(pid=24699)[0m        

2020-09-21 16:33:09,953	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.049173911625985056, 'policy_loss': -0.0037228373694233596, 'vf_loss': 0.05725458147935569, 'vf_explained_var': 0.46427554, 'kl': 0.002413736100815189, 'entropy': 1.3603495806455612, 'entropy_coeff': 0.005}
2020-09-21 16:33:10,442	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.009314888273365796, 'policy_loss': -0.01713935553561896, 'vf_loss': 0.027868946897797287, 'vf_explained_var': 0.7577318, 'kl': 0.005439221044071019, 'entropy': 1.384382776916027, 'entropy_coeff': 0.005}
2020-09-21 16:33:10,893	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.012363720801658928, 'policy_loss': -0.028091566637158394, 'vf_loss': 0.015391094726510346, 'vf_explained_var': 0.8540869, 'kl': 0.007201067972346209, 'entropy': 1.390865869820118, 'entropy_coeff': 0.0

2020-09-21 16:33:21,815	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08897210983559489, 'policy_loss': -0.1003725971095264, 'vf_loss': 0.0032019665522966534, 'vf_explained_var': 0.9693352, 'kl': 0.014883110765367746, 'entropy': 1.3741273954510689, 'entropy_coeff': 0.005}
2020-09-21 16:33:22,268	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09412516769953072, 'policy_loss': -0.10593152610817924, 'vf_loss': 0.003163050496368669, 'vf_explained_var': 0.96894085, 'kl': 0.015319733240175992, 'entropy': 1.373584695160389, 'entropy_coeff': 0.005}
2020-09-21 16:33:22,753	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10027034516679123, 'policy_loss': -0.11160085105802864, 'vf_loss': 0.0030874526128172874, 'vf_explained_var': 0.96991277, 'kl': 0.014906273048836738, 'entropy': 1.3699099719524384, 'entropy_coeff': 

custom_metrics: {}
date: 2020-09-21_16-33-23
done: false
episode_len_mean: 2398.57
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.79251979402673
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 146
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.370795138180256
      entropy_coeff: 0.005
      kl: 0.015367695246823132
      policy_loss: -0.114969277754426
      total_loss: -0.10321196320001036
      vf_explained_var: 0.970947265625
      vf_loss: 0.0030515022881445475
  num_steps_sampled: 290000
  num_steps_trained: 290000
iterations_since_restore: 290
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.547826086956522
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449909821282177
  v

[2m[36m(pid=24699)[0m 2020-09-21 16:33:24,006	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:33:26,916	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.07592008460778743, 'policy_loss': 0.0009325825958512723, 'vf_loss': 0.08072431548498571, 'vf_explained_var': 0.8829914, 'kl': 0.001096333640065894, 'entropy': 1.3693697899580002, 'entropy_coeff': 0.005}
2020-09-21 16:33:27,413	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.019785927142947912, 'policy_loss': -0.0072909335140138865, 'vf_loss': 0.03116294473875314, 'vf_explained_var': 0.94497615, 'kl': 0.002758943686785642, 'entropy': 1.3759019300341606, 'entropy_coeff': 0.005}
2020-09-21 16:33:27,864	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.007126423995941877, 'policy_loss': -0.02386407949961722, 'vf_loss': 0.019670896464958

2020-09-21 16:33:34,074	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.07231852039694786, 'policy_loss': -0.08154793572612107, 'vf_loss': 0.0050745538173941895, 'vf_explained_var': 0.99106956, 'kl': 0.010762362100649625, 'entropy': 1.3484036400914192, 'entropy_coeff': 0.005}
2020-09-21 16:33:34,528	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0794314524391666, 'policy_loss': -0.0884514139033854, 'vf_loss': 0.004882978872046806, 'vf_explained_var': 0.9910532, 'kl': 0.010775437287520617, 'entropy': 1.35462736338377, 'entropy_coeff': 0.005}
2020-09-21 16:33:35,010	DEBUG sgd.py:120 -- 17 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.07807304663583636, 'policy_loss': -0.08812942344229668, 'vf_loss': 0.004778564354637638, 'vf_explained_var': 0.9913746, 'kl': 0.011846551613416523, 'entropy': 1.3433649092912674, 'entropy_coeff': 0.00

custom_metrics: {}
date: 2020-09-21_16-33-40
done: false
episode_len_mean: 2542.41
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.68216554193559
episode_reward_min: -32.22698139089825
episodes_this_iter: 2
episodes_total: 148
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.35732202231884
      entropy_coeff: 0.005
      kl: 0.015004905115347356
      policy_loss: -0.1045951850246638
      total_loss: -0.09232874377630651
      vf_explained_var: 0.9931391477584839
      vf_loss: 0.0038605840236414224
  num_steps_sampled: 291000
  num_steps_trained: 291000
iterations_since_restore: 291
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.309090909090909
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449909821282177

2020-09-21 16:33:43,204	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.07103583248681389, 'policy_loss': -0.00019710377091541886, 'vf_loss': 0.0782021232880652, 'vf_explained_var': 0.9593632, 'kl': 0.00045974556783945353, 'entropy': 1.4869346618652344, 'entropy_coeff': 0.005}
2020-09-21 16:33:43,693	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.0010755945113487542, 'policy_loss': -0.0246696334797889, 'vf_loss': 0.030556329060345888, 'vf_explained_var': 0.9840063, 'kl': 0.0025657176520326175, 'entropy': 1.4817781001329422, 'entropy_coeff': 0.005}
2020-09-21 16:33:44,182	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.025712018948979676, 'policy_loss': -0.038961636484600604, 'vf_loss': 0.016457797668408602, 'vf_explained_var': 0.9914452, 'kl': 0.004150950408075005, 'entropy': 1.4822039976716042, 'entropy_coeff': 

2020-09-21 16:33:55,463	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09524316317401826, 'policy_loss': -0.10792251699604094, 'vf_loss': 0.007440728775691241, 'vf_explained_var': 0.99589884, 'kl': 0.01254914968740195, 'entropy': 1.4934778586030006, 'entropy_coeff': 0.005}
2020-09-21 16:33:55,952	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10039577656425536, 'policy_loss': -0.11345786257879809, 'vf_loss': 0.007543667161371559, 'vf_explained_var': 0.99574107, 'kl': 0.01281063782516867, 'entropy': 1.4904705733060837, 'entropy_coeff': 0.005}
2020-09-21 16:33:56,404	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09739891556091607, 'policy_loss': -0.11111712898127735, 'vf_loss': 0.007630692984093912, 'vf_explained_var': 0.9957136, 'kl': 0.013364678947255015, 'entropy': 1.4888439029455185, 'entropy_coeff': 0.

custom_metrics: {}
date: 2020-09-21_16-33-57
done: false
episode_len_mean: 2542.41
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.68216554193559
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 148
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.4903541132807732
      entropy_coeff: 0.005
      kl: 0.013564281049184501
      policy_loss: -0.11681712162680924
      total_loss: -0.10302690998651087
      vf_explained_var: 0.9958212375640869
      vf_loss: 0.007508156057156157
  num_steps_sampled: 292000
  num_steps_trained: 292000
iterations_since_restore: 292
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.20909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944990982128217

2020-09-21 16:34:00,016	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.1423593971412629, 'policy_loss': 0.0019777147099375725, 'vf_loss': 0.14714829018339515, 'vf_explained_var': 0.9689698, 'kl': 0.0005342352978067455, 'entropy': 1.4615039750933647, 'entropy_coeff': 0.005}
2020-09-21 16:34:00,503	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.044622573361266404, 'policy_loss': -0.033970634918659925, 'vf_loss': 0.08134714514017105, 'vf_explained_var': 0.9847176, 'kl': 0.004494591456023045, 'entropy': 1.4609419628977776, 'entropy_coeff': 0.005}
2020-09-21 16:34:00,959	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.009423225768841803, 'policy_loss': -0.04635202442295849, 'vf_loss': 0.05593677330762148, 'vf_explained_var': 0.98854274, 'kl': 0.007060368341626599, 'entropy': 1.4620295241475105, 'entropy_coeff': 0.005}

2020-09-21 16:34:11,873	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0892194954212755, 'policy_loss': -0.13910371740348637, 'vf_loss': 0.04040198668371886, 'vf_explained_var': 0.99111307, 'kl': 0.016487379209138453, 'entropy': 1.4422468468546867, 'entropy_coeff': 0.005}
2020-09-21 16:34:12,363	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09239361924119294, 'policy_loss': -0.14259684388525784, 'vf_loss': 0.04069592815358192, 'vf_explained_var': 0.9904649, 'kl': 0.016501936479471624, 'entropy': 1.4401829615235329, 'entropy_coeff': 0.005}
2020-09-21 16:34:12,851	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08952670244616456, 'policy_loss': -0.13892666541505605, 'vf_loss': 0.03994754375889897, 'vf_explained_var': 0.9909626, 'kl': 0.01645929494407028, 'entropy': 1.4425220787525177, 'entropy_coeff': 0.005}

custom_metrics: {}
date: 2020-09-21_16-34-13
done: false
episode_len_mean: 2542.41
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.68216554193559
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 148
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.4454843401908875
      entropy_coeff: 0.005
      kl: 0.016880122828297317
      policy_loss: -0.14414335717447102
      total_loss: -0.094603470293805
      vf_explained_var: 0.9909005761146545
      vf_loss: 0.03967618360184133
  num_steps_sampled: 293000
  num_steps_trained: 293000
iterations_since_restore: 293
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.180952380952381
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000002
  vram_util_percent0: 0.9449909821282179


2020-09-21 16:34:16,814	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.10100962757132947, 'policy_loss': 0.0012612984864972532, 'vf_loss': 0.10434604552574456, 'vf_explained_var': 0.9205297, 'kl': 0.0022982863152163358, 'entropy': 1.3849459066987038, 'entropy_coeff': 0.005}
2020-09-21 16:34:17,273	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.016016914858482778, 'policy_loss': -0.015659657772630453, 'vf_loss': 0.03658435761462897, 'vf_explained_var': 0.96899194, 'kl': 0.0020081450675206725, 'entropy': 1.3882065638899803, 'entropy_coeff': 0.005}
2020-09-21 16:34:17,757	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.016520300297997892, 'policy_loss': -0.031547884340398014, 'vf_loss': 0.017595295736100525, 'vf_explained_var': 0.9851963, 'kl': 0.004321521017118357, 'entropy': 1.3886490911245346, 'entropy_coeff': 0

2020-09-21 16:34:28,702	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.1045731995254755, 'policy_loss': -0.11756856646388769, 'vf_loss': 0.005645111494231969, 'vf_explained_var': 0.99489623, 'kl': 0.014059206121601164, 'entropy': 1.3769385740160942, 'entropy_coeff': 0.005}
2020-09-21 16:34:29,193	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10606132773682475, 'policy_loss': -0.1195464754709974, 'vf_loss': 0.005565114537603222, 'vf_explained_var': 0.9947244, 'kl': 0.014640126843005419, 'entropy': 1.380617469549179, 'entropy_coeff': 0.005}
2020-09-21 16:34:29,675	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.10695854248479009, 'policy_loss': -0.12040822266135365, 'vf_loss': 0.005412244878243655, 'vf_explained_var': 0.9949093, 'kl': 0.014755535463336855, 'entropy': 1.3805100917816162, 'entropy_coeff': 0.00

custom_metrics: {}
date: 2020-09-21_16-34-30
done: false
episode_len_mean: 2542.41
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.68216554193559
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 148
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.3810084760189056
      entropy_coeff: 0.005
      kl: 0.014780786354094744
      policy_loss: -0.1281239144736901
      total_loss: -0.11448667093645781
      vf_explained_var: 0.9951515197753906
      vf_loss: 0.005576741954428144
  num_steps_sampled: 294000
  num_steps_trained: 294000
iterations_since_restore: 294
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.363636363636364
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944990982128217

[2m[36m(pid=24699)[0m 2020-09-21 16:34:30,873	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 11141.06,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 609},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-2.589, max=3.118, mean=0.14),
[2m[36m(pid=24699)[0m                                   'prev_action': 6,
[2m[36m(pid=24699)[0m         

2020-09-21 16:34:33,692	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.25224393722601235, 'policy_loss': 0.0026110494509339333, 'vf_loss': 0.25585773680359125, 'vf_explained_var': 0.8876549, 'kl': 0.0004721801659429703, 'entropy': 1.3405879810452461, 'entropy_coeff': 0.005}
2020-09-21 16:34:34,096	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-6.841, max=8.338, mean=-0.211),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-6.637, max=-0.009, mean=-1.288),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.001, max=0.991, mean=0.451),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=5.703),
         

2020-09-21 16:34:40,351	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.028603870945516974, 'policy_loss': -0.08212170004844666, 'vf_loss': 0.04995773360133171, 'vf_explained_var': 0.97583044, 'kl': 0.010113722761161625, 'entropy': 1.3360092490911484, 'entropy_coeff': 0.005}
2020-09-21 16:34:40,832	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.02040853421203792, 'policy_loss': -0.07488404540345073, 'vf_loss': 0.050898685469292104, 'vf_explained_var': 0.97209084, 'kl': 0.010103119944687933, 'entropy': 1.330517329275608, 'entropy_coeff': 0.005}
2020-09-21 16:34:41,322	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.019533730344846845, 'policy_loss': -0.07307555165607482, 'vf_loss': 0.04934147116728127, 'vf_explained_var': 0.9748586, 'kl': 0.010743863880634308, 'entropy': 1.3355618491768837, 'entropy_coeff': 0

custom_metrics: {}
date: 2020-09-21_16-34-47
done: false
episode_len_mean: 2542.41
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.68216554193559
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 148
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.3409303799271584
      entropy_coeff: 0.005
      kl: 0.013027598441112787
      policy_loss: -0.09445358242373914
      total_loss: -0.0410106235940475
      vf_explained_var: 0.9755591154098511
      vf_loss: 0.04695716523565352
  num_steps_sampled: 295000
  num_steps_trained: 295000
iterations_since_restore: 295
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.6499999999999995
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944990982128217

[2m[36m(pid=24699)[0m 2020-09-21 16:34:48,097	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:34:50,664	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.1730610394733958, 'policy_loss': 0.0001622490817680955, 'vf_loss': 0.1786360964179039, 'vf_explained_var': 0.94028854, 'kl': 0.000631170358128319, 'entropy': 1.275272712111473, 'entropy_coeff': 0.005}
2020-09-21 16:34:51,123	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': 0.025423617800697684, 'policy_loss': -0.03998575743753463, 'vf_loss': 0.06516635371372104, 'vf_explained_var': 0.9766033, 'kl': 0.006496779154986143, 'entropy': 1.266993261873722, 'entropy_coeff': 0.005}
2020-09-21 16:34:51,609	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.008365957532078028, 'policy_loss': -0.04774677939713001, 'vf_loss': 0.03495149873197079, 'v

2020-09-21 16:35:02,552	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.08949104975908995, 'policy_loss': -0.11616588663309813, 'vf_loss': 0.011795359692769125, 'vf_explained_var': 0.9953779, 'kl': 0.021168935927562416, 'entropy': 1.3108133003115654, 'entropy_coeff': 0.005}
2020-09-21 16:35:03,038	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.0881772879511118, 'policy_loss': -0.11438721828744747, 'vf_loss': 0.011364855454303324, 'vf_explained_var': 0.99554807, 'kl': 0.021126513718627393, 'entropy': 1.3091019988059998, 'entropy_coeff': 0.005}
2020-09-21 16:35:03,530	DEBUG sgd.py:120 -- 27 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.0125, 'cur_lr': 1e-05, 'total_loss': -0.09051591367460787, 'policy_loss': -0.11729637254029512, 'vf_loss': 0.011231964803300798, 'vf_explained_var': 0.99543226, 'kl': 0.021866207709535956, 'entropy': 1.3182083666324615, 'entropy_coeff': 0

custom_metrics: {}
date: 2020-09-21_16-35-04
done: false
episode_len_mean: 2542.41
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.68216554193559
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 148
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.0125
      cur_lr: 1.0e-05
      entropy: 1.3083289116621017
      entropy_coeff: 0.005
      kl: 0.021513011190108955
      policy_loss: -0.11770215549040586
      total_loss: -0.09139500476885587
      vf_explained_var: 0.9959565997123718
      vf_loss: 0.011066870705690235
  num_steps_sampled: 296000
  num_steps_trained: 296000
iterations_since_restore: 296
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.177272727272728
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94499098212821

2020-09-21 16:35:07,804	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.2617765897884965, 'policy_loss': 6.682879757136106e-05, 'vf_loss': 0.262903262861073, 'vf_explained_var': 0.8790134, 'kl': 0.0038770141062015595, 'entropy': 1.4163445085287094, 'entropy_coeff': 0.005}
2020-09-21 16:35:08,287	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.09165690379450098, 'policy_loss': -0.039569368469528854, 'vf_loss': 0.12900831759907305, 'vf_explained_var': 0.93744105, 'kl': 0.0060592134614125825, 'entropy': 1.39689489454031, 'entropy_coeff': 0.005}
2020-09-21 16:35:08,752	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.05378509359434247, 'policy_loss': -0.0559016193728894, 'vf_loss': 0.10543657094240189, 'vf_explained_var': 0.9492807, 'kl': 0.007435106032062322, 'entropy': 1.40838552266

2020-09-21 16:35:19,222	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.015113543311599642, 'policy_loss': -0.10872837028000504, 'vf_loss': 0.08115620410535485, 'vf_explained_var': 0.9603473, 'kl': 0.012822755670640618, 'entropy': 1.4031865075230598, 'entropy_coeff': 0.005}
2020-09-21 16:35:19,708	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.026140643924009055, 'policy_loss': -0.1192374542588368, 'vf_loss': 0.08135486347600818, 'vf_explained_var': 0.9591894, 'kl': 0.012371396354865283, 'entropy': 1.4094216898083687, 'entropy_coeff': 0.005}
2020-09-21 16:35:20,162	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.018018679227679968, 'policy_loss': -0.11288682441227138, 'vf_loss': 0.08275484829209745, 'vf_explained_var': 0.9590981, 'kl': 0.012605276482645422, 'entropy': 1.40

custom_metrics: {}
date: 2020-09-21_16-35-21
done: false
episode_len_mean: 2542.41
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.68216554193559
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 148
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.4092290103435516
      entropy_coeff: 0.005
      kl: 0.013335028779692948
      policy_loss: -0.11996053624898195
      total_loss: -0.02479481097543612
      vf_explained_var: 0.9601293802261353
      vf_loss: 0.08195929299108684
  num_steps_sampled: 297000
  num_steps_trained: 297000
iterations_since_restore: 297
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.404545454545455
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944

2020-09-21 16:35:24,734	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.12111708987504244, 'policy_loss': 0.00027970224618911743, 'vf_loss': 0.1267888720612973, 'vf_explained_var': 0.879959, 'kl': 0.0007421596328963664, 'entropy': 1.4157272726297379, 'entropy_coeff': 0.005}
2020-09-21 16:35:25,222	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.01828247623052448, 'policy_loss': -0.04057219624519348, 'vf_loss': 0.058040215633809566, 'vf_explained_var': 0.94260013, 'kl': 0.005194124256377108, 'entropy': 1.4148214012384415, 'entropy_coeff': 0.005}
2020-09-21 16:35:25,676	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.02052170690149069, 'policy_loss': -0.06006072973832488, 'vf_loss': 0.03460278804413974, 'vf_explained_var': 0.9639523, 'kl': 0.007902476325398311, 'entropy': 1.413129

2020-09-21 16:35:34,200	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.5187499999999998,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.361237645149231,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.01160605251789093,
                                         'policy_loss': -0.22666943073272705,
                                         'total_loss': -0.20780888199806213,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.992, max=0.992, mean=0.992),
                                         'vf_loss': 0.00804006215184927}}}

2020-09-21 16:35:34,232	DEBUG sgd.py:120 -- 20 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10893913265317678, 'p

custom_metrics: {}
date: 2020-09-21_16-35-38
done: false
episode_len_mean: 2542.41
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.68216554193559
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 148
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.4167843386530876
      entropy_coeff: 0.005
      kl: 0.015496968408115208
      policy_loss: -0.14832434011623263
      total_loss: -0.12319652084261179
      vf_explained_var: 0.9909154176712036
      vf_loss: 0.008675725432112813
  num_steps_sampled: 298000
  num_steps_trained: 298000
iterations_since_restore: 298
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.554545454545455
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94

2020-09-21 16:35:42,212	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.12752854847349226, 'policy_loss': 0.001131995813921094, 'vf_loss': 0.13227231381461024, 'vf_explained_var': 0.8930063, 'kl': 0.0005729673736789564, 'entropy': 1.349191576242447, 'entropy_coeff': 0.005}
2020-09-21 16:35:42,667	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.03292494732886553, 'policy_loss': -0.02213003288488835, 'vf_loss': 0.05507949716411531, 'vf_explained_var': 0.95845973, 'kl': 0.0044350377065711655, 'entropy': 1.3520462214946747, 'entropy_coeff': 0.005}
2020-09-21 16:35:43,158	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.015559801133349538, 'policy_loss': -0.044927197333890945, 'vf_loss': 0.028115174849517643, 'vf_explained_var': 0.9780947, 'kl': 0.005286403058562428, 'entropy': 1.3553

2020-09-21 16:35:53,589	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10625958489254117, 'policy_loss': -0.13013116957154125, 'vf_loss': 0.006165925587993115, 'vf_explained_var': 0.99466765, 'kl': 0.016112509823869914, 'entropy': 1.353042557835579, 'entropy_coeff': 0.005}
2020-09-21 16:35:54,042	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10702398465946317, 'policy_loss': -0.13038744498044252, 'vf_loss': 0.00596219117869623, 'vf_explained_var': 0.9950197, 'kl': 0.015932952461298555, 'entropy': 1.3593807145953178, 'entropy_coeff': 0.005}
2020-09-21 16:35:54,528	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1043259653961286, 'policy_loss': -0.12842310452833772, 'vf_loss': 0.005985984782455489, 'vf_explained_var': 0.99503976, 'kl': 0.016368731332477182, 'entropy': 1.34

custom_metrics: {}
date: 2020-09-21_16-35-56
done: false
episode_len_mean: 2542.41
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.68216554193559
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 148
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.352754846215248
      entropy_coeff: 0.005
      kl: 0.017128583160229027
      policy_loss: -0.13776258868165314
      total_loss: -0.11265594197902828
      vf_explained_var: 0.9952270984649658
      vf_loss: 0.005856386589584872
  num_steps_sampled: 299000
  num_steps_trained: 299000
iterations_since_restore: 299
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.239130434782609
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944

[2m[36m(pid=24699)[0m 2020-09-21 16:35:56,209	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 13761.815764030402,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 1609},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-3.29, max=3.512, mean=0.303),
[2m[36m(pid=24699)[0m                                   'prev_action': 15,
[2m[36m(pid=24699)

2020-09-21 16:35:59,084	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.11827473004814237, 'policy_loss': -0.0021785589633509517, 'vf_loss': 0.12614465970546007, 'vf_explained_var': 0.92917264, 'kl': 0.0007296266673555429, 'entropy': 1.3598983138799667, 'entropy_coeff': 0.005}
2020-09-21 16:35:59,537	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.056382606038823724, 'policy_loss': -0.028844963700976223, 'vf_loss': 0.08611520542763174, 'vf_explained_var': 0.95290405, 'kl': 0.0038676260228385217, 'entropy': 1.3523187413811684, 'entropy_coeff': 0.005}
2020-09-21 16:36:00,025	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0022199939703568816, 'policy_loss': -0.06253479653969407, 'vf_loss': 0.06239630957134068, 'vf_explained_var': 0.96316755, 'kl': 0.0060129026242066175, 'entropy': 

2020-09-21 16:36:10,486	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.06876003043726087, 'policy_loss': -0.13137494423426688, 'vf_loss': 0.04922397469636053, 'vf_explained_var': 0.9702696, 'kl': 0.01326812501065433, 'entropy': 1.352006234228611, 'entropy_coeff': 0.005}
2020-09-21 16:36:10,942	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.06465652422048151, 'policy_loss': -0.12628193874843419, 'vf_loss': 0.047749456483870745, 'vf_explained_var': 0.97028244, 'kl': 0.013598383928183466, 'entropy': 1.3553184792399406, 'entropy_coeff': 0.005}
2020-09-21 16:36:11,431	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.07149518572259694, 'policy_loss': -0.13414012326393276, 'vf_loss': 0.048606815515086055, 'vf_explained_var': 0.97039944, 'kl': 0.01370503578800708, 'entropy': 1.355

custom_metrics: {}
date: 2020-09-21_16-36-12
done: false
episode_len_mean: 2584.32
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.60210032744972
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 149
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3571973592042923
      entropy_coeff: 0.005
      kl: 0.0138854700489901
      policy_loss: -0.13291823444887996
      total_loss: -0.06987256847787648
      vf_explained_var: 0.9698161482810974
      vf_loss: 0.04874310130253434
  num_steps_sampled: 300000
  num_steps_trained: 300000
iterations_since_restore: 300
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.331818181818182
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94499

[2m[36m(pid=24699)[0m 2020-09-21 16:36:13,113	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:36:16,042	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.1217529441928491, 'policy_loss': 0.0015284988330677152, 'vf_loss': 0.12688412121497095, 'vf_explained_var': 0.90811354, 'kl': 0.00035572805912176264, 'entropy': 1.4399882182478905, 'entropy_coeff': 0.005}
2020-09-21 16:36:16,494	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.016398738604038954, 'policy_loss': -0.024579004908446223, 'vf_loss': 0.04394703765865415, 'vf_explained_var': 0.9686386, 'kl': 0.002786390199617017, 'entropy': 1.4402242377400398, 'entropy_coeff': 0.005}
2020-09-21 16:36:16,987	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.033838729228591546, 'policy_loss': -0.05371658556

2020-09-21 16:36:27,440	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1195944540668279, 'policy_loss': -0.13491830928251147, 'vf_loss': 0.001156300353613915, 'vf_explained_var': 0.9990985, 'kl': 0.014046937576495111, 'entropy': 1.433246649801731, 'entropy_coeff': 0.005}
2020-09-21 16:36:27,898	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.12094171531498432, 'policy_loss': -0.13637743960134685, 'vf_loss': 0.0011272093252046034, 'vf_explained_var': 0.99915576, 'kl': 0.014142213040031493, 'entropy': 1.4339946657419205, 'entropy_coeff': 0.005}
2020-09-21 16:36:28,388	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.13108383805956692, 'policy_loss': -0.14665344927925617, 'vf_loss': 0.0010461235906404909, 'vf_explained_var': 0.999198, 'kl': 0.01426582521526143, 'entropy': 1.42

custom_metrics: {}
date: 2020-09-21_16-36-29
done: false
episode_len_mean: 2584.32
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.602100327449726
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 149
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.4346099272370338
      entropy_coeff: 0.005
      kl: 0.014677466475404799
      policy_loss: -0.13725629798136652
      total_loss: -0.1211641615955159
      vf_explained_var: 0.9992655515670776
      vf_loss: 0.0009737877262523398
  num_steps_sampled: 301000
  num_steps_trained: 301000
iterations_since_restore: 301
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.322727272727272
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9

2020-09-21 16:36:33,274	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.15220071375370026, 'policy_loss': -0.0035834094742313027, 'vf_loss': 0.16167781362310052, 'vf_explained_var': 0.9717482, 'kl': 0.0004660362397114759, 'entropy': 1.3202942833304405, 'entropy_coeff': 0.005}
2020-09-21 16:36:33,761	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.021369787165895104, 'policy_loss': -0.016405447269789875, 'vf_loss': 0.04145721951499581, 'vf_explained_var': 0.99272394, 'kl': 0.001935698172019329, 'entropy': 1.324366308748722, 'entropy_coeff': 0.005}
2020-09-21 16:36:34,214	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.01601155602838844, 'policy_loss': -0.03629931341856718, 'vf_loss': 0.021173235028982162, 'vf_explained_var': 0.99618834, 'kl': 0.00376082792354282, 'entropy': 1.319

2020-09-21 16:36:39,941	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.06986563373357058, 'policy_loss': -0.07918939576484263, 'vf_loss': 0.0033593278349144384, 'vf_explained_var': 0.99941915, 'kl': 0.008271216182038188, 'entropy': 1.319494679570198, 'entropy_coeff': 0.005}
2020-09-21 16:36:40,392	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.07439939572941512, 'policy_loss': -0.08411888568662107, 'vf_loss': 0.003240044752601534, 'vf_explained_var': 0.99945164, 'kl': 0.008625431481050327, 'entropy': 1.3240841180086136, 'entropy_coeff': 0.005}
2020-09-21 16:36:40,878	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.07088292145635933, 'policy_loss': -0.08050997043028474, 'vf_loss': 0.003097108557994943, 'vf_explained_var': 0.99946004, 'kl': 0.008658626349642873, 'entropy': 

custom_metrics: {}
date: 2020-09-21_16-36-47
done: false
episode_len_mean: 2584.32
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.602100327449726
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 149
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3267767056822777
      entropy_coeff: 0.005
      kl: 0.010560464404989034
      policy_loss: -0.0927569130435586
      total_loss: -0.08105883427197114
      vf_explained_var: 0.9995850324630737
      vf_loss: 0.0022932644715183415
  num_steps_sampled: 302000
  num_steps_trained: 302000
iterations_since_restore: 302
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.286363636363635
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9

2020-09-21 16:36:50,074	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.24506674334406853, 'policy_loss': 0.002639970858581364, 'vf_loss': 0.2485202457755804, 'vf_explained_var': 0.9233911, 'kl': 0.0003981091384428481, 'entropy': 1.3396213501691818, 'entropy_coeff': 0.005}
2020-09-21 16:36:50,560	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.03383938904153183, 'policy_loss': -0.03143239312339574, 'vf_loss': 0.06783243943937123, 'vf_explained_var': 0.9785278, 'kl': 0.0027545112388907, 'entropy': 1.3488144874572754, 'entropy_coeff': 0.005}
2020-09-21 16:36:51,012	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.011977334506809711, 'policy_loss': -0.04546887363540009, 'vf_loss': 0.03298899217043072, 'vf_explained_var': 0.9899993, 'kl': 0.004770704705151729, 'entropy': 1.3485909327

2020-09-21 16:37:01,519	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10277634486556053, 'policy_loss': -0.1160272634588182, 'vf_loss': 0.0022117518019513227, 'vf_explained_var': 0.9992604, 'kl': 0.01174288525362499, 'entropy': 1.3590669855475426, 'entropy_coeff': 0.005}
2020-09-21 16:37:02,019	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09834579291054979, 'policy_loss': -0.11254570260643959, 'vf_loss': 0.0021741779091826174, 'vf_explained_var': 0.99925447, 'kl': 0.012376967759337276, 'entropy': 1.3543576896190643, 'entropy_coeff': 0.005}
2020-09-21 16:37:02,472	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09889243022189476, 'policy_loss': -0.1127348612062633, 'vf_loss': 0.0020459341212699655, 'vf_explained_var': 0.999306, 'kl': 0.012251907493919134, 'entropy': 1.3

custom_metrics: {}
date: 2020-09-21_16-37-04
done: false
episode_len_mean: 2584.32
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.602100327449726
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 149
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3635284677147865
      entropy_coeff: 0.005
      kl: 0.012639664055313915
      policy_loss: -0.11722845252370462
      total_loss: -0.10302141623105854
      vf_explained_var: 0.9993100166320801
      vf_loss: 0.0018281941811437719
  num_steps_sampled: 303000
  num_steps_trained: 303000
iterations_since_restore: 303
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 6.3
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94499098212821

2020-09-21 16:37:07,448	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.14380161243025213, 'policy_loss': 0.002356953453272581, 'vf_loss': 0.1475340761244297, 'vf_explained_var': 0.9066962, 'kl': 0.0003994696322487945, 'entropy': 1.3392215371131897, 'entropy_coeff': 0.005}
2020-09-21 16:37:07,942	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.006836399785242975, 'policy_loss': -0.03349790582433343, 'vf_loss': 0.041968495468609035, 'vf_explained_var': 0.9729598, 'kl': 0.0032832367505761795, 'entropy': 1.3241204842925072, 'entropy_coeff': 0.005}
2020-09-21 16:37:08,388	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.028946425416506827, 'policy_loss': -0.04902923805639148, 'vf_loss': 0.018559115123935044, 'vf_explained_var': 0.987152, 'kl': 0.005400606285547838, 'entropy': 1.33569

2020-09-21 16:37:18,864	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10804801748599857, 'policy_loss': -0.12239676876924932, 'vf_loss': 0.0009289682238886598, 'vf_explained_var': 0.9993181, 'kl': 0.013210364035330713, 'entropy': 1.3286905884742737, 'entropy_coeff': 0.005}
2020-09-21 16:37:19,353	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11111903912387788, 'policy_loss': -0.12601060676388443, 'vf_loss': 0.0009037273630383424, 'vf_explained_var': 0.99936825, 'kl': 0.01357317587826401, 'entropy': 1.3252850323915482, 'entropy_coeff': 0.005}
2020-09-21 16:37:19,809	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11700125510105863, 'policy_loss': -0.1318441336043179, 'vf_loss': 0.0008718864046386443, 'vf_explained_var': 0.99939895, 'kl': 0.01355996634811163, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-37-21
done: false
episode_len_mean: 2584.32
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.602100327449726
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 149
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3256130889058113
      entropy_coeff: 0.005
      kl: 0.014013049309141934
      policy_loss: -0.14002327853813767
      total_loss: -0.1246074684895575
      vf_explained_var: 0.9994716644287109
      vf_loss: 0.0007615618305862881
  num_steps_sampled: 304000
  num_steps_trained: 304000
iterations_since_restore: 304
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.454545454545454
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9

[2m[36m(pid=24699)[0m 2020-09-21 16:37:21,523	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 11114.59,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 2609},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-2.242, max=1.412, mean=0.294),
[2m[36m(pid=24699)[0m                                   'prev_action': 20,
[2m[36m(pid=24699)[0m      

2020-09-21 16:37:24,675	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.10698307259008288, 'policy_loss': 0.005055540939792991, 'vf_loss': 0.10622108564712107, 'vf_explained_var': 0.93508613, 'kl': 0.0015420194464902115, 'entropy': 1.3270993158221245, 'entropy_coeff': 0.005}
2020-09-21 16:37:25,159	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.019670462352223694, 'policy_loss': -0.022329691681079566, 'vf_loss': 0.04377821774687618, 'vf_explained_var': 0.9735889, 'kl': 0.003293701578513719, 'entropy': 1.3560752719640732, 'entropy_coeff': 0.005}
2020-09-21 16:37:25,613	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.007290527049917728, 'policy_loss': -0.03311882296111435, 'vf_loss': 0.02445853961398825, 'vf_explained_var': 0.9852563, 'kl': 0.005333566296030767, 'entropy': 1.3461

2020-09-21 16:37:34,310	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.5187499999999998,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.4576833248138428,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.010885869152843952,
                                         'policy_loss': -0.2639927864074707,
                                         'total_loss': -0.24986374378204346,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.997, max=0.997, mean=0.997),
                                         'vf_loss': 0.004884617868810892}}}

2020-09-21 16:37:34,650	DEBUG sgd.py:120 -- 21 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10424820584012195, 

custom_metrics: {}
date: 2020-09-21_16-37-38
done: false
episode_len_mean: 2584.32
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.602100327449726
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 149
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3520660027861595
      entropy_coeff: 0.005
      kl: 0.015489280165638775
      policy_loss: -0.12959811463952065
      total_loss: -0.10903962631709874
      vf_explained_var: 0.9976926445960999
      vf_loss: 0.003794466974795796
  num_steps_sampled: 305000
  num_steps_trained: 305000
iterations_since_restore: 305
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.490909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9

[2m[36m(pid=24699)[0m 2020-09-21 16:37:38,765	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:37:42,118	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.05642637575510889, 'policy_loss': -0.008035765727981925, 'vf_loss': 0.06880733091384172, 'vf_explained_var': 0.94405043, 'kl': 0.0015252866256065811, 'entropy': 1.3323429301381111, 'entropy_coeff': 0.005}
2020-09-21 16:37:42,606	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.0029289539670571685, 'policy_loss': -0.02814470825251192, 'vf_loss': 0.024681969662196934, 'vf_explained_var': 0.9784394, 'kl': 0.004686497952206992, 'entropy': 1.316767930984497, 'entropy_coeff': 0.005}
2020-09-21 16:37:43,092	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.03212798875756562, 'policy_loss': -0.04793101863

2020-09-21 16:37:53,532	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11347710667178035, 'policy_loss': -0.12797697447240353, 'vf_loss': 0.001315902092756005, 'vf_explained_var': 0.9988986, 'kl': 0.013036539021413773, 'entropy': 1.3230552822351456, 'entropy_coeff': 0.005}
2020-09-21 16:37:54,019	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1062339274212718, 'policy_loss': -0.12117763562127948, 'vf_loss': 0.0013310874055605382, 'vf_explained_var': 0.9989123, 'kl': 0.013327629654668272, 'entropy': 1.3257429525256157, 'entropy_coeff': 0.005}
2020-09-21 16:37:54,505	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11341063352301717, 'policy_loss': -0.1284141016076319, 'vf_loss': 0.0012154051655670628, 'vf_explained_var': 0.99892545, 'kl': 0.013418316550087184, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_16-37-55
done: false
episode_len_mean: 2584.32
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.602100327449726
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 149
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3175735101103783
      entropy_coeff: 0.005
      kl: 0.013435362780001014
      policy_loss: -0.13885924604255706
      total_loss: -0.12395113485399634
      vf_explained_var: 0.9990501403808594
      vf_loss: 0.0010910230594163295
  num_steps_sampled: 306000
  num_steps_trained: 306000
iterations_since_restore: 306
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.636363636363637
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.

2020-09-21 16:37:59,559	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.1914026637095958, 'policy_loss': 0.0004188854363746941, 'vf_loss': 0.1959719555452466, 'vf_explained_var': 0.8747324, 'kl': 0.001123243614030378, 'entropy': 1.3388192728161812, 'entropy_coeff': 0.005}
2020-09-21 16:38:00,048	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.008976659912150353, 'policy_loss': -0.03379387327004224, 'vf_loss': 0.04542467463761568, 'vf_explained_var': 0.97092766, 'kl': 0.00265826637041755, 'entropy': 1.3382758647203445, 'entropy_coeff': 0.005}
2020-09-21 16:38:00,506	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.03857623878866434, 'policy_loss': -0.05774439498782158, 'vf_loss': 0.018781664839480072, 'vf_explained_var': 0.98763084, 'kl': 0.004671227288781665, 'entropy': 1.3415870

2020-09-21 16:38:11,029	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10971839469857514, 'policy_loss': -0.11936459736898541, 'vf_loss': 0.0012602947463165037, 'vf_explained_var': 0.99913263, 'kl': 0.009928321320330724, 'entropy': 1.3385458439588547, 'entropy_coeff': 0.005}
2020-09-21 16:38:11,515	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10456901416182518, 'policy_loss': -0.11483364831656218, 'vf_loss': 0.001196469285787316, 'vf_explained_var': 0.99918556, 'kl': 0.0103817006747704, 'entropy': 1.339807741343975, 'entropy_coeff': 0.005}
2020-09-21 16:38:11,969	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10596599581185728, 'policy_loss': -0.11588698136620224, 'vf_loss': 0.0011210897646378726, 'vf_explained_var': 0.9991995, 'kl': 0.01021627223235555, 'entropy': 1.3

custom_metrics: {}
date: 2020-09-21_16-38-13
done: false
episode_len_mean: 2584.32
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.602100327449726
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 149
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3380838558077812
      entropy_coeff: 0.005
      kl: 0.011104380013421178
      policy_loss: -0.11688756995135918
      total_loss: -0.10562965623103082
      vf_explained_var: 0.9992413520812988
      vf_loss: 0.001083552804630017
  num_steps_sampled: 307000
  num_steps_trained: 307000
iterations_since_restore: 307
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.386363636363638
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9

2020-09-21 16:38:17,218	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0812786330934614, 'policy_loss': -0.0010765932966023684, 'vf_loss': 0.08748724847100675, 'vf_explained_var': 0.93181664, 'kl': 0.0006869730509528882, 'entropy': 1.2350737303495407, 'entropy_coeff': 0.005}
2020-09-21 16:38:17,707	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.005792680429294705, 'policy_loss': -0.03353492799215019, 'vf_loss': 0.02912374516017735, 'vf_explained_var': 0.9753662, 'kl': 0.0031204136757878587, 'entropy': 1.2241249158978462, 'entropy_coeff': 0.005}
2020-09-21 16:38:18,159	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.029428760521113873, 'policy_loss': -0.045644018915481865, 'vf_loss': 0.014297571004135534, 'vf_explained_var': 0.9876344, 'kl': 0.005305247788783163, 'entropy': 1.

2020-09-21 16:38:28,592	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.118383071385324, 'policy_loss': -0.13524758745916188, 'vf_loss': 0.0011540847190190107, 'vf_explained_var': 0.99899185, 'kl': 0.014401700347661972, 'entropy': 1.2324295565485954, 'entropy_coeff': 0.005}
2020-09-21 16:38:29,079	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.12010425026528537, 'policy_loss': -0.13736008724663407, 'vf_loss': 0.001102338603232056, 'vf_explained_var': 0.99900675, 'kl': 0.014694984012749046, 'entropy': 1.2329007983207703, 'entropy_coeff': 0.005}
2020-09-21 16:38:29,535	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.12354126636637375, 'policy_loss': -0.1402937730308622, 'vf_loss': 0.0010508920822758228, 'vf_explained_var': 0.9989984, 'kl': 0.014389769756235182, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_16-38-31
done: false
episode_len_mean: 2584.32
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.602100327449726
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 149
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2337881103157997
      entropy_coeff: 0.005
      kl: 0.014796361734624952
      policy_loss: -0.13275999948382378
      total_loss: -0.115458081709221
      vf_explained_var: 0.9991375207901001
      vf_loss: 0.000998883795546135
  num_steps_sampled: 308000
  num_steps_trained: 308000
iterations_since_restore: 308
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.482608695652174
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944

2020-09-21 16:38:34,324	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-7.158, max=9.572, mean=-0.257),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-9.372, max=-0.005, mean=-1.446),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.0, max=0.995, mean=0.465),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=5.984),
                                                    'advantages': np.ndarray((64,), dtype=float32, min=-2.577, max=1.581, mean=-0.196),
                                                    'agent_index': np.ndarray((64,), dtype=int64, min=0.0, max=0.0, mean=0.0),
                                                    'dones': np.ndarray((64,), dty

2020-09-21 16:38:41,426	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09457927430048585, 'policy_loss': -0.10540320002473891, 'vf_loss': 0.002349811744352337, 'vf_explained_var': 0.9981394, 'kl': 0.009989867045078427, 'entropy': 1.3395989462733269, 'entropy_coeff': 0.005}
2020-09-21 16:38:41,920	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09540215693414211, 'policy_loss': -0.106428217375651, 'vf_loss': 0.0021916876576142386, 'vf_explained_var': 0.9982635, 'kl': 0.01021283894078806, 'entropy': 1.3352769240736961, 'entropy_coeff': 0.005}
2020-09-21 16:38:42,375	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10059013968566433, 'policy_loss': -0.11171677726088092, 'vf_loss': 0.0021467569677042775, 'vf_explained_var': 0.9983076, 'kl': 0.010344322770833969, 'entropy': 1.34

custom_metrics: {}
date: 2020-09-21_16-38-48
done: false
episode_len_mean: 2628.75
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.53371009288972
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 150
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.34759521484375
      entropy_coeff: 0.005
      kl: 0.012869447702541947
      policy_loss: -0.12538604135625064
      total_loss: -0.1114309980548569
      vf_explained_var: 0.9990910887718201
      vf_loss: 0.0011475517057988327
  num_steps_sampled: 309000
  num_steps_trained: 309000
iterations_since_restore: 309
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.368181818181818
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449

[2m[36m(pid=24699)[0m 2020-09-21 16:38:48,859	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 12752.024773492198,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 3609},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-3.529, max=3.21, mean=0.617),
[2m[36m(pid=24699)[0m                                   'prev_action': 3,
[2m[36m(pid=24699)[

2020-09-21 16:38:52,144	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.2150640757754445, 'policy_loss': -0.0012283429969102144, 'vf_loss': 0.21977344807237387, 'vf_explained_var': 0.95459855, 'kl': 0.0019264016905176096, 'entropy': 1.2813512310385704, 'entropy_coeff': 0.005}
2020-09-21 16:38:52,623	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.04166449484182522, 'policy_loss': -0.028801863896660507, 'vf_loss': 0.06799501995556056, 'vf_explained_var': 0.9859027, 'kl': 0.005845389765454456, 'entropy': 1.2812686786055565, 'entropy_coeff': 0.005}
2020-09-21 16:38:53,071	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.01392351125832647, 'policy_loss': -0.04688421433093026, 'vf_loss': 0.031046425574459136, 'vf_explained_var': 0.9929081, 'kl': 0.005485697271069512, 'entropy': 1.2834

2020-09-21 16:39:03,510	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08696306741330773, 'policy_loss': -0.10869115492096171, 'vf_loss': 0.008797949492873158, 'vf_explained_var': 0.9979212, 'kl': 0.01276583003345877, 'entropy': 1.2915940135717392, 'entropy_coeff': 0.005}
2020-09-21 16:39:03,993	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08428498893044889, 'policy_loss': -0.106026383000426, 'vf_loss': 0.009017091480927775, 'vf_explained_var': 0.99807143, 'kl': 0.012630176730453968, 'entropy': 1.2915542721748352, 'entropy_coeff': 0.005}
2020-09-21 16:39:04,446	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08628210343886167, 'policy_loss': -0.10820592264644802, 'vf_loss': 0.008959374434198253, 'vf_explained_var': 0.99796927, 'kl': 0.01279150228947401, 'entropy': 1.292

custom_metrics: {}
date: 2020-09-21_16-39-05
done: false
episode_len_mean: 2628.75
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.53371009288972
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 150
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2888612374663353
      entropy_coeff: 0.005
      kl: 0.013001738640014082
      policy_loss: -0.11695777624845505
      total_loss: -0.0949091725051403
      vf_explained_var: 0.9978983998298645
      vf_loss: 0.008746522682486102
  num_steps_sampled: 310000
  num_steps_trained: 310000
iterations_since_restore: 310
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.290909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944

[2m[36m(pid=24699)[0m 2020-09-21 16:39:06,196	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:39:09,514	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.12812975700944662, 'policy_loss': -0.0009153910214081407, 'vf_loss': 0.13399939890950918, 'vf_explained_var': 0.97436035, 'kl': 0.0010110246818585966, 'entropy': 1.2979478016495705, 'entropy_coeff': 0.005}
2020-09-21 16:39:10,001	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.01627661066595465, 'policy_loss': -0.02956696436740458, 'vf_loss': 0.046480990247800946, 'vf_explained_var': 0.9908612, 'kl': 0.0038623635555268265, 'entropy': 1.300675630569458, 'entropy_coeff': 0.005}
2020-09-21 16:39:10,489	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.033080369990784675, 'policy_loss': -0.06315829756

2020-09-21 16:39:20,959	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11528211238328367, 'policy_loss': -0.13654471893096343, 'vf_loss': 0.0076865781302331015, 'vf_explained_var': 0.9983833, 'kl': 0.013165088836103678, 'entropy': 1.2836886569857597, 'entropy_coeff': 0.005}
2020-09-21 16:39:21,441	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11644180427538231, 'policy_loss': -0.1386802267516032, 'vf_loss': 0.00789046857971698, 'vf_explained_var': 0.99832994, 'kl': 0.01367652224143967, 'entropy': 1.2846540361642838, 'entropy_coeff': 0.005}
2020-09-21 16:39:21,931	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11705270898528397, 'policy_loss': -0.1391156476456672, 'vf_loss': 0.007609144609887153, 'vf_explained_var': 0.99831545, 'kl': 0.013740654452703893, 'entropy': 1.28

custom_metrics: {}
date: 2020-09-21_16-39-23
done: false
episode_len_mean: 2628.75
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.53371009288972
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 150
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.276493489742279
      entropy_coeff: 0.005
      kl: 0.0139032420120202
      policy_loss: -0.13770450907759368
      total_loss: -0.11575684859417379
      vf_explained_var: 0.9984414577484131
      vf_loss: 0.007214576515252702
  num_steps_sampled: 311000
  num_steps_trained: 311000
iterations_since_restore: 311
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.443478260869565
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94499

2020-09-21 16:39:27,197	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.09480441291816533, 'policy_loss': 0.0004387632943689823, 'vf_loss': 0.09938250295817852, 'vf_explained_var': 0.95306355, 'kl': 0.0013596195754433105, 'entropy': 1.4163545593619347, 'entropy_coeff': 0.005}
2020-09-21 16:39:27,684	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.014030435588210821, 'policy_loss': -0.02237275312654674, 'vf_loss': 0.03869489603675902, 'vf_explained_var': 0.98184496, 'kl': 0.003152804565615952, 'entropy': 1.416004717350006, 'entropy_coeff': 0.005}
2020-09-21 16:39:28,136	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.025784222641959786, 'policy_loss': -0.045746192103251815, 'vf_loss': 0.01852408959530294, 'vf_explained_var': 0.9907029, 'kl': 0.005552851129323244, 'entropy': 1.399

2020-09-21 16:39:34,812	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10195550252683461, 'policy_loss': -0.1183544669765979, 'vf_loss': 0.0040655346238054335, 'vf_explained_var': 0.9979746, 'kl': 0.012752724520396441, 'entropy': 1.4069546237587929, 'entropy_coeff': 0.005}
2020-09-21 16:39:35,309	DEBUG sgd.py:120 -- 17 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10402711003553122, 'policy_loss': -0.12099272490013391, 'vf_loss': 0.003988225318607874, 'vf_explained_var': 0.9979651, 'kl': 0.013198808708693832, 'entropy': 1.4136593416333199, 'entropy_coeff': 0.005}
2020-09-21 16:39:35,763	DEBUG sgd.py:120 -- 18 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10094405588461086, 'policy_loss': -0.1179842070559971, 'vf_loss': 0.003823577018920332, 'vf_explained_var': 0.9979688, 'kl': 0.013315622636582702, 'entropy': 1.40

custom_metrics: {}
date: 2020-09-21_16-39-41
done: false
episode_len_mean: 2628.75
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.53371009288972
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 150
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.404704973101616
      entropy_coeff: 0.005
      kl: 0.014919774606823921
      policy_loss: -0.1395804932108149
      total_loss: -0.12060750497039407
      vf_explained_var: 0.9982600808143616
      vf_loss: 0.0033371062745573
  num_steps_sampled: 312000
  num_steps_trained: 312000
iterations_since_restore: 312
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.4818181818181815
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94499

2020-09-21 16:39:44,816	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.12194102816283703, 'policy_loss': 0.0022080211783759296, 'vf_loss': 0.12404796108603477, 'vf_explained_var': 0.9468348, 'kl': 0.001543944357257132, 'entropy': 1.3319652825593948, 'entropy_coeff': 0.005}
2020-09-21 16:39:45,305	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.015848935872782022, 'policy_loss': -0.02771722455509007, 'vf_loss': 0.043797262478619814, 'vf_explained_var': 0.9819974, 'kl': 0.004190534658846445, 'entropy': 1.3190955072641373, 'entropy_coeff': 0.005}
2020-09-21 16:39:45,754	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.02472330955788493, 'policy_loss': -0.047769110184162855, 'vf_loss': 0.01948592404369265, 'vf_explained_var': 0.99186903, 'kl': 0.0067559495801106095, 'entropy': 1.340

2020-09-21 16:39:56,215	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1217480746563524, 'policy_loss': -0.14094387041404843, 'vf_loss': 0.0026868604181800038, 'vf_explained_var': 0.99874014, 'kl': 0.015277217666152865, 'entropy': 1.3386655449867249, 'entropy_coeff': 0.005}
2020-09-21 16:39:56,709	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.12488884455524385, 'policy_loss': -0.1442332116421312, 'vf_loss': 0.0026498777369852178, 'vf_explained_var': 0.99878293, 'kl': 0.015393123379908502, 'entropy': 1.3367617204785347, 'entropy_coeff': 0.005}
2020-09-21 16:39:57,165	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.12443411804269999, 'policy_loss': -0.14399703062372282, 'vf_loss': 0.002620841631141957, 'vf_explained_var': 0.99876195, 'kl': 0.015543122659437358, 'entropy': 

custom_metrics: {}
date: 2020-09-21_16-39-58
done: false
episode_len_mean: 2628.75
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.53371009288972
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 150
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3398357257246971
      entropy_coeff: 0.005
      kl: 0.017079166136682034
      policy_loss: -0.1547173392609693
      total_loss: -0.13297574897296727
      vf_explained_var: 0.9988516569137573
      vf_loss: 0.00250178069472895
  num_steps_sampled: 313000
  num_steps_trained: 313000
iterations_since_restore: 313
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.469565217391304
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449

2020-09-21 16:40:02,579	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.11180604330729693, 'policy_loss': 0.00674084760248661, 'vf_loss': 0.11103937192820013, 'vf_explained_var': 0.9421345, 'kl': 0.0004675276028840436, 'entropy': 1.3368463590741158, 'entropy_coeff': 0.005}
2020-09-21 16:40:03,067	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.013596963952295482, 'policy_loss': -0.02224549639504403, 'vf_loss': 0.03792467433959246, 'vf_explained_var': 0.9784606, 'kl': 0.003016420188941993, 'entropy': 1.3326809406280518, 'entropy_coeff': 0.005}
2020-09-21 16:40:03,519	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.022701653419062495, 'policy_loss': -0.041926472214981914, 'vf_loss': 0.018520344223361462, 'vf_explained_var': 0.9896958, 'kl': 0.004849497665418312, 'entropy': 1.33214

2020-09-21 16:40:13,982	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10495820746291429, 'policy_loss': -0.11986592778703198, 'vf_loss': 0.0025232094922102988, 'vf_explained_var': 0.99859154, 'kl': 0.012517914699856192, 'entropy': 1.3254142925143242, 'entropy_coeff': 0.005}
2020-09-21 16:40:14,469	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10189635929418728, 'policy_loss': -0.11776985676260665, 'vf_loss': 0.002396908043010626, 'vf_explained_var': 0.99856603, 'kl': 0.013263585104141384, 'entropy': 1.3334957212209702, 'entropy_coeff': 0.005}
2020-09-21 16:40:14,925	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11262116022408009, 'policy_loss': -0.12827758118510246, 'vf_loss': 0.0023979053876246326, 'vf_explained_var': 0.99868906, 'kl': 0.013099221454467624, 'entropy'

custom_metrics: {}
date: 2020-09-21_16-40-16
done: false
episode_len_mean: 2628.75
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.53371009288972
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 150
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.328705906867981
      entropy_coeff: 0.005
      kl: 0.013648782973177731
      policy_loss: -0.12524517916608602
      total_loss: -0.10889557795599103
      vf_explained_var: 0.9987514019012451
      vf_loss: 0.002264037997520063
  num_steps_sampled: 314000
  num_steps_trained: 314000
iterations_since_restore: 314
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.38695652173913
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449

[2m[36m(pid=24699)[0m 2020-09-21 16:40:16,667	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 12357.31,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 4609},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-15.329, max=7.99, mean=0.446),
[2m[36m(pid=24699)[0m                                   'prev_action': 12,
[2m[36m(pid=24699)[0m      

2020-09-21 16:40:20,580	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.099770073662512, 'policy_loss': 0.0009110569662880152, 'vf_loss': 0.10426900652237236, 'vf_explained_var': 0.93787336, 'kl': 0.0010174526126187078, 'entropy': 1.3910485431551933, 'entropy_coeff': 0.005}
2020-09-21 16:40:21,073	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.014181320671923459, 'policy_loss': -0.023229562328197062, 'vf_loss': 0.03901071019936353, 'vf_explained_var': 0.9764253, 'kl': 0.0035028773709200323, 'entropy': 1.3839651346206665, 'entropy_coeff': 0.005}
2020-09-21 16:40:21,524	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.02100977214286104, 'policy_loss': -0.043783713364973664, 'vf_loss': 0.0223581301397644, 'vf_explained_var': 0.98634464, 'kl': 0.004832430757232942, 'entropy': 1.3846

2020-09-21 16:40:32,006	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1057467256905511, 'policy_loss': -0.1288882583612576, 'vf_loss': 0.009525547327939421, 'vf_explained_var': 0.99425936, 'kl': 0.013458495552185923, 'entropy': 1.3648225665092468, 'entropy_coeff': 0.005}
2020-09-21 16:40:32,489	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10307976545300335, 'policy_loss': -0.1268212057184428, 'vf_loss': 0.009330576722277328, 'vf_explained_var': 0.9945208, 'kl': 0.013996343943290412, 'entropy': 1.3692165315151215, 'entropy_coeff': 0.005}
2020-09-21 16:40:32,939	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1073990588192828, 'policy_loss': -0.13138301495928317, 'vf_loss': 0.009472388584981672, 'vf_explained_var': 0.99432635, 'kl': 0.014038236171472818, 'entropy': 1.361

custom_metrics: {}
date: 2020-09-21_16-40-34
done: false
episode_len_mean: 2628.75
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.53371009288972
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 150
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3690373599529266
      entropy_coeff: 0.005
      kl: 0.014539655938278884
      policy_loss: -0.12493456155061722
      total_loss: -0.10055605077650398
      vf_explained_var: 0.9944559335708618
      vf_loss: 0.009141597503912635
  num_steps_sampled: 315000
  num_steps_trained: 315000
iterations_since_restore: 315
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.534782608695653
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94

[2m[36m(pid=24699)[0m 2020-09-21 16:40:34,669	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:40:38,181	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.5187499999999998,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.3024792671203613,
                                         'entropy_coeff': 0.005,
                                         'kl': -7.3510030063062e-08,
                                         'policy_loss': 0.08729670941829681,
                                         'total_loss': 0.19187122583389282,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.868, max=0.868, mean=0.868),
                                         'vf_loss': 0.11108702421188354}}}

2020-09-21 16:40:38,186	INFO rollout_worker.py:736

2020-09-21 16:40:44,781	DEBUG sgd.py:120 -- 13 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.07900008384604007, 'policy_loss': -0.0899842819198966, 'vf_loss': 0.0043495845166035, 'vf_explained_var': 0.9904686, 'kl': 0.0086788164335303, 'entropy': 1.3092677146196365, 'entropy_coeff': 0.005}
2020-09-21 16:40:45,270	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08612703927792609, 'policy_loss': -0.0981231884798035, 'vf_loss': 0.004305770198698156, 'vf_explained_var': 0.99147546, 'kl': 0.009388283709995449, 'entropy': 1.3136151507496834, 'entropy_coeff': 0.005}
2020-09-21 16:40:45,723	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.0819721152074635, 'policy_loss': -0.09442394005600363, 'vf_loss': 0.004507603385718539, 'vf_explained_var': 0.9910271, 'kl': 0.009530791838187724, 'entropy': 1.3061330

custom_metrics: {}
date: 2020-09-21_16-40-52
done: false
episode_len_mean: 2628.75
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.53371009288972
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 150
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3064423948526382
      entropy_coeff: 0.005
      kl: 0.012192419671919197
      policy_loss: -0.11474479944445193
      total_loss: -0.09921561495866627
      vf_explained_var: 0.9922748804092407
      vf_loss: 0.0035441612926661037
  num_steps_sampled: 316000
  num_steps_trained: 316000
iterations_since_restore: 316
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.517391304347826
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9

2020-09-21 16:40:55,999	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.2131241757888347, 'policy_loss': 0.00019411381799727678, 'vf_loss': 0.2186404657550156, 'vf_explained_var': 0.94041926, 'kl': 0.00048432532734166145, 'entropy': 1.289195328950882, 'entropy_coeff': 0.005}
2020-09-21 16:40:56,460	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.01831228449009359, 'policy_loss': -0.033371126279234886, 'vf_loss': 0.05579405603930354, 'vf_explained_var': 0.98424655, 'kl': 0.001540141209261492, 'entropy': 1.289946936070919, 'entropy_coeff': 0.005}
2020-09-21 16:40:56,943	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.022917577996850014, 'policy_loss': -0.04832358815474436, 'vf_loss': 0.026434380793944, 'vf_explained_var': 0.99126637, 'kl': 0.0035756989746005274, 'entropy': 1.29179

2020-09-21 16:41:07,409	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09438777691684663, 'policy_loss': -0.10694556823000312, 'vf_loss': 0.0019624631386250257, 'vf_explained_var': 0.9993144, 'kl': 0.011228386865695938, 'entropy': 1.2915573939681053, 'entropy_coeff': 0.005}
2020-09-21 16:41:07,864	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09859639487694949, 'policy_loss': -0.11107881390489638, 'vf_loss': 0.001927065044583287, 'vf_explained_var': 0.999416, 'kl': 0.011212590557988733, 'entropy': 1.294755071401596, 'entropy_coeff': 0.005}
2020-09-21 16:41:08,352	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09530890500172973, 'policy_loss': -0.10819260275457054, 'vf_loss': 0.0018329825397813693, 'vf_explained_var': 0.9994392, 'kl': 0.01153080040239729, 'entropy': 1.29

custom_metrics: {}
date: 2020-09-21_16-41-09
done: false
episode_len_mean: 2702.71
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.477569586921287
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 151
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2971329763531685
      entropy_coeff: 0.005
      kl: 0.011280534439720213
      policy_loss: -0.10406161518767476
      total_loss: -0.09167213586624712
      vf_explained_var: 0.9994051456451416
      vf_loss: 0.0017428309802198783
  num_steps_sampled: 317000
  num_steps_trained: 317000
iterations_since_restore: 317
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.804347826086956
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.

2020-09-21 16:41:13,258	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.1205122055253014, 'policy_loss': -0.0030585152562707663, 'vf_loss': 0.12767624738626182, 'vf_explained_var': 0.9458153, 'kl': 0.0014006461899404576, 'entropy': 1.2465522289276123, 'entropy_coeff': 0.005}
2020-09-21 16:41:13,749	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.009511203737929463, 'policy_loss': -0.025875847786664963, 'vf_loss': 0.03868650330696255, 'vf_explained_var': 0.9843123, 'kl': 0.0019083934676018544, 'entropy': 1.2395630031824112, 'entropy_coeff': 0.005}
2020-09-21 16:41:14,201	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.026944284327328205, 'policy_loss': -0.04154260776704177, 'vf_loss': 0.015731941937701777, 'vf_explained_var': 0.99351454, 'kl': 0.003355616092449054, 'entropy': 1.2

2020-09-21 16:41:24,725	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10719435644568875, 'policy_loss': -0.12048317049629986, 'vf_loss': 0.0014058003507670946, 'vf_explained_var': 0.99937636, 'kl': 0.011896686046384275, 'entropy': 1.2370154187083244, 'entropy_coeff': 0.005}
2020-09-21 16:41:25,209	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10743342561181635, 'policy_loss': -0.12142564717214555, 'vf_loss': 0.0014439980805036612, 'vf_explained_var': 0.9992928, 'kl': 0.012316402746364474, 'entropy': 1.2314613908529282, 'entropy_coeff': 0.005}
2020-09-21 16:41:25,654	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11427591368556023, 'policy_loss': -0.12810373050160706, 'vf_loss': 0.0013530824799090624, 'vf_explained_var': 0.99940896, 'kl': 0.01228682475630194, 'entropy':

custom_metrics: {}
date: 2020-09-21_16-41-27
done: false
episode_len_mean: 2702.71
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.477569586921287
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 151
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2373932003974915
      entropy_coeff: 0.005
      kl: 0.012521743308752775
      policy_loss: -0.12702633836306632
      total_loss: -0.11289084900636226
      vf_explained_var: 0.9993833303451538
      vf_loss: 0.001305059362493921
  num_steps_sampled: 318000
  num_steps_trained: 318000
iterations_since_restore: 318
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.4772727272727275
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.

2020-09-21 16:41:30,644	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.27715484786313027, 'policy_loss': 0.0018978072330355644, 'vf_loss': 0.2799611850641668, 'vf_explained_var': 0.9593202, 'kl': 0.0010535196569021466, 'entropy': 1.2608354464173317, 'entropy_coeff': 0.005}
2020-09-21 16:41:31,131	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.054344821139238775, 'policy_loss': -0.022819495759904385, 'vf_loss': 0.07888411171734333, 'vf_explained_var': 0.9885364, 'kl': 0.0030482974252663553, 'entropy': 1.2698813080787659, 'entropy_coeff': 0.005}
2020-09-21 16:41:31,577	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.009540827595628798, 'policy_loss': -0.03251379879657179, 'vf_loss': 0.04120132757816464, 'vf_explained_var': 0.9940132, 'kl': 0.004708594176918268, 'entropy': 1.25957

2020-09-21 16:41:38,235	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.5187499999999998,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.1042096614837646,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.0077848173677921295,
                                         'policy_loss': -0.04784141108393669,
                                         'total_loss': -0.03556133806705475,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.999, max=0.999, mean=0.999),
                                         'vf_loss': 0.005977911874651909}}}

2020-09-21 16:41:38,259	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08433958329260349

custom_metrics: {}
date: 2020-09-21_16-41-44
done: false
episode_len_mean: 2702.71
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.477569586921287
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 151
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.265924870967865
      entropy_coeff: 0.005
      kl: 0.012501272896770388
      policy_loss: -0.11491848272271454
      total_loss: -0.09567550220526755
      vf_explained_var: 0.9989729523658752
      vf_loss: 0.006586291870917194
  num_steps_sampled: 319000
  num_steps_trained: 319000
iterations_since_restore: 319
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.263636363636364
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94

[2m[36m(pid=24699)[0m 2020-09-21 16:41:44,731	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 9615.161477473734,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 5609},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-4.813, max=4.004, mean=0.565),
[2m[36m(pid=24699)[0m                                   'prev_action': 0,
[2m[36m(pid=24699)[

2020-09-21 16:41:48,217	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.1101173497736454, 'policy_loss': 0.0032906964188441634, 'vf_loss': 0.10941635561175644, 'vf_explained_var': 0.9372662, 'kl': 0.002697651702730064, 'entropy': 1.3373505100607872, 'entropy_coeff': 0.005}
2020-09-21 16:41:48,701	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.012794557260349393, 'policy_loss': -0.026462988927960396, 'vf_loss': 0.03723447653464973, 'vf_explained_var': 0.9781921, 'kl': 0.005761830398114398, 'entropy': 1.3455415666103363, 'entropy_coeff': 0.005}
2020-09-21 16:41:49,158	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.016656797961331904, 'policy_loss': -0.04538647481240332, 'vf_loss': 0.023543373972643167, 'vf_explained_var': 0.98724353, 'kl': 0.007825790351489559, 'entropy': 1.3398

2020-09-21 16:41:59,636	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1017212268197909, 'policy_loss': -0.116467016749084, 'vf_loss': 0.002998017371282913, 'vf_explained_var': 0.99804825, 'kl': 0.01212060614489019, 'entropy': 1.3320794105529785, 'entropy_coeff': 0.005}
2020-09-21 16:42:00,130	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11025492148473859, 'policy_loss': -0.12493796751368791, 'vf_loss': 0.0028272897689021192, 'vf_explained_var': 0.9981303, 'kl': 0.01217429363168776, 'entropy': 1.3267919644713402, 'entropy_coeff': 0.005}
2020-09-21 16:42:00,589	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09792070160619915, 'policy_loss': -0.1129833257291466, 'vf_loss': 0.0028018820157740265, 'vf_explained_var': 0.9982878, 'kl': 0.012463402352295816, 'entropy': 1.3336

custom_metrics: {}
date: 2020-09-21_16-42-02
done: false
episode_len_mean: 2702.71
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.477569586921287
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 151
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3309861868619919
      entropy_coeff: 0.005
      kl: 0.01312213292112574
      policy_loss: -0.1182013259967789
      total_loss: -0.10217913123778999
      vf_explained_var: 0.9983279705047607
      vf_loss: 0.002747890663158614
  num_steps_sampled: 320000
  num_steps_trained: 320000
iterations_since_restore: 320
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 6.352173913043477
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944

[2m[36m(pid=24699)[0m 2020-09-21 16:42:02,290	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:42:05,828	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.12088569125626236, 'policy_loss': 0.002913136500865221, 'vf_loss': 0.12218209216371179, 'vf_explained_var': 0.9531691, 'kl': 0.0016671764996110117, 'entropy': 1.3483126014471054, 'entropy_coeff': 0.005}
2020-09-21 16:42:06,323	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0005372188170440495, 'policy_loss': -0.03576689210603945, 'vf_loss': 0.03486828051973134, 'vf_explained_var': 0.9859163, 'kl': 0.005358311056625098, 'entropy': 1.3404217213392258, 'entropy_coeff': 0.005}
2020-09-21 16:42:06,814	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.03460934036411345, 'policy_loss': -0.05193699907977

2020-09-21 16:42:17,279	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10436837119050324, 'policy_loss': -0.11766057903878391, 'vf_loss': 0.001069058333087014, 'vf_explained_var': 0.9995238, 'kl': 0.012502219818998128, 'entropy': 1.3529199287295341, 'entropy_coeff': 0.005}
2020-09-21 16:42:17,768	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10811026746523567, 'policy_loss': -0.12177342976792715, 'vf_loss': 0.0009858235098363366, 'vf_explained_var': 0.9995458, 'kl': 0.012778491247445345, 'entropy': 1.3459986448287964, 'entropy_coeff': 0.005}
2020-09-21 16:42:18,254	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11011048610089347, 'policy_loss': -0.12400642188731581, 'vf_loss': 0.001049112786859041, 'vf_explained_var': 0.99954015, 'kl': 0.012903621071018279, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-42-19
done: false
episode_len_mean: 2702.71
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.477569586921287
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 151
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3522957637906075
      entropy_coeff: 0.005
      kl: 0.013081327313557267
      policy_loss: -0.13154878001660109
      total_loss: -0.11752210697159171
      vf_explained_var: 0.9995847344398499
      vf_loss: 0.0009208836854668334
  num_steps_sampled: 321000
  num_steps_trained: 321000
iterations_since_restore: 321
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.5608695652173905
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0

2020-09-21 16:42:23,461	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.10837411135435104, 'policy_loss': 0.0006949553498998284, 'vf_loss': 0.11132505163550377, 'vf_explained_var': 0.94560885, 'kl': 0.0020574988336187205, 'entropy': 1.3541447147727013, 'entropy_coeff': 0.005}
2020-09-21 16:42:23,950	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.013972874148748815, 'policy_loss': -0.02118266059551388, 'vf_loss': 0.036455484572798014, 'vf_explained_var': 0.9817416, 'kl': 0.003647518082289025, 'entropy': 1.3679241761565208, 'entropy_coeff': 0.005}
2020-09-21 16:42:24,405	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.025597622501663864, 'policy_loss': -0.04994733270723373, 'vf_loss': 0.0221029223757796, 'vf_explained_var': 0.98897624, 'kl': 0.005939816473983228, 'entropy': 1.354

2020-09-21 16:42:34,846	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10134697530884296, 'policy_loss': -0.12384777329862118, 'vf_loss': 0.009140859627223108, 'vf_explained_var': 0.9951937, 'kl': 0.013284512562677264, 'entropy': 1.3631819784641266, 'entropy_coeff': 0.005}
2020-09-21 16:42:35,337	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1085089819971472, 'policy_loss': -0.1318290164344944, 'vf_loss': 0.008710237380000763, 'vf_explained_var': 0.9954659, 'kl': 0.01409327395958826, 'entropy': 1.358872152864933, 'entropy_coeff': 0.005}
2020-09-21 16:42:35,785	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10715849895495921, 'policy_loss': -0.13077316188719124, 'vf_loss': 0.008756564202485606, 'vf_explained_var': 0.9954826, 'kl': 0.014260336523875594, 'entropy': 1.35995

custom_metrics: {}
date: 2020-09-21_16-42-37
done: false
episode_len_mean: 2702.71
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.477569586921287
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 151
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3620300069451332
      entropy_coeff: 0.005
      kl: 0.014231572684366256
      policy_loss: -0.13710095989517868
      total_loss: -0.11367053957656026
      vf_explained_var: 0.9953197240829468
      vf_loss: 0.008626372553408146
  num_steps_sampled: 322000
  num_steps_trained: 322000
iterations_since_restore: 322
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.5636363636363635
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.

2020-09-21 16:42:41,253	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.5187499999999998,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.28767991065979,
                                         'entropy_coeff': 0.005,
                                         'kl': 6.024243326407941e-09,
                                         'policy_loss': -0.1969650834798813,
                                         'total_loss': -0.08354528248310089,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.919, max=0.919, mean=0.919),
                                         'vf_loss': 0.11985816806554794}}}

2020-09-21 16:42:41,257	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'a

2020-09-21 16:42:48,295	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.0988134506624192, 'policy_loss': -0.1107999193482101, 'vf_loss': 0.0015490585283259861, 'vf_explained_var': 0.99878377, 'kl': 0.011212267621885985, 'entropy': 1.3182445988059044, 'entropy_coeff': 0.005}
2020-09-21 16:42:48,749	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10338713508099318, 'policy_loss': -0.11563042877241969, 'vf_loss': 0.0014206787236616947, 'vf_explained_var': 0.99885714, 'kl': 0.01147680205758661, 'entropy': 1.3215569481253624, 'entropy_coeff': 0.005}
2020-09-21 16:42:49,236	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10785829345695674, 'policy_loss': -0.12017636594828218, 'vf_loss': 0.00136708627542248, 'vf_explained_var': 0.9988718, 'kl': 0.011577378259971738, 'entropy': 1.3

custom_metrics: {}
date: 2020-09-21_16-42-55
done: false
episode_len_mean: 2702.71
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.477569586921287
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 151
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3249007686972618
      entropy_coeff: 0.005
      kl: 0.01398514152970165
      policy_loss: -0.13919055717997253
      total_loss: -0.12385655613616109
      vf_explained_var: 0.999389111995697
      vf_loss: 0.0007185709673649399
  num_steps_sampled: 323000
  num_steps_trained: 323000
iterations_since_restore: 323
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.4173913043478255
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9

2020-09-21 16:42:59,075	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.08882001624442637, 'policy_loss': 0.0036093288799747825, 'vf_loss': 0.09072630922310054, 'vf_explained_var': 0.95633423, 'kl': 0.001105247689524469, 'entropy': 1.438841961324215, 'entropy_coeff': 0.005}
2020-09-21 16:42:59,531	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0012202659854665399, 'policy_loss': -0.029224223806522787, 'vf_loss': 0.03228586947079748, 'vf_explained_var': 0.98388636, 'kl': 0.003518609795719385, 'entropy': 1.4370529875159264, 'entropy_coeff': 0.005}
2020-09-21 16:43:00,020	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.04017406888306141, 'policy_loss': -0.057956183911301196, 'vf_loss': 0.017426895210519433, 'vf_explained_var': 0.99123776, 'kl': 0.004945085893268697, 'entropy': 1.4

2020-09-21 16:43:10,491	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11582581396214664, 'policy_loss': -0.13127353903837502, 'vf_loss': 0.0032966633480100427, 'vf_explained_var': 0.99799174, 'kl': 0.012714420503471047, 'entropy': 1.4317935854196548, 'entropy_coeff': 0.005}
2020-09-21 16:43:10,939	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11909680522512645, 'policy_loss': -0.13469190755859017, 'vf_loss': 0.0031686895417806227, 'vf_explained_var': 0.99813926, 'kl': 0.012876802124083042, 'entropy': 1.4260473921895027, 'entropy_coeff': 0.005}
2020-09-21 16:43:11,429	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1253190750721842, 'policy_loss': -0.14141089329496026, 'vf_loss': 0.003375551337740035, 'vf_explained_var': 0.9978352, 'kl': 0.013082093675620854, 'entropy': 

custom_metrics: {}
date: 2020-09-21_16-43-12
done: false
episode_len_mean: 2759.08
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.43210861728846
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 152
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.4279362335801125
      entropy_coeff: 0.005
      kl: 0.01353025482967496
      policy_loss: -0.13735321292188019
      total_loss: -0.12094187259208411
      vf_explained_var: 0.9982179403305054
      vf_loss: 0.0030019537152838893
  num_steps_sampled: 324000
  num_steps_trained: 324000
iterations_since_restore: 324
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.252173913043479
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94

[2m[36m(pid=24699)[0m 2020-09-21 16:43:13,135	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 6531.2,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 6609},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-2.068, max=5.175, mean=0.377),
[2m[36m(pid=24699)[0m                                   'prev_action': 0,
[2m[36m(pid=24699)[0m         

2020-09-21 16:43:16,675	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.10343522363109514, 'policy_loss': -0.0014710455434396863, 'vf_loss': 0.11002972046844661, 'vf_explained_var': 0.9650955, 'kl': 0.001031312297796294, 'entropy': 1.3379516825079918, 'entropy_coeff': 0.005}
2020-09-21 16:43:17,176	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.002051018876954913, 'policy_loss': -0.03389320650603622, 'vf_loss': 0.03542989573907107, 'vf_explained_var': 0.98899496, 'kl': 0.004701079407823272, 'entropy': 1.325087197124958, 'entropy_coeff': 0.005}
2020-09-21 16:43:17,631	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.02911112818401307, 'policy_loss': -0.04420573078095913, 'vf_loss': 0.013986149278935045, 'vf_explained_var': 0.995919, 'kl': 0.005087761004688218, 'entropy': 1.323716

2020-09-21 16:43:28,069	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11200756509788334, 'policy_loss': -0.12530414736829698, 'vf_loss': 0.0006210099109011935, 'vf_explained_var': 0.9998158, 'kl': 0.012693617085460573, 'entropy': 1.3205719962716103, 'entropy_coeff': 0.005}
2020-09-21 16:43:28,556	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1245269172359258, 'policy_loss': -0.1381110700313002, 'vf_loss': 0.0006070140425435966, 'vf_explained_var': 0.9998282, 'kl': 0.012890096521005034, 'entropy': 1.3199399262666702, 'entropy_coeff': 0.005}
2020-09-21 16:43:29,010	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.12143537570955232, 'policy_loss': -0.13526864326559007, 'vf_loss': 0.0005882317218492972, 'vf_explained_var': 0.9998393, 'kl': 0.013049434404820204, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_16-43-30
done: false
episode_len_mean: 2759.08
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.432108617288453
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 152
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3147810474038124
      entropy_coeff: 0.005
      kl: 0.013846391695551574
      policy_loss: -0.13928493298590183
      total_loss: -0.12436175730545074
      vf_explained_var: 0.9998660087585449
      vf_loss: 0.00046787314386165235
  num_steps_sampled: 325000
  num_steps_trained: 325000
iterations_since_restore: 325
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.322727272727274
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0

[2m[36m(pid=24699)[0m 2020-09-21 16:43:30,733	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:43:34,173	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.07185660605318844, 'policy_loss': 9.904650505632162e-05, 'vf_loss': 0.07752056163735688, 'vf_explained_var': 0.9745995, 'kl': 0.000551771612125232, 'entropy': 1.320201337337494, 'entropy_coeff': 0.005}
2020-09-21 16:43:34,665	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.00047104619443416595, 'policy_loss': -0.026505618705414236, 'vf_loss': 0.028402391239069402, 'vf_explained_var': 0.9897329, 'kl': 0.0027966545458184555, 'entropy': 1.3230473846197128, 'entropy_coeff': 0.005}
2020-09-21 16:43:35,126	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.028122440387960523, 'policy_loss': -0.040516148

2020-09-21 16:43:41,314	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09439943672623485, 'policy_loss': -0.10518364183371887, 'vf_loss': 0.0010498999399715103, 'vf_explained_var': 0.9996319, 'kl': 0.010754208109574392, 'entropy': 1.319728635251522, 'entropy_coeff': 0.005}
2020-09-21 16:43:41,797	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.107475018594414, 'policy_loss': -0.11759808217175305, 'vf_loss': 0.0009538963859085925, 'vf_explained_var': 0.99962246, 'kl': 0.010369895870098844, 'entropy': 1.3160213977098465, 'entropy_coeff': 0.005}
2020-09-21 16:43:42,290	DEBUG sgd.py:120 -- 17 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10584075818769634, 'policy_loss': -0.11671323445625603, 'vf_loss': 0.0008844207186484709, 'vf_explained_var': 0.9996753, 'kl': 0.010918382700765505, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_16-43-48
done: false
episode_len_mean: 2759.08
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.432108617288453
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 152
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.311718761920929
      entropy_coeff: 0.005
      kl: 0.012894740561023355
      policy_loss: -0.13293074467219412
      total_loss: -0.11943333595991135
      vf_explained_var: 0.9998365044593811
      vf_loss: 0.0004721088844235055
  num_steps_sampled: 326000
  num_steps_trained: 326000
iterations_since_restore: 326
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.478260869565218
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9

2020-09-21 16:43:51,712	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.1812686319462955, 'policy_loss': 0.0010594221530482173, 'vf_loss': 0.18540104338899255, 'vf_explained_var': 0.94461375, 'kl': 0.0009615437043373198, 'entropy': 1.3304363116621971, 'entropy_coeff': 0.005}
2020-09-21 16:43:52,202	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.018499739468097687, 'policy_loss': -0.025405463995411992, 'vf_loss': 0.046418718760833144, 'vf_explained_var': 0.9863609, 'kl': 0.0027270098798908293, 'entropy': 1.3310336768627167, 'entropy_coeff': 0.005}
2020-09-21 16:43:52,691	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.02687671547755599, 'policy_loss': -0.04595710965804756, 'vf_loss': 0.01879688975168392, 'vf_explained_var': 0.99453133, 'kl': 0.004566737959976308, 'entropy': 1.33

2020-09-21 16:44:03,126	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1006873520091176, 'policy_loss': -0.11306829913519323, 'vf_loss': 0.000560251288334257, 'vf_explained_var': 0.999799, 'kl': 0.012124869041144848, 'entropy': 1.318789429962635, 'entropy_coeff': 0.005}
2020-09-21 16:44:03,617	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10470699449069798, 'policy_loss': -0.11794677434954792, 'vf_loss': 0.0005241686540102819, 'vf_explained_var': 0.9998158, 'kl': 0.012716447294224054, 'entropy': 1.319499358534813, 'entropy_coeff': 0.005}
2020-09-21 16:44:04,103	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10760856862179935, 'policy_loss': -0.12119385437108576, 'vf_loss': 0.00048275242261297535, 'vf_explained_var': 0.99984217, 'kl': 0.012970131821930408, 'entropy': 1.3

custom_metrics: {}
date: 2020-09-21_16-44-05
done: false
episode_len_mean: 2759.08
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.432108617288453
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 152
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3212002217769623
      entropy_coeff: 0.005
      kl: 0.013046254811342806
      policy_loss: -0.11812286428175867
      total_loss: -0.10447841905988753
      vf_explained_var: 0.9998509883880615
      vf_loss: 0.00043644531615427695
  num_steps_sampled: 327000
  num_steps_trained: 327000
iterations_since_restore: 327
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.331818181818182
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0

2020-09-21 16:44:09,506	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.17278484208509326, 'policy_loss': 0.003375191939994693, 'vf_loss': 0.1722791539505124, 'vf_explained_var': 0.961724, 'kl': 0.0022871433098177185, 'entropy': 1.268620491027832, 'entropy_coeff': 0.005}
2020-09-21 16:44:09,992	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0240612291963771, 'policy_loss': -0.030564271612092853, 'vf_loss': 0.05700293509289622, 'vf_explained_var': 0.9877256, 'kl': 0.0025873592094285414, 'entropy': 1.2613982632756233, 'entropy_coeff': 0.005}
2020-09-21 16:44:10,443	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.01892865973059088, 'policy_loss': -0.043267646338790655, 'vf_loss': 0.02385717339348048, 'vf_explained_var': 0.9937606, 'kl': 0.00446946248121094, 'entropy': 1.2612371891

2020-09-21 16:44:20,900	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08769531268626451, 'policy_loss': -0.10078833013540134, 'vf_loss': 0.0007896337137935916, 'vf_explained_var': 0.99980676, 'kl': 0.012259236304089427, 'entropy': 1.2630668804049492, 'entropy_coeff': 0.005}
2020-09-21 16:44:21,387	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09780726954340935, 'policy_loss': -0.11084508872590959, 'vf_loss': 0.0007638415117980912, 'vf_explained_var': 0.9998207, 'kl': 0.012257157068233937, 'entropy': 1.2683148011565208, 'entropy_coeff': 0.005}
2020-09-21 16:44:21,838	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09639298147521913, 'policy_loss': -0.10887933673802763, 'vf_loss': 0.0007245056822284823, 'vf_explained_var': 0.99981666, 'kl': 0.011938434792682528, 'entropy'

custom_metrics: {}
date: 2020-09-21_16-44-23
done: false
episode_len_mean: 2759.08
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.432108617288453
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 152
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2644984796643257
      entropy_coeff: 0.005
      kl: 0.012742506805807352
      policy_loss: -0.1118006082251668
      total_loss: -0.09816172136925161
      vf_explained_var: 0.9998270273208618
      vf_loss: 0.000608698002906749
  num_steps_sampled: 328000
  num_steps_trained: 328000
iterations_since_restore: 328
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.413043478260869
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94

2020-09-21 16:44:27,708	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.11557243461720645, 'policy_loss': -0.0040306944865733385, 'vf_loss': 0.12104984419420362, 'vf_explained_var': 0.91798455, 'kl': 0.003383473661249692, 'entropy': 1.3170738816261292, 'entropy_coeff': 0.005}
2020-09-21 16:44:28,197	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.018234662828035653, 'policy_loss': -0.032627111999318004, 'vf_loss': 0.047595608863048255, 'vf_explained_var': 0.96490055, 'kl': 0.006438151409383863, 'entropy': 1.302355483174324, 'entropy_coeff': 0.005}
2020-09-21 16:44:28,654	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.01585534866899252, 'policy_loss': -0.04366563574876636, 'vf_loss': 0.02288463240256533, 'vf_explained_var': 0.9824032, 'kl': 0.007571354333776981, 'entropy': 1.314

2020-09-21 16:44:39,126	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1018115512561053, 'policy_loss': -0.11528002005070448, 'vf_loss': 0.000988572803180432, 'vf_explained_var': 0.9992507, 'kl': 0.012615309737157077, 'entropy': 1.33592090010643, 'entropy_coeff': 0.005}
2020-09-21 16:44:39,615	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10248838388361037, 'policy_loss': -0.11756711086491123, 'vf_loss': 0.0008958394864748698, 'vf_explained_var': 0.9993459, 'kl': 0.013710510218515992, 'entropy': 1.3279900625348091, 'entropy_coeff': 0.005}
2020-09-21 16:44:40,065	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10350005910731852, 'policy_loss': -0.11880941246636212, 'vf_loss': 0.0008473456146020908, 'vf_explained_var': 0.99936795, 'kl': 0.013871239614672959, 'entropy': 1.3

custom_metrics: {}
date: 2020-09-21_16-44-41
done: false
episode_len_mean: 2759.08
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.432108617288453
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 152
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3236062228679657
      entropy_coeff: 0.005
      kl: 0.013440934533718973
      policy_loss: -0.12554916192311794
      total_loss: -0.11094503849744797
      vf_explained_var: 0.9994063973426819
      vf_loss: 0.0008087305795925204
  num_steps_sampled: 329000
  num_steps_trained: 329000
iterations_since_restore: 329
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.517391304347826
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.

[2m[36m(pid=24699)[0m 2020-09-21 16:44:41,818	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 5378.329396587207,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 7609},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.944, max=2.608, mean=0.434),
[2m[36m(pid=24699)[0m                                   'prev_action': 0,
[2m[36m(pid=24699)[

2020-09-21 16:44:45,772	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.09530648798681796, 'policy_loss': 0.0013580608647316694, 'vf_loss': 0.10010287258774042, 'vf_explained_var': 0.8428866, 'kl': 0.0005076326028596867, 'entropy': 1.3850838020443916, 'entropy_coeff': 0.005}
2020-09-21 16:44:46,268	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.008803393924608827, 'policy_loss': -0.030505678558256477, 'vf_loss': 0.04063831374514848, 'vf_explained_var': 0.9361422, 'kl': 0.00366652767843334, 'entropy': 1.3795562162995338, 'entropy_coeff': 0.005}
2020-09-21 16:44:46,732	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.032365974970161915, 'policy_loss': -0.051757479552179575, 'vf_loss': 0.01745930645847693, 'vf_explained_var': 0.9710779, 'kl': 0.00580286918557249, 'entropy': 1.37618

2020-09-21 16:44:57,176	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11626149923540652, 'policy_loss': -0.13087918260134757, 'vf_loss': 0.000737849641154753, 'vf_explained_var': 0.99867815, 'kl': 0.013713920139707625, 'entropy': 1.389635093510151, 'entropy_coeff': 0.005}
2020-09-21 16:44:57,666	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11895692901453003, 'policy_loss': -0.13372700824402273, 'vf_loss': 0.0007322440378629835, 'vf_explained_var': 0.99865216, 'kl': 0.013783429516479373, 'entropy': 1.3791504949331284, 'entropy_coeff': 0.005}
2020-09-21 16:44:58,119	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.115317338029854, 'policy_loss': -0.13054470962379128, 'vf_loss': 0.0007207260532595683, 'vf_explained_var': 0.9987957, 'kl': 0.014100694912485778, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_16-44-59
done: false
episode_len_mean: 2759.08
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.432108617288453
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 152
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3801089450716972
      entropy_coeff: 0.005
      kl: 0.0143723200308159
      policy_loss: -0.13635588926263154
      total_loss: -0.12083553383126855
      vf_explained_var: 0.9989439249038696
      vf_loss: 0.0005929420549364295
  num_steps_sampled: 330000
  num_steps_trained: 330000
iterations_since_restore: 330
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.586956521739131
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94

[2m[36m(pid=24699)[0m 2020-09-21 16:44:59,844	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:45:02,704	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.09883438516408205, 'policy_loss': 0.0007324169855564833, 'vf_loss': 0.10329574509523809, 'vf_explained_var': 0.92923146, 'kl': 0.0010455695405209298, 'entropy': 1.3563485369086266, 'entropy_coeff': 0.005}
2020-09-21 16:45:03,189	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0009304669220000505, 'policy_loss': -0.03199796564877033, 'vf_loss': 0.03279890422709286, 'vf_explained_var': 0.9747263, 'kl': 0.004597086313879117, 'entropy': 1.3704601302742958, 'entropy_coeff': 0.005}
2020-09-21 16:45:03,641	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.027264798991382122, 'policy_loss': -0.04193097545

2020-09-21 16:45:14,032	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11459096020553261, 'policy_loss': -0.1271864166483283, 'vf_loss': 0.0004482752137846546, 'vf_explained_var': 0.99964345, 'kl': 0.01247537910239771, 'entropy': 1.3599609360098839, 'entropy_coeff': 0.005}
2020-09-21 16:45:14,516	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.12431053398177028, 'policy_loss': -0.13755227718502283, 'vf_loss': 0.0004218409831082681, 'vf_explained_var': 0.99965084, 'kl': 0.01292524195741862, 'entropy': 1.3620609790086746, 'entropy_coeff': 0.005}
2020-09-21 16:45:14,969	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1152708792797057, 'policy_loss': -0.12823754886630923, 'vf_loss': 0.00040010077464103233, 'vf_explained_var': 0.99967015, 'kl': 0.012763511156663299, 'entropy': 

custom_metrics: {}
date: 2020-09-21_16-45-16
done: false
episode_len_mean: 2830.24
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.385875232516483
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 153
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3579348921775818
      entropy_coeff: 0.005
      kl: 0.013658228155691177
      policy_loss: -0.135652941535227
      total_loss: -0.12137431220617145
      vf_explained_var: 0.9997406601905823
      vf_loss: 0.0003248780940339202
  num_steps_sampled: 331000
  num_steps_trained: 331000
iterations_since_restore: 331
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.3500000000000005
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9

2020-09-21 16:45:20,006	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0683518098667264, 'policy_loss': 0.004263924900442362, 'vf_loss': 0.07026783528272063, 'vf_explained_var': 0.97963905, 'kl': 0.00043645417139859255, 'entropy': 1.3685626536607742, 'entropy_coeff': 0.005}
2020-09-21 16:45:20,497	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.00459681823849678, 'policy_loss': -0.014568852202501148, 'vf_loss': 0.022521331324242055, 'vf_explained_var': 0.9939269, 'kl': 0.002259844412037637, 'entropy': 1.3575582206249237, 'entropy_coeff': 0.005}
2020-09-21 16:45:20,955	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.022737521503586322, 'policy_loss': -0.030409852450247854, 'vf_loss': 0.009193202538881451, 'vf_explained_var': 0.99724114, 'kl': 0.003482625979813747, 'entropy': 1.3

2020-09-21 16:45:31,425	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11360310716554523, 'policy_loss': -0.12639902136288583, 'vf_loss': 0.0004763391989399679, 'vf_explained_var': 0.99985003, 'kl': 0.01256296393694356, 'entropy': 1.352085880935192, 'entropy_coeff': 0.005}
2020-09-21 16:45:31,920	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11710220179520547, 'policy_loss': -0.12974577001295984, 'vf_loss': 0.00045969858547323383, 'vf_explained_var': 0.99985844, 'kl': 0.012493539601564407, 'entropy': 1.3581385388970375, 'entropy_coeff': 0.005}
2020-09-21 16:45:32,370	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11444982583634555, 'policy_loss': -0.12769215332809836, 'vf_loss': 0.000442343914983212, 'vf_explained_var': 0.999861, 'kl': 0.012885180534794927, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-45-33
done: false
episode_len_mean: 2830.24
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.385875232516483
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 153
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3473670706152916
      entropy_coeff: 0.005
      kl: 0.013750438287388533
      policy_loss: -0.1348917332943529
      total_loss: -0.12037627818062901
      vf_explained_var: 0.9998859763145447
      vf_loss: 0.00036881049345538486
  num_steps_sampled: 332000
  num_steps_trained: 332000
iterations_since_restore: 332
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.413636363636363
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.

2020-09-21 16:45:37,293	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.15877638664096594, 'policy_loss': -0.00371169246500358, 'vf_loss': 0.1687908279709518, 'vf_explained_var': 0.96842074, 'kl': 0.0005886097896732334, 'entropy': 1.439339466392994, 'entropy_coeff': 0.005}
2020-09-21 16:45:37,781	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0071514289593324065, 'policy_loss': -0.0360830893041566, 'vf_loss': 0.043562585837207735, 'vf_explained_var': 0.99089646, 'kl': 0.004511555482167751, 'entropy': 1.4359987452626228, 'entropy_coeff': 0.005}
2020-09-21 16:45:38,235	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.03937060828320682, 'policy_loss': -0.061603638576343656, 'vf_loss': 0.020722671411931515, 'vf_explained_var': 0.99591166, 'kl': 0.005715635779779404, 'entropy': 1.434

2020-09-21 16:45:43,909	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11214124038815498, 'policy_loss': -0.12278467626310885, 'vf_loss': 0.0009783580499060918, 'vf_explained_var': 0.9997558, 'kl': 0.011116159614175558, 'entropy': 1.4435168355703354, 'entropy_coeff': 0.005}
2020-09-21 16:45:44,361	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11281277006492019, 'policy_loss': -0.1237589952070266, 'vf_loss': 0.0009098192676901817, 'vf_explained_var': 0.99978805, 'kl': 0.011347610270604491, 'entropy': 1.4395551607012749, 'entropy_coeff': 0.005}
2020-09-21 16:45:44,845	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11323311924934387, 'policy_loss': -0.12417657123296522, 'vf_loss': 0.0008106991808745079, 'vf_explained_var': 0.9998083, 'kl': 0.01140773028600961, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-45-51
done: false
episode_len_mean: 2830.24
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.385875232516483
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 153
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.4356516748666763
      entropy_coeff: 0.005
      kl: 0.013619320234283805
      policy_loss: -0.1422575480537489
      total_loss: -0.12838404171634465
      vf_explained_var: 0.999904215335846
      vf_loss: 0.0003674237941595493
  num_steps_sampled: 333000
  num_steps_trained: 333000
iterations_since_restore: 333
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.49090909090909
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944

2020-09-21 16:45:54,730	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.05001847678795457, 'policy_loss': -0.002964884275570512, 'vf_loss': 0.05871100374497473, 'vf_explained_var': 0.97047305, 'kl': 0.0007114238404080098, 'entropy': 1.3616256937384605, 'entropy_coeff': 0.005}
2020-09-21 16:45:55,225	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.009837895631790161, 'policy_loss': -0.02473250776529312, 'vf_loss': 0.017106173443607986, 'vf_explained_var': 0.9913255, 'kl': 0.0030220328844734468, 'entropy': 1.360253468155861, 'entropy_coeff': 0.005}
2020-09-21 16:45:55,670	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.029616851126775146, 'policy_loss': -0.03873684839345515, 'vf_loss': 0.008319557411596179, 'vf_explained_var': 0.9957745, 'kl': 0.0050211507914355025, 'entropy': 1.

2020-09-21 16:46:06,104	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11522495211102068, 'policy_loss': -0.12888945080339909, 'vf_loss': 0.0004474800280149793, 'vf_explained_var': 0.99975896, 'kl': 0.013149327540304512, 'entropy': 1.3507044985890388, 'entropy_coeff': 0.005}
2020-09-21 16:46:06,598	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11346507631242275, 'policy_loss': -0.12703398475423455, 'vf_loss': 0.00041670934297144413, 'vf_explained_var': 0.9997685, 'kl': 0.013113969645928591, 'entropy': 1.3529293611645699, 'entropy_coeff': 0.005}
2020-09-21 16:46:07,043	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.12039191607618704, 'policy_loss': -0.134702080686111, 'vf_loss': 0.00039806908353057224, 'vf_explained_var': 0.9997804, 'kl': 0.013616089592687786, 'entropy':

custom_metrics: {}
date: 2020-09-21_16-46-08
done: false
episode_len_mean: 2830.24
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.385875232516483
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 153
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3526760041713715
      entropy_coeff: 0.005
      kl: 0.013694177323486656
      policy_loss: -0.12702247558627278
      total_loss: -0.11264276225119829
      vf_explained_var: 0.9998084306716919
      vf_loss: 0.000345053119417571
  num_steps_sampled: 334000
  num_steps_trained: 334000
iterations_since_restore: 334
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.50909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94

[2m[36m(pid=24699)[0m 2020-09-21 16:46:08,782	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 12134.99290931054,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 780},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-2.447, max=1.762, mean=0.211),
[2m[36m(pid=24699)[0m                                   'prev_action': 10,
[2m[36m(pid=24699)[

2020-09-21 16:46:12,174	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0905801624758169, 'policy_loss': 0.0028672628104686737, 'vf_loss': 0.09313414106145501, 'vf_explained_var': 0.97076875, 'kl': 0.0006056289802511827, 'entropy': 1.2682074680924416, 'entropy_coeff': 0.005}
2020-09-21 16:46:12,669	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.011269577778875828, 'policy_loss': -0.01883859105873853, 'vf_loss': 0.03129725845064968, 'vf_explained_var': 0.99003977, 'kl': 0.0033945496470551006, 'entropy': 1.2689131125807762, 'entropy_coeff': 0.005}
2020-09-21 16:46:13,122	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.021501554176211357, 'policy_loss': -0.03465433942619711, 'vf_loss': 0.012469696084735915, 'vf_explained_var': 0.996098, 'kl': 0.004666993991122581, 'entropy': 1.280

2020-09-21 16:46:23,588	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09611305384896696, 'policy_loss': -0.10816418123431504, 'vf_loss': 0.0004972645110683516, 'vf_explained_var': 0.999841, 'kl': 0.011767679417971522, 'entropy': 1.2636608257889748, 'entropy_coeff': 0.005}
2020-09-21 16:46:24,078	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1112184087978676, 'policy_loss': -0.12372059794142842, 'vf_loss': 0.00047962137796275783, 'vf_explained_var': 0.9998567, 'kl': 0.012087993382010609, 'entropy': 1.267215184867382, 'entropy_coeff': 0.005}
2020-09-21 16:46:24,536	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10852638515643775, 'policy_loss': -0.12076870747841895, 'vf_loss': 0.0005585509170487057, 'vf_explained_var': 0.99984765, 'kl': 0.011840101913549006, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-46-26
done: false
episode_len_mean: 2830.24
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.385875232516483
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 153
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.266374133527279
      entropy_coeff: 0.005
      kl: 0.012031517340801656
      policy_loss: -0.1141658938722685
      total_loss: -0.10183993715327233
      vf_explained_var: 0.9998685717582703
      vf_loss: 0.00038496373599627987
  num_steps_sampled: 335000
  num_steps_trained: 335000
iterations_since_restore: 335
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.40909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94

[2m[36m(pid=24699)[0m 2020-09-21 16:46:26,247	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:46:29,974	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.16033437242731452, 'policy_loss': -0.00043653184548020363, 'vf_loss': 0.16404266096651554, 'vf_explained_var': 0.9638541, 'kl': 0.001555817169838214, 'entropy': 1.1269310638308525, 'entropy_coeff': 0.005}
2020-09-21 16:46:30,462	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.043055100773926824, 'policy_loss': -0.017274614539928734, 'vf_loss': 0.06163034145720303, 'vf_explained_var': 0.98593307, 'kl': 0.002787206816719845, 'entropy': 1.1067403741180897, 'entropy_coeff': 0.005}
2020-09-21 16:46:30,912	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.008981508668512106, 'policy_loss': -0.0381058749

2020-09-21 16:46:41,316	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09187654801644385, 'policy_loss': -0.10296151577495039, 'vf_loss': 0.0007258082623593509, 'vf_explained_var': 0.9998065, 'kl': 0.0104832902434282, 'entropy': 1.112465463578701, 'entropy_coeff': 0.005}
2020-09-21 16:46:41,442	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-7.525, max=8.53, mean=-0.281),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-6.059, max=-0.01, mean=-0.983),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.002, max=0.99, mean=0.546),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=4.172),
 

custom_metrics: {}
date: 2020-09-21_16-46-43
done: false
episode_len_mean: 2830.24
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.385875232516483
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 153
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.1179802119731903
      entropy_coeff: 0.005
      kl: 0.011444048024713993
      policy_loss: -0.10476906481198967
      total_loss: -0.09242150210775435
      vf_explained_var: 0.9998518228530884
      vf_loss: 0.0005568186279560905
  num_steps_sampled: 336000
  num_steps_trained: 336000
iterations_since_restore: 336
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.478260869565217
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.

2020-09-21 16:46:47,747	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.07203360740095377, 'policy_loss': 0.0008058432722464204, 'vf_loss': 0.0762663574423641, 'vf_explained_var': 0.9523392, 'kl': 0.0010345572033338968, 'entropy': 1.3219652026891708, 'entropy_coeff': 0.005}
2020-09-21 16:46:48,233	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.004553509701509029, 'policy_loss': -0.029101261927280575, 'vf_loss': 0.024752555356826633, 'vf_explained_var': 0.985057, 'kl': 0.00423079525353387, 'entropy': 1.3260646015405655, 'entropy_coeff': 0.005}
2020-09-21 16:46:48,678	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.03393598133698106, 'policy_loss': -0.04789797600824386, 'vf_loss': 0.010096486163092777, 'vf_explained_var': 0.9934935, 'kl': 0.00695061290753074, 'entropy': 1.338145

2020-09-21 16:46:59,127	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11054059909656644, 'policy_loss': -0.12441860046237707, 'vf_loss': 0.000536405776074389, 'vf_explained_var': 0.9996532, 'kl': 0.013176122505683452, 'entropy': 1.333927370607853, 'entropy_coeff': 0.005}
2020-09-21 16:46:59,627	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10614286045893095, 'policy_loss': -0.12046345847193152, 'vf_loss': 0.0005128736611368367, 'vf_explained_var': 0.99968064, 'kl': 0.013466555043123662, 'entropy': 1.328921139240265, 'entropy_coeff': 0.005}
2020-09-21 16:47:00,083	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10312144143972546, 'policy_loss': -0.11816213058773428, 'vf_loss': 0.000489095564262243, 'vf_explained_var': 0.9996525, 'kl': 0.013950106804259121, 'entropy': 1.3

custom_metrics: {}
date: 2020-09-21_16-47-01
done: false
episode_len_mean: 2830.24
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.385875232516483
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 153
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3262852430343628
      entropy_coeff: 0.005
      kl: 0.013930074812378734
      policy_loss: -0.13150075532030314
      total_loss: -0.11655821651220322
      vf_explained_var: 0.9997199773788452
      vf_loss: 0.00041766809226828627
  num_steps_sampled: 337000
  num_steps_trained: 337000
iterations_since_restore: 337
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 6.317391304347825
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0

2020-09-21 16:47:05,488	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.14029407897032797, 'policy_loss': 0.0025958624901250005, 'vf_loss': 0.14316262607462704, 'vf_explained_var': 0.89517605, 'kl': 0.0008589857996605943, 'entropy': 1.353798747062683, 'entropy_coeff': 0.005}
2020-09-21 16:47:05,977	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.013146560871973634, 'policy_loss': -0.0387360806344077, 'vf_loss': 0.05108158348593861, 'vf_explained_var': 0.9616018, 'kl': 0.004924159817164764, 'entropy': 1.3355027437210083, 'entropy_coeff': 0.005}
2020-09-21 16:47:06,432	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.033722001942805946, 'policy_loss': -0.06021231599152088, 'vf_loss': 0.02319074876140803, 'vf_explained_var': 0.98344076, 'kl': 0.00658134080003947, 'entropy': 1.339169

2020-09-21 16:47:16,880	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10164888168219477, 'policy_loss': -0.11495176248718053, 'vf_loss': 0.0007591726152895717, 'vf_explained_var': 0.99938405, 'kl': 0.012671679025515914, 'entropy': 1.3402792736887932, 'entropy_coeff': 0.005}
2020-09-21 16:47:17,368	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10124335007276386, 'policy_loss': -0.11335750822763657, 'vf_loss': 0.0006887747767905239, 'vf_explained_var': 0.9994103, 'kl': 0.011933646979741752, 'entropy': 1.3397692665457726, 'entropy_coeff': 0.005}
2020-09-21 16:47:17,819	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1116252396022901, 'policy_loss': -0.1238524803193286, 'vf_loss': 0.0006751194123353343, 'vf_explained_var': 0.99939466, 'kl': 0.012028360390104353, 'entropy': 

custom_metrics: {}
date: 2020-09-21_16-47-19
done: false
episode_len_mean: 2830.24
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.385875232516483
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 153
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.340085193514824
      entropy_coeff: 0.005
      kl: 0.012009955593384802
      policy_loss: -0.11907025403343141
      total_loss: -0.10694019764196128
      vf_explained_var: 0.9994807839393616
      vf_loss: 0.0005903632099943934
  num_steps_sampled: 338000
  num_steps_trained: 338000
iterations_since_restore: 338
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.6
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.944990982128217

2020-09-21 16:47:23,466	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.10994582995772362, 'policy_loss': 0.0007456650491803885, 'vf_loss': 0.11441987985745072, 'vf_explained_var': 0.7829708, 'kl': 0.001053430069733713, 'entropy': 1.3639229536056519, 'entropy_coeff': 0.005}
2020-09-21 16:47:23,958	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0015214538434520364, 'policy_loss': -0.03495853627100587, 'vf_loss': 0.036149275954812765, 'vf_explained_var': 0.92898583, 'kl': 0.004692405404057354, 'entropy': 1.359174981713295, 'entropy_coeff': 0.005}
2020-09-21 16:47:24,412	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.03811594471335411, 'policy_loss': -0.058944484451785684, 'vf_loss': 0.018718750507105142, 'vf_explained_var': 0.962667, 'kl': 0.005915151385124773, 'entropy': 1.3747

2020-09-21 16:47:34,855	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.13158968242350966, 'policy_loss': -0.14807584625668824, 'vf_loss': 0.0005645977198582841, 'vf_explained_var': 0.9988296, 'kl': 0.014978861436247826, 'entropy': 1.365516945719719, 'entropy_coeff': 0.005}
2020-09-21 16:47:35,344	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.13024878746364266, 'policy_loss': -0.1473770448938012, 'vf_loss': 0.0005425765539257554, 'vf_explained_var': 0.998881, 'kl': 0.0154164619743824, 'entropy': 1.3656142354011536, 'entropy_coeff': 0.005}
2020-09-21 16:47:35,797	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.13545363023877144, 'policy_loss': -0.15231061726808548, 'vf_loss': 0.0005004043287044624, 'vf_explained_var': 0.9989048, 'kl': 0.015289875504095107, 'entropy': 1.372

custom_metrics: {}
date: 2020-09-21_16-47-37
done: false
episode_len_mean: 2830.24
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.385875232516483
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 153
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3697727471590042
      entropy_coeff: 0.005
      kl: 0.015472562634386122
      policy_loss: -0.16346282372251153
      total_loss: -0.14636764931492507
      vf_explained_var: 0.9990846514701843
      vf_loss: 0.0004450842498044949
  num_steps_sampled: 339000
  num_steps_trained: 339000
iterations_since_restore: 339
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.534782608695652
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.

[2m[36m(pid=24699)[0m 2020-09-21 16:47:37,542	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 13556.41578213961,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 1780},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-2.453, max=1.951, mean=0.237),
[2m[36m(pid=24699)[0m                                   'prev_action': 5,
[2m[36m(pid=24699)[

2020-09-21 16:47:41,483	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-6.454, max=7.27, mean=-0.304),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-7.666, max=-0.044, mean=-1.425),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.0, max=0.957, mean=0.444),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=6.375),
                                                    'advantages': np.ndarray((64,), dtype=float32, min=-2.406, max=2.257, mean=-0.075),
                                                    'agent_index': np.ndarray((64,), dtype=int64, min=0.0, max=0.0, mean=0.0),
                                                    'dones': np.ndarray((64,), dtyp

2020-09-21 16:47:48,491	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10519172926433384, 'policy_loss': -0.11860825517214835, 'vf_loss': 0.0012406148780428339, 'vf_explained_var': 0.9993954, 'kl': 0.012276917695999146, 'entropy': 1.29393120855093, 'entropy_coeff': 0.005}
2020-09-21 16:47:48,944	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11204858776181936, 'policy_loss': -0.12556111859157681, 'vf_loss': 0.0010949195602734108, 'vf_explained_var': 0.99944735, 'kl': 0.012453373055905104, 'entropy': 1.2991907075047493, 'entropy_coeff': 0.005}
2020-09-21 16:47:49,430	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1125990953296423, 'policy_loss': -0.12655083020217717, 'vf_loss': 0.0010099342252942733, 'vf_explained_var': 0.99950206, 'kl': 0.012798722949810326, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-47-55
done: false
episode_len_mean: 2830.24
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.385875232516483
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 153
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.298705317080021
      entropy_coeff: 0.005
      kl: 0.014709645824041218
      policy_loss: -0.1428947674576193
      total_loss: -0.1265811885241419
      vf_explained_var: 0.9997536540031433
      vf_loss: 0.00046683557229698636
  num_steps_sampled: 340000
  num_steps_trained: 340000
iterations_since_restore: 340
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.721739130434783
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94

[2m[36m(pid=24699)[0m 2020-09-21 16:47:55,900	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:47:59,496	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.12354666006285697, 'policy_loss': 0.0021843280410394073, 'vf_loss': 0.12645509745925665, 'vf_explained_var': 0.91786516, 'kl': 0.0011557478829954115, 'entropy': 1.3696118220686913, 'entropy_coeff': 0.005}
2020-09-21 16:47:59,982	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.016426708782091737, 'policy_loss': -0.02579124120529741, 'vf_loss': 0.04514123115222901, 'vf_explained_var': 0.9696207, 'kl': 0.002590089658042416, 'entropy': 1.3713961094617844, 'entropy_coeff': 0.005}
2020-09-21 16:48:00,438	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.013793182675726712, 'policy_loss': -0.034271746408

2020-09-21 16:48:10,926	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10956515884026885, 'policy_loss': -0.12214874394703656, 'vf_loss': 0.001183076066809008, 'vf_explained_var': 0.9993159, 'kl': 0.012064628943335265, 'entropy': 1.3845297768712044, 'entropy_coeff': 0.005}
2020-09-21 16:48:11,417	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10697294701822102, 'policy_loss': -0.11993262986652553, 'vf_loss': 0.0010415093274787068, 'vf_explained_var': 0.99933875, 'kl': 0.012394783436320722, 'entropy': 1.3812788054347038, 'entropy_coeff': 0.005}
2020-09-21 16:48:11,864	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11201377084944397, 'policy_loss': -0.1250277750659734, 'vf_loss': 0.0011177038941241335, 'vf_explained_var': 0.9993908, 'kl': 0.01236847200198099, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_16-48-13
done: false
episode_len_mean: 2906.6
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.337342470895308
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 154
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3779789954423904
      entropy_coeff: 0.005
      kl: 0.012610255682375282
      policy_loss: -0.1344756004982628
      total_loss: -0.12144353077746928
      vf_explained_var: 0.9994503259658813
      vf_loss: 0.0007701364629610907
  num_steps_sampled: 341000
  num_steps_trained: 341000
iterations_since_restore: 341
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.765217391304348
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94

2020-09-21 16:48:16,834	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.10270256246440113, 'policy_loss': 0.0026381012285128236, 'vf_loss': 0.10522212646901608, 'vf_explained_var': 0.95254827, 'kl': 0.0007071132001606473, 'entropy': 1.2463193833827972, 'entropy_coeff': 0.005}
2020-09-21 16:48:17,323	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0068424768396653235, 'policy_loss': -0.023889060539659113, 'vf_loss': 0.031871568877249956, 'vf_explained_var': 0.9854127, 'kl': 0.0033433288626838475, 'entropy': 1.2435421273112297, 'entropy_coeff': 0.005}
2020-09-21 16:48:17,769	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.028271158575080335, 'policy_loss': -0.04437191435135901, 'vf_loss': 0.014127538132015616, 'vf_explained_var': 0.994179, 'kl': 0.005399872025009245, 'entropy': 1.

2020-09-21 16:48:28,213	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10469531337730587, 'policy_loss': -0.11965745757333934, 'vf_loss': 0.0004464600506253191, 'vf_explained_var': 0.99978507, 'kl': 0.013664639089256525, 'entropy': 1.2474958971142769, 'entropy_coeff': 0.005}
2020-09-21 16:48:28,698	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11142707709223032, 'policy_loss': -0.12639343133196235, 'vf_loss': 0.0004248268214723794, 'vf_explained_var': 0.9997993, 'kl': 0.013693191867787391, 'entropy': 1.2509999796748161, 'entropy_coeff': 0.005}
2020-09-21 16:48:29,144	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10578790854196995, 'policy_loss': -0.12145402887836099, 'vf_loss': 0.00042652722186176106, 'vf_explained_var': 0.999804, 'kl': 0.014138640428427607, 'entropy':

custom_metrics: {}
date: 2020-09-21_16-48-30
done: false
episode_len_mean: 2906.6
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.337342470895308
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 154
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2484239861369133
      entropy_coeff: 0.005
      kl: 0.014478307508397847
      policy_loss: -0.12321342254290357
      total_loss: -0.10713369771838188
      vf_explained_var: 0.9998397827148438
      vf_loss: 0.0003329103983560344
  num_steps_sampled: 342000
  num_steps_trained: 342000
iterations_since_restore: 342
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.336363636363636
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9

2020-09-21 16:48:34,191	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.11685558059252799, 'policy_loss': -0.0012514982518041506, 'vf_loss': 0.12344921752810478, 'vf_explained_var': 0.98313767, 'kl': 0.0005196758531187395, 'entropy': 1.2262792587280273, 'entropy_coeff': 0.005}
2020-09-21 16:48:34,680	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.005061153555288911, 'policy_loss': -0.03707313654012978, 'vf_loss': 0.032482882612384856, 'vf_explained_var': 0.99616426, 'kl': 0.003711461744387634, 'entropy': 1.2215379998087883, 'entropy_coeff': 0.005}
2020-09-21 16:48:35,132	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.047628800268284976, 'policy_loss': -0.06418299535289407, 'vf_loss': 0.012861246766988188, 'vf_explained_var': 0.9982488, 'kl': 0.00645165191963315, 'entropy': 1.

2020-09-21 16:48:41,554	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.5187499999999998,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.1973674297332764,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.013716664165258408,
                                         'policy_loss': 0.0030463188886642456,
                                         'total_loss': 0.0187855027616024,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=1.0, max=1.0, mean=1.0),
                                         'vf_loss': 0.0008938348037190735}}}

2020-09-21 16:48:41,801	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10148015758022666, 'poli

custom_metrics: {}
date: 2020-09-21_16-48-48
done: false
episode_len_mean: 2906.6
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.337342470895308
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 154
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.231572948396206
      entropy_coeff: 0.005
      kl: 0.015038929006550461
      policy_loss: -0.13137115561403334
      total_loss: -0.11437521013431251
      vf_explained_var: 0.9999585151672363
      vf_loss: 0.00031344208400696516
  num_steps_sampled: 343000
  num_steps_trained: 343000
iterations_since_restore: 343
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.513636363636365
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9

2020-09-21 16:48:51,759	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.14920779585372657, 'policy_loss': 0.002777908928692341, 'vf_loss': 0.15152484783902764, 'vf_explained_var': 0.9644789, 'kl': 0.0008954743302235224, 'entropy': 1.2909917533397675, 'entropy_coeff': 0.005}
2020-09-21 16:48:52,251	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.023167309002019465, 'policy_loss': -0.024432120728306472, 'vf_loss': 0.04809156060218811, 'vf_explained_var': 0.9868404, 'kl': 0.003973531289375387, 'entropy': 1.3053857386112213, 'entropy_coeff': 0.005}
2020-09-21 16:48:52,701	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.020444035006221384, 'policy_loss': -0.04184482293203473, 'vf_loss': 0.020217889628838748, 'vf_explained_var': 0.9954659, 'kl': 0.005069121296401136, 'entropy': 1.3031

2020-09-21 16:49:03,113	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10499464138410985, 'policy_loss': -0.11861658655107021, 'vf_loss': 0.0005217474335950101, 'vf_explained_var': 0.9998293, 'kl': 0.012897340318886563, 'entropy': 1.2975276708602905, 'entropy_coeff': 0.005}
2020-09-21 16:49:03,595	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10245990881230682, 'policy_loss': -0.11624522955389693, 'vf_loss': 0.0005077464520581998, 'vf_explained_var': 0.9998615, 'kl': 0.013021260965615511, 'entropy': 1.2996927052736282, 'entropy_coeff': 0.005}
2020-09-21 16:49:04,053	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11332414706703275, 'policy_loss': -0.1273283816408366, 'vf_loss': 0.0005026567814638838, 'vf_explained_var': 0.99986804, 'kl': 0.013144064985681325, 'entropy': 

custom_metrics: {}
date: 2020-09-21_16-49-05
done: false
episode_len_mean: 2906.6
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.337342470895308
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 154
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.301017887890339
      entropy_coeff: 0.005
      kl: 0.01353547052713111
      policy_loss: -0.12610896315891296
      total_loss: -0.111639175680466
      vf_explained_var: 0.9998892545700073
      vf_loss: 0.0004178853050689213
  num_steps_sampled: 344000
  num_steps_trained: 344000
iterations_since_restore: 344
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.395652173913043
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94499

[2m[36m(pid=24699)[0m 2020-09-21 16:49:06,098	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 11752.49747213967,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 2780},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-2.795, max=2.143, mean=0.254),
[2m[36m(pid=24699)[0m                                   'prev_action': 6,
[2m[36m(pid=24699)[

2020-09-21 16:49:09,406	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.08542392286472023, 'policy_loss': 0.0008392416639253497, 'vf_loss': 0.08905079774558544, 'vf_explained_var': 0.9318719, 'kl': 0.001205513140607406, 'entropy': 1.2593968883156776, 'entropy_coeff': 0.005}
2020-09-21 16:49:09,862	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.0019214170170016587, 'policy_loss': -0.03572461858857423, 'vf_loss': 0.03281696402700618, 'vf_explained_var': 0.9757629, 'kl': 0.004773694992763922, 'entropy': 1.252761222422123, 'entropy_coeff': 0.005}
2020-09-21 16:49:10,351	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.036363940802402794, 'policy_loss': -0.05116007407195866, 'vf_loss': 0.012361809494905174, 'vf_explained_var': 0.990302, 'kl': 0.005790960989543237, 'entropy': 1.27213

2020-09-21 16:49:21,142	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11152536841109395, 'policy_loss': -0.12566842732485384, 'vf_loss': 0.0006018143594701542, 'vf_explained_var': 0.99948525, 'kl': 0.013032826653216034, 'entropy': 1.2504713088274002, 'entropy_coeff': 0.005}
2020-09-21 16:49:21,634	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1104786959476769, 'policy_loss': -0.12494234112091362, 'vf_loss': 0.0005786094989161938, 'vf_explained_var': 0.9995699, 'kl': 0.013250581920146942, 'entropy': 1.2478569075465202, 'entropy_coeff': 0.005}
2020-09-21 16:49:22,119	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10466360137797892, 'policy_loss': -0.1197355471085757, 'vf_loss': 0.0005713051723432727, 'vf_explained_var': 0.99957466, 'kl': 0.013679569645319134, 'entropy': 

custom_metrics: {}
date: 2020-09-21_16-49-23
done: false
episode_len_mean: 2906.6
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.337342470895308
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 154
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2486346811056137
      entropy_coeff: 0.005
      kl: 0.0137777995551005
      policy_loss: -0.1324112459551543
      total_loss: -0.11729031533468515
      vf_explained_var: 0.9996384978294373
      vf_loss: 0.00043907464350922965
  num_steps_sampled: 345000
  num_steps_trained: 345000
iterations_since_restore: 345
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.41304347826087
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9449

[2m[36m(pid=24699)[0m 2020-09-21 16:49:23,819	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:49:27,872	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.10168304183753207, 'policy_loss': 0.0002174723194912076, 'vf_loss': 0.10656388709321618, 'vf_explained_var': 0.9457483, 'kl': 0.0011345338720045728, 'entropy': 1.3642791658639908, 'entropy_coeff': 0.005}
2020-09-21 16:49:28,361	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0030045293387956917, 'policy_loss': -0.03654438885860145, 'vf_loss': 0.03811048669740558, 'vf_explained_var': 0.9807174, 'kl': 0.005372724204789847, 'entropy': 1.3442799225449562, 'entropy_coeff': 0.005}
2020-09-21 16:49:28,815	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.03629731142427772, 'policy_loss': -0.0562560823746

2020-09-21 16:49:39,245	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11799732269719243, 'policy_loss': -0.13299768138676882, 'vf_loss': 0.00063760416196601, 'vf_explained_var': 0.99965286, 'kl': 0.0138767987373285, 'entropy': 1.3425262719392776, 'entropy_coeff': 0.005}
2020-09-21 16:49:39,737	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11832102411426604, 'policy_loss': -0.1334032934391871, 'vf_loss': 0.0006097634195612045, 'vf_explained_var': 0.9996689, 'kl': 0.013958406401798129, 'entropy': 1.3453636541962624, 'entropy_coeff': 0.005}
2020-09-21 16:49:40,188	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11752215144224465, 'policy_loss': -0.13290739082731307, 'vf_loss': 0.0005944014174019685, 'vf_explained_var': 0.9996951, 'kl': 0.01415333355544135, 'entropy': 1.340

custom_metrics: {}
date: 2020-09-21_16-49-41
done: false
episode_len_mean: 2906.6
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.337342470895308
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 154
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.341880850493908
      entropy_coeff: 0.005
      kl: 0.01445446047000587
      policy_loss: -0.1341189278755337
      total_loss: -0.11836407118244097
      vf_explained_var: 0.999736487865448
      vf_loss: 0.0005115557469252963
  num_steps_sampled: 346000
  num_steps_trained: 346000
iterations_since_restore: 346
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.408695652173913
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94499

2020-09-21 16:49:45,608	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.07223463139962405, 'policy_loss': -0.0020815590396523476, 'vf_loss': 0.07837152201682329, 'vf_explained_var': 0.9629298, 'kl': 0.0014450276882631563, 'entropy': 1.2499921545386314, 'entropy_coeff': 0.005}
2020-09-21 16:49:46,095	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0034039082238450646, 'policy_loss': -0.02071451465599239, 'vf_loss': 0.025737154355738312, 'vf_explained_var': 0.98782444, 'kl': 0.003039243609237019, 'entropy': 1.2469149231910706, 'entropy_coeff': 0.005}
2020-09-21 16:49:46,553	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.029804702615365386, 'policy_loss': -0.04273885581642389, 'vf_loss': 0.011291838512988761, 'vf_explained_var': 0.9948471, 'kl': 0.0052025197801413015, 'entropy': 1

2020-09-21 16:49:57,053	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11194687627721578, 'policy_loss': -0.12581786327064037, 'vf_loss': 0.0005094020161777735, 'vf_explained_var': 0.9997345, 'kl': 0.01290030445670709, 'entropy': 1.246149830520153, 'entropy_coeff': 0.005}
2020-09-21 16:49:57,538	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11465448781382293, 'policy_loss': -0.12885238835588098, 'vf_loss': 0.0004891402004432166, 'vf_explained_var': 0.99975634, 'kl': 0.013122926058713347, 'entropy': 1.2443355843424797, 'entropy_coeff': 0.005}
2020-09-21 16:49:57,996	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11708940553944558, 'policy_loss': -0.1309482865035534, 'vf_loss': 0.0004619812698365422, 'vf_explained_var': 0.9997684, 'kl': 0.012931383913382888, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_16-49-59
done: false
episode_len_mean: 2906.6
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.337342470895308
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 154
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2478686571121216
      entropy_coeff: 0.005
      kl: 0.01416419114684686
      policy_loss: -0.13768844376318157
      total_loss: -0.12196137965656817
      vf_explained_var: 0.9997851252555847
      vf_loss: 0.00045454290011548437
  num_steps_sampled: 347000
  num_steps_trained: 347000
iterations_since_restore: 347
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.521739130434783
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9

2020-09-21 16:50:02,725	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.1013488641474396, 'policy_loss': -0.0014638430438935757, 'vf_loss': 0.10794018255546689, 'vf_explained_var': 0.91297203, 'kl': 0.0007864221509703562, 'entropy': 1.2643723338842392, 'entropy_coeff': 0.005}
2020-09-21 16:50:03,182	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.00268241815501824, 'policy_loss': -0.028839262668043375, 'vf_loss': 0.03349620138760656, 'vf_explained_var': 0.9720468, 'kl': 0.002841276211256627, 'entropy': 1.2579423487186432, 'entropy_coeff': 0.005}
2020-09-21 16:50:03,666	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.03747102851048112, 'policy_loss': -0.052530525950714946, 'vf_loss': 0.0128405106079299, 'vf_explained_var': 0.98859173, 'kl': 0.005574423557845876, 'entropy': 1.2494

2020-09-21 16:50:14,047	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11616478837095201, 'policy_loss': -0.13148544286377728, 'vf_loss': 0.00047922976591507904, 'vf_explained_var': 0.9995694, 'kl': 0.013913378061261028, 'entropy': 1.2579042315483093, 'entropy_coeff': 0.005}
2020-09-21 16:50:14,533	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11287967686075717, 'policy_loss': -0.12990335549693555, 'vf_loss': 0.0005210754370637005, 'vf_explained_var': 0.9995526, 'kl': 0.014991421427112073, 'entropy': 1.2531220242381096, 'entropy_coeff': 0.005}
2020-09-21 16:50:14,987	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11114650685340166, 'policy_loss': -0.12702925503253937, 'vf_loss': 0.0004574212580337189, 'vf_explained_var': 0.999598, 'kl': 0.014289876038674265, 'entropy': 

custom_metrics: {}
date: 2020-09-21_16-50-16
done: false
episode_len_mean: 2977.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.285571719115524
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 155
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2549470737576485
      entropy_coeff: 0.005
      kl: 0.014791946799959987
      policy_loss: -0.13206584262661636
      total_loss: -0.11547894851537421
      vf_explained_var: 0.9996566772460938
      vf_loss: 0.00039636094152228907
  num_steps_sampled: 348000
  num_steps_trained: 348000
iterations_since_restore: 348
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.385714285714286
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000002
  vram_util_percent0: 0

2020-09-21 16:50:20,226	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.06543170133954845, 'policy_loss': 0.0018067251658067107, 'vf_loss': 0.06916383444331586, 'vf_explained_var': 0.9731964, 'kl': 0.0005394103121099114, 'entropy': 1.2716175094246864, 'entropy_coeff': 0.005}
2020-09-21 16:50:20,711	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.0017741477931849658, 'policy_loss': -0.021620225277729332, 'vf_loss': 0.02218715992057696, 'vf_explained_var': 0.99086857, 'kl': 0.002641754224896431, 'entropy': 1.2706500813364983, 'entropy_coeff': 0.005}
2020-09-21 16:50:21,167	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.03023856447543949, 'policy_loss': -0.042600062559358776, 'vf_loss': 0.010642550827469677, 'vf_explained_var': 0.99533147, 'kl': 0.005344386794604361, 'entropy': 1

2020-09-21 16:50:31,640	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10536510311067104, 'policy_loss': -0.11831284617073834, 'vf_loss': 0.00038002635301381815, 'vf_explained_var': 0.99982834, 'kl': 0.012459871708415449, 'entropy': 1.271141916513443, 'entropy_coeff': 0.005}
2020-09-21 16:50:32,140	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10797736374661326, 'policy_loss': -0.12092754640616477, 'vf_loss': 0.00036631398143072147, 'vf_explained_var': 0.99984133, 'kl': 0.012494625756517053, 'entropy': 1.2784691825509071, 'entropy_coeff': 0.005}
2020-09-21 16:50:32,594	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1127839736873284, 'policy_loss': -0.12603990687057376, 'vf_loss': 0.0003386399366718251, 'vf_explained_var': 0.9998501, 'kl': 0.012692387565039098, 'entropy'

custom_metrics: {}
date: 2020-09-21_16-50-34
done: false
episode_len_mean: 2977.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.285571719115527
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 155
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2709597274661064
      entropy_coeff: 0.005
      kl: 0.01262971421238035
      policy_loss: -0.12675252882763743
      total_loss: -0.1136266787070781
      vf_explained_var: 0.9998729825019836
      vf_loss: 0.0002992632680616225
  num_steps_sampled: 349000
  num_steps_trained: 349000
iterations_since_restore: 349
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.447826086956522
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.94

[2m[36m(pid=24699)[0m 2020-09-21 16:50:34,308	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 11524.88,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 3780},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-3.939, max=4.465, mean=0.584),
[2m[36m(pid=24699)[0m                                   'prev_action': 0,
[2m[36m(pid=24699)[0m       

2020-09-21 16:50:37,546	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.10200062370859087, 'policy_loss': -0.0009381897980347276, 'vf_loss': 0.1057732398621738, 'vf_explained_var': 0.9871239, 'kl': 0.002018786321052332, 'entropy': 1.1800919398665428, 'entropy_coeff': 0.005}
2020-09-21 16:50:38,037	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.0035359024768695235, 'policy_loss': -0.03188651741947979, 'vf_loss': 0.027452344947960228, 'vf_explained_var': 0.99634314, 'kl': 0.004428883126820438, 'entropy': 1.1656178832054138, 'entropy_coeff': 0.005}
2020-09-21 16:50:38,483	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.044066932052373886, 'policy_loss': -0.05976784561062232, 'vf_loss': 0.012121288804337382, 'vf_explained_var': 0.9984715, 'kl': 0.006192660017404705, 'entropy': 1.1

2020-09-21 16:50:44,191	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1034680181182921, 'policy_loss': -0.1163065581349656, 'vf_loss': 0.0006699338809994515, 'vf_explained_var': 0.99990743, 'kl': 0.01188028123578988, 'entropy': 1.1749128177762032, 'entropy_coeff': 0.005}
2020-09-21 16:50:44,635	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1041706363321282, 'policy_loss': -0.11701091333816294, 'vf_loss': 0.0006535140510095516, 'vf_explained_var': 0.99991626, 'kl': 0.011882472841534764, 'entropy': 1.171948529779911, 'entropy_coeff': 0.005}
2020-09-21 16:50:45,119	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10701176291331649, 'policy_loss': -0.12079897220246494, 'vf_loss': 0.0005670151513186283, 'vf_explained_var': 0.99992573, 'kl': 0.012573309184517711, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_16-50-51
done: false
episode_len_mean: 2977.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.285571719115527
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 155
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.1735426858067513
      entropy_coeff: 0.005
      kl: 0.015341898135375232
      policy_loss: -0.14177137578371912
      total_loss: -0.1241100647021085
      vf_explained_var: 0.9999680519104004
      vf_loss: 0.0002285169011884136
  num_steps_sampled: 350000
  num_steps_trained: 350000
iterations_since_restore: 350
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.3500000000000005
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.

[2m[36m(pid=24699)[0m 2020-09-21 16:50:51,522	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:50:54,811	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0755433269077912, 'policy_loss': 0.0032089827582240105, 'vf_loss': 0.07709037256427109, 'vf_explained_var': 0.96138453, 'kl': 0.0012779455819290214, 'entropy': 1.339381717145443, 'entropy_coeff': 0.005}
2020-09-21 16:50:55,300	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.009442263399250805, 'policy_loss': -0.036158662871457636, 'vf_loss': 0.025094218377489597, 'vf_explained_var': 0.9882766, 'kl': 0.005510544098797254, 'entropy': 1.3493920341134071, 'entropy_coeff': 0.005}
2020-09-21 16:50:55,752	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.039177788072265685, 'policy_loss': -0.05260935565

2020-09-21 16:51:06,232	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.12424411065876484, 'policy_loss': -0.1385096344165504, 'vf_loss': 0.0003629120410550968, 'vf_explained_var': 0.9997866, 'kl': 0.013501543726306409, 'entropy': 1.3205723017454147, 'entropy_coeff': 0.005}
2020-09-21 16:51:06,727	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1288229781202972, 'policy_loss': -0.14330418745521456, 'vf_loss': 0.00033316562257823534, 'vf_explained_var': 0.99980545, 'kl': 0.013695543981157243, 'entropy': 1.3304125890135765, 'entropy_coeff': 0.005}
2020-09-21 16:51:07,181	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1316835898323916, 'policy_loss': -0.14659300504717976, 'vf_loss': 0.00031573951582686277, 'vf_explained_var': 0.9998169, 'kl': 0.013944103149697185, 'entropy': 

custom_metrics: {}
date: 2020-09-21_16-51-08
done: false
episode_len_mean: 2977.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.285571719115527
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 155
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3275600373744965
      entropy_coeff: 0.005
      kl: 0.014068525459151715
      policy_loss: -0.14850238896906376
      total_loss: -0.1334901382215321
      vf_explained_var: 0.9998360872268677
      vf_loss: 0.00028347099942038767
  num_steps_sampled: 351000
  num_steps_trained: 351000
iterations_since_restore: 351
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.550000000000001
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.

2020-09-21 16:51:12,335	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0729403899749741, 'policy_loss': -0.0030073942616581917, 'vf_loss': 0.08095072000287473, 'vf_explained_var': 0.9622489, 'kl': 0.0007938444851158799, 'entropy': 1.2417177557945251, 'entropy_coeff': 0.005}
2020-09-21 16:51:12,821	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.004811661347048357, 'policy_loss': -0.03232404624577612, 'vf_loss': 0.02676299843005836, 'vf_explained_var': 0.9879284, 'kl': 0.004547696953522973, 'entropy': 1.2314854711294174, 'entropy_coeff': 0.005}
2020-09-21 16:51:13,269	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.03801395252230577, 'policy_loss': -0.05311315867584199, 'vf_loss': 0.011568349844310433, 'vf_explained_var': 0.99446994, 'kl': 0.006378880701959133, 'entropy': 1.231

2020-09-21 16:51:23,714	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.12406122172251344, 'policy_loss': -0.1406148222158663, 'vf_loss': 0.00035441654108581133, 'vf_explained_var': 0.9998112, 'kl': 0.014768415538128465, 'entropy': 1.246068499982357, 'entropy_coeff': 0.005}
2020-09-21 16:51:24,168	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11424371067550965, 'policy_loss': -0.1312833713600412, 'vf_loss': 0.0003918129277735716, 'vf_explained_var': 0.9998176, 'kl': 0.015066480031237006, 'entropy': 1.246872253715992, 'entropy_coeff': 0.005}
2020-09-21 16:51:24,645	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11702868877910078, 'policy_loss': -0.13414821680635214, 'vf_loss': 0.00033523476849950384, 'vf_explained_var': 0.99982595, 'kl': 0.015169177670031786, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-51-26
done: false
episode_len_mean: 2977.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.285571719115527
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 155
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2477966994047165
      entropy_coeff: 0.005
      kl: 0.01593644655076787
      policy_loss: -0.13547032233327627
      total_loss: -0.11724104201130103
      vf_explained_var: 0.9998577833175659
      vf_loss: 0.0002647832452566945
  num_steps_sampled: 352000
  num_steps_trained: 352000
iterations_since_restore: 352
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.634782608695653
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9

2020-09-21 16:51:30,136	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.064876175718382, 'policy_loss': 0.0034028280060738325, 'vf_loss': 0.06488944205921143, 'vf_explained_var': 0.96159977, 'kl': 0.0017481303187998343, 'entropy': 1.214213639497757, 'entropy_coeff': 0.005}
2020-09-21 16:51:30,625	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0011417681816965342, 'policy_loss': -0.018610223662108183, 'vf_loss': 0.020249503490049392, 'vf_explained_var': 0.9872292, 'kl': 0.003666003336547874, 'entropy': 1.2130491733551025, 'entropy_coeff': 0.005}
2020-09-21 16:51:31,082	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.027033564940211363, 'policy_loss': -0.03695762960705906, 'vf_loss': 0.009287527034757659, 'vf_explained_var': 0.99449885, 'kl': 0.004425616905791685, 'entropy': 1.21

2020-09-21 16:51:41,542	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09936239448143169, 'policy_loss': -0.11104088218417019, 'vf_loss': 0.000469397989945719, 'vf_explained_var': 0.999678, 'kl': 0.011387467297026888, 'entropy': 1.2171251401305199, 'entropy_coeff': 0.005}
2020-09-21 16:51:41,667	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-8.203, max=8.969, mean=-0.35),
                                                    'action_logp': np.ndarray((64,), dtype=float32, min=-5.505, max=-0.009, mean=-1.063),
                                                    'action_prob': np.ndarray((64,), dtype=float32, min=0.004, max=0.991, mean=0.512),
                                                    'actions': np.ndarray((64,), dtype=int64, min=0.0, max=20.0, mean=5.297)

custom_metrics: {}
date: 2020-09-21_16-51-43
done: false
episode_len_mean: 2977.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.285571719115527
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 155
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2230919003486633
      entropy_coeff: 0.005
      kl: 0.011981456249486655
      policy_loss: -0.1071777418255806
      total_loss: -0.09465236216783524
      vf_explained_var: 0.9997324347496033
      vf_loss: 0.00044400135993782897
  num_steps_sampled: 353000
  num_steps_trained: 353000
iterations_since_restore: 353
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.4391304347826095
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0

2020-09-21 16:51:47,960	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.10585146408993751, 'policy_loss': 0.0035352373961359262, 'vf_loss': 0.10643816459923983, 'vf_explained_var': 0.9180765, 'kl': 0.0014511488835717845, 'entropy': 1.265174113214016, 'entropy_coeff': 0.005}
2020-09-21 16:51:48,447	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.005311342771165073, 'policy_loss': -0.02578759111929685, 'vf_loss': 0.03197002212982625, 'vf_explained_var': 0.97490096, 'kl': 0.0035481902596075088, 'entropy': 1.251980982720852, 'entropy_coeff': 0.005}
2020-09-21 16:51:48,897	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.041885064681991935, 'policy_loss': -0.05937552801333368, 'vf_loss': 0.01317719678627327, 'vf_explained_var': 0.99087024, 'kl': 0.006945885776076466, 'entropy': 1.2471

2020-09-21 16:51:59,536	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.12322540720924735, 'policy_loss': -0.1375955599360168, 'vf_loss': 0.0004511082843237091, 'vf_explained_var': 0.99959564, 'kl': 0.01332980819279328, 'entropy': 1.2651207372546196, 'entropy_coeff': 0.005}
2020-09-21 16:52:00,031	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.12312410376034677, 'policy_loss': -0.13769148848950863, 'vf_loss': 0.0004230709218973061, 'vf_explained_var': 0.9996097, 'kl': 0.013469537836499512, 'entropy': 1.262507826089859, 'entropy_coeff': 0.005}
2020-09-21 16:52:00,477	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.12261373875662684, 'policy_loss': -0.13792625325731933, 'vf_loss': 0.00040649767106515355, 'vf_explained_var': 0.9996417, 'kl': 0.013958027004264295, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-52-02
done: false
episode_len_mean: 2977.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.285571719115527
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 155
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.266507737338543
      entropy_coeff: 0.005
      kl: 0.01446396391838789
      policy_loss: -0.1343197305686772
      total_loss: -0.11836438300088048
      vf_explained_var: 0.999705970287323
      vf_loss: 0.00032074532555270707
  num_steps_sampled: 354000
  num_steps_trained: 354000
iterations_since_restore: 354
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 6.27391304347826
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.830434782608696
  vram_util_percent0: 0.9449

[2m[36m(pid=24699)[0m 2020-09-21 16:52:02,259	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 9983.321032584037,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 4780},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-10.617, max=7.139, mean=0.477),
[2m[36m(pid=24699)[0m                                   'prev_action': 14,
[2m[36m(pid=24699)

2020-09-21 16:52:06,174	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.1164444147143513, 'policy_loss': 0.007178498664870858, 'vf_loss': 0.11398174287751317, 'vf_explained_var': 0.8721968, 'kl': 0.001122535502035893, 'entropy': 1.2841343060135841, 'entropy_coeff': 0.005}
2020-09-21 16:52:06,670	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.010492085013538599, 'policy_loss': -0.027540573792066425, 'vf_loss': 0.03674950113054365, 'vf_explained_var': 0.9577341, 'kl': 0.005060529787442647, 'entropy': 1.2805031538009644, 'entropy_coeff': 0.005}
2020-09-21 16:52:07,128	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.02754842839203775, 'policy_loss': -0.04853476939024404, 'vf_loss': 0.016709381889086217, 'vf_explained_var': 0.98098695, 'kl': 0.007035197340883315, 'entropy': 1.281549

2020-09-21 16:52:17,596	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10884824313689023, 'policy_loss': -0.12288385457395634, 'vf_loss': 0.0005047569120506523, 'vf_explained_var': 0.9993837, 'kl': 0.013185129035264254, 'entropy': 1.2988119721412659, 'entropy_coeff': 0.005}
2020-09-21 16:52:18,082	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11815060512162745, 'policy_loss': -0.1320673384470865, 'vf_loss': 0.0004761274776683422, 'vf_explained_var': 0.9994167, 'kl': 0.013096788083203137, 'entropy': 1.2900273203849792, 'entropy_coeff': 0.005}
2020-09-21 16:52:18,533	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11402067926246673, 'policy_loss': -0.12875943607650697, 'vf_loss': 0.00043552055649342947, 'vf_explained_var': 0.9994271, 'kl': 0.013686914287973195, 'entropy': 

custom_metrics: {}
date: 2020-09-21_16-52-20
done: false
episode_len_mean: 2977.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.285571719115527
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 155
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2966273576021194
      entropy_coeff: 0.005
      kl: 0.014021608338225633
      policy_loss: -0.1328583470894955
      total_loss: -0.11766954598715529
      vf_explained_var: 0.9995225667953491
      vf_loss: 0.00037662138765881537
  num_steps_sampled: 355000
  num_steps_trained: 355000
iterations_since_restore: 355
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.521739130434782
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.860869565217394
  vram_util_percent0: 0.

[2m[36m(pid=24699)[0m 2020-09-21 16:52:20,265	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:52:24,208	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.03906405786983669, 'policy_loss': -0.003627529484219849, 'vf_loss': 0.04787714290432632, 'vf_explained_var': 0.93460685, 'kl': 0.0008816092868209613, 'entropy': 1.3048997595906258, 'entropy_coeff': 0.005}
2020-09-21 16:52:24,692	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.0009927559876814485, 'policy_loss': -0.01698066940298304, 'vf_loss': 0.01743456581607461, 'vf_explained_var': 0.97518694, 'kl': 0.003380286507308483, 'entropy': 1.316093921661377, 'entropy_coeff': 0.005}
2020-09-21 16:52:25,150	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.03270597325172275, 'policy_loss': -0.04496544483

2020-09-21 16:52:35,566	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11751572473440319, 'policy_loss': -0.1302279862575233, 'vf_loss': 0.00047039140008564573, 'vf_explained_var': 0.9993653, 'kl': 0.012361064786091447, 'entropy': 1.3062986508011818, 'entropy_coeff': 0.005}
2020-09-21 16:52:36,051	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11122879694448784, 'policy_loss': -0.12413716746959835, 'vf_loss': 0.0004220808968966594, 'vf_explained_var': 0.99939084, 'kl': 0.012522308388724923, 'entropy': 1.3063920065760612, 'entropy_coeff': 0.005}
2020-09-21 16:52:36,502	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1142520431894809, 'policy_loss': -0.12758766976185143, 'vf_loss': 0.00040068317139230203, 'vf_explained_var': 0.999425, 'kl': 0.012815147580113262, 'entropy': 

custom_metrics: {}
date: 2020-09-21_16-52-38
done: false
episode_len_mean: 2977.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.285571719115527
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 155
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.308548964560032
      entropy_coeff: 0.005
      kl: 0.013285672350320965
      policy_loss: -0.13241829653270543
      total_loss: -0.11846012808382511
      vf_explained_var: 0.9995183944702148
      vf_loss: 0.0003232917679270031
  num_steps_sampled: 356000
  num_steps_trained: 356000
iterations_since_restore: 356
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.613043478260869
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0.9

2020-09-21 16:52:41,767	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.5187499999999998,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.244917392730713,
                                         'entropy_coeff': 0.005,
                                         'kl': 3.7609254377457546e-08,
                                         'policy_loss': 0.012562712654471397,
                                         'total_loss': 0.17289653420448303,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.948, max=0.948, mean=0.948),
                                         'vf_loss': 0.16655832529067993}}}

2020-09-21 16:52:41,771	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 

2020-09-21 16:52:48,856	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09389868576545268, 'policy_loss': -0.10465057869441807, 'vf_loss': 0.0006923983055457938, 'vf_explained_var': 0.99969566, 'kl': 0.01070922875078395, 'entropy': 1.241029568016529, 'entropy_coeff': 0.005}
2020-09-21 16:52:49,308	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09061295731225982, 'policy_loss': -0.10070097871357575, 'vf_loss': 0.0006075203564250842, 'vf_explained_var': 0.9997317, 'kl': 0.010316205967683345, 'entropy': 1.2374467253684998, 'entropy_coeff': 0.005}
2020-09-21 16:52:49,793	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10255187132861465, 'policy_loss': -0.11341045296285301, 'vf_loss': 0.0005891717610211344, 'vf_explained_var': 0.9997325, 'kl': 0.010852893523406237, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-52-56
done: false
episode_len_mean: 2977.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.285571719115527
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 155
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2373361140489578
      entropy_coeff: 0.005
      kl: 0.012886049400549382
      policy_loss: -0.12534633255563676
      total_loss: -0.11165777873247862
      vf_explained_var: 0.9998661279678345
      vf_loss: 0.00030454555053438526
  num_steps_sampled: 357000
  num_steps_trained: 357000
iterations_since_restore: 357
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.569565217391304
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0

2020-09-21 16:53:00,499	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.046399633632972836, 'policy_loss': -0.005328669212758541, 'vf_loss': 0.056088638259097934, 'vf_explained_var': 0.9352397, 'kl': 0.0011479365320187507, 'entropy': 1.2207532674074173, 'entropy_coeff': 0.005}
2020-09-21 16:53:00,990	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.005120075191371143, 'policy_loss': -0.02241797384340316, 'vf_loss': 0.02613882371224463, 'vf_explained_var': 0.9747596, 'kl': 0.004875597485806793, 'entropy': 1.2011171653866768, 'entropy_coeff': 0.005}
2020-09-21 16:53:01,432	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.02568964345846325, 'policy_loss': -0.04269353952258825, 'vf_loss': 0.014084313588682562, 'vf_explained_var': 0.9858697, 'kl': 0.00590143384761177, 'entropy': 1.2086

2020-09-21 16:53:11,863	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11397172685246915, 'policy_loss': -0.12935219053179026, 'vf_loss': 0.0005300965567585081, 'vf_explained_var': 0.9993974, 'kl': 0.01374153804499656, 'entropy': 1.2039165943861008, 'entropy_coeff': 0.005}
2020-09-21 16:53:12,352	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11145544075407088, 'policy_loss': -0.12773126893443987, 'vf_loss': 0.0005035869771745638, 'vf_explained_var': 0.9993911, 'kl': 0.01433946896577254, 'entropy': 1.2011655196547508, 'entropy_coeff': 0.005}
2020-09-21 16:53:12,806	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11708438186906278, 'policy_loss': -0.13298147358000278, 'vf_loss': 0.0005235023163550068, 'vf_explained_var': 0.99941194, 'kl': 0.014095611521042883, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-53-14
done: false
episode_len_mean: 2977.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.285571719115527
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 155
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2076553851366043
      entropy_coeff: 0.005
      kl: 0.014383519650436938
      policy_loss: -0.13211790309287608
      total_loss: -0.11589940532576293
      vf_explained_var: 0.9995028376579285
      vf_loss: 0.00041180557491315994
  num_steps_sampled: 358000
  num_steps_trained: 358000
iterations_since_restore: 358
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.539130434782609
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.800000000000004
  vram_util_percent0: 0

2020-09-21 16:53:18,645	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.057664434192702174, 'policy_loss': -0.002128895604982972, 'vf_loss': 0.06362139130942523, 'vf_explained_var': 0.90347767, 'kl': 0.0015077087268275946, 'entropy': 1.2235786616802216, 'entropy_coeff': 0.005}
2020-09-21 16:53:19,134	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.007410907885059714, 'policy_loss': -0.03419281728565693, 'vf_loss': 0.02394089865265414, 'vf_explained_var': 0.9656069, 'kl': 0.005846265994478017, 'entropy': 1.20760178565979, 'entropy_coeff': 0.005}
2020-09-21 16:53:19,587	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.02370701788458973, 'policy_loss': -0.041492928052321076, 'vf_loss': 0.012326796248089522, 'vf_explained_var': 0.98243666, 'kl': 0.0075870356522500515, 'entropy': 1.2

2020-09-21 16:53:30,019	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09768937935587019, 'policy_loss': -0.11172371380962431, 'vf_loss': 0.0007536435223300941, 'vf_explained_var': 0.9987223, 'kl': 0.012770584842655808, 'entropy': 1.222926527261734, 'entropy_coeff': 0.005}
2020-09-21 16:53:30,508	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10413429408799857, 'policy_loss': -0.11786734289489686, 'vf_loss': 0.0007039604770398, 'vf_explained_var': 0.99879575, 'kl': 0.012598775268997997, 'entropy': 1.2210589945316315, 'entropy_coeff': 0.005}
2020-09-21 16:53:30,963	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10388890746980906, 'policy_loss': -0.11838588234968483, 'vf_loss': 0.0007235652810777538, 'vf_explained_var': 0.9988556, 'kl': 0.013083583617117256, 'entropy': 1.2

custom_metrics: {}
date: 2020-09-21_16-53-32
done: false
episode_len_mean: 2977.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.285571719115527
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 155
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2191507145762444
      entropy_coeff: 0.005
      kl: 0.013269455113913864
      policy_loss: -0.11982038011774421
      total_loss: -0.105155986151658
      vf_explained_var: 0.9989455342292786
      vf_loss: 0.0006071572242944967
  num_steps_sampled: 359000
  num_steps_trained: 359000
iterations_since_restore: 359
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.660869565217392
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.81739130434783
  vram_util_percent0: 0.944

[2m[36m(pid=24699)[0m 2020-09-21 16:53:32,714	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 9049.296378600677,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 5780},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-3.045, max=3.289, mean=0.536),
[2m[36m(pid=24699)[0m                                   'prev_action': 12,
[2m[36m(pid=24699)

2020-09-21 16:53:36,880	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.1133664547232911, 'policy_loss': 0.0006246560660656542, 'vf_loss': 0.11756796529516578, 'vf_explained_var': 0.97590804, 'kl': 0.0006202157576852274, 'entropy': 1.1536227762699127, 'entropy_coeff': 0.005}
2020-09-21 16:53:37,367	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.012689588707871735, 'policy_loss': -0.019421727629378438, 'vf_loss': 0.03342686407268047, 'vf_explained_var': 0.9926622, 'kl': 0.0029222536977613345, 'entropy': 1.1507453471422195, 'entropy_coeff': 0.005}
2020-09-21 16:53:37,819	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.016926801297813654, 'policy_loss': -0.03339137276634574, 'vf_loss': 0.017154650704469532, 'vf_explained_var': 0.9960346, 'kl': 0.0032947600993793458, 'entropy': 1.1

2020-09-21 16:53:43,502	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.0722242483170703, 'policy_loss': -0.08245593751780689, 'vf_loss': 0.0016550289292354137, 'vf_explained_var': 0.99956346, 'kl': 0.009403029718669131, 'entropy': 1.140837900340557, 'entropy_coeff': 0.005}
2020-09-21 16:53:43,957	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.07451472291722894, 'policy_loss': -0.08500927034765482, 'vf_loss': 0.001512348375399597, 'vf_explained_var': 0.9996016, 'kl': 0.009649188432376832, 'entropy': 1.1345003098249435, 'entropy_coeff': 0.005}
2020-09-21 16:53:44,442	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.07730561727657914, 'policy_loss': -0.08726709883194417, 'vf_loss': 0.0013726629076700192, 'vf_explained_var': 0.9996559, 'kl': 0.009415882610483095, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_16-53-50
done: false
episode_len_mean: 2977.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.285571719115527
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 155
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.144933469593525
      entropy_coeff: 0.005
      kl: 0.011612437723670155
      policy_loss: -0.09880953782703727
      total_loss: -0.0861844044411555
      vf_explained_var: 0.9998208284378052
      vf_loss: 0.0007134163788578007
  num_steps_sampled: 360000
  num_steps_trained: 360000
iterations_since_restore: 360
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.660869565217391
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.895652173913044
  vram_util_percent0: 0.94

[2m[36m(pid=24699)[0m 2020-09-21 16:53:50,885	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:53:55,047	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.09414590749656782, 'policy_loss': 0.0014635712141171098, 'vf_loss': 0.0971682914532721, 'vf_explained_var': 0.94228315, 'kl': 0.0011616432987267555, 'entropy': 1.2500417828559875, 'entropy_coeff': 0.005}
2020-09-21 16:53:55,537	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.01047211152035743, 'policy_loss': -0.02249849820509553, 'vf_loss': 0.03276264085434377, 'vf_explained_var': 0.98044443, 'kl': 0.004213156571495347, 'entropy': 1.2381512597203255, 'entropy_coeff': 0.005}
2020-09-21 16:53:55,982	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.030866381421219558, 'policy_loss': -0.0508480654098

2020-09-21 16:54:06,463	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10976445901906118, 'policy_loss': -0.12487411068286747, 'vf_loss': 0.0007633089699083939, 'vf_explained_var': 0.99944866, 'kl': 0.013634231989271939, 'entropy': 1.2721311151981354, 'entropy_coeff': 0.005}
2020-09-21 16:54:06,955	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11119810468517244, 'policy_loss': -0.12663184106349945, 'vf_loss': 0.000750439750845544, 'vf_explained_var': 0.99947315, 'kl': 0.013840649626217782, 'entropy': 1.2674370184540749, 'entropy_coeff': 0.005}
2020-09-21 16:54:07,407	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11121420678682625, 'policy_loss': -0.12652005115523934, 'vf_loss': 0.0006891316679684678, 'vf_explained_var': 0.99950457, 'kl': 0.01379212411120534, 'entropy':

custom_metrics: {}
date: 2020-09-21_16-54-08
done: false
episode_len_mean: 2977.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.285571719115527
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 155
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.263807788491249
      entropy_coeff: 0.005
      kl: 0.014218396041542292
      policy_loss: -0.13772874977439642
      total_loss: -0.12185202981345356
      vf_explained_var: 0.9996023178100586
      vf_loss: 0.0006015761573507916
  num_steps_sampled: 361000
  num_steps_trained: 361000
iterations_since_restore: 361
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.673913043478261
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.944990982128217

2020-09-21 16:54:13,579	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.09271775546949357, 'policy_loss': -0.0013934136950410903, 'vf_loss': 0.09892222355119884, 'vf_explained_var': 0.8727865, 'kl': 0.001009501937924595, 'entropy': 1.2688463106751442, 'entropy_coeff': 0.005}
2020-09-21 16:54:14,065	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.004947449720930308, 'policy_loss': -0.032232987810857594, 'vf_loss': 0.03770200873259455, 'vf_explained_var': 0.95263946, 'kl': 0.00381459372874815, 'entropy': 1.2629981264472008, 'entropy_coeff': 0.005}
2020-09-21 16:54:14,522	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.028530632553156465, 'policy_loss': -0.05108103578095324, 'vf_loss': 0.017928494547959417, 'vf_explained_var': 0.9753095, 'kl': 0.007178902567829937, 'entropy': 1.256

2020-09-21 16:54:24,951	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11034706357168034, 'policy_loss': -0.12752108881250024, 'vf_loss': 0.001026685433316743, 'vf_explained_var': 0.998497, 'kl': 0.01476876885863021, 'entropy': 1.2565447762608528, 'entropy_coeff': 0.005}
2020-09-21 16:54:25,437	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11215609102509916, 'policy_loss': -0.1292435850482434, 'vf_loss': 0.001031748022796819, 'vf_explained_var': 0.99854094, 'kl': 0.014713765413034707, 'entropy': 1.2581562250852585, 'entropy_coeff': 0.005}
2020-09-21 16:54:25,892	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1136037833057344, 'policy_loss': -0.13059446564875543, 'vf_loss': 0.0009546401488478296, 'vf_explained_var': 0.9985771, 'kl': 0.014705820300150663, 'entropy': 1.259

custom_metrics: {}
date: 2020-09-21_16-54-27
done: false
episode_len_mean: 2977.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.285571719115527
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 155
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2553967610001564
      entropy_coeff: 0.005
      kl: 0.01571137143764645
      policy_loss: -0.1330937820021063
      total_loss: -0.11469768546521664
      vf_explained_var: 0.9988036751747131
      vf_loss: 0.0008114285083138384
  num_steps_sampled: 362000
  num_steps_trained: 362000
iterations_since_restore: 362
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.721739130434782
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177

2020-09-21 16:54:32,003	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.09916217054706067, 'policy_loss': 0.005519327358342707, 'vf_loss': 0.09774389956146479, 'vf_explained_var': 0.835633, 'kl': 0.001554810051036859, 'entropy': 1.2924847975373268, 'entropy_coeff': 0.005}
2020-09-21 16:54:32,489	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.004469383391551673, 'policy_loss': -0.03388021944556385, 'vf_loss': 0.030128823360428214, 'vf_explained_var': 0.9404304, 'kl': 0.003732956523890607, 'entropy': 1.2774832248687744, 'entropy_coeff': 0.005}
2020-09-21 16:54:32,943	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.03174827294424176, 'policy_loss': -0.04902315454091877, 'vf_loss': 0.015501688118092716, 'vf_explained_var': 0.9688201, 'kl': 0.005401230140705593, 'entropy': 1.285983

2020-09-21 16:54:41,857	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.5187499999999998,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.1988227367401123,
                                         'entropy_coeff': 0.005,
                                         'kl': 0.010671952739357948,
                                         'policy_loss': -0.013067412190139294,
                                         'total_loss': -0.0017984479200094938,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.998, max=0.998, mean=0.998),
                                         'vf_loss': 0.0010550444712862372}}}

2020-09-21 16:54:41,971	DEBUG sgd.py:120 -- 21 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10896651474467

custom_metrics: {}
date: 2020-09-21_16-54-45
done: false
episode_len_mean: 2977.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.285571719115527
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 155
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2973295897245407
      entropy_coeff: 0.005
      kl: 0.015080610872246325
      policy_loss: -0.13816388649865985
      total_loss: -0.12094153522048146
      vf_explained_var: 0.998386561870575
      vf_loss: 0.0008053183992160484
  num_steps_sampled: 363000
  num_steps_trained: 363000
iterations_since_restore: 363
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 6.0
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177
  vram_util_

2020-09-21 16:54:50,871	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.11117968813050538, 'policy_loss': 0.000998225761577487, 'vf_loss': 0.11517888912931085, 'vf_explained_var': 0.8645071, 'kl': 0.0006242162040410171, 'entropy': 1.1890899613499641, 'entropy_coeff': 0.005}
2020-09-21 16:54:51,357	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.016048075631260872, 'policy_loss': -0.029344859474804252, 'vf_loss': 0.04656483395956457, 'vf_explained_var': 0.9465784, 'kl': 0.0031419206570717506, 'entropy': 1.18873780220747, 'entropy_coeff': 0.005}
2020-09-21 16:54:51,805	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.012983440246898681, 'policy_loss': -0.04213725100271404, 'vf_loss': 0.028552034753374755, 'vf_explained_var': 0.9697119, 'kl': 0.004293793987017125, 'entropy': 1.18388

2020-09-21 16:55:02,247	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10995580547023565, 'policy_loss': -0.12653675395995378, 'vf_loss': 0.0014415304212889168, 'vf_explained_var': 0.99819416, 'kl': 0.01385464024497196, 'entropy': 1.1804632171988487, 'entropy_coeff': 0.005}
2020-09-21 16:55:02,691	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10417964810039848, 'policy_loss': -0.1204212317825295, 'vf_loss': 0.001286507525946945, 'vf_explained_var': 0.99824864, 'kl': 0.01372614735737443, 'entropy': 1.1783010065555573, 'entropy_coeff': 0.005}
2020-09-21 16:55:03,184	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10119762230897322, 'policy_loss': -0.1173220130149275, 'vf_loss': 0.0011877060605911538, 'vf_explained_var': 0.99845576, 'kl': 0.013702125870622694, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_16-55-04
done: false
episode_len_mean: 2977.36
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.285571719115527
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 155
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.1735422909259796
      entropy_coeff: 0.005
      kl: 0.01456300204154104
      policy_loss: -0.12705500167794526
      total_loss: -0.109800795908086
      vf_explained_var: 0.9986803531646729
      vf_loss: 0.0010043577531178016
  num_steps_sampled: 364000
  num_steps_trained: 364000
iterations_since_restore: 364
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.8
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177
  vram_util_pe

[2m[36m(pid=24699)[0m 2020-09-21 16:55:04,888	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 6460.781709948884,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 6780},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-3.214, max=1.532, mean=0.375),
[2m[36m(pid=24699)[0m                                   'prev_action': 10,
[2m[36m(pid=24699)

2020-09-21 16:55:09,416	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.13093370641581714, 'policy_loss': 0.001031869149301201, 'vf_loss': 0.13472542096860707, 'vf_explained_var': 0.90295786, 'kl': 0.0008876524729695134, 'entropy': 1.234342411160469, 'entropy_coeff': 0.005}
2020-09-21 16:55:09,924	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0192364735994488, 'policy_loss': -0.029419898346532136, 'vf_loss': 0.049237599363550544, 'vf_explained_var': 0.96115166, 'kl': 0.0036775102489627898, 'entropy': 1.2332878932356834, 'entropy_coeff': 0.005}
2020-09-21 16:55:10,371	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.02798630716279149, 'policy_loss': -0.05747013876680285, 'vf_loss': 0.027624589973129332, 'vf_explained_var': 0.97881436, 'kl': 0.005252679999102838, 'entropy': 1.223

2020-09-21 16:55:20,832	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11160843126708642, 'policy_loss': -0.1252392183523625, 'vf_loss': 0.001228534343681531, 'vf_explained_var': 0.9989487, 'kl': 0.012221997312735766, 'entropy': 1.231978952884674, 'entropy_coeff': 0.005}
2020-09-21 16:55:21,323	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10300041676964611, 'policy_loss': -0.11601678421720862, 'vf_loss': 0.001183807420602534, 'vf_explained_var': 0.99896145, 'kl': 0.011844654043670744, 'entropy': 1.2313020005822182, 'entropy_coeff': 0.005}
2020-09-21 16:55:21,776	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1091951088164933, 'policy_loss': -0.12225817993748933, 'vf_loss': 0.001103092003177153, 'vf_explained_var': 0.9990492, 'kl': 0.011933743313420564, 'entropy': 1.232

custom_metrics: {}
date: 2020-09-21_16-55-23
done: false
episode_len_mean: 3057.57
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.28605513928838
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 156
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2340950146317482
      entropy_coeff: 0.005
      kl: 0.01243152969982475
      policy_loss: -0.12048977991798893
      total_loss: -0.10683289298322052
      vf_explained_var: 0.9992154240608215
      vf_loss: 0.0009469835167692509
  num_steps_sampled: 365000
  num_steps_trained: 365000
iterations_since_restore: 365
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.695652173913044
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177

[2m[36m(pid=24699)[0m 2020-09-21 16:55:23,883	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:55:27,064	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.1799711249768734, 'policy_loss': -0.005925698671489954, 'vf_loss': 0.18863933021202683, 'vf_explained_var': 0.91415155, 'kl': 0.0019312823165641424, 'entropy': 1.1351266205310822, 'entropy_coeff': 0.005}
2020-09-21 16:55:27,520	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.05457077664323151, 'policy_loss': -0.03028730813821312, 'vf_loss': 0.08510680729523301, 'vf_explained_var': 0.96064705, 'kl': 0.003586149148759432, 'entropy': 1.1390377283096313, 'entropy_coeff': 0.005}
2020-09-21 16:55:28,007	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.014209400513209403, 'policy_loss': -0.04795673041371

2020-09-21 16:55:38,469	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.07844441832276061, 'policy_loss': -0.1085542639484629, 'vf_loss': 0.016376355102693196, 'vf_explained_var': 0.99180144, 'kl': 0.012760525045450777, 'entropy': 1.1293121464550495, 'entropy_coeff': 0.005}
2020-09-21 16:55:38,924	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.0883907206589356, 'policy_loss': -0.11764502816367894, 'vf_loss': 0.015438223323144484, 'vf_explained_var': 0.9918165, 'kl': 0.012795491551514715, 'entropy': 1.1234128922224045, 'entropy_coeff': 0.005}
2020-09-21 16:55:39,409	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.07116204942576587, 'policy_loss': -0.10484357469249517, 'vf_loss': 0.020029147483000997, 'vf_explained_var': 0.9908383, 'kl': 0.012707516842056066, 'entropy': 1.12

custom_metrics: {}
date: 2020-09-21_16-55-40
done: false
episode_len_mean: 3150.03
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.21111348876701
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 157
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.1250854060053825
      entropy_coeff: 0.005
      kl: 0.013267502537928522
      policy_loss: -0.11369345732964575
      total_loss: -0.08640802907757461
      vf_explained_var: 0.993882417678833
      vf_loss: 0.012760833924403414
  num_steps_sampled: 366000
  num_steps_trained: 366000
iterations_since_restore: 366
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.439130434782609
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177


2020-09-21 16:55:43,949	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.5187499999999998,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.2238887548446655,
                                         'entropy_coeff': 0.005,
                                         'kl': -8.923430883100991e-09,
                                         'policy_loss': 0.055306341499090195,
                                         'total_loss': 0.22708077728748322,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.971, max=0.971, mean=0.971),
                                         'vf_loss': 0.17789387702941895}}}

2020-09-21 16:55:43,954	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': {

2020-09-21 16:55:51,045	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08884616248542443, 'policy_loss': -0.1023481012089178, 'vf_loss': 0.0038731049717171118, 'vf_explained_var': 0.9993935, 'kl': 0.010528016311582178, 'entropy': 1.2721186429262161, 'entropy_coeff': 0.005}
2020-09-21 16:55:51,531	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08542806282639503, 'policy_loss': -0.09912112983874977, 'vf_loss': 0.0037637369823642075, 'vf_explained_var': 0.9994236, 'kl': 0.010747416090453044, 'entropy': 1.2786613628268242, 'entropy_coeff': 0.005}
2020-09-21 16:55:51,990	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08064046499202959, 'policy_loss': -0.09451191819971427, 'vf_loss': 0.004098412915482186, 'vf_explained_var': 0.9993879, 'kl': 0.01063027436612174, 'entropy': 1.2

custom_metrics: {}
date: 2020-09-21_16-55-58
done: false
episode_len_mean: 3150.03
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.211113488767015
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 157
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2655041590332985
      entropy_coeff: 0.005
      kl: 0.012451086717192084
      policy_loss: -0.11450867078383453
      total_loss: -0.09868102718610317
      vf_explained_var: 0.99950110912323
      vf_loss: 0.003245078245527111
  num_steps_sampled: 367000
  num_steps_trained: 367000
iterations_since_restore: 367
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.422727272727272
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177


2020-09-21 16:56:01,775	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.25216855434700847, 'policy_loss': 0.004511204548180103, 'vf_loss': 0.25181463663466275, 'vf_explained_var': 0.9761051, 'kl': 0.0013042709296903765, 'entropy': 1.2276296392083168, 'entropy_coeff': 0.005}
2020-09-21 16:56:02,270	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.03361208364367485, 'policy_loss': -0.021002044260967523, 'vf_loss': 0.05645125266164541, 'vf_explained_var': 0.9939123, 'kl': 0.0027513215463841334, 'entropy': 1.2031382322311401, 'entropy_coeff': 0.005}
2020-09-21 16:56:02,759	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.014217121060937643, 'policy_loss': -0.04079057835042477, 'vf_loss': 0.02551807218696922, 'vf_explained_var': 0.99715704, 'kl': 0.004671875722124241, 'entropy': 1.2080

2020-09-21 16:56:13,171	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08307310030795634, 'policy_loss': -0.104901748301927, 'vf_loss': 0.009101813455345109, 'vf_explained_var': 0.99894166, 'kl': 0.012357704632449895, 'entropy': 1.2082868069410324, 'entropy_coeff': 0.005}
2020-09-21 16:56:13,656	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08618924813345075, 'policy_loss': -0.10839321906678379, 'vf_loss': 0.009212977267452516, 'vf_explained_var': 0.9989114, 'kl': 0.012523186975158751, 'entropy': 1.2057180143892765, 'entropy_coeff': 0.005}
2020-09-21 16:56:14,109	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08682894718367606, 'policy_loss': -0.10890199930872768, 'vf_loss': 0.008818392860121094, 'vf_explained_var': 0.99895895, 'kl': 0.01269874011632055, 'entropy': 1.20

custom_metrics: {}
date: 2020-09-21_16-56-15
done: false
episode_len_mean: 3150.03
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.211113488767015
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 157
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2039933279156685
      entropy_coeff: 0.005
      kl: 0.01290299737593159
      policy_loss: -0.10881020175293088
      total_loss: -0.08646166522521526
      vf_explained_var: 0.9989780187606812
      vf_loss: 0.008772073982981965
  num_steps_sampled: 368000
  num_steps_trained: 368000
iterations_since_restore: 368
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.259090909090909
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177

2020-09-21 16:56:19,370	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.09972388250753284, 'policy_loss': 0.0003941252361983061, 'vf_loss': 0.1024304274469614, 'vf_explained_var': 0.96246886, 'kl': 0.002041648610454083, 'entropy': 1.2402840480208397, 'entropy_coeff': 0.005}
2020-09-21 16:56:19,858	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.007848811568692327, 'policy_loss': -0.030460299807600677, 'vf_loss': 0.037493617506697774, 'vf_explained_var': 0.98500293, 'kl': 0.0045975946268299595, 'entropy': 1.233419381082058, 'entropy_coeff': 0.005}
2020-09-21 16:56:20,311	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.02477839356288314, 'policy_loss': -0.050246115075424314, 'vf_loss': 0.021843687165528536, 'vf_explained_var': 0.9916166, 'kl': 0.006453500973293558, 'entropy': 1.23

2020-09-21 16:56:30,773	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09961636294610798, 'policy_loss': -0.11405446368735284, 'vf_loss': 0.0014601630900870077, 'vf_explained_var': 0.99941176, 'kl': 0.012585828662849963, 'entropy': 1.2273562997579575, 'entropy_coeff': 0.005}
2020-09-21 16:56:31,267	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09188782540149987, 'policy_loss': -0.10665099625475705, 'vf_loss': 0.001455165540392045, 'vf_explained_var': 0.999414, 'kl': 0.012809253123123199, 'entropy': 1.2292094007134438, 'entropy_coeff': 0.005}
2020-09-21 16:56:31,726	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09790801873896271, 'policy_loss': -0.11291529063601047, 'vf_loss': 0.0014742758721695282, 'vf_explained_var': 0.9994088, 'kl': 0.012930838507600129, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-56-33
done: false
episode_len_mean: 3150.03
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.211113488767015
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 157
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2301634550094604
      entropy_coeff: 0.005
      kl: 0.012834515218855813
      policy_loss: -0.11817814747337252
      total_loss: -0.1035448715556413
      vf_explained_var: 0.9994778037071228
      vf_loss: 0.0012916695450257976
  num_steps_sampled: 369000
  num_steps_trained: 369000
iterations_since_restore: 369
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.48695652173913
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177

[2m[36m(pid=24699)[0m 2020-09-21 16:56:33,471	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 5598.44,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 7780},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.3, max=1.688, mean=0.457),
[2m[36m(pid=24699)[0m                                   'prev_action': 12,
[2m[36m(pid=24699)[0m         

2020-09-21 16:56:36,957	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.15399057418107986, 'policy_loss': -0.0011457304935902357, 'vf_loss': 0.15976335760205984, 'vf_explained_var': 0.9698349, 'kl': 0.0011229947695458598, 'entropy': 1.2665189504623413, 'entropy_coeff': 0.005}
2020-09-21 16:56:37,449	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.01693707733647898, 'policy_loss': -0.04316574468975887, 'vf_loss': 0.05798761313781142, 'vf_explained_var': 0.98875064, 'kl': 0.005568174383370206, 'entropy': 1.2682903036475182, 'entropy_coeff': 0.005}
2020-09-21 16:56:37,904	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.023747520754113793, 'policy_loss': -0.06237719126511365, 'vf_loss': 0.032456577639095485, 'vf_explained_var': 0.9939442, 'kl': 0.008277195447590202, 'entropy': 1.279

2020-09-21 16:56:44,086	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09636124479584396, 'policy_loss': -0.11528718285262585, 'vf_loss': 0.005612102439044975, 'vf_explained_var': 0.9988518, 'kl': 0.012986569723580033, 'entropy': 1.2819045186042786, 'entropy_coeff': 0.005}
2020-09-21 16:56:44,570	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10002362472005188, 'policy_loss': -0.11905759128421778, 'vf_loss': 0.005471633194247261, 'vf_explained_var': 0.99888647, 'kl': 0.013177169545087963, 'entropy': 1.2900991067290306, 'entropy_coeff': 0.005}
2020-09-21 16:56:45,061	DEBUG sgd.py:120 -- 17 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10242312744958326, 'policy_loss': -0.12103369156830013, 'vf_loss': 0.0051842151951859705, 'vf_explained_var': 0.99894357, 'kl': 0.01307589141651988, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-56-50
done: false
episode_len_mean: 3150.03
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.211113488767015
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 157
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.288718320429325
      entropy_coeff: 0.005
      kl: 0.014952605939470232
      policy_loss: -0.14490296016447246
      total_loss: -0.12374253268353641
      vf_explained_var: 0.9989525079727173
      vf_loss: 0.0048947490358841605
  num_steps_sampled: 370000
  num_steps_trained: 370000
iterations_since_restore: 370
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.439130434782608
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.944990982128217

[2m[36m(pid=24699)[0m 2020-09-21 16:56:50,992	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:56:54,223	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.08603760378900915, 'policy_loss': 0.0039005252765491605, 'vf_loss': 0.08573149377480149, 'vf_explained_var': 0.98225564, 'kl': 0.0013384630591934954, 'entropy': 1.1254415810108185, 'entropy_coeff': 0.005}
2020-09-21 16:56:54,713	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.011043464648537338, 'policy_loss': -0.021636566321831197, 'vf_loss': 0.0345357668120414, 'vf_explained_var': 0.9926615, 'kl': 0.0024289903230965137, 'entropy': 1.108953382819891, 'entropy_coeff': 0.005}
2020-09-21 16:56:55,166	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.016472169576445594, 'policy_loss': -0.038097877288

2020-09-21 16:57:05,696	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08230399299645796, 'policy_loss': -0.10427963430993259, 'vf_loss': 0.009454509592615068, 'vf_explained_var': 0.9978606, 'kl': 0.011943118064664304, 'entropy': 1.1234966218471527, 'entropy_coeff': 0.005}
2020-09-21 16:57:06,191	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.07721333066001534, 'policy_loss': -0.09942743164720014, 'vf_loss': 0.009436195279704407, 'vf_explained_var': 0.9977726, 'kl': 0.01210382257704623, 'entropy': 1.1209543570876122, 'entropy_coeff': 0.005}
2020-09-21 16:57:06,648	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08335590770002455, 'policy_loss': -0.10578133852686733, 'vf_loss': 0.009601596975699067, 'vf_explained_var': 0.99778277, 'kl': 0.012140734354034066, 'entropy': 1.1

custom_metrics: {}
date: 2020-09-21_16-57-08
done: false
episode_len_mean: 3217.9
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.15425534425736
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 158
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.1184128448367119
      entropy_coeff: 0.005
      kl: 0.01280222786590457
      policy_loss: -0.10430270817596465
      total_loss: -0.0810354589484632
      vf_explained_var: 0.9978968501091003
      vf_loss: 0.009415930137038231
  num_steps_sampled: 371000
  num_steps_trained: 371000
iterations_since_restore: 371
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 6.295454545454546
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177
  

2020-09-21 16:57:11,121	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.11155603569932282, 'policy_loss': 0.0019926142413169146, 'vf_loss': 0.11220451164990664, 'vf_explained_var': 0.9505408, 'kl': 0.002627569597323798, 'entropy': 1.3263421580195427, 'entropy_coeff': 0.005}
2020-09-21 16:57:11,614	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.013012256938964128, 'policy_loss': -0.026136809028685093, 'vf_loss': 0.04017020028550178, 'vf_explained_var': 0.980459, 'kl': 0.003703965383465402, 'entropy': 1.3293065279722214, 'entropy_coeff': 0.005}
2020-09-21 16:57:12,065	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.018461044877767563, 'policy_loss': -0.041830350644886494, 'vf_loss': 0.02197829558281228, 'vf_explained_var': 0.9883871, 'kl': 0.005305975952069275, 'entropy': 1.33348

2020-09-21 16:57:22,547	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10457149392459542, 'policy_loss': -0.1256867798510939, 'vf_loss': 0.0074981759826187044, 'vf_explained_var': 0.99637985, 'kl': 0.013358562253415585, 'entropy': 1.3342411443591118, 'entropy_coeff': 0.005}
2020-09-21 16:57:23,034	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11531376652419567, 'policy_loss': -0.1368551456835121, 'vf_loss': 0.007062199365464039, 'vf_explained_var': 0.9964368, 'kl': 0.013903783285059035, 'entropy': 1.3274387791752815, 'entropy_coeff': 0.005}
2020-09-21 16:57:23,492	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11452463455498219, 'policy_loss': -0.13623879104852676, 'vf_loss': 0.006920724670635536, 'vf_explained_var': 0.9964416, 'kl': 0.014123640139587224, 'entropy': 1.3

custom_metrics: {}
date: 2020-09-21_16-57-24
done: false
episode_len_mean: 3217.9
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.15425534425737
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 158
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.3317292854189873
      entropy_coeff: 0.005
      kl: 0.014324775314889848
      policy_loss: -0.1358727322658524
      total_loss: -0.11361143854446709
      vf_explained_var: 0.9963696002960205
      vf_loss: 0.007164193157223053
  num_steps_sampled: 372000
  num_steps_trained: 372000
iterations_since_restore: 372
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.277272727272727
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177
 

2020-09-21 16:57:28,506	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.1039919478353113, 'policy_loss': 0.0072872573509812355, 'vf_loss': 0.10193016938865185, 'vf_explained_var': 0.984774, 'kl': 0.0005711629792852779, 'entropy': 1.2185861244797707, 'entropy_coeff': 0.005}
2020-09-21 16:57:28,989	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.015009251015726477, 'policy_loss': -0.02358119178097695, 'vf_loss': 0.03831771691329777, 'vf_explained_var': 0.9934986, 'kl': 0.004122547979932278, 'entropy': 1.197679027915001, 'entropy_coeff': 0.005}
2020-09-21 16:57:29,438	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.019448999490123242, 'policy_loss': -0.04348865640349686, 'vf_loss': 0.022032517998013645, 'vf_explained_var': 0.99628264, 'kl': 0.005271226720651612, 'entropy': 1.199707

2020-09-21 16:57:39,926	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.0872050478355959, 'policy_loss': -0.10510014474857599, 'vf_loss': 0.0061750591557938606, 'vf_explained_var': 0.99895686, 'kl': 0.011694516753777862, 'entropy': 1.2082020491361618, 'entropy_coeff': 0.005}
2020-09-21 16:57:40,417	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09005505125969648, 'policy_loss': -0.10770189668983221, 'vf_loss': 0.005813077885250095, 'vf_explained_var': 0.9990688, 'kl': 0.011774066311772913, 'entropy': 1.2096198052167892, 'entropy_coeff': 0.005}
2020-09-21 16:57:40,869	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09526160231325775, 'policy_loss': -0.11281912820413709, 'vf_loss': 0.005739171610912308, 'vf_explained_var': 0.99908304, 'kl': 0.011745682160835713, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-57-42
done: false
episode_len_mean: 3217.9
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.15425534425737
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 158
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2138976380228996
      entropy_coeff: 0.005
      kl: 0.011868715286254883
      policy_loss: -0.12067841365933418
      total_loss: -0.10294129722751677
      vf_explained_var: 0.9990578889846802
      vf_loss: 0.005780990628409199
  num_steps_sampled: 373000
  num_steps_trained: 373000
iterations_since_restore: 373
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.254545454545454
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177


2020-09-21 16:57:45,423	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.5187499999999998,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.374706745147705,
                                         'entropy_coeff': 0.005,
                                         'kl': -6.416504660933242e-09,
                                         'policy_loss': 0.25002148747444153,
                                         'total_loss': 0.42552652955055237,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.939, max=0.939, mean=0.939),
                                         'vf_loss': 0.18237857520580292}}}

2020-09-21 16:57:45,427	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 40,
  'policy_batches': { 'default_policy': { 'data': { '

2020-09-21 16:57:52,515	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10298201948171481, 'policy_loss': -0.11626877239905298, 'vf_loss': 0.00218032393604517, 'vf_explained_var': 0.9991797, 'kl': 0.011510585900396109, 'entropy': 1.2750552594661713, 'entropy_coeff': 0.005}
2020-09-21 16:57:53,005	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09842239104909822, 'policy_loss': -0.11173243759549223, 'vf_loss': 0.0020985744122299366, 'vf_explained_var': 0.99929523, 'kl': 0.011576311546377838, 'entropy': 1.2740106955170631, 'entropy_coeff': 0.005}
2020-09-21 16:57:53,461	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10712960548698902, 'policy_loss': -0.12105984869413078, 'vf_loss': 0.002026723715971457, 'vf_explained_var': 0.99923694, 'kl': 0.012048996693920344, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_16-57-59
done: false
episode_len_mean: 3217.9
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.15425534425737
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 158
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2762939184904099
      entropy_coeff: 0.005
      kl: 0.014380719047039747
      policy_loss: -0.12976476759649813
      total_loss: -0.11268500331789255
      vf_explained_var: 0.9994539618492126
      vf_loss: 0.0016205157735385
  num_steps_sampled: 374000
  num_steps_trained: 374000
iterations_since_restore: 374
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.25909090909091
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177
  v

[2m[36m(pid=24699)[0m 2020-09-21 16:57:59,904	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 13569.702193101642,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 674},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.276, max=1.923, mean=0.321),
[2m[36m(pid=24699)[0m                                   'prev_action': 2,
[2m[36m(pid=24699)[

2020-09-21 16:58:03,170	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.09529003885108978, 'policy_loss': -0.0008051344775594771, 'vf_loss': 0.10093495855107903, 'vf_explained_var': 0.9555812, 'kl': 0.0007020047707305688, 'entropy': 1.1811908334493637, 'entropy_coeff': 0.005}
2020-09-21 16:58:03,656	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.017339999903924763, 'policy_loss': -0.016816260176710784, 'vf_loss': 0.03622032422572374, 'vf_explained_var': 0.9835967, 'kl': 0.0025171682573272847, 'entropy': 1.1774026826024055, 'entropy_coeff': 0.005}
2020-09-21 16:58:04,112	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.01882091024890542, 'policy_loss': -0.03771835647057742, 'vf_loss': 0.017587625654414296, 'vf_explained_var': 0.99145675, 'kl': 0.004744385587400757, 'entropy': 1.1

2020-09-21 16:58:14,584	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1031376423779875, 'policy_loss': -0.12002339190803468, 'vf_loss': 0.00279539553594077, 'vf_explained_var': 0.99867415, 'kl': 0.013153129199054092, 'entropy': 1.177192434668541, 'entropy_coeff': 0.005}
2020-09-21 16:58:15,078	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10707320389337838, 'policy_loss': -0.12359436054248363, 'vf_loss': 0.002651711904036347, 'vf_explained_var': 0.9986712, 'kl': 0.012989156413823366, 'entropy': 1.1715681105852127, 'entropy_coeff': 0.005}
2020-09-21 16:58:15,531	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10143993439851329, 'policy_loss': -0.11843585129827261, 'vf_loss': 0.002678578119230224, 'vf_explained_var': 0.9986795, 'kl': 0.013290767790749669, 'entropy': 1.173

custom_metrics: {}
date: 2020-09-21_16-58-17
done: false
episode_len_mean: 3217.9
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.15425534425737
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 158
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.1685200072824955
      entropy_coeff: 0.005
      kl: 0.013710866856854409
      policy_loss: -0.1271312553435564
      total_loss: -0.10948584764264524
      vf_explained_var: 0.998701810836792
      vf_loss: 0.0026646328551578335
  num_steps_sampled: 375000
  num_steps_trained: 375000
iterations_since_restore: 375
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.3
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177
  vram_util_per

[2m[36m(pid=24699)[0m 2020-09-21 16:58:17,269	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:58:20,725	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.1140602242667228, 'policy_loss': 0.01140287343878299, 'vf_loss': 0.1074505029246211, 'vf_explained_var': 0.9384284, 'kl': 0.000739806761262729, 'entropy': 1.1833464875817299, 'entropy_coeff': 0.005}
2020-09-21 16:58:21,220	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.017792688740883023, 'policy_loss': -0.020663684175815433, 'vf_loss': 0.03820744389668107, 'vf_explained_var': 0.97717553, 'kl': 0.003982743481174111, 'entropy': 1.159972496330738, 'entropy_coeff': 0.005}
2020-09-21 16:58:21,671	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.014780511613935232, 'policy_loss': -0.03613600530661642

2020-09-21 16:58:32,167	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09351904725190252, 'policy_loss': -0.10941235173959285, 'vf_loss': 0.004365465691080317, 'vf_explained_var': 0.99709487, 'kl': 0.011410527164116502, 'entropy': 1.1603788509964943, 'entropy_coeff': 0.005}
2020-09-21 16:58:32,657	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09440071962308139, 'policy_loss': -0.11117960955016315, 'vf_loss': 0.004262235932401381, 'vf_explained_var': 0.9970318, 'kl': 0.012078692438080907, 'entropy': 1.16557215154171, 'entropy_coeff': 0.005}
2020-09-21 16:58:33,109	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09789624728728086, 'policy_loss': -0.11493218422401696, 'vf_loss': 0.0042171108070760965, 'vf_explained_var': 0.9970984, 'kl': 0.012251003354322165, 'entropy': 1.1

custom_metrics: {}
date: 2020-09-21_16-58-34
done: false
episode_len_mean: 3217.9
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.15425534425737
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 158
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.1562388949096203
      entropy_coeff: 0.005
      kl: 0.012461410544347018
      policy_loss: -0.11128109431592748
      total_loss: -0.09387233876623213
      vf_explained_var: 0.9971061944961548
      vf_loss: 0.004264187169610523
  num_steps_sampled: 376000
  num_steps_trained: 376000
iterations_since_restore: 376
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.473913043478262
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177


2020-09-21 16:58:38,377	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.06416935124434531, 'policy_loss': 0.0021012845681980252, 'vf_loss': 0.06443747028242797, 'vf_explained_var': 0.8269577, 'kl': 0.0022317893964571844, 'entropy': 1.1517863050103188, 'entropy_coeff': 0.005}
2020-09-21 16:58:38,860	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0074390912195667624, 'policy_loss': -0.016537602292373776, 'vf_loss': 0.02424595842603594, 'vf_explained_var': 0.93226, 'kl': 0.0036101900041103363, 'entropy': 1.150448851287365, 'entropy_coeff': 0.005}
2020-09-21 16:58:39,306	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.017059440724551678, 'policy_loss': -0.032551979820709676, 'vf_loss': 0.011860488157253712, 'vf_explained_var': 0.96680236, 'kl': 0.006186908925883472, 'entropy': 1.15

2020-09-21 16:58:45,474	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.07642588205635548, 'policy_loss': -0.08817580225877464, 'vf_loss': 0.002874062236514874, 'vf_explained_var': 0.99150455, 'kl': 0.009673768829088658, 'entropy': 1.1632368117570877, 'entropy_coeff': 0.005}
2020-09-21 16:58:45,957	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08155737293418497, 'policy_loss': -0.09347424318548292, 'vf_loss': 0.00285084979259409, 'vf_explained_var': 0.990802, 'kl': 0.009749405202455819, 'entropy': 1.1481769010424614, 'entropy_coeff': 0.005}
2020-09-21 16:58:46,443	DEBUG sgd.py:120 -- 17 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.07948662992566824, 'policy_loss': -0.09225733135826886, 'vf_loss': 0.0028125390454079024, 'vf_explained_var': 0.991729, 'kl': 0.010374327015597373, 'entropy': 1.15

custom_metrics: {}
date: 2020-09-21_16-58-52
done: false
episode_len_mean: 3217.9
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.15425534425737
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 158
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.159013569355011
      entropy_coeff: 0.005
      kl: 0.01289577130228281
      policy_loss: -0.11089382728096098
      total_loss: -0.09475172433303669
      vf_explained_var: 0.9931595325469971
      vf_loss: 0.002351728100620676
  num_steps_sampled: 377000
  num_steps_trained: 377000
iterations_since_restore: 377
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.521739130434784
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177
  

2020-09-21 16:58:56,033	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.03967880451818928, 'policy_loss': -0.0016449615359306335, 'vf_loss': 0.04515460517723113, 'vf_explained_var': 0.95793235, 'kl': 0.0012891085206125918, 'entropy': 1.1577333062887192, 'entropy_coeff': 0.005}
2020-09-21 16:58:56,515	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.001463904744014144, 'policy_loss': -0.018531815614551306, 'vf_loss': 0.01646066061221063, 'vf_explained_var': 0.98577696, 'kl': 0.006116555057815276, 'entropy': 1.1508916318416595, 'entropy_coeff': 0.005}
2020-09-21 16:58:56,967	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.020419424050487578, 'policy_loss': -0.031093719298951328, 'vf_loss': 0.007773568038828671, 'vf_explained_var': 0.99295384, 'kl': 0.0056804232590366155, 'entropy':

2020-09-21 16:59:07,437	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09599856333807111, 'policy_loss': -0.11110353376716375, 'vf_loss': 0.0006885062157380162, 'vf_explained_var': 0.9993692, 'kl': 0.013278648781124502, 'entropy': 1.1500968411564827, 'entropy_coeff': 0.005}
2020-09-21 16:59:07,926	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09071214342839085, 'policy_loss': -0.10686657571932301, 'vf_loss': 0.0006629866838920861, 'vf_explained_var': 0.9993519, 'kl': 0.013976536749396473, 'entropy': 1.1470819190144539, 'entropy_coeff': 0.005}
2020-09-21 16:59:08,372	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08999108173884451, 'policy_loss': -0.10672044917009771, 'vf_loss': 0.0006744737838744186, 'vf_explained_var': 0.99936295, 'kl': 0.014370583114214242, 'entropy':

custom_metrics: {}
date: 2020-09-21_16-59-09
done: false
episode_len_mean: 3217.9
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.15425534425737
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 158
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.14769047498703
      entropy_coeff: 0.005
      kl: 0.013910552195739001
      policy_loss: -0.11376143270172179
      total_loss: -0.09775462478864938
      vf_explained_var: 0.9993937015533447
      vf_loss: 0.0006186168266140157
  num_steps_sampled: 378000
  num_steps_trained: 378000
iterations_since_restore: 378
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.321739130434783
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177
 

2020-09-21 16:59:13,547	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.0630049203755334, 'policy_loss': -0.0029439766658470035, 'vf_loss': 0.0692588861566037, 'vf_explained_var': 0.90240514, 'kl': 0.0018470475733924907, 'entropy': 1.2230377569794655, 'entropy_coeff': 0.005}
2020-09-21 16:59:14,031	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.010751642868854105, 'policy_loss': -0.016264938632957637, 'vf_loss': 0.026919934316538274, 'vf_explained_var': 0.9643086, 'kl': 0.004142569596297108, 'entropy': 1.2389753013849258, 'entropy_coeff': 0.005}
2020-09-21 16:59:14,485	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.031214516144245863, 'policy_loss': -0.0463703841669485, 'vf_loss': 0.014673116384074092, 'vf_explained_var': 0.9802598, 'kl': 0.0044045319518772885, 'entropy': 1.24

2020-09-21 16:59:24,964	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10598370246589184, 'policy_loss': -0.12493518646806479, 'vf_loss': 0.005107442368171178, 'vf_explained_var': 0.9923668, 'kl': 0.013138492649886757, 'entropy': 1.2220080345869064, 'entropy_coeff': 0.005}
2020-09-21 16:59:25,456	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09924678783863783, 'policy_loss': -0.11887987714726478, 'vf_loss': 0.005079211943666451, 'vf_explained_var': 0.9918718, 'kl': 0.013628873566631228, 'entropy': 1.22899579256773, 'entropy_coeff': 0.005}
2020-09-21 16:59:25,903	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10561360488645732, 'policy_loss': -0.1251167261507362, 'vf_loss': 0.005363697127904743, 'vf_explained_var': 0.992164, 'kl': 0.013354743598029017, 'entropy': 1.22861

custom_metrics: {}
date: 2020-09-21_16-59-27
done: false
episode_len_mean: 3217.9
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.15425534425737
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 158
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.2289707660675049
      entropy_coeff: 0.005
      kl: 0.014802379417233169
      policy_loss: -0.12896737188566476
      total_loss: -0.10732317215297371
      vf_explained_var: 0.9926239252090454
      vf_loss: 0.00530793919460848
  num_steps_sampled: 379000
  num_steps_trained: 379000
iterations_since_restore: 379
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.463636363636363
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177
 

[2m[36m(pid=24699)[0m 2020-09-21 16:59:27,638	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 15644.355755896911,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 1674},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-6.365, max=2.659, mean=0.265),
[2m[36m(pid=24699)[0m                                   'prev_action': 14,
[2m[36m(pid=24699)

2020-09-21 16:59:31,016	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.06486597622279078, 'policy_loss': -0.004575746250338852, 'vf_loss': 0.07337736245244741, 'vf_explained_var': 0.9414059, 'kl': 0.0013449821030718234, 'entropy': 1.1956665441393852, 'entropy_coeff': 0.005}
2020-09-21 16:59:31,469	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.019031546893529594, 'policy_loss': -0.014481650781817734, 'vf_loss': 0.030600082711316645, 'vf_explained_var': 0.97473425, 'kl': 0.005915672823903151, 'entropy': 1.2142616584897041, 'entropy_coeff': 0.005}
2020-09-21 16:59:31,964	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.02127450768603012, 'policy_loss': -0.04216843476751819, 'vf_loss': 0.018934913678094745, 'vf_explained_var': 0.9846198, 'kl': 0.005259389930870384, 'entropy': 1.20

2020-09-21 16:59:42,396	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08926945924758911, 'policy_loss': -0.1085453771520406, 'vf_loss': 0.007950897714181338, 'vf_explained_var': 0.9930426, 'kl': 0.0113963604089804, 'entropy': 1.1966406777501106, 'entropy_coeff': 0.005}
2020-09-21 16:59:42,851	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.0876075190026313, 'policy_loss': -0.10783804254606366, 'vf_loss': 0.00812921308533987, 'vf_explained_var': 0.9926486, 'kl': 0.011896590585820377, 'entropy': 1.193327620625496, 'entropy_coeff': 0.005}
2020-09-21 16:59:43,334	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.08675334765575826, 'policy_loss': -0.10775486170314252, 'vf_loss': 0.008220451149099972, 'vf_explained_var': 0.9923643, 'kl': 0.012378121668007225, 'entropy': 1.2036414

custom_metrics: {}
date: 2020-09-21_16-59-44
done: false
episode_len_mean: 3217.9
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.15425534425737
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 158
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.1977953761816025
      entropy_coeff: 0.005
      kl: 0.012887045042589307
      policy_loss: -0.11317885317839682
      total_loss: -0.09111274051247165
      vf_explained_var: 0.9925516247749329
      vf_loss: 0.008482889970764518
  num_steps_sampled: 380000
  num_steps_trained: 380000
iterations_since_restore: 380
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.517391304347826
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177


[2m[36m(pid=24699)[0m 2020-09-21 16:59:45,412	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 16:59:49,002	DEBUG rollout_worker.py:760 -- Training out:

{ 'default_policy': { 'learner_stats': { 'allreduce_latency': 0.0,
                                         'cur_kl_coeff': 1.5187499999999998,
                                         'cur_lr': 1e-05,
                                         'entropy': 1.2715469598770142,
                                         'entropy_coeff': 0.005,
                                         'kl': -1.640209212894206e-08,
                                         'policy_loss': 0.17249393463134766,
                                         'total_loss': 0.2555316388607025,
                                         'vf_explained_var': np.ndarray((1,), dtype=float32, min=0.791, max=0.791, mean=0.791),
                                         'vf_loss': 0.08939547836780548}}}

2020-09-21 16:59:49,006	INFO rollout_worker.py:73

2020-09-21 16:59:55,593	DEBUG sgd.py:120 -- 13 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.07974163652397692, 'policy_loss': -0.09141611517407, 'vf_loss': 0.0023189727071439847, 'vf_explained_var': 0.99450487, 'kl': 0.010090740310261026, 'entropy': 1.1939615830779076, 'entropy_coeff': 0.005}
2020-09-21 16:59:56,074	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.0806115428913472, 'policy_loss': -0.09159651241498068, 'vf_loss': 0.0022695017250953242, 'vf_explained_var': 0.99461704, 'kl': 0.009669195453170687, 'entropy': 1.1939237415790558, 'entropy_coeff': 0.005}
2020-09-21 16:59:56,519	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.0826665055938065, 'policy_loss': -0.09393208264373243, 'vf_loss': 0.002265025606902782, 'vf_explained_var': 0.99471945, 'kl': 0.009870086738374084, 'entropy': 1.1

custom_metrics: {}
date: 2020-09-21_17-00-03
done: false
episode_len_mean: 3217.9
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.15425534425737
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 158
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.1987542510032654
      entropy_coeff: 0.005
      kl: 0.013753685401752591
      policy_loss: -0.11582685762550682
      total_loss: -0.09899939756724052
      vf_explained_var: 0.9958157539367676
      vf_loss: 0.001932819235662464
  num_steps_sampled: 381000
  num_steps_trained: 381000
iterations_since_restore: 381
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.536363636363636
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177


2020-09-21 17:00:07,236	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.08571242488687858, 'policy_loss': 0.0024431785568594933, 'vf_loss': 0.08676245785318315, 'vf_explained_var': 0.94369143, 'kl': 0.001212684893748206, 'entropy': 1.0669959224760532, 'entropy_coeff': 0.005}
2020-09-21 17:00:07,727	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.018953602761030197, 'policy_loss': -0.026089358143508434, 'vf_loss': 0.0457541320938617, 'vf_explained_var': 0.9702748, 'kl': 0.003050790895940736, 'entropy': 1.0689118281006813, 'entropy_coeff': 0.005}
2020-09-21 17:00:08,184	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.0020354138687253, 'policy_loss': -0.037778821075335145, 'vf_loss': 0.03495520120486617, 'vf_explained_var': 0.9765705, 'kl': 0.004039736406411976, 'entropy': 1.069428

2020-09-21 17:00:18,622	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.05992636573500931, 'policy_loss': -0.09067536052316427, 'vf_loss': 0.01986588421277702, 'vf_explained_var': 0.98620933, 'kl': 0.010692643467336893, 'entropy': 1.0712675340473652, 'entropy_coeff': 0.005}
2020-09-21 17:00:19,113	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.06327816774137318, 'policy_loss': -0.09474250767379999, 'vf_loss': 0.019771134248003364, 'vf_explained_var': 0.9862183, 'kl': 0.011225320602534339, 'entropy': 1.0710503086447716, 'entropy_coeff': 0.005}
2020-09-21 17:00:19,565	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.05931920243892819, 'policy_loss': -0.09184090758208185, 'vf_loss': 0.02060228679329157, 'vf_explained_var': 0.9857611, 'kl': 0.011379514646250755, 'entropy': 1.07

custom_metrics: {}
date: 2020-09-21_17-00-21
done: false
episode_len_mean: 3217.9
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.15425534425737
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 158
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.0693176835775375
      entropy_coeff: 0.005
      kl: 0.011491760960780084
      policy_loss: -0.10294403787702322
      total_loss: -0.07125212729442865
      vf_explained_var: 0.9863374829292297
      vf_loss: 0.01958539057523012
  num_steps_sampled: 382000
  num_steps_trained: 382000
iterations_since_restore: 382
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.539130434782609
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177
 

2020-09-21 17:00:25,244	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.06198428641073406, 'policy_loss': -0.004289680975489318, 'vf_loss': 0.07034072210080922, 'vf_explained_var': 0.92673665, 'kl': 0.0008589052661647223, 'entropy': 1.0742434673011303, 'entropy_coeff': 0.005}
2020-09-21 17:00:25,728	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.010358305764384568, 'policy_loss': -0.02705170528497547, 'vf_loss': 0.03403370117302984, 'vf_explained_var': 0.9634816, 'kl': 0.005813753581605852, 'entropy': 1.0906647816300392, 'entropy_coeff': 0.005}
2020-09-21 17:00:26,172	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.019200153183192015, 'policy_loss': -0.038082587299868464, 'vf_loss': 0.017448750673793256, 'vf_explained_var': 0.9805314, 'kl': 0.004531119033345021, 'entropy': 1.08

2020-09-21 17:00:36,640	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09612191584892571, 'policy_loss': -0.11097434419207275, 'vf_loss': 0.002953616523882374, 'vf_explained_var': 0.99639606, 'kl': 0.011389486549887806, 'entropy': 1.0797948352992535, 'entropy_coeff': 0.005}
2020-09-21 17:00:37,131	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09828859963454306, 'policy_loss': -0.11371565039735287, 'vf_loss': 0.0029713581898249686, 'vf_explained_var': 0.9963435, 'kl': 0.0117591992020607, 'entropy': 1.0807173326611519, 'entropy_coeff': 0.005}
2020-09-21 17:00:37,590	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10179094428895041, 'policy_loss': -0.11688044818583876, 'vf_loss': 0.0029769852772005834, 'vf_explained_var': 0.9963659, 'kl': 0.011544500855961815, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_17-00-39
done: false
episode_len_mean: 3217.9
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.15425534425737
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 158
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.0826738476753235
      entropy_coeff: 0.005
      kl: 0.012623042799532413
      policy_loss: -0.117669771425426
      total_loss: -0.10089928773231804
      vf_explained_var: 0.9965206384658813
      vf_loss: 0.003012606597621925
  num_steps_sampled: 383000
  num_steps_trained: 383000
iterations_since_restore: 383
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.508695652173913
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177
  

2020-09-21 17:00:43,286	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.10442574723856524, 'policy_loss': -0.003952561586629599, 'vf_loss': 0.11028687213547528, 'vf_explained_var': 0.9604574, 'kl': 0.0020919091125667366, 'entropy': 1.0171292126178741, 'entropy_coeff': 0.005}
2020-09-21 17:00:43,773	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.015006706584244967, 'policy_loss': -0.025153592112474144, 'vf_loss': 0.037440357031300664, 'vf_explained_var': 0.9862602, 'kl': 0.005167591109056957, 'entropy': 1.0256668664515018, 'entropy_coeff': 0.005}
2020-09-21 17:00:44,224	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.015461171278730035, 'policy_loss': -0.03917070385068655, 'vf_loss': 0.019856197759509087, 'vf_explained_var': 0.99282455, 'kl': 0.005926517507759854, 'entropy': 1.0

2020-09-21 17:00:49,967	DEBUG sgd.py:120 -- 14 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.06980326934717596, 'policy_loss': -0.08387011755257845, 'vf_loss': 0.002582903813163284, 'vf_explained_var': 0.9988801, 'kl': 0.0109170256764628, 'entropy': 1.0192576572299004, 'entropy_coeff': 0.005}
2020-09-21 17:00:50,453	DEBUG sgd.py:120 -- 15 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.07593964249826968, 'policy_loss': -0.08946076349820942, 'vf_loss': 0.0023836352847865783, 'vf_explained_var': 0.99899876, 'kl': 0.010683084896299988, 'entropy': 1.0174889340996742, 'entropy_coeff': 0.005}
2020-09-21 17:00:50,911	DEBUG sgd.py:120 -- 16 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.07812823681160808, 'policy_loss': -0.0920266512548551, 'vf_loss': 0.0023448217325494625, 'vf_explained_var': 0.998996, 'kl': 0.010955877281958237, 'entropy': 1.01

custom_metrics: {}
date: 2020-09-21_17-00-57
done: false
episode_len_mean: 3217.9
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.15425534425737
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 158
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.0177407003939152
      entropy_coeff: 0.005
      kl: 0.01376167667331174
      policy_loss: -0.11271535977721214
      total_loss: -0.09548640833236277
      vf_explained_var: 0.9994128942489624
      vf_loss: 0.001417105522705242
  num_steps_sampled: 384000
  num_steps_trained: 384000
iterations_since_restore: 384
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.482608695652174
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177
 

[2m[36m(pid=24699)[0m 2020-09-21 17:00:57,379	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 15737.114164315948,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 2674},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-1.506, max=5.626, mean=0.437),
[2m[36m(pid=24699)[0m                                   'prev_action': 3,
[2m[36m(pid=24699)

2020-09-21 17:01:01,406	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.08449005032889545, 'policy_loss': 0.0030334527837112546, 'vf_loss': 0.08462210115976632, 'vf_explained_var': 0.9852694, 'kl': 0.0014336086951256544, 'entropy': 1.0685597658157349, 'entropy_coeff': 0.005}
2020-09-21 17:01:01,912	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.012120200379285961, 'policy_loss': -0.027471087407320738, 'vf_loss': 0.03811473923269659, 'vf_explained_var': 0.9934186, 'kl': 0.004488524355110712, 'entropy': 1.0680781118571758, 'entropy_coeff': 0.005}
2020-09-21 17:01:02,363	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.012455016491003335, 'policy_loss': -0.03673922209418379, 'vf_loss': 0.022374070598743856, 'vf_explained_var': 0.9955808, 'kl': 0.0047393658314831555, 'entropy': 1.05

2020-09-21 17:01:12,830	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.07522772857919335, 'policy_loss': -0.09822837798856199, 'vf_loss': 0.009388248814502731, 'vf_explained_var': 0.99829435, 'kl': 0.012487571686506271, 'entropy': 1.0706191025674343, 'entropy_coeff': 0.005}
2020-09-21 17:01:13,320	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.07605793536640704, 'policy_loss': -0.09949222323484719, 'vf_loss': 0.009314889437519014, 'vf_explained_var': 0.99833643, 'kl': 0.01283068343764171, 'entropy': 1.0734392143785954, 'entropy_coeff': 0.005}
2020-09-21 17:01:13,772	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.0811916571110487, 'policy_loss': -0.10473476815968752, 'vf_loss': 0.009112428553635255, 'vf_explained_var': 0.998304, 'kl': 0.013031079375650734, 'entropy': 1.07

custom_metrics: {}
date: 2020-09-21_17-01-15
done: false
episode_len_mean: 3217.9
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.15425534425737
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 158
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.0750436075031757
      entropy_coeff: 0.005
      kl: 0.013318676908966154
      policy_loss: -0.10297278326470405
      total_loss: -0.07931907649617642
      vf_explained_var: 0.9983516931533813
      vf_loss: 0.008801184070762247
  num_steps_sampled: 385000
  num_steps_trained: 385000
iterations_since_restore: 385
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.626086956521738
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177


[2m[36m(pid=24699)[0m 2020-09-21 17:01:15,514	INFO rollout_worker.py:561 -- Generating sample batch of size 200
2020-09-21 17:01:19,322	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.09388455003499985, 'policy_loss': 0.0013515592436306179, 'vf_loss': 0.09598303423263133, 'vf_explained_var': 0.95933354, 'kl': 0.0013988419161109489, 'entropy': 1.1149062812328339, 'entropy_coeff': 0.005}
2020-09-21 17:01:19,810	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.0017908297013491392, 'policy_loss': -0.038233583443798125, 'vf_loss': 0.03463636222295463, 'vf_explained_var': 0.98448646, 'kl': 0.004852169353398494, 'entropy': 1.1125681027770042, 'entropy_coeff': 0.005}
2020-09-21 17:01:20,268	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.031925130751915276, 'policy_loss': -0.05529555

2020-09-21 17:01:30,718	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1012251788051799, 'policy_loss': -0.12024864472914487, 'vf_loss': 0.0013865367691323627, 'vf_explained_var': 0.9993105, 'kl': 0.015321138896979392, 'entropy': 1.1264088526368141, 'entropy_coeff': 0.005}
2020-09-21 17:01:31,207	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10422459023538977, 'policy_loss': -0.12380712328013033, 'vf_loss': 0.0013544376342906617, 'vf_explained_var': 0.9993235, 'kl': 0.015699578099884093, 'entropy': 1.1231281347572803, 'entropy_coeff': 0.005}
2020-09-21 17:01:31,663	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10365689639002085, 'policy_loss': -0.12370613822713494, 'vf_loss': 0.0013804435802740045, 'vf_explained_var': 0.99934226, 'kl': 0.01599277456989512, 'entropy': 1

custom_metrics: {}
date: 2020-09-21_17-01-33
done: false
episode_len_mean: 3299.09
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.11209836323707
episode_reward_min: -32.22698139089825
episodes_this_iter: 1
episodes_total: 159
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.1227283775806427
      entropy_coeff: 0.005
      kl: 0.01544446760090068
      policy_loss: -0.1337374347494915
      total_loss: -0.11457250104285777
      vf_explained_var: 0.9993693828582764
      vf_loss: 0.001322284726484213
  num_steps_sampled: 386000
  num_steps_trained: 386000
iterations_since_restore: 386
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.48695652173913
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177
  

2020-09-21 17:01:36,846	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.10298910993151367, 'policy_loss': 0.00042757182382047176, 'vf_loss': 0.10777624952606857, 'vf_explained_var': 0.9631814, 'kl': 0.0005725206644880965, 'entropy': 1.2168458104133606, 'entropy_coeff': 0.005}
2020-09-21 17:01:37,339	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.003238284436520189, 'policy_loss': -0.037698834552429616, 'vf_loss': 0.04105869575869292, 'vf_explained_var': 0.9868392, 'kl': 0.003925792756490409, 'entropy': 1.216774806380272, 'entropy_coeff': 0.005}
2020-09-21 17:01:37,789	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.03099409129936248, 'policy_loss': -0.05380857951240614, 'vf_loss': 0.019258012413047254, 'vf_explained_var': 0.9940104, 'kl': 0.006364039320033044, 'entropy': 1.2217

2020-09-21 17:01:48,246	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1019977864343673, 'policy_loss': -0.1200723092770204, 'vf_loss': 0.0031514345755567774, 'vf_explained_var': 0.9988941, 'kl': 0.01384298806078732, 'entropy': 1.220190353691578, 'entropy_coeff': 0.005}
2020-09-21 17:01:48,733	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10180737439077348, 'policy_loss': -0.12036948828608729, 'vf_loss': 0.003169888092088513, 'vf_explained_var': 0.99888635, 'kl': 0.014112902747001499, 'entropy': 1.208349995315075, 'entropy_coeff': 0.005}
2020-09-21 17:01:49,078	INFO rollout_worker.py:736 -- Training on concatenated sample batches:

{ 'count': 64,
  'policy_batches': { 'default_policy': { 'data': { 'action_dist_inputs': np.ndarray((64, 21), dtype=float32, min=-7.848, max=8.667, mean=-0.294),
                                                   

custom_metrics: {}
date: 2020-09-21_17-01-50
done: false
episode_len_mean: 3299.09
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.112098363237074
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 159
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.218368299305439
      entropy_coeff: 0.005
      kl: 0.014441645704209805
      policy_loss: -0.12756516423542053
      total_loss: -0.10877970838919282
      vf_explained_var: 0.9989542365074158
      vf_loss: 0.002944056632259162
  num_steps_sampled: 387000
  num_steps_trained: 387000
iterations_since_restore: 387
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.495652173913043
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.9449909821282177

2020-09-21 17:01:55,040	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.11788986704777926, 'policy_loss': -0.002481819479726255, 'vf_loss': 0.12427398608997464, 'vf_explained_var': 0.98184377, 'kl': 0.0011946925806629038, 'entropy': 1.143347404897213, 'entropy_coeff': 0.005}
2020-09-21 17:01:55,528	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.007187881041318178, 'policy_loss': -0.030289063754025847, 'vf_loss': 0.03866144048515707, 'vf_explained_var': 0.99421054, 'kl': 0.003005002588906791, 'entropy': 1.1496684551239014, 'entropy_coeff': 0.005}
2020-09-21 17:01:55,981	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.02872951334575191, 'policy_loss': -0.05423281528055668, 'vf_loss': 0.02274989488068968, 'vf_explained_var': 0.9964576, 'kl': 0.005612923036096618, 'entropy': 1.1542

2020-09-21 17:02:06,511	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09151812584605068, 'policy_loss': -0.11613915371708572, 'vf_loss': 0.011073792789829895, 'vf_explained_var': 0.99821377, 'kl': 0.012703557207714766, 'entropy': 1.1492590680718422, 'entropy_coeff': 0.005}
2020-09-21 17:02:07,002	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.09571913865511306, 'policy_loss': -0.12080889334902167, 'vf_loss': 0.011581136001041159, 'vf_explained_var': 0.99828804, 'kl': 0.012693020689766854, 'entropy': 1.1537795513868332, 'entropy_coeff': 0.005}
2020-09-21 17:02:07,457	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10463854053523391, 'policy_loss': -0.1304767319234088, 'vf_loss': 0.011833991447929293, 'vf_explained_var': 0.9982052, 'kl': 0.013004445529077202, 'entropy': 1.

custom_metrics: {}
date: 2020-09-21_17-02-08
done: false
episode_len_mean: 3299.09
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.112098363237074
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 159
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.1500118747353554
      entropy_coeff: 0.005
      kl: 0.013501449255272746
      policy_loss: -0.12511729402467608
      total_loss: -0.09946019225753844
      vf_explained_var: 0.9982345104217529
      vf_loss: 0.010901831818046048
  num_steps_sampled: 388000
  num_steps_trained: 388000
iterations_since_restore: 388
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 6.304347826086956
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.944990982128217

2020-09-21 17:02:12,704	DEBUG sgd.py:120 -- 0 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': 0.06297133438056335, 'policy_loss': -0.0014859427465125918, 'vf_loss': 0.0688271860126406, 'vf_explained_var': 0.9785248, 'kl': 0.0010421769283222027, 'entropy': 1.1905440017580986, 'entropy_coeff': 0.005}
2020-09-21 17:02:13,205	DEBUG sgd.py:120 -- 1 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.006372558418661356, 'policy_loss': -0.0317188526969403, 'vf_loss': 0.023210754327010363, 'vf_explained_var': 0.99214935, 'kl': 0.00529358240601141, 'entropy': 1.1808177903294563, 'entropy_coeff': 0.005}
2020-09-21 17:02:13,656	DEBUG sgd.py:120 -- 2 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.035724662127904594, 'policy_loss': -0.05194421943087946, 'vf_loss': 0.013416120869806036, 'vf_explained_var': 0.9954375, 'kl': 0.005733613157644868, 'entropy': 1.180

2020-09-21 17:02:24,514	DEBUG sgd.py:120 -- 24 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.1094979173271895, 'policy_loss': -0.1297079537762329, 'vf_loss': 0.0048969930212479085, 'vf_explained_var': 0.9982286, 'kl': 0.013983084762003273, 'entropy': 1.1847530379891396, 'entropy_coeff': 0.005}
2020-09-21 17:02:25,002	DEBUG sgd.py:120 -- 25 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.11527465842664242, 'policy_loss': -0.1357661560177803, 'vf_loss': 0.0050379146850900725, 'vf_explained_var': 0.99820316, 'kl': 0.014087261515669525, 'entropy': 1.1882888600230217, 'entropy_coeff': 0.005}
2020-09-21 17:02:25,485	DEBUG sgd.py:120 -- 26 {'allreduce_latency': 0.0, 'cur_kl_coeff': 1.5187499999999998, 'cur_lr': 1e-05, 'total_loss': -0.10975859424797818, 'policy_loss': -0.1303483173251152, 'vf_loss': 0.004883372414042242, 'vf_explained_var': 0.99821323, 'kl': 0.014236470276955515, 'entropy': 1.1

custom_metrics: {}
date: 2020-09-21_17-02-26
done: false
episode_len_mean: 3299.09
episode_reward_max: -21.771561900678247
episode_reward_mean: -28.112098363237074
episode_reward_min: -32.22698139089825
episodes_this_iter: 0
episodes_total: 159
experiment_id: 0c7219fa5ef6496d9c3ae96482abee7c
hostname: gra852
info:
  learner:
    default_policy:
      allreduce_latency: 0.0
      cur_kl_coeff: 1.5187499999999998
      cur_lr: 1.0e-05
      entropy: 1.1848045513033867
      entropy_coeff: 0.005
      kl: 0.014548459846992046
      policy_loss: -0.1333144437521696
      total_loss: -0.11227653233800083
      vf_explained_var: 0.9982156753540039
      vf_loss: 0.004866471659624949
  num_steps_sampled: 389000
  num_steps_trained: 389000
iterations_since_restore: 389
node_ip: 10.29.85.26
num_healthy_workers: 5
off_policy_estimator: {}
perf:
  cpu_util_percent: 5.4125000000000005
  gpu_util_percent0: 0.0
  gpu_util_percent1: 0.0
  ram_util_percent: 11.9
  vram_util_percent0: 0.944990982128217

[2m[36m(pid=24699)[0m 2020-09-21 17:02:27,187	INFO sampler.py:882 -- Inputs to compute_actions():
[2m[36m(pid=24699)[0m 
[2m[36m(pid=24699)[0m { 'default_policy': [ { 'data': { 'agent_id': 'agent0',
[2m[36m(pid=24699)[0m                                   'env_id': 0,
[2m[36m(pid=24699)[0m                                   'info': { 'broker': <tensortrade.oms.orders.broker.Broker object at 0x2b27cab2e978>,
[2m[36m(pid=24699)[0m                                             'net_worth': 17881.731146677746,
[2m[36m(pid=24699)[0m                                             'portfolio': <tensortrade.oms.wallets.portfolio.Portfolio object at 0x2b27cab2e6a0>,
[2m[36m(pid=24699)[0m                                             'step': 3674},
[2m[36m(pid=24699)[0m                                   'obs': np.ndarray((50, 78), dtype=float32, min=-20.069, max=8.121, mean=0.468),
[2m[36m(pid=24699)[0m                                   'prev_action': 13,
[2m[36m(pid=24699

In [None]:
import random
from ray.tune.schedulers import PopulationBasedTraining

In [None]:
# Postprocess the perturbed config to ensure it's still valid
def explore(config):
    # ensure we collect enough timesteps to do sgd
    if config["train_batch_size"] < config["sgd_minibatch_size"] * 2:
        config["train_batch_size"] = config["sgd_minibatch_size"] * 2
    # ensure we run at least one sgd iter
    if config["num_sgd_iter"] < 1:
        config["num_sgd_iter"] = 1
    return config


pbt = PopulationBasedTraining(
    time_attr="time_total_s",
    metric="episode_reward_mean",
    mode="max",
    perturbation_interval=100,
    resample_probability=0.25,
    # Specifies the mutations of these hyperparams
    hyperparam_mutations={
        "lambda": lambda: random.uniform(0.9, 1.0),
        "clip_param": lambda: random.uniform(0.01, 0.5),
        "lr": [1e-2, 1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
        "num_sgd_iter": lambda: random.randint(1, 40),
        "sgd_minibatch_size": lambda: random.randint(32, 128),
        "train_batch_size": lambda: random.randint(1000, 5000),
    },
    custom_explore_fn=explore,
)

tune.run(
    "PPO",
    name="show_me_the_money_test",
    scheduler=pbt,
    #     num_samples=8,
    checkpoint_at_end=True,
    config={
        "env": "TradingEnv",
        "env_config": env_config,
        "framework": "tf",
        "log_level": "DEBUG",
        "kl_coeff": 1.0,
        #         "num_workers": 8,
        "num_gpus": 2,
        #         "model": {
        #             "free_log_std": True
        #         },
        # These params are tuned from a fixed starting value.
        "lambda": 0.95,
        "clip_param": 0.2,
        "lr": 1e-4,
        # These params start off randomly drawn from a set.
        "num_sgd_iter": tune.sample_from(lambda spec: random.choice([10, 20, 30])),
        "sgd_minibatch_size": tune.sample_from(
            lambda spec: random.choice([32, 128])
        ),
        "train_batch_size": tune.sample_from(
            lambda spec: random.choice([2000, 4000])
        ),
    },
)

In [None]:
analysis = tune.run(
    "PPO",
    stop={"episode_reward_mean": 50000},
    config={
        "env": "TradingEnv",
        "env_config": env_config,
        "log_level": "DEBUG",
        "framework": "tf",
        #         "ignore_worker_failures": True,
        #         "num_workers": 4,
        "num_gpus": 2,
        
        "lambda": 0.95,
        "clip_param": 0.2,
        "lr": 1e-4,
        # These params start off randomly drawn from a set.
        "num_sgd_iter": tune.sample_from(lambda spec: random.choice([10, 20, 30])),
        "sgd_minibatch_size": tune.sample_from(
            lambda spec: random.choice([32, 128, 512, 2048])
        ),
        "train_batch_size": tune.sample_from(
            lambda spec: random.choice([10000, 20000, 40000])
        ),
        #         "clip_rewards": True,
        #         "lr": 1e-6,
        #         "lr_schedule": [
        # #             [0, 1e-1],
        # #             [int(1e2), 1e-2],
        # #             [int(1e3), 1e-3],
        #             [int(1e4), 1e-4],
        #             [int(1e5), 1e-5],
        #             [int(1e6), 1e-6],
        #             [int(1e7), 1e-7],
        #         ],
        #         "gamma": 0,
        #         "observation_filter": "MeanStdFilter",
        #         "lambda": 0.72,
        #         "vf_loss_coeff": 0.5,
        #         "entropy_coeff": 0.01,
    },
    #     local_dir="./ray",
    #     resources_per_trial={"cpu": 4, "gpu": 1},
    checkpoint_at_end=True,
)