In [1]:
import sys
import os
from baselines import logger, bench
from baselines.common import set_global_seeds
from baselines.common.cmd_util import make_atari_env, atari_arg_parser
from baselines.common.atari_wrappers import wrap_deepmind, make_atari
from baselines.common.vec_env.vec_frame_stack import VecFrameStack
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
from baselines.common.vec_env.vec_normalize import VecNormalize, VecNormalizeTest
from baselines.ppo2 import ppo2
#from baselines.ppo2.policies import CnnPolicy, LstmPolicy, LnLstmPolicy, MlpPolicy
import multiprocessing
import tensorflow as tf
import pandas as pd
import numpy as np
import gym

from src.common.environments import ESTradingEnv

import matplotlib as mpl
%matplotlib

  from ._conv import register_converters as _register_converters


Using matplotlib backend: Qt5Agg




In [2]:
#Register trading environment with OpenAI Gym
from gym.envs.registration import register
register(id='tradingenv-v0',
         entry_point='src.common.environments:ESTradingEnv_v2',)

In [3]:
from baselines.a2c.utils import fc, conv_to_fc, lstm, batch_to_seq, seq_to_batch
from baselines.common.distributions import make_pdtype
from baselines.common.input import observation_input

def ortho_init(scale=1.0):
    def _ortho_init(shape, dtype, partition_info=None):
        #lasagne ortho init for tf
        shape = tuple(shape)
        if len(shape) == 2:
            flat_shape = shape
        elif len(shape) > 2: # assumes NHWC
            flat_shape = (np.prod(shape[:-1]), shape[-1])
        else:
            raise NotImplementedError
        a = np.random.normal(0.0, 1.0, flat_shape)
        u, _, v = np.linalg.svd(a, full_matrices=False)
        q = u if u.shape == flat_shape else v # pick the one with the correct shape
        q = q.reshape(shape)
        return (scale * q[:shape[0], :shape[1]]).astype(np.float32)
    return _ortho_init

def conv(x, scope, *, nf, rf, stride, pad='VALID', init_scale=1.0, one_dim_bias=False):    
    channel_ax = 2
    strides = [1, stride, 1]
    bshape = [1, 1, nf]
    
    nin = x.get_shape()[channel_ax].value
    wshape = [rf, nin, nf]
    with tf.variable_scope(scope):
        w = tf.get_variable("w", wshape, initializer=ortho_init(init_scale))
        b = tf.get_variable("b", bshape, initializer=tf.constant_initializer(0.0))
        return b + tf.nn.conv1d(x, w, stride=stride, padding=pad)


def cnn(X, is_training, **conv_kwargs):
    activ = tf.nn.relu
    h = activ(conv(X, 'c1', nf=16, rf=4, stride=1, init_scale=np.sqrt(2),
                   **conv_kwargs))
    h = tf.layers.dropout(h, rate=0.5, training=is_training)
    h2 = activ(conv(h, 'c2', nf=8, rf=4, stride=1, init_scale=np.sqrt(2), **conv_kwargs))
    h2 = tf.layers.dropout(h2, rate=0.5, training=is_training)
    h3 = conv_to_fc(h2)
    return activ(fc(h3, 'fc1', nh=32, init_scale=np.sqrt(2)))


class CnnPolicy(object):

    def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False, **conv_kwargs): #pylint: disable=W0613
        if nsteps == 1:
            self._is_training = False
        else:
            self._is_training = True
            
        self.pdtype = make_pdtype(ac_space)
        X, processed_x = observation_input(ob_space, nbatch)
        with tf.variable_scope("model", reuse=reuse):
            h = cnn(processed_x, self._is_training, **conv_kwargs)
            #activ = tf.nn.relu
            #print(processed_x.get_shape())
            #h = activ(fc(processed_x, 'fc1', nh=512, init_scale=np.sqrt(2)))
            vf = fc(h, 'v', 1)[:,0]
            self.pd, self.pi = self.pdtype.pdfromlatent(h, init_scale=0.01)

        a0 = self.pd.sample()
        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = None

        def step(ob, *_args, **_kwargs):
            a, v, neglogp = sess.run([a0, vf, neglogp0], {X:ob})
            return a, v, self.initial_state, neglogp

        def value(ob, *_args, **_kwargs):
            return sess.run(vf, {X:ob})

        self.X = X
        self.vf = vf
        self.step = step
        self.value = value
        
class LstmPolicy(object):

    def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, nlstm=256, reuse=False):
        if nsteps == 1:
            self._is_training = False
        else:
            self._is_training = True
            
        nenv = nbatch // nsteps
        self.pdtype = make_pdtype(ac_space)
        X, processed_x = observation_input(ob_space, nbatch)

        M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1)
        S = tf.placeholder(tf.float32, [nenv, nlstm*2]) #states
        with tf.variable_scope("model", reuse=reuse):
            h = cnn(processed_x, self._is_training)
            xs = batch_to_seq(h, nenv, nsteps)
            ms = batch_to_seq(M, nenv, nsteps)
            h5, snew = lstm(xs, ms, S, 'lstm1', nh=nlstm)
            h5 = tf.layers.dropout(h5, rate=0.5, training=self._is_training)
            h5 = seq_to_batch(h5)
            vf = fc(h5, 'v', 1)
            self.pd, self.pi = self.pdtype.pdfromlatent(h5)

        v0 = vf[:, 0]
        a0 = self.pd.sample()
        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = np.zeros((nenv, nlstm*2), dtype=np.float32)

        def step(ob, state, mask):
            return sess.run([a0, v0, snew, neglogp0], {X:ob, S:state, M:mask})

        def value(ob, state, mask):
            return sess.run(v0, {X:ob, S:state, M:mask})

        self.X = X
        self.M = M
        self.S = S
        self.vf = vf
        self.step = step
        self.value = value

In [4]:
def train(env_id, num_timesteps, seed, policy):

    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True #pylint: disable=E1101
    tf.Session(config=config).__enter__()
    
    def make_env(rank):
        def _thunk():
            env = gym.make(env_id)
            env.seed(seed + rank)
            env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
            return env
        return _thunk
    

    #env = DummyVecEnv([make_env])
    env = SubprocVecEnv([make_env(i) for i in range(32)])
    env = VecNormalize(env, ob=True)
    set_global_seeds(seed)

    policy = LstmPolicy
    ppo2.learn(policy=policy, env=env, nsteps=128, nminibatches=4,
        lam=0.95, gamma=0.99, noptepochs=25, log_interval=1,
        ent_coef=.01,
        lr=lambda f: f * 2.5e-4,
        cliprange=0.1,
        total_timesteps=int(num_timesteps * 1.1))

In [5]:
logger.configure()

train('tradingenv-v0', num_timesteps=1e7, seed=0, policy='cnn')

Logging to /tmp/openai-2018-05-30-18-51-01-525742
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
--------------------------------------
| approxkl           | 0.000938044   |
| clipfrac           | 0.021708984   |
| eplenmean          | 96.5          |
| eprewmean          | -523          |
| explained_variance | -0.297        |
| fps                | 638           |
| nupdates           | 1             |
| policy_entropy     | 1.0976362     |
| policy_loss        | -0.0011973628 |
| serial_timesteps   | 128           |
| time_elapsed       | 6.41          |
| total_timesteps    | 4096          |
| value_loss         | 0.6793031     |
--------------------------------------
Saving to /tmp/openai-2018-

--------------------------------------
| approxkl           | 0.0020687236  |
| clipfrac           | 0.07727539    |
| eplenmean          | 204           |
| eprewmean          | -525          |
| explained_variance | 0.0157        |
| fps                | 768           |
| nupdates           | 14            |
| policy_entropy     | 1.0594614     |
| policy_loss        | -0.0006887268 |
| serial_timesteps   | 1792          |
| time_elapsed       | 74.8          |
| total_timesteps    | 57344         |
| value_loss         | 0.05905364    |
--------------------------------------
---------------------------------------
| approxkl           | 0.0018217039   |
| clipfrac           | 0.059023436    |
| eplenmean          | 221            |
| eprewmean          | -524           |
| explained_variance | 0.00348        |
| fps                | 775            |
| nupdates           | 15             |
| policy_entropy     | 1.0548551      |
| policy_loss        | -0.00042630904 |
| serial_timest

---------------------------------------
| approxkl           | 0.0017026842   |
| clipfrac           | 0.047060546    |
| eplenmean          | 249            |
| eprewmean          | -524           |
| explained_variance | 0.00729        |
| fps                | 792            |
| nupdates           | 28             |
| policy_entropy     | 1.0131191      |
| policy_loss        | -0.00018720294 |
| serial_timesteps   | 3584           |
| time_elapsed       | 147            |
| total_timesteps    | 114688         |
| value_loss         | 0.059608735    |
---------------------------------------
--------------------------------------
| approxkl           | 0.0020274615  |
| clipfrac           | 0.081318356   |
| eplenmean          | 241           |
| eprewmean          | -524          |
| explained_variance | 0.0169        |
| fps                | 770           |
| nupdates           | 29            |
| policy_entropy     | 1.0051094     |
| policy_loss        | -0.0011492603 |
| serial_t

-------------------------------------
| approxkl           | 0.001771481  |
| clipfrac           | 0.056962892  |
| eplenmean          | 257          |
| eprewmean          | -526         |
| explained_variance | 0.0319       |
| fps                | 769          |
| nupdates           | 43           |
| policy_entropy     | 0.99385065   |
| policy_loss        | 0.0006580582 |
| serial_timesteps   | 5504         |
| time_elapsed       | 225          |
| total_timesteps    | 176128       |
| value_loss         | 0.062310237  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0021229656 |
| clipfrac           | 0.08518555   |
| eplenmean          | 268          |
| eprewmean          | -527         |
| explained_variance | -0.014       |
| fps                | 757          |
| nupdates           | 44           |
| policy_entropy     | 0.9960192    |
| policy_loss        | -0.000622154 |
| serial_timesteps   | 5632         |
| time_elaps

---------------------------------------
| approxkl           | 0.0021399246   |
| clipfrac           | 0.078525394    |
| eplenmean          | 268            |
| eprewmean          | -527           |
| explained_variance | 0.00465        |
| fps                | 781            |
| nupdates           | 57             |
| policy_entropy     | 0.9422295      |
| policy_loss        | -0.00026629522 |
| serial_timesteps   | 7296           |
| time_elapsed       | 300            |
| total_timesteps    | 233472         |
| value_loss         | 0.0663784      |
---------------------------------------
---------------------------------------
| approxkl           | 0.001646881    |
| clipfrac           | 0.0525         |
| eplenmean          | 286            |
| eprewmean          | -526           |
| explained_variance | 0.00973        |
| fps                | 803            |
| nupdates           | 58             |
| policy_entropy     | 0.9511803      |
| policy_loss        | -0.00019617962 |


--------------------------------------
| approxkl           | 0.0021243433  |
| clipfrac           | 0.09387695    |
| eplenmean          | 308           |
| eprewmean          | -524          |
| explained_variance | -0.002        |
| fps                | 769           |
| nupdates           | 71            |
| policy_entropy     | 0.9062597     |
| policy_loss        | -0.0017363387 |
| serial_timesteps   | 9088          |
| time_elapsed       | 374           |
| total_timesteps    | 290816        |
| value_loss         | 0.054917336   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0015753494  |
| clipfrac           | 0.060371093   |
| eplenmean          | 299           |
| eprewmean          | -524          |
| explained_variance | 0.0223        |
| fps                | 768           |
| nupdates           | 72            |
| policy_entropy     | 0.9155499     |
| policy_loss        | -0.0011191005 |
| serial_timesteps   | 92

--------------------------------------
| approxkl           | 0.0021050107  |
| clipfrac           | 0.09226563    |
| eplenmean          | 292           |
| eprewmean          | -526          |
| explained_variance | 0.0178        |
| fps                | 762           |
| nupdates           | 86            |
| policy_entropy     | 0.8760307     |
| policy_loss        | -0.0014870034 |
| serial_timesteps   | 11008         |
| time_elapsed       | 456           |
| total_timesteps    | 352256        |
| value_loss         | 0.059986245   |
--------------------------------------
--------------------------------------
| approxkl           | 0.001937505   |
| clipfrac           | 0.08116211    |
| eplenmean          | 298           |
| eprewmean          | -526          |
| explained_variance | -0.0172       |
| fps                | 768           |
| nupdates           | 87            |
| policy_entropy     | 0.87602156    |
| policy_loss        | -0.0012307339 |
| serial_timesteps   | 11

--------------------------------------
| approxkl           | 0.0023364231  |
| clipfrac           | 0.093222655   |
| eplenmean          | 327           |
| eprewmean          | -522          |
| explained_variance | -0.0121       |
| fps                | 801           |
| nupdates           | 100           |
| policy_entropy     | 0.83252954    |
| policy_loss        | -0.0011271144 |
| serial_timesteps   | 12800         |
| time_elapsed       | 529           |
| total_timesteps    | 409600        |
| value_loss         | 0.047131386   |
--------------------------------------
Saving to /tmp/openai-2018-05-30-18-51-01-525742/checkpoints/00100
--------------------------------------
| approxkl           | 0.0017587335  |
| clipfrac           | 0.06936523    |
| eplenmean          | 339           |
| eprewmean          | -521          |
| explained_variance | -0.00265      |
| fps                | 810           |
| nupdates           | 101           |
| policy_entropy     | 0.8483824    

--------------------------------------
| approxkl           | 0.0025070894  |
| clipfrac           | 0.08545899    |
| eplenmean          | 399           |
| eprewmean          | -520          |
| explained_variance | 0.02          |
| fps                | 782           |
| nupdates           | 114           |
| policy_entropy     | 0.7859747     |
| policy_loss        | -0.0007392218 |
| serial_timesteps   | 14592         |
| time_elapsed       | 601           |
| total_timesteps    | 466944        |
| value_loss         | 0.05083756    |
--------------------------------------
-------------------------------------
| approxkl           | 0.00267712   |
| clipfrac           | 0.07284179   |
| eplenmean          | 390          |
| eprewmean          | -520         |
| explained_variance | -0.00054     |
| fps                | 790          |
| nupdates           | 115          |
| policy_entropy     | 0.77721566   |
| policy_loss        | 0.0005522459 |
| serial_timesteps   | 14720       

-------------------------------------
| approxkl           | 0.002399855  |
| clipfrac           | 0.0740332    |
| eplenmean          | 453          |
| eprewmean          | -524         |
| explained_variance | -0.00933     |
| fps                | 809          |
| nupdates           | 129          |
| policy_entropy     | 0.7076103    |
| policy_loss        | 0.0011984243 |
| serial_timesteps   | 16512        |
| time_elapsed       | 678          |
| total_timesteps    | 528384       |
| value_loss         | 0.050340164  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0021491812  |
| clipfrac           | 0.08640625    |
| eplenmean          | 457           |
| eprewmean          | -525          |
| explained_variance | -0.000679     |
| fps                | 787           |
| nupdates           | 130           |
| policy_entropy     | 0.7190512     |
| policy_loss        | -0.0006105107 |
| serial_timesteps   | 16640         |
|

---------------------------------------
| approxkl           | 0.002404625    |
| clipfrac           | 0.0680664      |
| eplenmean          | 481            |
| eprewmean          | -525           |
| explained_variance | 0.0119         |
| fps                | 826            |
| nupdates           | 144            |
| policy_entropy     | 0.67105556     |
| policy_loss        | -0.00040197998 |
| serial_timesteps   | 18432          |
| time_elapsed       | 755            |
| total_timesteps    | 589824         |
| value_loss         | 0.043346677    |
---------------------------------------
--------------------------------------
| approxkl           | 0.0021023406  |
| clipfrac           | 0.057265624   |
| eplenmean          | 484           |
| eprewmean          | -525          |
| explained_variance | 0.00156       |
| fps                | 810           |
| nupdates           | 145           |
| policy_entropy     | 0.6767914     |
| policy_loss        | 0.00068886013 |
| serial_t

--------------------------------------
| approxkl           | 0.0015773268  |
| clipfrac           | 0.044853516   |
| eplenmean          | 487           |
| eprewmean          | -525          |
| explained_variance | -0.00302      |
| fps                | 792           |
| nupdates           | 158           |
| policy_entropy     | 0.6928168     |
| policy_loss        | 0.00032712994 |
| serial_timesteps   | 20224         |
| time_elapsed       | 827           |
| total_timesteps    | 647168        |
| value_loss         | 0.039100684   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0020513255 |
| clipfrac           | 0.06442383   |
| eplenmean          | 505          |
| eprewmean          | -525         |
| explained_variance | 0.0202       |
| fps                | 785          |
| nupdates           | 159          |
| policy_entropy     | 0.6970375    |
| policy_loss        | 8.220803e-05 |
| serial_timesteps   | 20352       

--------------------------------------
| approxkl           | 0.0027143168  |
| clipfrac           | 0.07834961    |
| eplenmean          | 487           |
| eprewmean          | -523          |
| explained_variance | 0.0317        |
| fps                | 797           |
| nupdates           | 173           |
| policy_entropy     | 0.6880352     |
| policy_loss        | 0.00064717024 |
| serial_timesteps   | 22144         |
| time_elapsed       | 906           |
| total_timesteps    | 708608        |
| value_loss         | 0.042340267   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0026997342  |
| clipfrac           | 0.08771484    |
| eplenmean          | 493           |
| eprewmean          | -522          |
| explained_variance | -0.00197      |
| fps                | 790           |
| nupdates           | 174           |
| policy_entropy     | 0.69645333    |
| policy_loss        | 0.00027138914 |
| serial_timesteps   | 22

-------------------------------------
| approxkl           | 0.0021203295 |
| clipfrac           | 0.07782227   |
| eplenmean          | 529          |
| eprewmean          | -517         |
| explained_variance | 0.0119       |
| fps                | 803          |
| nupdates           | 188          |
| policy_entropy     | 0.639648     |
| policy_loss        | 0.0009596015 |
| serial_timesteps   | 24064        |
| time_elapsed       | 983          |
| total_timesteps    | 770048       |
| value_loss         | 0.038399767  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0025805696 |
| clipfrac           | 0.075234376  |
| eplenmean          | 509          |
| eprewmean          | -520         |
| explained_variance | 0.0795       |
| fps                | 802          |
| nupdates           | 189          |
| policy_entropy     | 0.64777815   |
| policy_loss        | 0.0010843733 |
| serial_timesteps   | 24192        |
| time_elaps

--------------------------------------
| approxkl           | 0.002188067   |
| clipfrac           | 0.057519533   |
| eplenmean          | 539           |
| eprewmean          | -522          |
| explained_variance | 0.0601        |
| fps                | 796           |
| nupdates           | 203           |
| policy_entropy     | 0.6355654     |
| policy_loss        | 0.00091036054 |
| serial_timesteps   | 25984         |
| time_elapsed       | 1.06e+03      |
| total_timesteps    | 831488        |
| value_loss         | 0.03587338    |
--------------------------------------
--------------------------------------
| approxkl           | 0.0028420344  |
| clipfrac           | 0.10044922    |
| eplenmean          | 550           |
| eprewmean          | -522          |
| explained_variance | 7.22e-05      |
| fps                | 810           |
| nupdates           | 204           |
| policy_entropy     | 0.6302279     |
| policy_loss        | -0.0014715551 |
| serial_timesteps   | 26

---------------------------------------
| approxkl           | 0.002559701    |
| clipfrac           | 0.094033204    |
| eplenmean          | 449            |
| eprewmean          | -521           |
| explained_variance | 0.0986         |
| fps                | 807            |
| nupdates           | 218            |
| policy_entropy     | 0.706497       |
| policy_loss        | -0.00052280154 |
| serial_timesteps   | 27904          |
| time_elapsed       | 1.14e+03       |
| total_timesteps    | 892928         |
| value_loss         | 0.04696936     |
---------------------------------------
---------------------------------------
| approxkl           | 0.0024499062   |
| clipfrac           | 0.09441406     |
| eplenmean          | 449            |
| eprewmean          | -520           |
| explained_variance | 0.0787         |
| fps                | 797            |
| nupdates           | 219            |
| policy_entropy     | 0.70970756     |
| policy_loss        | -0.00077866385 |


--------------------------------------
| approxkl           | 0.0022622035  |
| clipfrac           | 0.07342774    |
| eplenmean          | 539           |
| eprewmean          | -522          |
| explained_variance | 0.188         |
| fps                | 782           |
| nupdates           | 232           |
| policy_entropy     | 0.6486958     |
| policy_loss        | -0.0013029826 |
| serial_timesteps   | 29696         |
| time_elapsed       | 1.21e+03      |
| total_timesteps    | 950272        |
| value_loss         | 0.04642616    |
--------------------------------------
---------------------------------------
| approxkl           | 0.0024972411   |
| clipfrac           | 0.06258789     |
| eplenmean          | 537            |
| eprewmean          | -523           |
| explained_variance | 0.173          |
| fps                | 814            |
| nupdates           | 233            |
| policy_entropy     | 0.657351       |
| policy_loss        | -0.00062014355 |
| serial_timest

--------------------------------------
| approxkl           | 0.0022015988  |
| clipfrac           | 0.05246094    |
| eplenmean          | 539           |
| eprewmean          | -521          |
| explained_variance | 0.0302        |
| fps                | 798           |
| nupdates           | 247           |
| policy_entropy     | 0.5902773     |
| policy_loss        | 0.00027255065 |
| serial_timesteps   | 31616         |
| time_elapsed       | 1.29e+03      |
| total_timesteps    | 1011712       |
| value_loss         | 0.03751612    |
--------------------------------------
--------------------------------------
| approxkl           | 0.0018836994  |
| clipfrac           | 0.076992184   |
| eplenmean          | 523           |
| eprewmean          | -520          |
| explained_variance | 0.00193       |
| fps                | 791           |
| nupdates           | 248           |
| policy_entropy     | 0.5662656     |
| policy_loss        | -0.0013566989 |
| serial_timesteps   | 31

-------------------------------------
| approxkl           | 0.001923746  |
| clipfrac           | 0.048125     |
| eplenmean          | 602          |
| eprewmean          | -509         |
| explained_variance | 0.323        |
| fps                | 774          |
| nupdates           | 261          |
| policy_entropy     | 0.36985517   |
| policy_loss        | 0.0017611287 |
| serial_timesteps   | 33408        |
| time_elapsed       | 1.36e+03     |
| total_timesteps    | 1069056      |
| value_loss         | 0.025531422  |
-------------------------------------
---------------------------------------
| approxkl           | 0.0022462008   |
| clipfrac           | 0.05586914     |
| eplenmean          | 586            |
| eprewmean          | -513           |
| explained_variance | 0.264          |
| fps                | 778            |
| nupdates           | 262            |
| policy_entropy     | 0.3446965      |
| policy_loss        | -0.00026495018 |
| serial_timesteps   | 33536  

-------------------------------------
| approxkl           | 0.002359964  |
| clipfrac           | 0.051445313  |
| eplenmean          | 868          |
| eprewmean          | -465         |
| explained_variance | 0.421        |
| fps                | 772          |
| nupdates           | 276          |
| policy_entropy     | 0.18528007   |
| policy_loss        | 0.0016081429 |
| serial_timesteps   | 35328        |
| time_elapsed       | 1.44e+03     |
| total_timesteps    | 1130496      |
| value_loss         | 0.01396079   |
-------------------------------------
-------------------------------------
| approxkl           | 0.0025938756 |
| clipfrac           | 0.053544924  |
| eplenmean          | 906          |
| eprewmean          | -460         |
| explained_variance | 0.503        |
| fps                | 774          |
| nupdates           | 277          |
| policy_entropy     | 0.23322228   |
| policy_loss        | 0.0019097801 |
| serial_timesteps   | 35456        |
| time_elaps

-------------------------------------
| approxkl           | 0.0016112528 |
| clipfrac           | 0.034589842  |
| eplenmean          | 1.1e+03      |
| eprewmean          | -382         |
| explained_variance | 0.838        |
| fps                | 783          |
| nupdates           | 291          |
| policy_entropy     | 0.15185836   |
| policy_loss        | 0.0017028883 |
| serial_timesteps   | 37248        |
| time_elapsed       | 1.52e+03     |
| total_timesteps    | 1191936      |
| value_loss         | 0.006575544  |
-------------------------------------
-------------------------------------
| approxkl           | 0.002149597  |
| clipfrac           | 0.038955078  |
| eplenmean          | 1.11e+03     |
| eprewmean          | -369         |
| explained_variance | 0.801        |
| fps                | 779          |
| nupdates           | 292          |
| policy_entropy     | 0.14604262   |
| policy_loss        | 0.0023313502 |
| serial_timesteps   | 37376        |
| time_elaps

-------------------------------------
| approxkl           | 0.0057594087 |
| clipfrac           | 0.049160156  |
| eplenmean          | 1.09e+03     |
| eprewmean          | -314         |
| explained_variance | 0.708        |
| fps                | 819          |
| nupdates           | 306          |
| policy_entropy     | 0.14161749   |
| policy_loss        | 0.0047335993 |
| serial_timesteps   | 39168        |
| time_elapsed       | 1.59e+03     |
| total_timesteps    | 1253376      |
| value_loss         | 0.01274979   |
-------------------------------------
-------------------------------------
| approxkl           | 0.0038531665 |
| clipfrac           | 0.04680664   |
| eplenmean          | 1.11e+03     |
| eprewmean          | -310         |
| explained_variance | 0.744        |
| fps                | 793          |
| nupdates           | 307          |
| policy_entropy     | 0.12869808   |
| policy_loss        | 0.0034570452 |
| serial_timesteps   | 39296        |
| time_elaps

--------------------------------------
| approxkl           | 0.0014544576  |
| clipfrac           | 0.020371094   |
| eplenmean          | 1.21e+03      |
| eprewmean          | -263          |
| explained_variance | 0.685         |
| fps                | 797           |
| nupdates           | 321           |
| policy_entropy     | 0.065936305   |
| policy_loss        | 0.00021587909 |
| serial_timesteps   | 41088         |
| time_elapsed       | 1.67e+03      |
| total_timesteps    | 1314816       |
| value_loss         | 0.008343423   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0026812924  |
| clipfrac           | 0.015146485   |
| eplenmean          | 1.24e+03      |
| eprewmean          | -257          |
| explained_variance | 0.646         |
| fps                | 774           |
| nupdates           | 322           |
| policy_entropy     | 0.044650435   |
| policy_loss        | 0.00022161927 |
| serial_timesteps   | 41

-------------------------------------
| approxkl           | 0.0028901258 |
| clipfrac           | 0.030859375  |
| eplenmean          | 1.37e+03     |
| eprewmean          | -190         |
| explained_variance | 0.618        |
| fps                | 805          |
| nupdates           | 336          |
| policy_entropy     | 0.09054452   |
| policy_loss        | 0.0025107178 |
| serial_timesteps   | 43008        |
| time_elapsed       | 1.75e+03     |
| total_timesteps    | 1376256      |
| value_loss         | 0.0069135334 |
-------------------------------------
-------------------------------------
| approxkl           | 0.0021955213 |
| clipfrac           | 0.026875     |
| eplenmean          | 1.37e+03     |
| eprewmean          | -193         |
| explained_variance | 0.641        |
| fps                | 792          |
| nupdates           | 337          |
| policy_entropy     | 0.09074192   |
| policy_loss        | 0.0018093501 |
| serial_timesteps   | 43136        |
| time_elaps

Saving to /tmp/openai-2018-05-30-18-51-01-525742/checkpoints/00350
--------------------------------------
| approxkl           | 0.001528675   |
| clipfrac           | 0.020595703   |
| eplenmean          | 1.44e+03      |
| eprewmean          | -166          |
| explained_variance | 0.688         |
| fps                | 780           |
| nupdates           | 351           |
| policy_entropy     | 0.07396571    |
| policy_loss        | 0.00068316073 |
| serial_timesteps   | 44928         |
| time_elapsed       | 1.82e+03      |
| total_timesteps    | 1437696       |
| value_loss         | 0.0033499552  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0006645601  |
| clipfrac           | 0.013349609   |
| eplenmean          | 1.44e+03      |
| eprewmean          | -166          |
| explained_variance | 0.614         |
| fps                | 817           |
| nupdates           | 352           |
| policy_entropy     | 0.067986265  

-------------------------------------
| approxkl           | 0.0031372248 |
| clipfrac           | 0.022509765  |
| eplenmean          | 1.51e+03     |
| eprewmean          | -121         |
| explained_variance | 0.219        |
| fps                | 788          |
| nupdates           | 365          |
| policy_entropy     | 0.06169549   |
| policy_loss        | 0.0010849036 |
| serial_timesteps   | 46720        |
| time_elapsed       | 1.9e+03      |
| total_timesteps    | 1495040      |
| value_loss         | 0.005851086  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0027843085 |
| clipfrac           | 0.02227539   |
| eplenmean          | 1.52e+03     |
| eprewmean          | -117         |
| explained_variance | 0.239        |
| fps                | 784          |
| nupdates           | 366          |
| policy_entropy     | 0.07409358   |
| policy_loss        | 0.0038968986 |
| serial_timesteps   | 46848        |
| time_elaps

-------------------------------------
| approxkl           | 0.0016358325 |
| clipfrac           | 0.022304688  |
| eplenmean          | 1.54e+03     |
| eprewmean          | -91.7        |
| explained_variance | 0.583        |
| fps                | 805          |
| nupdates           | 380          |
| policy_entropy     | 0.050991673  |
| policy_loss        | 0.003674606  |
| serial_timesteps   | 48640        |
| time_elapsed       | 1.97e+03     |
| total_timesteps    | 1556480      |
| value_loss         | 0.0028365748 |
-------------------------------------
-------------------------------------
| approxkl           | 0.0020958525 |
| clipfrac           | 0.024667969  |
| eplenmean          | 1.54e+03     |
| eprewmean          | -90.3        |
| explained_variance | 0.508        |
| fps                | 798          |
| nupdates           | 381          |
| policy_entropy     | 0.058380302  |
| policy_loss        | 0.0031480205 |
| serial_timesteps   | 48768        |
| time_elaps

-------------------------------------
| approxkl           | 0.0013924534 |
| clipfrac           | 0.025771484  |
| eplenmean          | 1.43e+03     |
| eprewmean          | -127         |
| explained_variance | 0.629        |
| fps                | 799          |
| nupdates           | 395          |
| policy_entropy     | 0.114399     |
| policy_loss        | 0.0012014625 |
| serial_timesteps   | 50560        |
| time_elapsed       | 2.05e+03     |
| total_timesteps    | 1617920      |
| value_loss         | 0.008679434  |
-------------------------------------
-------------------------------------
| approxkl           | 0.002015101  |
| clipfrac           | 0.023515625  |
| eplenmean          | 1.43e+03     |
| eprewmean          | -125         |
| explained_variance | 0.607        |
| fps                | 804          |
| nupdates           | 396          |
| policy_entropy     | 0.10669228   |
| policy_loss        | 0.0008575686 |
| serial_timesteps   | 50688        |
| time_elaps

--------------------------------------
| approxkl           | 0.0008213371  |
| clipfrac           | 0.009257812   |
| eplenmean          | 1.39e+03      |
| eprewmean          | -152          |
| explained_variance | 0.752         |
| fps                | 808           |
| nupdates           | 410           |
| policy_entropy     | 0.035808913   |
| policy_loss        | -0.0007924499 |
| serial_timesteps   | 52480         |
| time_elapsed       | 2.13e+03      |
| total_timesteps    | 1679360       |
| value_loss         | 0.002584138   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0012293607 |
| clipfrac           | 0.01336914   |
| eplenmean          | 1.39e+03     |
| eprewmean          | -148         |
| explained_variance | 0.7          |
| fps                | 790          |
| nupdates           | 411          |
| policy_entropy     | 0.038257968  |
| policy_loss        | 0.0005801654 |
| serial_timesteps   | 52608       

---------------------------------------
| approxkl           | 5.7548652e-05  |
| clipfrac           | 0.00025390624  |
| eplenmean          | 1.46e+03       |
| eprewmean          | -131           |
| explained_variance | 0.949          |
| fps                | 792            |
| nupdates           | 425            |
| policy_entropy     | 0.004673503    |
| policy_loss        | -0.00013074404 |
| serial_timesteps   | 54400          |
| time_elapsed       | 2.21e+03       |
| total_timesteps    | 1740800        |
| value_loss         | 0.000120907294 |
---------------------------------------
---------------------------------------
| approxkl           | 0.00012910004  |
| clipfrac           | 0.00041015624  |
| eplenmean          | 1.46e+03       |
| eprewmean          | -132           |
| explained_variance | 0.848          |
| fps                | 796            |
| nupdates           | 426            |
| policy_entropy     | 0.0044963085   |
| policy_loss        | -0.00021995649 |


---------------------------------------
| approxkl           | 1.1562405e-08  |
| clipfrac           | 0.0            |
| eplenmean          | 1.54e+03       |
| eprewmean          | -86.2          |
| explained_variance | 0.903          |
| fps                | 782            |
| nupdates           | 439            |
| policy_entropy     | 0.0011681684   |
| policy_loss        | -1.2342548e-05 |
| serial_timesteps   | 56192          |
| time_elapsed       | 2.28e+03       |
| total_timesteps    | 1798144        |
| value_loss         | 5.9739566e-05  |
---------------------------------------
--------------------------------------
| approxkl           | 3.3216104e-08 |
| clipfrac           | 0.0           |
| eplenmean          | 1.55e+03      |
| eprewmean          | -80.7         |
| explained_variance | 0.848         |
| fps                | 797           |
| nupdates           | 440           |
| policy_entropy     | 0.001214812   |
| policy_loss        | -2.578028e-05 |
| serial_t

--------------------------------------
| approxkl           | 9.029709e-06  |
| clipfrac           | 0.00041015624 |
| eplenmean          | 1.62e+03      |
| eprewmean          | -31.7         |
| explained_variance | 0.144         |
| fps                | 797           |
| nupdates           | 453           |
| policy_entropy     | 0.0018147385  |
| policy_loss        | -0.0003903818 |
| serial_timesteps   | 57984         |
| time_elapsed       | 2.35e+03      |
| total_timesteps    | 1855488       |
| value_loss         | 0.0002228673  |
--------------------------------------
---------------------------------------
| approxkl           | 1.6675399e-09  |
| clipfrac           | 0.0            |
| eplenmean          | 1.62e+03       |
| eprewmean          | -30.3          |
| explained_variance | 0.932          |
| fps                | 834            |
| nupdates           | 454            |
| policy_entropy     | 0.0016320347   |
| policy_loss        | -2.5608717e-06 |
| serial_timest

--------------------------------------
| approxkl           | 2.4741214e-05 |
| clipfrac           | 0.00044921876 |
| eplenmean          | 1.62e+03      |
| eprewmean          | -5.2          |
| explained_variance | 0.0457        |
| fps                | 800           |
| nupdates           | 467           |
| policy_entropy     | 0.0028335296  |
| policy_loss        | -0.0004045259 |
| serial_timesteps   | 59776         |
| time_elapsed       | 2.42e+03      |
| total_timesteps    | 1912832       |
| value_loss         | 0.0001681596  |
--------------------------------------
--------------------------------------
| approxkl           | 5.23042e-07   |
| clipfrac           | 0.0           |
| eplenmean          | 1.62e+03      |
| eprewmean          | -6.9          |
| explained_variance | 0.887         |
| fps                | 800           |
| nupdates           | 468           |
| policy_entropy     | 0.002781651   |
| policy_loss        | 2.4813013e-05 |
| serial_timesteps   | 59

--------------------------------------
| approxkl           | 1.4855905e-07 |
| clipfrac           | 0.0           |
| eplenmean          | 1.62e+03      |
| eprewmean          | -5.96         |
| explained_variance | 0.362         |
| fps                | 829           |
| nupdates           | 481           |
| policy_entropy     | 0.0016464718  |
| policy_loss        | 4.155395e-06  |
| serial_timesteps   | 61568         |
| time_elapsed       | 2.49e+03      |
| total_timesteps    | 1970176       |
| value_loss         | 9.794059e-07  |
--------------------------------------
--------------------------------------
| approxkl           | 4.4766175e-06 |
| clipfrac           | 0.00022460938 |
| eplenmean          | 1.62e+03      |
| eprewmean          | -5.96         |
| explained_variance | -0.00261      |
| fps                | 833           |
| nupdates           | 482           |
| policy_entropy     | 0.0017041374  |
| policy_loss        | -0.0003718837 |
| serial_timesteps   | 61

--------------------------------------
| approxkl           | 5.5176442e-06 |
| clipfrac           | 0.00022460938 |
| eplenmean          | 1.62e+03      |
| eprewmean          | -7.5          |
| explained_variance | -0.0196       |
| fps                | 791           |
| nupdates           | 495           |
| policy_entropy     | 0.003230439   |
| policy_loss        | -0.0004308977 |
| serial_timesteps   | 63360         |
| time_elapsed       | 2.56e+03      |
| total_timesteps    | 2027520       |
| value_loss         | 0.00016262635 |
--------------------------------------
-------------------------------------
| approxkl           | 8.080558e-06 |
| clipfrac           | 0.0005175781 |
| eplenmean          | 1.62e+03     |
| eprewmean          | -7.04        |
| explained_variance | -0.00447     |
| fps                | 814          |
| nupdates           | 496          |
| policy_entropy     | 0.0036200879 |
| policy_loss        | -0.00028005  |
| serial_timesteps   | 63488       

-------------------------------------
| approxkl           | 0.003404873  |
| clipfrac           | 0.0053710938 |
| eplenmean          | 1.62e+03     |
| eprewmean          | -4.96        |
| explained_variance | -0.0866      |
| fps                | 799          |
| nupdates           | 509          |
| policy_entropy     | 0.027784934  |
| policy_loss        | 0.0008340336 |
| serial_timesteps   | 65152        |
| time_elapsed       | 2.63e+03     |
| total_timesteps    | 2084864      |
| value_loss         | 0.0020513323 |
-------------------------------------
--------------------------------------
| approxkl           | 0.0026242172  |
| clipfrac           | 0.004628906   |
| eplenmean          | 1.62e+03      |
| eprewmean          | -3.96         |
| explained_variance | 0.272         |
| fps                | 798           |
| nupdates           | 510           |
| policy_entropy     | 0.014723658   |
| policy_loss        | -0.0013554661 |
| serial_timesteps   | 65280         |
|

---------------------------------------
| approxkl           | 5.4500557e-11  |
| clipfrac           | 0.0            |
| eplenmean          | 1.62e+03       |
| eprewmean          | -7.32          |
| explained_variance | 0.961          |
| fps                | 780            |
| nupdates           | 523            |
| policy_entropy     | 0.00093978347  |
| policy_loss        | -1.1770055e-06 |
| serial_timesteps   | 66944          |
| time_elapsed       | 2.7e+03        |
| total_timesteps    | 2142208        |
| value_loss         | 4.8100554e-07  |
---------------------------------------
---------------------------------------
| approxkl           | 1.04462064e-10 |
| clipfrac           | 0.0            |
| eplenmean          | 1.62e+03       |
| eprewmean          | -7.32          |
| explained_variance | 0.963          |
| fps                | 782            |
| nupdates           | 524            |
| policy_entropy     | 0.000872485    |
| policy_loss        | -1.2968947e-06 |


------------------------------------
| approxkl           | 0.009077554 |
| clipfrac           | 0.10553711  |
| eplenmean          | 1.62e+03    |
| eprewmean          | -14.6       |
| explained_variance | 0.0807      |
| fps                | 798         |
| nupdates           | 537         |
| policy_entropy     | 0.15619573  |
| policy_loss        | 0.012158603 |
| serial_timesteps   | 68736       |
| time_elapsed       | 2.78e+03    |
| total_timesteps    | 2199552     |
| value_loss         | 0.017861327 |
------------------------------------
------------------------------------
| approxkl           | 0.00582828  |
| clipfrac           | 0.07745117  |
| eplenmean          | 1.6e+03     |
| eprewmean          | -32.1       |
| explained_variance | 0.205       |
| fps                | 785         |
| nupdates           | 538         |
| policy_entropy     | 0.12822196  |
| policy_loss        | 0.010180689 |
| serial_timesteps   | 68864       |
| time_elapsed       | 2.78e+03    |
|

-------------------------------------
| approxkl           | 0.0035291659 |
| clipfrac           | 0.037578125  |
| eplenmean          | 1.53e+03     |
| eprewmean          | -87.6        |
| explained_variance | 0.232        |
| fps                | 796          |
| nupdates           | 552          |
| policy_entropy     | 0.11754408   |
| policy_loss        | 0.004212859  |
| serial_timesteps   | 70656        |
| time_elapsed       | 2.85e+03     |
| total_timesteps    | 2260992      |
| value_loss         | 0.009554637  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0032446764 |
| clipfrac           | 0.026376953  |
| eplenmean          | 1.53e+03     |
| eprewmean          | -88.1        |
| explained_variance | 0.226        |
| fps                | 825          |
| nupdates           | 553          |
| policy_entropy     | 0.10074826   |
| policy_loss        | 0.0024590718 |
| serial_timesteps   | 70784        |
| time_elaps

--------------------------------------
| approxkl           | 0.00078487286 |
| clipfrac           | 0.00546875    |
| eplenmean          | 1.53e+03      |
| eprewmean          | -126          |
| explained_variance | 0.0991        |
| fps                | 807           |
| nupdates           | 567           |
| policy_entropy     | 0.022190077   |
| policy_loss        | 6.99896e-05   |
| serial_timesteps   | 72576         |
| time_elapsed       | 2.93e+03      |
| total_timesteps    | 2322432       |
| value_loss         | 0.0007497322  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0009275068  |
| clipfrac           | 0.0042382814  |
| eplenmean          | 1.53e+03      |
| eprewmean          | -126          |
| explained_variance | 0.00127       |
| fps                | 801           |
| nupdates           | 568           |
| policy_entropy     | 0.023332871   |
| policy_loss        | 0.0021716265  |
| serial_timesteps   | 72

---------------------------------------
| approxkl           | 0.0021221468   |
| clipfrac           | 0.005654297    |
| eplenmean          | 1.57e+03       |
| eprewmean          | -94.2          |
| explained_variance | 0.242          |
| fps                | 789            |
| nupdates           | 582            |
| policy_entropy     | 0.020754041    |
| policy_loss        | -0.00042831883 |
| serial_timesteps   | 74496          |
| time_elapsed       | 3e+03          |
| total_timesteps    | 2383872        |
| value_loss         | 0.00089501846  |
---------------------------------------
--------------------------------------
| approxkl           | 0.0013022062  |
| clipfrac           | 0.0059179687  |
| eplenmean          | 1.57e+03      |
| eprewmean          | -90.2         |
| explained_variance | -0.224        |
| fps                | 785           |
| nupdates           | 583           |
| policy_entropy     | 0.02354965    |
| policy_loss        | -0.0008943753 |
| serial_t

---------------------------------------
| approxkl           | 0.000120428485 |
| clipfrac           | 0.00203125     |
| eplenmean          | 1.62e+03       |
| eprewmean          | -40.8          |
| explained_variance | -0.258         |
| fps                | 791            |
| nupdates           | 596            |
| policy_entropy     | 0.00974149     |
| policy_loss        | -0.00014175453 |
| serial_timesteps   | 76288          |
| time_elapsed       | 3.08e+03       |
| total_timesteps    | 2441216        |
| value_loss         | 0.00024434886  |
---------------------------------------
--------------------------------------
| approxkl           | 0.00035865974 |
| clipfrac           | 0.0033203126  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -40.6         |
| explained_variance | -0.126        |
| fps                | 815           |
| nupdates           | 597           |
| policy_entropy     | 0.0113354055  |
| policy_loss        | 0.00019203284 |
| serial_t

--------------------------------------
| approxkl           | 0.0002860251  |
| clipfrac           | 0.002109375   |
| eplenmean          | 1.62e+03      |
| eprewmean          | -22           |
| explained_variance | -0.0309       |
| fps                | 801           |
| nupdates           | 610           |
| policy_entropy     | 0.004693937   |
| policy_loss        | -0.0006966466 |
| serial_timesteps   | 78080         |
| time_elapsed       | 3.15e+03      |
| total_timesteps    | 2498560       |
| value_loss         | 0.00032112817 |
--------------------------------------
--------------------------------------
| approxkl           | 0.00023741374 |
| clipfrac           | 0.0026464844  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -21.9         |
| explained_variance | -0.0929       |
| fps                | 786           |
| nupdates           | 611           |
| policy_entropy     | 0.0048317574  |
| policy_loss        | -0.0010260113 |
| serial_timesteps   | 78

---------------------------------------
| approxkl           | 0.0013586428   |
| clipfrac           | 0.0080664065   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -7.42          |
| explained_variance | 0.101          |
| fps                | 820            |
| nupdates           | 624            |
| policy_entropy     | 0.021548035    |
| policy_loss        | -0.00013639237 |
| serial_timesteps   | 79872          |
| time_elapsed       | 3.22e+03       |
| total_timesteps    | 2555904        |
| value_loss         | 0.0017298886   |
---------------------------------------
--------------------------------------
| approxkl           | 0.0007954093  |
| clipfrac           | 0.009580079   |
| eplenmean          | 1.62e+03      |
| eprewmean          | -4.5          |
| explained_variance | 0.182         |
| fps                | 804           |
| nupdates           | 625           |
| policy_entropy     | 0.027885545   |
| policy_loss        | 0.0008450152  |
| serial_t

---------------------------------------
| approxkl           | 3.367607e-06   |
| clipfrac           | 0.000107421874 |
| eplenmean          | 1.62e+03       |
| eprewmean          | 9.38           |
| explained_variance | -0.381         |
| fps                | 779            |
| nupdates           | 639            |
| policy_entropy     | 0.0019114972   |
| policy_loss        | 0.0001239491   |
| serial_timesteps   | 81792          |
| time_elapsed       | 3.3e+03        |
| total_timesteps    | 2617344        |
| value_loss         | 8.498656e-06   |
---------------------------------------
--------------------------------------
| approxkl           | 0.00018065644 |
| clipfrac           | 0.0014941406  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 9.38          |
| explained_variance | -0.236        |
| fps                | 779           |
| nupdates           | 640           |
| policy_entropy     | 0.004429927   |
| policy_loss        | 0.00020760513 |
| serial_t

--------------------------------------
| approxkl           | 0.00050480285 |
| clipfrac           | 0.0030859376  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 19.2          |
| explained_variance | 0.509         |
| fps                | 792           |
| nupdates           | 653           |
| policy_entropy     | 0.004938921   |
| policy_loss        | 0.002111361   |
| serial_timesteps   | 83584         |
| time_elapsed       | 3.37e+03      |
| total_timesteps    | 2674688       |
| value_loss         | 0.0013045628  |
--------------------------------------
---------------------------------------
| approxkl           | 0.0002845774   |
| clipfrac           | 0.0014160157   |
| eplenmean          | 1.62e+03       |
| eprewmean          | 17.8           |
| explained_variance | -0.0466        |
| fps                | 803            |
| nupdates           | 654            |
| policy_entropy     | 0.007347722    |
| policy_loss        | -0.00036372236 |
| serial_timest

-------------------------------------
| approxkl           | 0.0029853068 |
| clipfrac           | 0.013017578  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 23           |
| explained_variance | 0.21         |
| fps                | 777          |
| nupdates           | 668          |
| policy_entropy     | 0.03824328   |
| policy_loss        | 0.0020117993 |
| serial_timesteps   | 85504        |
| time_elapsed       | 3.45e+03     |
| total_timesteps    | 2736128      |
| value_loss         | 0.0036428485 |
-------------------------------------
---------------------------------------
| approxkl           | 0.0026834684   |
| clipfrac           | 0.009589843    |
| eplenmean          | 1.62e+03       |
| eprewmean          | 20.2           |
| explained_variance | 0.348          |
| fps                | 752            |
| nupdates           | 669            |
| policy_entropy     | 0.02891353     |
| policy_loss        | -4.3753764e-05 |
| serial_timesteps   | 85632  

-------------------------------------
| approxkl           | 0.0013704537 |
| clipfrac           | 0.011894532  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 18.8         |
| explained_variance | 0.135        |
| fps                | 805          |
| nupdates           | 683          |
| policy_entropy     | 0.02533743   |
| policy_loss        | 0.0023803452 |
| serial_timesteps   | 87424        |
| time_elapsed       | 3.52e+03     |
| total_timesteps    | 2797568      |
| value_loss         | 0.0011478583 |
-------------------------------------
-------------------------------------
| approxkl           | 0.0025386862 |
| clipfrac           | 0.015400391  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 19.2         |
| explained_variance | 0.215        |
| fps                | 825          |
| nupdates           | 684          |
| policy_entropy     | 0.02712774   |
| policy_loss        | 0.006319783  |
| serial_timesteps   | 87552        |
| time_elaps

--------------------------------------
| approxkl           | 0.0034966199  |
| clipfrac           | 0.0029101563  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 22.6          |
| explained_variance | -0.0387       |
| fps                | 808           |
| nupdates           | 698           |
| policy_entropy     | 0.01307482    |
| policy_loss        | 0.00031372678 |
| serial_timesteps   | 89344         |
| time_elapsed       | 3.6e+03       |
| total_timesteps    | 2859008       |
| value_loss         | 0.0004356748  |
--------------------------------------
--------------------------------------
| approxkl           | 0.00080688874 |
| clipfrac           | 0.0068066404  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 20.8          |
| explained_variance | -0.183        |
| fps                | 801           |
| nupdates           | 699           |
| policy_entropy     | 0.012693261   |
| policy_loss        | -5.463351e-05 |
| serial_timesteps   | 89

--------------------------------------
| approxkl           | 0.00040084997 |
| clipfrac           | 0.0027246093  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 18.7          |
| explained_variance | -0.134        |
| fps                | 781           |
| nupdates           | 712           |
| policy_entropy     | 0.0073409383  |
| policy_loss        | 0.0020627186  |
| serial_timesteps   | 91136         |
| time_elapsed       | 3.67e+03      |
| total_timesteps    | 2916352       |
| value_loss         | 0.00028166387 |
--------------------------------------
--------------------------------------
| approxkl           | 0.0040453183  |
| clipfrac           | 0.0052539064  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 21.1          |
| explained_variance | 0.435         |
| fps                | 788           |
| nupdates           | 713           |
| policy_entropy     | 0.007173745   |
| policy_loss        | 0.00048793296 |
| serial_timesteps   | 91

--------------------------------------
| approxkl           | 0.00032029423 |
| clipfrac           | 0.0015332032  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 22            |
| explained_variance | 0.535         |
| fps                | 787           |
| nupdates           | 727           |
| policy_entropy     | 0.0075830654  |
| policy_loss        | 0.00021496952 |
| serial_timesteps   | 93056         |
| time_elapsed       | 3.75e+03      |
| total_timesteps    | 2977792       |
| value_loss         | 0.0017291047  |
--------------------------------------
-------------------------------------
| approxkl           | 0.0011764815 |
| clipfrac           | 0.0056933593 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 18.7         |
| explained_variance | -0.565       |
| fps                | 812          |
| nupdates           | 728          |
| policy_entropy     | 0.011947551  |
| policy_loss        | 0.0034423675 |
| serial_timesteps   | 93184       

-------------------------------------
| approxkl           | 0.0026695684 |
| clipfrac           | 0.009824219  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 26.6         |
| explained_variance | 0.374        |
| fps                | 818          |
| nupdates           | 742          |
| policy_entropy     | 0.026629344  |
| policy_loss        | 0.0023354772 |
| serial_timesteps   | 94976        |
| time_elapsed       | 3.82e+03     |
| total_timesteps    | 3039232      |
| value_loss         | 0.0038274112 |
-------------------------------------
--------------------------------------
| approxkl           | 0.0014947255  |
| clipfrac           | 0.008056641   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 23.4          |
| explained_variance | 0.159         |
| fps                | 813           |
| nupdates           | 743           |
| policy_entropy     | 0.025852408   |
| policy_loss        | 0.00030218513 |
| serial_timesteps   | 95104         |
|

--------------------------------------
| approxkl           | 0.0038013344  |
| clipfrac           | 0.019101562   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 16.1          |
| explained_variance | 0.131         |
| fps                | 772           |
| nupdates           | 757           |
| policy_entropy     | 0.06490457    |
| policy_loss        | -0.0001336431 |
| serial_timesteps   | 96896         |
| time_elapsed       | 3.9e+03       |
| total_timesteps    | 3100672       |
| value_loss         | 0.0034150789  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0039880974  |
| clipfrac           | 0.022861328   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 21.3          |
| explained_variance | 0.117         |
| fps                | 768           |
| nupdates           | 758           |
| policy_entropy     | 0.07648236    |
| policy_loss        | 0.00094349525 |
| serial_timesteps   | 97

--------------------------------------
| approxkl           | 0.004797687   |
| clipfrac           | 0.01850586    |
| eplenmean          | 1.61e+03      |
| eprewmean          | -8.44         |
| explained_variance | 0.451         |
| fps                | 800           |
| nupdates           | 772           |
| policy_entropy     | 0.04741771    |
| policy_loss        | 0.00085551105 |
| serial_timesteps   | 98816         |
| time_elapsed       | 3.98e+03      |
| total_timesteps    | 3162112       |
| value_loss         | 0.0041048583  |
--------------------------------------
------------------------------------
| approxkl           | 0.003925385 |
| clipfrac           | 0.015742188 |
| eplenmean          | 1.61e+03    |
| eprewmean          | -10         |
| explained_variance | 0.506       |
| fps                | 801         |
| nupdates           | 773         |
| policy_entropy     | 0.05016545  |
| policy_loss        | 0.002509298 |
| serial_timesteps   | 98944       |
| time_e

-------------------------------------
| approxkl           | 0.0040793475 |
| clipfrac           | 0.010371094  |
| eplenmean          | 1.61e+03     |
| eprewmean          | 4.1          |
| explained_variance | 0.487        |
| fps                | 806          |
| nupdates           | 787          |
| policy_entropy     | 0.039211992  |
| policy_loss        | 0.005118847  |
| serial_timesteps   | 100736       |
| time_elapsed       | 4.05e+03     |
| total_timesteps    | 3223552      |
| value_loss         | 0.0062823957 |
-------------------------------------
-------------------------------------
| approxkl           | 0.0036329683 |
| clipfrac           | 0.011904296  |
| eplenmean          | 1.61e+03     |
| eprewmean          | -1.04        |
| explained_variance | 0.436        |
| fps                | 803          |
| nupdates           | 788          |
| policy_entropy     | 0.036629237  |
| policy_loss        | 0.0036023674 |
| serial_timesteps   | 100864       |
| time_elaps

--------------------------------------
| approxkl           | 0.0008222887  |
| clipfrac           | 0.004658203   |
| eplenmean          | 1.61e+03      |
| eprewmean          | 27.2          |
| explained_variance | -0.0658       |
| fps                | 810           |
| nupdates           | 802           |
| policy_entropy     | 0.025723593   |
| policy_loss        | 0.00064730743 |
| serial_timesteps   | 102656        |
| time_elapsed       | 4.13e+03      |
| total_timesteps    | 3284992       |
| value_loss         | 0.0026939062  |
--------------------------------------
-------------------------------------
| approxkl           | 0.0027831274 |
| clipfrac           | 0.010664063  |
| eplenmean          | 1.61e+03     |
| eprewmean          | 19.8         |
| explained_variance | 0.494        |
| fps                | 821          |
| nupdates           | 803          |
| policy_entropy     | 0.028728018  |
| policy_loss        | 0.0014889942 |
| serial_timesteps   | 102784      

---------------------------------------
| approxkl           | 0.00073598383  |
| clipfrac           | 0.0061914064   |
| eplenmean          | 1.62e+03       |
| eprewmean          | 37.9           |
| explained_variance | 0.32           |
| fps                | 773            |
| nupdates           | 817            |
| policy_entropy     | 0.0290102      |
| policy_loss        | -0.00033152467 |
| serial_timesteps   | 104576         |
| time_elapsed       | 4.21e+03       |
| total_timesteps    | 3346432        |
| value_loss         | 0.001466788    |
---------------------------------------
--------------------------------------
| approxkl           | 0.00180711    |
| clipfrac           | 0.0067382813  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 37.9          |
| explained_variance | 0.594         |
| fps                | 801           |
| nupdates           | 818           |
| policy_entropy     | 0.03934411    |
| policy_loss        | 0.00015988693 |
| serial_t

--------------------------------------
| approxkl           | 0.00031135886 |
| clipfrac           | 0.0030957032  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 48.7          |
| explained_variance | 0.863         |
| fps                | 804           |
| nupdates           | 832           |
| policy_entropy     | 0.025446313   |
| policy_loss        | 2.6818472e-05 |
| serial_timesteps   | 106496        |
| time_elapsed       | 4.28e+03      |
| total_timesteps    | 3407872       |
| value_loss         | 0.0010781456  |
--------------------------------------
-------------------------------------
| approxkl           | 0.001928898  |
| clipfrac           | 0.007548828  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 48.3         |
| explained_variance | 0.67         |
| fps                | 797          |
| nupdates           | 833          |
| policy_entropy     | 0.03393703   |
| policy_loss        | 0.0009257978 |
| serial_timesteps   | 106624      

-------------------------------------
| approxkl           | 0.0038576955 |
| clipfrac           | 0.004921875  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 32.2         |
| explained_variance | 0.0579       |
| fps                | 813          |
| nupdates           | 847          |
| policy_entropy     | 0.021734416  |
| policy_loss        | 0.0009897156 |
| serial_timesteps   | 108416       |
| time_elapsed       | 4.36e+03     |
| total_timesteps    | 3469312      |
| value_loss         | 0.0033031711 |
-------------------------------------
-------------------------------------
| approxkl           | 0.002440068  |
| clipfrac           | 0.007861328  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 23.8         |
| explained_variance | 0.309        |
| fps                | 798          |
| nupdates           | 848          |
| policy_entropy     | 0.020611655  |
| policy_loss        | 0.0008066757 |
| serial_timesteps   | 108544       |
| time_elaps

-------------------------------------
| approxkl           | 0.002943768  |
| clipfrac           | 0.008671875  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 5.44         |
| explained_variance | 0.114        |
| fps                | 780          |
| nupdates           | 862          |
| policy_entropy     | 0.023476202  |
| policy_loss        | 0.0014704105 |
| serial_timesteps   | 110336       |
| time_elapsed       | 4.43e+03     |
| total_timesteps    | 3530752      |
| value_loss         | 0.0025579021 |
-------------------------------------
--------------------------------------
| approxkl           | 0.003582952   |
| clipfrac           | 0.004824219   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 3.3           |
| explained_variance | 0.707         |
| fps                | 798           |
| nupdates           | 863           |
| policy_entropy     | 0.02680907    |
| policy_loss        | 0.00050096767 |
| serial_timesteps   | 110464        |
|

-------------------------------------
| approxkl           | 0.002051309  |
| clipfrac           | 0.0100195315 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 20.1         |
| explained_variance | 0.65         |
| fps                | 803          |
| nupdates           | 877          |
| policy_entropy     | 0.0318363    |
| policy_loss        | 0.0015107217 |
| serial_timesteps   | 112256       |
| time_elapsed       | 4.51e+03     |
| total_timesteps    | 3592192      |
| value_loss         | 0.0040446534 |
-------------------------------------
-------------------------------------
| approxkl           | 0.0010324882 |
| clipfrac           | 0.006875     |
| eplenmean          | 1.62e+03     |
| eprewmean          | 22.5         |
| explained_variance | 0.581        |
| fps                | 786          |
| nupdates           | 878          |
| policy_entropy     | 0.02536049   |
| policy_loss        | 0.0005876714 |
| serial_timesteps   | 112384       |
| time_elaps

---------------------------------------
| approxkl           | 0.0004100506   |
| clipfrac           | 0.0027148437   |
| eplenmean          | 1.62e+03       |
| eprewmean          | 60.8           |
| explained_variance | 0.113          |
| fps                | 796            |
| nupdates           | 892            |
| policy_entropy     | 0.01969974     |
| policy_loss        | -0.00025807417 |
| serial_timesteps   | 114176         |
| time_elapsed       | 4.59e+03       |
| total_timesteps    | 3653632        |
| value_loss         | 0.0016751533   |
---------------------------------------
--------------------------------------
| approxkl           | 0.00027192902 |
| clipfrac           | 0.0026367188  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 60.8          |
| explained_variance | 0.698         |
| fps                | 797           |
| nupdates           | 893           |
| policy_entropy     | 0.022069165   |
| policy_loss        | 3.127152e-05  |
| serial_t

-------------------------------------
| approxkl           | 0.0010418597 |
| clipfrac           | 0.005390625  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 68           |
| explained_variance | 0.87         |
| fps                | 813          |
| nupdates           | 906          |
| policy_entropy     | 0.015908172  |
| policy_loss        | 0.0017363341 |
| serial_timesteps   | 115968       |
| time_elapsed       | 4.66e+03     |
| total_timesteps    | 3710976      |
| value_loss         | 0.0041138157 |
-------------------------------------
-------------------------------------
| approxkl           | 0.0009389345 |
| clipfrac           | 0.0029101563 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 67.4         |
| explained_variance | 0.91         |
| fps                | 784          |
| nupdates           | 907          |
| policy_entropy     | 0.010143544  |
| policy_loss        | 0.0008294162 |
| serial_timesteps   | 116096       |
| time_elaps

--------------------------------------
| approxkl           | 0.00093358837 |
| clipfrac           | 0.0023632813  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 80.5          |
| explained_variance | 0.832         |
| fps                | 798           |
| nupdates           | 921           |
| policy_entropy     | 0.020423807   |
| policy_loss        | 0.0012267963  |
| serial_timesteps   | 117888        |
| time_elapsed       | 4.74e+03      |
| total_timesteps    | 3772416       |
| value_loss         | 0.0040857987  |
--------------------------------------
--------------------------------------
| approxkl           | 0.00070416665 |
| clipfrac           | 0.0040527345  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 77            |
| explained_variance | 0.837         |
| fps                | 808           |
| nupdates           | 922           |
| policy_entropy     | 0.01973526    |
| policy_loss        | -8.703355e-05 |
| serial_timesteps   | 11

---------------------------------------
| approxkl           | 0.0001163818   |
| clipfrac           | 0.00083007815  |
| eplenmean          | 1.62e+03       |
| eprewmean          | 58.6           |
| explained_variance | -0.00309       |
| fps                | 790            |
| nupdates           | 936            |
| policy_entropy     | 0.010932676    |
| policy_loss        | -0.00026524748 |
| serial_timesteps   | 119808         |
| time_elapsed       | 4.81e+03       |
| total_timesteps    | 3833856        |
| value_loss         | 0.00067068974  |
---------------------------------------
---------------------------------------
| approxkl           | 0.000120873185 |
| clipfrac           | 0.00140625     |
| eplenmean          | 1.62e+03       |
| eprewmean          | 59.2           |
| explained_variance | 0.543          |
| fps                | 784            |
| nupdates           | 937            |
| policy_entropy     | 0.012977146    |
| policy_loss        | -0.00010100077 |


Saving to /tmp/openai-2018-05-30-18-51-01-525742/checkpoints/00950
-------------------------------------
| approxkl           | 0.0009973377 |
| clipfrac           | 0.004189453  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 57.5         |
| explained_variance | 0.877        |
| fps                | 804          |
| nupdates           | 951          |
| policy_entropy     | 0.015158207  |
| policy_loss        | 0.0011928346 |
| serial_timesteps   | 121728       |
| time_elapsed       | 4.89e+03     |
| total_timesteps    | 3895296      |
| value_loss         | 0.002830927  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0012599904 |
| clipfrac           | 0.0046484377 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 48.2         |
| explained_variance | 0.897        |
| fps                | 806          |
| nupdates           | 952          |
| policy_entropy     | 0.019297173  |
| policy_loss        

--------------------------------------
| approxkl           | 0.0006468378  |
| clipfrac           | 0.0032910155  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 38.8          |
| explained_variance | 0.934         |
| fps                | 804           |
| nupdates           | 966           |
| policy_entropy     | 0.02061271    |
| policy_loss        | 0.00018141791 |
| serial_timesteps   | 123648        |
| time_elapsed       | 4.97e+03      |
| total_timesteps    | 3956736       |
| value_loss         | 0.0017203358  |
--------------------------------------
--------------------------------------
| approxkl           | 0.00087885035 |
| clipfrac           | 0.004091797   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 40.6          |
| explained_variance | 0.862         |
| fps                | 820           |
| nupdates           | 967           |
| policy_entropy     | 0.021720454   |
| policy_loss        | 0.001415864   |
| serial_timesteps   | 12

-------------------------------------
| approxkl           | 0.0016517981 |
| clipfrac           | 0.008847657  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 76.2         |
| explained_variance | 0.883        |
| fps                | 779          |
| nupdates           | 981          |
| policy_entropy     | 0.030947855  |
| policy_loss        | 0.001643023  |
| serial_timesteps   | 125568       |
| time_elapsed       | 5.05e+03     |
| total_timesteps    | 4018176      |
| value_loss         | 0.004167121  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0023086437 |
| clipfrac           | 0.007255859  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 71.9         |
| explained_variance | 0.791        |
| fps                | 809          |
| nupdates           | 982          |
| policy_entropy     | 0.030225726  |
| policy_loss        | 0.0005409771 |
| serial_timesteps   | 125696       |
| time_elaps

-------------------------------------
| approxkl           | 0.0022886957 |
| clipfrac           | 0.011923828  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 112          |
| explained_variance | 0.532        |
| fps                | 802          |
| nupdates           | 996          |
| policy_entropy     | 0.033293806  |
| policy_loss        | 0.0010787564 |
| serial_timesteps   | 127488       |
| time_elapsed       | 5.12e+03     |
| total_timesteps    | 4079616      |
| value_loss         | 0.0113587305 |
-------------------------------------
-------------------------------------
| approxkl           | 0.001595463  |
| clipfrac           | 0.012216797  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 112          |
| explained_variance | 0.869        |
| fps                | 803          |
| nupdates           | 997          |
| policy_entropy     | 0.040904272  |
| policy_loss        | 0.0025996224 |
| serial_timesteps   | 127616       |
| time_elaps

--------------------------------------
| approxkl           | 0.0015613869  |
| clipfrac           | 0.007050781   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 89.2          |
| explained_variance | 0.78          |
| fps                | 827           |
| nupdates           | 1011          |
| policy_entropy     | 0.032269873   |
| policy_loss        | -0.0006500472 |
| serial_timesteps   | 129408        |
| time_elapsed       | 5.2e+03       |
| total_timesteps    | 4141056       |
| value_loss         | 0.0039280276  |
--------------------------------------
-------------------------------------
| approxkl           | 0.0026005553 |
| clipfrac           | 0.016425781  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 96.9         |
| explained_variance | 0.865        |
| fps                | 790          |
| nupdates           | 1012         |
| policy_entropy     | 0.042130277  |
| policy_loss        | 0.0037298976 |
| serial_timesteps   | 129536      

-------------------------------------
| approxkl           | 0.0014129898 |
| clipfrac           | 0.010996094  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 80.5         |
| explained_variance | 0.84         |
| fps                | 820          |
| nupdates           | 1026         |
| policy_entropy     | 0.049278315  |
| policy_loss        | 0.0015122336 |
| serial_timesteps   | 131328       |
| time_elapsed       | 5.28e+03     |
| total_timesteps    | 4202496      |
| value_loss         | 0.007883677  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0006750945  |
| clipfrac           | 0.0072167967  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 82.6          |
| explained_variance | 0.123         |
| fps                | 789           |
| nupdates           | 1027          |
| policy_entropy     | 0.047601294   |
| policy_loss        | 0.00041708368 |
| serial_timesteps   | 131456        |
|

-------------------------------------
| approxkl           | 0.0018998646 |
| clipfrac           | 0.007158203  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 73.6         |
| explained_variance | 0.946        |
| fps                | 774          |
| nupdates           | 1041         |
| policy_entropy     | 0.025953755  |
| policy_loss        | 0.0009848379 |
| serial_timesteps   | 133248       |
| time_elapsed       | 5.35e+03     |
| total_timesteps    | 4263936      |
| value_loss         | 0.00380642   |
-------------------------------------
--------------------------------------
| approxkl           | 0.0006454017  |
| clipfrac           | 0.0041503906  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 73.6          |
| explained_variance | 0.943         |
| fps                | 804           |
| nupdates           | 1042          |
| policy_entropy     | 0.017664116   |
| policy_loss        | 0.00095419807 |
| serial_timesteps   | 133376        |
|

-------------------------------------
| approxkl           | 0.0011974884 |
| clipfrac           | 0.0070214844 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 68.6         |
| explained_variance | 0.89         |
| fps                | 798          |
| nupdates           | 1056         |
| policy_entropy     | 0.03246871   |
| policy_loss        | 0.0014217524 |
| serial_timesteps   | 135168       |
| time_elapsed       | 5.43e+03     |
| total_timesteps    | 4325376      |
| value_loss         | 0.008243366  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0016922988 |
| clipfrac           | 0.0073242188 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 62.5         |
| explained_variance | 0.957        |
| fps                | 802          |
| nupdates           | 1057         |
| policy_entropy     | 0.03321161   |
| policy_loss        | 0.001425807  |
| serial_timesteps   | 135296       |
| time_elaps

-------------------------------------
| approxkl           | 0.0018532594 |
| clipfrac           | 0.013017578  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 45.2         |
| explained_variance | 0.838        |
| fps                | 778          |
| nupdates           | 1071         |
| policy_entropy     | 0.033485435  |
| policy_loss        | 0.0019809876 |
| serial_timesteps   | 137088       |
| time_elapsed       | 5.51e+03     |
| total_timesteps    | 4386816      |
| value_loss         | 0.007959519  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0013586008  |
| clipfrac           | 0.0093652345  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 46.8          |
| explained_variance | 0.634         |
| fps                | 781           |
| nupdates           | 1072          |
| policy_entropy     | 0.03454877    |
| policy_loss        | 0.00061589276 |
| serial_timesteps   | 137216        |
|

---------------------------------------
| approxkl           | 0.0009481004   |
| clipfrac           | 0.012753907    |
| eplenmean          | 1.62e+03       |
| eprewmean          | 55.4           |
| explained_variance | 0.53           |
| fps                | 712            |
| nupdates           | 1086           |
| policy_entropy     | 0.03595843     |
| policy_loss        | -0.00024030778 |
| serial_timesteps   | 139008         |
| time_elapsed       | 5.59e+03       |
| total_timesteps    | 4448256        |
| value_loss         | 0.0027897777   |
---------------------------------------
-------------------------------------
| approxkl           | 0.0040761344 |
| clipfrac           | 0.017607423  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 55.2         |
| explained_variance | 0.803        |
| fps                | 727          |
| nupdates           | 1087         |
| policy_entropy     | 0.033853184  |
| policy_loss        | 0.001185445  |
| serial_timesteps  

Saving to /tmp/openai-2018-05-30-18-51-01-525742/checkpoints/01100
--------------------------------------
| approxkl           | 0.0017485692  |
| clipfrac           | 0.0057226564  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 61.1          |
| explained_variance | 0.916         |
| fps                | 794           |
| nupdates           | 1101          |
| policy_entropy     | 0.026790783   |
| policy_loss        | 0.00044782396 |
| serial_timesteps   | 140928        |
| time_elapsed       | 5.67e+03      |
| total_timesteps    | 4509696       |
| value_loss         | 0.004336658   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0010387463 |
| clipfrac           | 0.0043261717 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 66.4         |
| explained_variance | 0.0974       |
| fps                | 780          |
| nupdates           | 1102         |
| policy_entropy     | 0.025499897  |
| poli

-------------------------------------
| approxkl           | 0.0021521472 |
| clipfrac           | 0.011083985  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 105          |
| explained_variance | 0.831        |
| fps                | 825          |
| nupdates           | 1116         |
| policy_entropy     | 0.029639294  |
| policy_loss        | 0.0016286586 |
| serial_timesteps   | 142848       |
| time_elapsed       | 5.75e+03     |
| total_timesteps    | 4571136      |
| value_loss         | 0.006571646  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0023788211 |
| clipfrac           | 0.014833984  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 107          |
| explained_variance | 0.891        |
| fps                | 791          |
| nupdates           | 1117         |
| policy_entropy     | 0.043110333  |
| policy_loss        | 0.0016797364 |
| serial_timesteps   | 142976       |
| time_elaps

-------------------------------------
| approxkl           | 0.0021646074 |
| clipfrac           | 0.0073828124 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 103          |
| explained_variance | 0.964        |
| fps                | 773          |
| nupdates           | 1131         |
| policy_entropy     | 0.027949147  |
| policy_loss        | 0.0014751973 |
| serial_timesteps   | 144768       |
| time_elapsed       | 5.82e+03     |
| total_timesteps    | 4632576      |
| value_loss         | 0.0065376866 |
-------------------------------------
---------------------------------------
| approxkl           | 0.0004979124   |
| clipfrac           | 0.0035839844   |
| eplenmean          | 1.62e+03       |
| eprewmean          | 110            |
| explained_variance | 0.0801         |
| fps                | 799            |
| nupdates           | 1132           |
| policy_entropy     | 0.02703933     |
| policy_loss        | -0.00016973968 |
| serial_timesteps   | 144896 

--------------------------------------
| approxkl           | 8.0099e-05    |
| clipfrac           | 0.00071289064 |
| eplenmean          | 1.62e+03      |
| eprewmean          | 98.1          |
| explained_variance | 0.25          |
| fps                | 793           |
| nupdates           | 1146          |
| policy_entropy     | 0.010119685   |
| policy_loss        | -1.662567e-05 |
| serial_timesteps   | 146688        |
| time_elapsed       | 5.9e+03       |
| total_timesteps    | 4694016       |
| value_loss         | 0.0010353116  |
--------------------------------------
-------------------------------------
| approxkl           | 0.0002178807 |
| clipfrac           | 0.0020214843 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 95.9         |
| explained_variance | 0.912        |
| fps                | 792          |
| nupdates           | 1147         |
| policy_entropy     | 0.0105719995 |
| policy_loss        | -0.000273711 |
| serial_timesteps   | 146816      

---------------------------------------
| approxkl           | 8.42232e-05    |
| clipfrac           | 0.00045898437  |
| eplenmean          | 1.62e+03       |
| eprewmean          | 96.1           |
| explained_variance | 0.869          |
| fps                | 793            |
| nupdates           | 1160           |
| policy_entropy     | 0.0057152575   |
| policy_loss        | -0.00025261618 |
| serial_timesteps   | 148480         |
| time_elapsed       | 5.97e+03       |
| total_timesteps    | 4751360        |
| value_loss         | 0.00065559184  |
---------------------------------------
--------------------------------------
| approxkl           | 0.0007200062  |
| clipfrac           | 0.0014453125  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 96.1          |
| explained_variance | 0.858         |
| fps                | 787           |
| nupdates           | 1161          |
| policy_entropy     | 0.005640543   |
| policy_loss        | 0.00046789553 |
| serial_t

--------------------------------------
| approxkl           | 7.226346e-05  |
| clipfrac           | 0.00021484375 |
| eplenmean          | 1.62e+03      |
| eprewmean          | 74.3          |
| explained_variance | 0.00693       |
| fps                | 806           |
| nupdates           | 1175          |
| policy_entropy     | 0.004244433   |
| policy_loss        | 1.3734447e-05 |
| serial_timesteps   | 150400        |
| time_elapsed       | 6.05e+03      |
| total_timesteps    | 4812800       |
| value_loss         | 6.710594e-05  |
--------------------------------------
---------------------------------------
| approxkl           | 0.0007688178   |
| clipfrac           | 0.0008984375   |
| eplenmean          | 1.62e+03       |
| eprewmean          | 79.3           |
| explained_variance | 0.0428         |
| fps                | 770            |
| nupdates           | 1176           |
| policy_entropy     | 0.003923074    |
| policy_loss        | -1.0157022e-05 |
| serial_timest

--------------------------------------
| approxkl           | 0.00017560896 |
| clipfrac           | 9.765625e-06  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 71.6          |
| explained_variance | -0.365        |
| fps                | 800           |
| nupdates           | 1189          |
| policy_entropy     | 0.0011878118  |
| policy_loss        | 6.57748e-06   |
| serial_timesteps   | 152192        |
| time_elapsed       | 6.12e+03      |
| total_timesteps    | 4870144       |
| value_loss         | 8.819196e-05  |
--------------------------------------
--------------------------------------
| approxkl           | 6.2490244e-07 |
| clipfrac           | 9.765625e-06  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 73.2          |
| explained_variance | 0.133         |
| fps                | 808           |
| nupdates           | 1190          |
| policy_entropy     | 0.0013205372  |
| policy_loss        | 2.0830228e-06 |
| serial_timesteps   | 15

-------------------------------------
| approxkl           | 0.0019301623 |
| clipfrac           | 0.0016699219 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 93.8         |
| explained_variance | 0.948        |
| fps                | 775          |
| nupdates           | 1204         |
| policy_entropy     | 0.0021170983 |
| policy_loss        | 0.0004196131 |
| serial_timesteps   | 154112       |
| time_elapsed       | 6.2e+03      |
| total_timesteps    | 4931584      |
| value_loss         | 0.0073325145 |
-------------------------------------
--------------------------------------
| approxkl           | 0.0019688464  |
| clipfrac           | 0.0008203125  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 100           |
| explained_variance | 0.971         |
| fps                | 778           |
| nupdates           | 1205          |
| policy_entropy     | 0.00136581    |
| policy_loss        | -0.0009276412 |
| serial_timesteps   | 154240        |
|

-------------------------------------
| approxkl           | 0.0012227722 |
| clipfrac           | 0.0010351562 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 104          |
| explained_variance | 0.916        |
| fps                | 815          |
| nupdates           | 1219         |
| policy_entropy     | 0.001366513  |
| policy_loss        | 5.502753e-05 |
| serial_timesteps   | 156032       |
| time_elapsed       | 6.27e+03     |
| total_timesteps    | 4993024      |
| value_loss         | 0.0052271592 |
-------------------------------------
-------------------------------------
| approxkl           | 0.0013773058 |
| clipfrac           | 0.0009667969 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 110          |
| explained_variance | 0.969        |
| fps                | 808          |
| nupdates           | 1220         |
| policy_entropy     | 0.0016330256 |
| policy_loss        | 0.0007677227 |
| serial_timesteps   | 156160       |
| time_elaps

--------------------------------------
| approxkl           | 0.00087829883 |
| clipfrac           | 0.00171875    |
| eplenmean          | 1.62e+03      |
| eprewmean          | 136           |
| explained_variance | 0.943         |
| fps                | 801           |
| nupdates           | 1234          |
| policy_entropy     | 0.004537126   |
| policy_loss        | 0.00073414715 |
| serial_timesteps   | 157952        |
| time_elapsed       | 6.35e+03      |
| total_timesteps    | 5054464       |
| value_loss         | 0.0059028035  |
--------------------------------------
--------------------------------------
| approxkl           | 0.00039779753 |
| clipfrac           | 0.0009960937  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 136           |
| explained_variance | 0.975         |
| fps                | 830           |
| nupdates           | 1235          |
| policy_entropy     | 0.0039026556  |
| policy_loss        | 0.00038090712 |
| serial_timesteps   | 15

--------------------------------------
| approxkl           | 0.00026313067 |
| clipfrac           | 0.0012695312  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 113           |
| explained_variance | 0.949         |
| fps                | 789           |
| nupdates           | 1249          |
| policy_entropy     | 0.003643802   |
| policy_loss        | -3.780181e-05 |
| serial_timesteps   | 159872        |
| time_elapsed       | 6.43e+03      |
| total_timesteps    | 5115904       |
| value_loss         | 0.005434017   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0026740935  |
| clipfrac           | 0.003251953   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 113           |
| explained_variance | 0.925         |
| fps                | 783           |
| nupdates           | 1250          |
| policy_entropy     | 0.00462191    |
| policy_loss        | 0.00039668527 |
| serial_timesteps   | 16

-------------------------------------
| approxkl           | 0.00148729   |
| clipfrac           | 0.0010253906 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 130          |
| explained_variance | 0.88         |
| fps                | 820          |
| nupdates           | 1264         |
| policy_entropy     | 0.0020217188 |
| policy_loss        | 0.007818293  |
| serial_timesteps   | 161792       |
| time_elapsed       | 6.51e+03     |
| total_timesteps    | 5177344      |
| value_loss         | 0.0012903856 |
-------------------------------------
--------------------------------------
| approxkl           | 3.877235e-08  |
| clipfrac           | 0.0           |
| eplenmean          | 1.62e+03      |
| eprewmean          | 138           |
| explained_variance | 0.439         |
| fps                | 817           |
| nupdates           | 1265          |
| policy_entropy     | 0.0014647143  |
| policy_loss        | 8.402176e-06  |
| serial_timesteps   | 161920        |
|

---------------------------------------
| approxkl           | 5.0027844e-05  |
| clipfrac           | 0.00028320312  |
| eplenmean          | 1.62e+03       |
| eprewmean          | 156            |
| explained_variance | 0.799          |
| fps                | 803            |
| nupdates           | 1279           |
| policy_entropy     | 0.0028103618   |
| policy_loss        | -2.4404108e-05 |
| serial_timesteps   | 163712         |
| time_elapsed       | 6.58e+03       |
| total_timesteps    | 5238784        |
| value_loss         | 0.0010805003   |
---------------------------------------
--------------------------------------
| approxkl           | 0.00030616263 |
| clipfrac           | 0.0010449219  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 158           |
| explained_variance | 0.867         |
| fps                | 824           |
| nupdates           | 1280          |
| policy_entropy     | 0.0026429966  |
| policy_loss        | 0.00012405669 |
| serial_t

--------------------------------------
| approxkl           | 3.448737e-08  |
| clipfrac           | 0.0           |
| eplenmean          | 1.62e+03      |
| eprewmean          | 157           |
| explained_variance | 0.803         |
| fps                | 811           |
| nupdates           | 1293          |
| policy_entropy     | 0.0016401536  |
| policy_loss        | 1.7387723e-05 |
| serial_timesteps   | 165504        |
| time_elapsed       | 6.66e+03      |
| total_timesteps    | 5296128       |
| value_loss         | 9.285967e-05  |
--------------------------------------
---------------------------------------
| approxkl           | 0.0008728919   |
| clipfrac           | 0.00041015624  |
| eplenmean          | 1.62e+03       |
| eprewmean          | 157            |
| explained_variance | 0.979          |
| fps                | 811            |
| nupdates           | 1294           |
| policy_entropy     | 0.002073669    |
| policy_loss        | -0.00028473546 |
| serial_timest

-------------------------------------
| approxkl           | 0.0010162961 |
| clipfrac           | 0.0012304687 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 135          |
| explained_variance | 0.93         |
| fps                | 803          |
| nupdates           | 1307         |
| policy_entropy     | 0.002704113  |
| policy_loss        | 0.0009121769 |
| serial_timesteps   | 167296       |
| time_elapsed       | 6.73e+03     |
| total_timesteps    | 5353472      |
| value_loss         | 0.008295932  |
-------------------------------------
--------------------------------------
| approxkl           | 0.00085236575 |
| clipfrac           | 0.0012304687  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 135           |
| explained_variance | 0.935         |
| fps                | 827           |
| nupdates           | 1308          |
| policy_entropy     | 0.0029115358  |
| policy_loss        | 0.00031669735 |
| serial_timesteps   | 167424        |
|

-------------------------------------
| approxkl           | 0.0049190866 |
| clipfrac           | 0.0016796875 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 129          |
| explained_variance | 0.94         |
| fps                | 817          |
| nupdates           | 1321         |
| policy_entropy     | 0.002406631  |
| policy_loss        | 0.0018293335 |
| serial_timesteps   | 169088       |
| time_elapsed       | 6.8e+03      |
| total_timesteps    | 5410816      |
| value_loss         | 0.0055171265 |
-------------------------------------
-------------------------------------
| approxkl           | 0.003148456  |
| clipfrac           | 0.0021484375 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 122          |
| explained_variance | 0.927        |
| fps                | 816          |
| nupdates           | 1322         |
| policy_entropy     | 0.0026847245 |
| policy_loss        | 0.0012893829 |
| serial_timesteps   | 169216       |
| time_elaps

---------------------------------------
| approxkl           | 0.00022238832  |
| clipfrac           | 0.00024414062  |
| eplenmean          | 1.62e+03       |
| eprewmean          | 121            |
| explained_variance | 0.159          |
| fps                | 799            |
| nupdates           | 1335           |
| policy_entropy     | 0.0018569011   |
| policy_loss        | -9.6768046e-05 |
| serial_timesteps   | 170880         |
| time_elapsed       | 6.87e+03       |
| total_timesteps    | 5468160        |
| value_loss         | 0.00042202524  |
---------------------------------------
---------------------------------------
| approxkl           | 8.063516e-05   |
| clipfrac           | 0.00044921876  |
| eplenmean          | 1.62e+03       |
| eprewmean          | 109            |
| explained_variance | 0.97           |
| fps                | 812            |
| nupdates           | 1336           |
| policy_entropy     | 0.0016472306   |
| policy_loss        | -0.00036966905 |


--------------------------------------
| approxkl           | 0.004411205   |
| clipfrac           | 0.005869141   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 114           |
| explained_variance | 0.919         |
| fps                | 815           |
| nupdates           | 1349          |
| policy_entropy     | 0.0069276197  |
| policy_loss        | 0.00018068984 |
| serial_timesteps   | 172672        |
| time_elapsed       | 6.94e+03      |
| total_timesteps    | 5525504       |
| value_loss         | 0.008355269   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00095847226 |
| clipfrac           | 0.0006347656  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 121           |
| explained_variance | 0.96          |
| fps                | 818           |
| nupdates           | 1350          |
| policy_entropy     | 0.0029286293  |
| policy_loss        | 0.00047662426 |
| serial_timesteps   | 17

-------------------------------------
| approxkl           | 0.0011404798 |
| clipfrac           | 0.00078125   |
| eplenmean          | 1.62e+03     |
| eprewmean          | 109          |
| explained_variance | 0.979        |
| fps                | 799          |
| nupdates           | 1363         |
| policy_entropy     | 0.003664696  |
| policy_loss        | 0.0003884838 |
| serial_timesteps   | 174464       |
| time_elapsed       | 7.01e+03     |
| total_timesteps    | 5582848      |
| value_loss         | 0.0012971398 |
-------------------------------------
---------------------------------------
| approxkl           | 0.00012828477  |
| clipfrac           | 0.000234375    |
| eplenmean          | 1.62e+03       |
| eprewmean          | 109            |
| explained_variance | 0.0797         |
| fps                | 799            |
| nupdates           | 1364           |
| policy_entropy     | 0.0028780424   |
| policy_loss        | -0.00030468727 |
| serial_timesteps   | 174592 

--------------------------------------
| approxkl           | 0.00046421925 |
| clipfrac           | 0.00171875    |
| eplenmean          | 1.62e+03      |
| eprewmean          | 94.7          |
| explained_variance | 0.987         |
| fps                | 808           |
| nupdates           | 1377          |
| policy_entropy     | 0.004693382   |
| policy_loss        | 0.00037599177 |
| serial_timesteps   | 176256        |
| time_elapsed       | 7.09e+03      |
| total_timesteps    | 5640192       |
| value_loss         | 0.0012336138  |
--------------------------------------
--------------------------------------
| approxkl           | 0.0017921274  |
| clipfrac           | 0.002705078   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 94.7          |
| explained_variance | 0.424         |
| fps                | 802           |
| nupdates           | 1378          |
| policy_entropy     | 0.005574022   |
| policy_loss        | 0.0005839895  |
| serial_timesteps   | 17

-------------------------------------
| approxkl           | 0.0041797315 |
| clipfrac           | 0.0056933593 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 104          |
| explained_variance | 0.912        |
| fps                | 779          |
| nupdates           | 1392         |
| policy_entropy     | 0.012437     |
| policy_loss        | 0.0014876644 |
| serial_timesteps   | 178176       |
| time_elapsed       | 7.16e+03     |
| total_timesteps    | 5701632      |
| value_loss         | 0.012773491  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0009115713  |
| clipfrac           | 0.00069335935 |
| eplenmean          | 1.62e+03      |
| eprewmean          | 97.7          |
| explained_variance | 0.991         |
| fps                | 780           |
| nupdates           | 1393          |
| policy_entropy     | 0.008237571   |
| policy_loss        | 0.0008886271  |
| serial_timesteps   | 178304        |
|

--------------------------------------
| approxkl           | 0.0011796817  |
| clipfrac           | 0.0027636718  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 126           |
| explained_variance | 0.904         |
| fps                | 806           |
| nupdates           | 1407          |
| policy_entropy     | 0.009300449   |
| policy_loss        | 1.8832718e-05 |
| serial_timesteps   | 180096        |
| time_elapsed       | 7.24e+03      |
| total_timesteps    | 5763072       |
| value_loss         | 0.015370014   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0036308742 |
| clipfrac           | 0.002421875  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 126          |
| explained_variance | 0.983        |
| fps                | 790          |
| nupdates           | 1408         |
| policy_entropy     | 0.008333216  |
| policy_loss        | 0.009269047  |
| serial_timesteps   | 180224      

---------------------------------------
| approxkl           | 6.1511586e-05  |
| clipfrac           | 0.0007714844   |
| eplenmean          | 1.62e+03       |
| eprewmean          | 154            |
| explained_variance | 0.97           |
| fps                | 794            |
| nupdates           | 1422           |
| policy_entropy     | 0.007633748    |
| policy_loss        | -0.00016177291 |
| serial_timesteps   | 182016         |
| time_elapsed       | 7.32e+03       |
| total_timesteps    | 5824512        |
| value_loss         | 0.0007241442   |
---------------------------------------
---------------------------------------
| approxkl           | 0.0009947676   |
| clipfrac           | 0.0024511719   |
| eplenmean          | 1.62e+03       |
| eprewmean          | 153            |
| explained_variance | 0.683          |
| fps                | 783            |
| nupdates           | 1423           |
| policy_entropy     | 0.009813066    |
| policy_loss        | -0.00021135222 |


-------------------------------------
| approxkl           | 0.0033230477 |
| clipfrac           | 0.008779297  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 130          |
| explained_variance | 0.905        |
| fps                | 802          |
| nupdates           | 1436         |
| policy_entropy     | 0.025634248  |
| policy_loss        | 0.0019153634 |
| serial_timesteps   | 183808       |
| time_elapsed       | 7.39e+03     |
| total_timesteps    | 5881856      |
| value_loss         | 0.008459733  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0023242733 |
| clipfrac           | 0.004931641  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 134          |
| explained_variance | 0.913        |
| fps                | 783          |
| nupdates           | 1437         |
| policy_entropy     | 0.02060742   |
| policy_loss        | 0.0019882948 |
| serial_timesteps   | 183936       |
| time_elaps

Saving to /tmp/openai-2018-05-30-18-51-01-525742/checkpoints/01450
--------------------------------------
| approxkl           | 0.002721052   |
| clipfrac           | 0.0046875     |
| eplenmean          | 1.62e+03      |
| eprewmean          | 119           |
| explained_variance | 0.975         |
| fps                | 803           |
| nupdates           | 1451          |
| policy_entropy     | 0.02307649    |
| policy_loss        | 0.00080963294 |
| serial_timesteps   | 185728        |
| time_elapsed       | 7.47e+03      |
| total_timesteps    | 5943296       |
| value_loss         | 0.006913964   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0004952109  |
| clipfrac           | 0.0031640625  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 112           |
| explained_variance | 0.936         |
| fps                | 809           |
| nupdates           | 1452          |
| policy_entropy     | 0.019245356  

-------------------------------------
| approxkl           | 0.0017106715 |
| clipfrac           | 0.0053222654 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 132          |
| explained_variance | 0.923        |
| fps                | 775          |
| nupdates           | 1466         |
| policy_entropy     | 0.025398457  |
| policy_loss        | 0.0012913174 |
| serial_timesteps   | 187648       |
| time_elapsed       | 7.55e+03     |
| total_timesteps    | 6004736      |
| value_loss         | 0.0069961287 |
-------------------------------------
--------------------------------------
| approxkl           | 0.0016401581  |
| clipfrac           | 0.0051660156  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 124           |
| explained_variance | 0.868         |
| fps                | 783           |
| nupdates           | 1467          |
| policy_entropy     | 0.023020105   |
| policy_loss        | -0.0006096631 |
| serial_timesteps   | 187776        |
|

-------------------------------------
| approxkl           | 0.0010392861 |
| clipfrac           | 0.0044921874 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 114          |
| explained_variance | 0.0321       |
| fps                | 762          |
| nupdates           | 1481         |
| policy_entropy     | 0.025996909  |
| policy_loss        | 9.197402e-05 |
| serial_timesteps   | 189568       |
| time_elapsed       | 7.63e+03     |
| total_timesteps    | 6066176      |
| value_loss         | 0.0032555119 |
-------------------------------------
--------------------------------------
| approxkl           | 0.00033834486 |
| clipfrac           | 0.0018847656  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 114           |
| explained_variance | 0.956         |
| fps                | 763           |
| nupdates           | 1482          |
| policy_entropy     | 0.020566745   |
| policy_loss        | -0.0002543523 |
| serial_timesteps   | 189696        |
|

--------------------------------------
| approxkl           | 0.0003214222  |
| clipfrac           | 0.0013867187  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 111           |
| explained_variance | 0.0307        |
| fps                | 739           |
| nupdates           | 1496          |
| policy_entropy     | 0.013820388   |
| policy_loss        | -0.0002985081 |
| serial_timesteps   | 191488        |
| time_elapsed       | 7.71e+03      |
| total_timesteps    | 6127616       |
| value_loss         | 0.0009092503  |
--------------------------------------
---------------------------------------
| approxkl           | 0.00023072933  |
| clipfrac           | 0.0012988282   |
| eplenmean          | 1.62e+03       |
| eprewmean          | 109            |
| explained_variance | 0.0323         |
| fps                | 782            |
| nupdates           | 1497           |
| policy_entropy     | 0.012445704    |
| policy_loss        | -0.00021297896 |
| serial_timest

-------------------------------------
| approxkl           | 0.0027497115 |
| clipfrac           | 0.004951172  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 102          |
| explained_variance | 0.937        |
| fps                | 783          |
| nupdates           | 1510         |
| policy_entropy     | 0.016905263  |
| policy_loss        | 0.002330107  |
| serial_timesteps   | 193280       |
| time_elapsed       | 7.78e+03     |
| total_timesteps    | 6184960      |
| value_loss         | 0.011826502  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0015735797 |
| clipfrac           | 0.002939453  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 95.9         |
| explained_variance | 0.5          |
| fps                | 781          |
| nupdates           | 1511         |
| policy_entropy     | 0.012349593  |
| policy_loss        | 4.058226e-05 |
| serial_timesteps   | 193408       |
| time_elaps

-------------------------------------
| approxkl           | 0.0010169954 |
| clipfrac           | 0.0038085938 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 99           |
| explained_variance | 0.93         |
| fps                | 786          |
| nupdates           | 1525         |
| policy_entropy     | 0.011813705  |
| policy_loss        | 6.621051e-05 |
| serial_timesteps   | 195200       |
| time_elapsed       | 7.86e+03     |
| total_timesteps    | 6246400      |
| value_loss         | 0.004924426  |
-------------------------------------
-------------------------------------
| approxkl           | 0.009309239  |
| clipfrac           | 0.0033007814 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 98           |
| explained_variance | 0.961        |
| fps                | 783          |
| nupdates           | 1526         |
| policy_entropy     | 0.010678842  |
| policy_loss        | 0.0038519707 |
| serial_timesteps   | 195328       |
| time_elaps

--------------------------------------
| approxkl           | 0.0010130932  |
| clipfrac           | 0.0030664064  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 108           |
| explained_variance | 0.931         |
| fps                | 789           |
| nupdates           | 1540          |
| policy_entropy     | 0.007871509   |
| policy_loss        | 0.00048721794 |
| serial_timesteps   | 197120        |
| time_elapsed       | 7.94e+03      |
| total_timesteps    | 6307840       |
| value_loss         | 0.011553843   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0017775157 |
| clipfrac           | 0.001875     |
| eplenmean          | 1.62e+03     |
| eprewmean          | 108          |
| explained_variance | 0.95         |
| fps                | 783          |
| nupdates           | 1541         |
| policy_entropy     | 0.007524005  |
| policy_loss        | 0.0004241787 |
| serial_timesteps   | 197248      

--------------------------------------
| approxkl           | 0.0024062255  |
| clipfrac           | 0.0033007814  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 102           |
| explained_variance | 0.904         |
| fps                | 771           |
| nupdates           | 1554          |
| policy_entropy     | 0.008004552   |
| policy_loss        | 0.00096904655 |
| serial_timesteps   | 198912        |
| time_elapsed       | 8.01e+03      |
| total_timesteps    | 6365184       |
| value_loss         | 0.022172656   |
--------------------------------------
---------------------------------------
| approxkl           | 0.001437522    |
| clipfrac           | 0.0019140625   |
| eplenmean          | 1.62e+03       |
| eprewmean          | 108            |
| explained_variance | 0.961          |
| fps                | 782            |
| nupdates           | 1555           |
| policy_entropy     | 0.0064052898   |
| policy_loss        | -2.9215776e-06 |
| serial_timest

--------------------------------------
| approxkl           | 0.00042199553 |
| clipfrac           | 0.0004394531  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 120           |
| explained_variance | 0.987         |
| fps                | 760           |
| nupdates           | 1568          |
| policy_entropy     | 0.0030048967  |
| policy_loss        | 0.00026721286 |
| serial_timesteps   | 200704        |
| time_elapsed       | 8.09e+03      |
| total_timesteps    | 6422528       |
| value_loss         | 0.0013633457  |
--------------------------------------
---------------------------------------
| approxkl           | 0.00019848553  |
| clipfrac           | 0.0004296875   |
| eplenmean          | 1.62e+03       |
| eprewmean          | 119            |
| explained_variance | -0.0473        |
| fps                | 801            |
| nupdates           | 1569           |
| policy_entropy     | 0.0033169675   |
| policy_loss        | -0.00013179661 |
| serial_timest

---------------------------------------
| approxkl           | 5.8736237e-08  |
| clipfrac           | 0.0            |
| eplenmean          | 1.62e+03       |
| eprewmean          | 87.3           |
| explained_variance | 0.951          |
| fps                | 788            |
| nupdates           | 1582           |
| policy_entropy     | 0.0024222154   |
| policy_loss        | -4.9918723e-05 |
| serial_timesteps   | 202496         |
| time_elapsed       | 8.16e+03       |
| total_timesteps    | 6479872        |
| value_loss         | 0.00017159533  |
---------------------------------------
--------------------------------------
| approxkl           | 0.00011764277 |
| clipfrac           | 0.00056640623 |
| eplenmean          | 1.62e+03      |
| eprewmean          | 87.3          |
| explained_variance | 0.95          |
| fps                | 782           |
| nupdates           | 1583          |
| policy_entropy     | 0.0027777073  |
| policy_loss        | 0.0006297991  |
| serial_t

--------------------------------------
| approxkl           | 0.00030903952 |
| clipfrac           | 0.0014648438  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 99.3          |
| explained_variance | 0.921         |
| fps                | 775           |
| nupdates           | 1597          |
| policy_entropy     | 0.00559761    |
| policy_loss        | 0.00014920998 |
| serial_timesteps   | 204416        |
| time_elapsed       | 8.24e+03      |
| total_timesteps    | 6541312       |
| value_loss         | 0.00767417    |
--------------------------------------
--------------------------------------
| approxkl           | 0.0040053115  |
| clipfrac           | 0.0024804687  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 100           |
| explained_variance | 0.962         |
| fps                | 770           |
| nupdates           | 1598          |
| policy_entropy     | 0.0061833216  |
| policy_loss        | 9.9631085e-05 |
| serial_timesteps   | 20

-------------------------------------
| approxkl           | 0.005292939  |
| clipfrac           | 0.013222656  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 141          |
| explained_variance | 0.876        |
| fps                | 796          |
| nupdates           | 1611         |
| policy_entropy     | 0.04762791   |
| policy_loss        | 0.0016427765 |
| serial_timesteps   | 206208       |
| time_elapsed       | 8.31e+03     |
| total_timesteps    | 6598656      |
| value_loss         | 0.024415707  |
-------------------------------------
-------------------------------------
| approxkl           | 0.005106804  |
| clipfrac           | 0.011884766  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 141          |
| explained_variance | 0.962        |
| fps                | 798          |
| nupdates           | 1612         |
| policy_entropy     | 0.046362467  |
| policy_loss        | 0.0027257106 |
| serial_timesteps   | 206336       |
| time_elaps

--------------------------------------
| approxkl           | 0.0010439856  |
| clipfrac           | 0.0026367188  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 189           |
| explained_variance | 0.96          |
| fps                | 765           |
| nupdates           | 1626          |
| policy_entropy     | 0.012210211   |
| policy_loss        | 0.00030022522 |
| serial_timesteps   | 208128        |
| time_elapsed       | 8.39e+03      |
| total_timesteps    | 6660096       |
| value_loss         | 0.0059854854  |
--------------------------------------
--------------------------------------
| approxkl           | 0.00030041725 |
| clipfrac           | 0.0017480468  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 195           |
| explained_variance | 0.952         |
| fps                | 784           |
| nupdates           | 1627          |
| policy_entropy     | 0.016931174   |
| policy_loss        | -0.0001231502 |
| serial_timesteps   | 20

--------------------------------------
| approxkl           | 0.0038912904  |
| clipfrac           | 0.0036328125  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 168           |
| explained_variance | 0.979         |
| fps                | 813           |
| nupdates           | 1641          |
| policy_entropy     | 0.011707438   |
| policy_loss        | 0.00023698414 |
| serial_timesteps   | 210048        |
| time_elapsed       | 8.47e+03      |
| total_timesteps    | 6721536       |
| value_loss         | 0.005890706   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0016251797 |
| clipfrac           | 0.002138672  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 174          |
| explained_variance | 0.896        |
| fps                | 800          |
| nupdates           | 1642         |
| policy_entropy     | 0.011537033  |
| policy_loss        | 0.0005209485 |
| serial_timesteps   | 210176      

--------------------------------------
| approxkl           | 0.00036099303 |
| clipfrac           | 0.00045898437 |
| eplenmean          | 1.62e+03      |
| eprewmean          | 149           |
| explained_variance | 0.994         |
| fps                | 773           |
| nupdates           | 1655          |
| policy_entropy     | 0.0038137028  |
| policy_loss        | 0.0003903227  |
| serial_timesteps   | 211840        |
| time_elapsed       | 8.54e+03      |
| total_timesteps    | 6778880       |
| value_loss         | 0.00091142213 |
--------------------------------------
-------------------------------------
| approxkl           | 0.0073572337 |
| clipfrac           | 0.0017675781 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 152          |
| explained_variance | 0.98         |
| fps                | 769          |
| nupdates           | 1656         |
| policy_entropy     | 0.0046363277 |
| policy_loss        | 5.728365e-06 |
| serial_timesteps   | 211968      

--------------------------------------
| approxkl           | 0.0006732661  |
| clipfrac           | 0.0027246093  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 115           |
| explained_variance | 0.938         |
| fps                | 778           |
| nupdates           | 1670          |
| policy_entropy     | 0.0088957045  |
| policy_loss        | -8.562945e-05 |
| serial_timesteps   | 213760        |
| time_elapsed       | 8.62e+03      |
| total_timesteps    | 6840320       |
| value_loss         | 0.010011885   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0031966856 |
| clipfrac           | 0.0024414062 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 115          |
| explained_variance | 0.977        |
| fps                | 769          |
| nupdates           | 1671         |
| policy_entropy     | 0.008296596  |
| policy_loss        | 0.0005510263 |
| serial_timesteps   | 213888      

-------------------------------------
| approxkl           | 0.0018657598 |
| clipfrac           | 0.0043066405 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 154          |
| explained_variance | 0.934        |
| fps                | 786          |
| nupdates           | 1685         |
| policy_entropy     | 0.014686722  |
| policy_loss        | 0.0004072575 |
| serial_timesteps   | 215680       |
| time_elapsed       | 8.7e+03      |
| total_timesteps    | 6901760      |
| value_loss         | 0.022052713  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0024216003  |
| clipfrac           | 0.0038769532  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 168           |
| explained_variance | 0.942         |
| fps                | 789           |
| nupdates           | 1686          |
| policy_entropy     | 0.013625482   |
| policy_loss        | 0.00072303385 |
| serial_timesteps   | 215808        |
|

---------------------------------------
| approxkl           | 6.355137e-05   |
| clipfrac           | 0.0005371094   |
| eplenmean          | 1.62e+03       |
| eprewmean          | 185            |
| explained_variance | 0.534          |
| fps                | 812            |
| nupdates           | 1700           |
| policy_entropy     | 0.0049377605   |
| policy_loss        | -0.00047202432 |
| serial_timesteps   | 217600         |
| time_elapsed       | 8.78e+03       |
| total_timesteps    | 6963200        |
| value_loss         | 0.0005602927   |
---------------------------------------
Saving to /tmp/openai-2018-05-30-18-51-01-525742/checkpoints/01700
--------------------------------------
| approxkl           | 0.00027725587 |
| clipfrac           | 0.00034179687 |
| eplenmean          | 1.62e+03      |
| eprewmean          | 192           |
| explained_variance | 0.854         |
| fps                | 814           |
| nupdates           | 1701          |
| policy_entropy     

--------------------------------------
| approxkl           | 0.0006471135  |
| clipfrac           | 0.00081054686 |
| eplenmean          | 1.62e+03      |
| eprewmean          | 177           |
| explained_variance | 0.964         |
| fps                | 781           |
| nupdates           | 1714          |
| policy_entropy     | 0.00557778    |
| policy_loss        | 0.00013016086 |
| serial_timesteps   | 219392        |
| time_elapsed       | 8.85e+03      |
| total_timesteps    | 7020544       |
| value_loss         | 0.0044920067  |
--------------------------------------
--------------------------------------
| approxkl           | 0.00094893    |
| clipfrac           | 0.0013574219  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 166           |
| explained_variance | 0.938         |
| fps                | 781           |
| nupdates           | 1715          |
| policy_entropy     | 0.0058129304  |
| policy_loss        | -7.477605e-05 |
| serial_timesteps   | 21

---------------------------------------
| approxkl           | 4.2592153e-05  |
| clipfrac           | 0.00024414062  |
| eplenmean          | 1.62e+03       |
| eprewmean          | 128            |
| explained_variance | 0.734          |
| fps                | 778            |
| nupdates           | 1728           |
| policy_entropy     | 0.003162742    |
| policy_loss        | -2.7463653e-05 |
| serial_timesteps   | 221184         |
| time_elapsed       | 8.92e+03       |
| total_timesteps    | 7077888        |
| value_loss         | 0.0013519055   |
---------------------------------------
--------------------------------------
| approxkl           | 9.875087e-05  |
| clipfrac           | 0.00049804687 |
| eplenmean          | 1.62e+03      |
| eprewmean          | 137           |
| explained_variance | 0.911         |
| fps                | 803           |
| nupdates           | 1729          |
| policy_entropy     | 0.0035147099  |
| policy_loss        | 0.00029725538 |
| serial_t

--------------------------------------
| approxkl           | 0.0007231256  |
| clipfrac           | 0.0008691406  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 90.9          |
| explained_variance | 0.0316        |
| fps                | 807           |
| nupdates           | 1742          |
| policy_entropy     | 0.0036457023  |
| policy_loss        | -3.146458e-05 |
| serial_timesteps   | 222976        |
| time_elapsed       | 8.99e+03      |
| total_timesteps    | 7135232       |
| value_loss         | 0.0005490255  |
--------------------------------------
---------------------------------------
| approxkl           | 0.00014987779  |
| clipfrac           | 0.0004394531   |
| eplenmean          | 1.62e+03       |
| eprewmean          | 91.4           |
| explained_variance | 0.184          |
| fps                | 806            |
| nupdates           | 1743           |
| policy_entropy     | 0.004155347    |
| policy_loss        | -0.00046331287 |
| serial_timest

--------------------------------------
| approxkl           | 0.0036309944  |
| clipfrac           | 0.002109375   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 137           |
| explained_variance | 0.959         |
| fps                | 795           |
| nupdates           | 1756          |
| policy_entropy     | 0.0037124585  |
| policy_loss        | 0.00058761495 |
| serial_timesteps   | 224768        |
| time_elapsed       | 9.06e+03      |
| total_timesteps    | 7192576       |
| value_loss         | 0.01124874    |
--------------------------------------
-------------------------------------
| approxkl           | 0.0004925102 |
| clipfrac           | 0.0008203125 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 138          |
| explained_variance | 0.96         |
| fps                | 801          |
| nupdates           | 1757         |
| policy_entropy     | 0.0025821896 |
| policy_loss        | 0.0005773743 |
| serial_timesteps   | 224896      

--------------------------------------
| approxkl           | 0.00021233155 |
| clipfrac           | 0.0012011719  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 156           |
| explained_variance | 0.982         |
| fps                | 821           |
| nupdates           | 1771          |
| policy_entropy     | 0.0052455342  |
| policy_loss        | 6.021617e-05  |
| serial_timesteps   | 226688        |
| time_elapsed       | 9.14e+03      |
| total_timesteps    | 7254016       |
| value_loss         | 0.00018770361 |
--------------------------------------
--------------------------------------
| approxkl           | 0.00014768957 |
| clipfrac           | 0.000625      |
| eplenmean          | 1.62e+03      |
| eprewmean          | 150           |
| explained_variance | 0.517         |
| fps                | 811           |
| nupdates           | 1772          |
| policy_entropy     | 0.005078833   |
| policy_loss        | -9.021734e-05 |
| serial_timesteps   | 22

-------------------------------------
| approxkl           | 0.0023351847 |
| clipfrac           | 0.0048730467 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 156          |
| explained_variance | 0.861        |
| fps                | 826          |
| nupdates           | 1785         |
| policy_entropy     | 0.006070708  |
| policy_loss        | 0.0009518016 |
| serial_timesteps   | 228480       |
| time_elapsed       | 9.21e+03     |
| total_timesteps    | 7311360      |
| value_loss         | 0.009702805  |
-------------------------------------
--------------------------------------
| approxkl           | 0.00094826863 |
| clipfrac           | 0.0021289063  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 156           |
| explained_variance | 0.976         |
| fps                | 821           |
| nupdates           | 1786          |
| policy_entropy     | 0.0045900587  |
| policy_loss        | 0.00037730022 |
| serial_timesteps   | 228608        |
|

--------------------------------------
| approxkl           | 4.9718984e-09 |
| clipfrac           | 0.0           |
| eplenmean          | 1.62e+03      |
| eprewmean          | 133           |
| explained_variance | 0.781         |
| fps                | 804           |
| nupdates           | 1800          |
| policy_entropy     | 0.0007244774  |
| policy_loss        | 5.801647e-06  |
| serial_timesteps   | 230400        |
| time_elapsed       | 9.29e+03      |
| total_timesteps    | 7372800       |
| value_loss         | 3.9524595e-05 |
--------------------------------------
Saving to /tmp/openai-2018-05-30-18-51-01-525742/checkpoints/01800
--------------------------------------
| approxkl           | 5.6887557e-09 |
| clipfrac           | 0.0           |
| eplenmean          | 1.62e+03      |
| eprewmean          | 134           |
| explained_variance | 0.824         |
| fps                | 812           |
| nupdates           | 1801          |
| policy_entropy     | 0.0007393104 

-------------------------------------
| approxkl           | 0.0016151428 |
| clipfrac           | 0.0030371093 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 101          |
| explained_variance | 0.919        |
| fps                | 789          |
| nupdates           | 1814         |
| policy_entropy     | 0.004374935  |
| policy_loss        | 0.0007352851 |
| serial_timesteps   | 232192       |
| time_elapsed       | 9.36e+03     |
| total_timesteps    | 7430144      |
| value_loss         | 0.028648276  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0014578715  |
| clipfrac           | 0.001328125   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 101           |
| explained_variance | 0.98          |
| fps                | 769           |
| nupdates           | 1815          |
| policy_entropy     | 0.002409365   |
| policy_loss        | 0.00035255015 |
| serial_timesteps   | 232320        |
|

---------------------------------------
| approxkl           | 0.00092428515  |
| clipfrac           | 0.0011035156   |
| eplenmean          | 1.62e+03       |
| eprewmean          | 175            |
| explained_variance | 0.989          |
| fps                | 800            |
| nupdates           | 1829           |
| policy_entropy     | 0.0037368385   |
| policy_loss        | -0.00032669996 |
| serial_timesteps   | 234112         |
| time_elapsed       | 9.43e+03       |
| total_timesteps    | 7491584        |
| value_loss         | 0.0011851129   |
---------------------------------------
-------------------------------------
| approxkl           | 0.0045879735 |
| clipfrac           | 0.0025878907 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 175          |
| explained_variance | 0.944        |
| fps                | 804          |
| nupdates           | 1830         |
| policy_entropy     | 0.005231008  |
| policy_loss        | 3.23702e-05  |
| serial_timesteps  

---------------------------------------
| approxkl           | 1.24488295e-08 |
| clipfrac           | 0.0            |
| eplenmean          | 1.62e+03       |
| eprewmean          | 180            |
| explained_variance | 0.993          |
| fps                | 785            |
| nupdates           | 1843           |
| policy_entropy     | 0.00071286666  |
| policy_loss        | 5.4451184e-06  |
| serial_timesteps   | 235904         |
| time_elapsed       | 9.51e+03       |
| total_timesteps    | 7548928        |
| value_loss         | 0.00029735052  |
---------------------------------------
-------------------------------------
| approxkl           | 0.0047491924 |
| clipfrac           | 0.001171875  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 191          |
| explained_variance | 0.955        |
| fps                | 771          |
| nupdates           | 1844         |
| policy_entropy     | 0.0011983045 |
| policy_loss        | 0.0007130877 |
| serial_timesteps  

--------------------------------------
| approxkl           | 3.091915e-10  |
| clipfrac           | 0.0           |
| eplenmean          | 1.62e+03      |
| eprewmean          | 196           |
| explained_variance | 0.939         |
| fps                | 824           |
| nupdates           | 1857          |
| policy_entropy     | 0.00024832942 |
| policy_loss        | 1.140706e-06  |
| serial_timesteps   | 237696        |
| time_elapsed       | 9.58e+03      |
| total_timesteps    | 7606272       |
| value_loss         | 3.651036e-05  |
--------------------------------------
--------------------------------------
| approxkl           | 5.7463967e-10 |
| clipfrac           | 0.0           |
| eplenmean          | 1.62e+03      |
| eprewmean          | 196           |
| explained_variance | 0.938         |
| fps                | 833           |
| nupdates           | 1858          |
| policy_entropy     | 0.00024797954 |
| policy_loss        | 4.5868287e-06 |
| serial_timesteps   | 23

--------------------------------------
| approxkl           | 0.0017949136  |
| clipfrac           | 0.0032421874  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 149           |
| explained_variance | 0.919         |
| fps                | 825           |
| nupdates           | 1872          |
| policy_entropy     | 0.0030976424  |
| policy_loss        | 0.00047342683 |
| serial_timesteps   | 239616        |
| time_elapsed       | 9.65e+03      |
| total_timesteps    | 7667712       |
| value_loss         | 0.021130772   |
--------------------------------------
-------------------------------------
| approxkl           | 0.0023733568 |
| clipfrac           | 0.00125      |
| eplenmean          | 1.62e+03     |
| eprewmean          | 145          |
| explained_variance | 0.997        |
| fps                | 812          |
| nupdates           | 1873         |
| policy_entropy     | 0.0012906498 |
| policy_loss        | 0.0010390696 |
| serial_timesteps   | 239744      

--------------------------------------
| approxkl           | 0.00084648957 |
| clipfrac           | 0.0009375     |
| eplenmean          | 1.62e+03      |
| eprewmean          | 125           |
| explained_variance | 0.976         |
| fps                | 802           |
| nupdates           | 1886          |
| policy_entropy     | 0.0009261252  |
| policy_loss        | 0.00035398884 |
| serial_timesteps   | 241408        |
| time_elapsed       | 9.72e+03      |
| total_timesteps    | 7725056       |
| value_loss         | 0.003701804   |
--------------------------------------
--------------------------------------
| approxkl           | 2.0973225e-07 |
| clipfrac           | 1.953125e-05  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 125           |
| explained_variance | 0.821         |
| fps                | 770           |
| nupdates           | 1887          |
| policy_entropy     | 0.00019478213 |
| policy_loss        | 1.5161221e-06 |
| serial_timesteps   | 24

Saving to /tmp/openai-2018-05-30-18-51-01-525742/checkpoints/01900
--------------------------------------
| approxkl           | 2.360681e-08  |
| clipfrac           | 0.0           |
| eplenmean          | 1.62e+03      |
| eprewmean          | 156           |
| explained_variance | 0.99          |
| fps                | 811           |
| nupdates           | 1901          |
| policy_entropy     | 0.00014662006 |
| policy_loss        | 1.896416e-06  |
| serial_timesteps   | 243328        |
| time_elapsed       | 9.8e+03       |
| total_timesteps    | 7786496       |
| value_loss         | 0.00034576628 |
--------------------------------------
--------------------------------------
| approxkl           | 0.001588396   |
| clipfrac           | 0.0016503907  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 149           |
| explained_variance | 0.977         |
| fps                | 804           |
| nupdates           | 1902          |
| policy_entropy     | 0.00096609874

--------------------------------------
| approxkl           | 0.000711005   |
| clipfrac           | 0.0011230469  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 137           |
| explained_variance | 0.963         |
| fps                | 821           |
| nupdates           | 1915          |
| policy_entropy     | 0.0011305171  |
| policy_loss        | 0.00076327025 |
| serial_timesteps   | 245120        |
| time_elapsed       | 9.87e+03      |
| total_timesteps    | 7843840       |
| value_loss         | 0.008017908   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00049166055 |
| clipfrac           | 0.0013671875  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 137           |
| explained_variance | 0.941         |
| fps                | 826           |
| nupdates           | 1916          |
| policy_entropy     | 0.0014305532  |
| policy_loss        | 0.00033210625 |
| serial_timesteps   | 24

--------------------------------------
| approxkl           | 0.00014247006 |
| clipfrac           | 0.00012695312 |
| eplenmean          | 1.62e+03      |
| eprewmean          | 130           |
| explained_variance | 0.996         |
| fps                | 813           |
| nupdates           | 1930          |
| policy_entropy     | 0.00028657232 |
| policy_loss        | 0.00032572247 |
| serial_timesteps   | 247040        |
| time_elapsed       | 9.95e+03      |
| total_timesteps    | 7905280       |
| value_loss         | 0.0004881107  |
--------------------------------------
--------------------------------------
| approxkl           | 4.4462327e-09 |
| clipfrac           | 0.0           |
| eplenmean          | 1.62e+03      |
| eprewmean          | 119           |
| explained_variance | 0.993         |
| fps                | 833           |
| nupdates           | 1931          |
| policy_entropy     | 0.00010816125 |
| policy_loss        | 3.6424246e-06 |
| serial_timesteps   | 24

--------------------------------------
| approxkl           | 3.819811e-11  |
| clipfrac           | 0.0           |
| eplenmean          | 1.62e+03      |
| eprewmean          | 124           |
| explained_variance | 0.934         |
| fps                | 808           |
| nupdates           | 1945          |
| policy_entropy     | 7.8371464e-05 |
| policy_loss        | 3.129756e-07  |
| serial_timesteps   | 248960        |
| time_elapsed       | 1e+04         |
| total_timesteps    | 7966720       |
| value_loss         | 3.5166093e-05 |
--------------------------------------
---------------------------------------
| approxkl           | 5.762507e-05   |
| clipfrac           | 0.0005371094   |
| eplenmean          | 1.62e+03       |
| eprewmean          | 124            |
| explained_variance | 0.956          |
| fps                | 835            |
| nupdates           | 1946           |
| policy_entropy     | 0.00076627143  |
| policy_loss        | -0.00021359744 |
| serial_timest

--------------------------------------
| approxkl           | 0.00059496297 |
| clipfrac           | 0.00034179687 |
| eplenmean          | 1.62e+03      |
| eprewmean          | 116           |
| explained_variance | 0.981         |
| fps                | 826           |
| nupdates           | 1959          |
| policy_entropy     | 0.00063936604 |
| policy_loss        | 7.318477e-05  |
| serial_timesteps   | 250752        |
| time_elapsed       | 1.01e+04      |
| total_timesteps    | 8024064       |
| value_loss         | 0.003544384   |
--------------------------------------
-------------------------------------
| approxkl           | 0.001328811  |
| clipfrac           | 0.0012792968 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 116          |
| explained_variance | 0.959        |
| fps                | 814          |
| nupdates           | 1960         |
| policy_entropy     | 0.0010770834 |
| policy_loss        | 0.000167473  |
| serial_timesteps   | 250880      

--------------------------------------
| approxkl           | 0.0018730458  |
| clipfrac           | 0.0012597656  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 164           |
| explained_variance | 0.863         |
| fps                | 811           |
| nupdates           | 1974          |
| policy_entropy     | 0.0011908999  |
| policy_loss        | 0.00011210978 |
| serial_timesteps   | 252672        |
| time_elapsed       | 1.02e+04      |
| total_timesteps    | 8085504       |
| value_loss         | 0.010442691   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0019790388  |
| clipfrac           | 0.002314453   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 162           |
| explained_variance | 0.973         |
| fps                | 821           |
| nupdates           | 1975          |
| policy_entropy     | 0.0020227015  |
| policy_loss        | 0.00026094593 |
| serial_timesteps   | 25

---------------------------------------
| approxkl           | 0.002128423    |
| clipfrac           | 0.005          |
| eplenmean          | 1.62e+03       |
| eprewmean          | 164            |
| explained_variance | 0.905          |
| fps                | 804            |
| nupdates           | 1988           |
| policy_entropy     | 0.0043225256   |
| policy_loss        | -0.00054099684 |
| serial_timesteps   | 254464         |
| time_elapsed       | 1.02e+04       |
| total_timesteps    | 8142848        |
| value_loss         | 0.020767862    |
---------------------------------------
--------------------------------------
| approxkl           | 0.00027731503 |
| clipfrac           | 0.00080078124 |
| eplenmean          | 1.62e+03      |
| eprewmean          | 170           |
| explained_variance | 0.998         |
| fps                | 801           |
| nupdates           | 1989          |
| policy_entropy     | 0.0011349221  |
| policy_loss        | 0.00010977875 |
| serial_t

--------------------------------------
| approxkl           | 0.00063804514 |
| clipfrac           | 0.00109375    |
| eplenmean          | 1.62e+03      |
| eprewmean          | 197           |
| explained_variance | 0.963         |
| fps                | 830           |
| nupdates           | 2002          |
| policy_entropy     | 0.0015547925  |
| policy_loss        | 0.00011300003 |
| serial_timesteps   | 256256        |
| time_elapsed       | 1.03e+04      |
| total_timesteps    | 8200192       |
| value_loss         | 0.011993884   |
--------------------------------------
---------------------------------------
| approxkl           | 0.0006679493   |
| clipfrac           | 0.0007324219   |
| eplenmean          | 1.62e+03       |
| eprewmean          | 187            |
| explained_variance | 0.892          |
| fps                | 782            |
| nupdates           | 2003           |
| policy_entropy     | 0.0014930613   |
| policy_loss        | -3.7305708e-06 |
| serial_timest

--------------------------------------
| approxkl           | 2.1652609e-06 |
| clipfrac           | 0.0001953125  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 175           |
| explained_variance | 0.858         |
| fps                | 798           |
| nupdates           | 2016          |
| policy_entropy     | 8.482361e-05  |
| policy_loss        | -9.379498e-06 |
| serial_timesteps   | 258048        |
| time_elapsed       | 1.04e+04      |
| total_timesteps    | 8257536       |
| value_loss         | 5.7880334e-05 |
--------------------------------------
---------------------------------------
| approxkl           | 0.0016128941   |
| clipfrac           | 0.0012402344   |
| eplenmean          | 1.62e+03       |
| eprewmean          | 170            |
| explained_variance | 0.924          |
| fps                | 824            |
| nupdates           | 2017           |
| policy_entropy     | 0.0011709108   |
| policy_loss        | -8.9287925e-05 |
| serial_timest

--------------------------------------
| approxkl           | 0.00016167611 |
| clipfrac           | 0.00081054686 |
| eplenmean          | 1.62e+03      |
| eprewmean          | 167           |
| explained_variance | 0.93          |
| fps                | 804           |
| nupdates           | 2030          |
| policy_entropy     | 0.0009495028  |
| policy_loss        | -7.142029e-05 |
| serial_timesteps   | 259840        |
| time_elapsed       | 1.05e+04      |
| total_timesteps    | 8314880       |
| value_loss         | 0.007645944   |
--------------------------------------
--------------------------------------
| approxkl           | 0.00039231856 |
| clipfrac           | 0.0012988282  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 159           |
| explained_variance | 0.981         |
| fps                | 790           |
| nupdates           | 2031          |
| policy_entropy     | 0.0014196183  |
| policy_loss        | 0.000171893   |
| serial_timesteps   | 25

---------------------------------------
| approxkl           | 6.370313e-06   |
| clipfrac           | 7.8125e-05     |
| eplenmean          | 1.62e+03       |
| eprewmean          | 150            |
| explained_variance | 0.801          |
| fps                | 798            |
| nupdates           | 2044           |
| policy_entropy     | 0.00039654676  |
| policy_loss        | -2.4469098e-05 |
| serial_timesteps   | 261632         |
| time_elapsed       | 1.05e+04       |
| total_timesteps    | 8372224        |
| value_loss         | 0.00030306994  |
---------------------------------------
--------------------------------------
| approxkl           | 0.00026292622 |
| clipfrac           | 0.000234375   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 156           |
| explained_variance | 0.993         |
| fps                | 808           |
| nupdates           | 2045          |
| policy_entropy     | 0.00047038333 |
| policy_loss        | 0.00010134753 |
| serial_t

-------------------------------------
| approxkl           | 0.005826462  |
| clipfrac           | 0.0061132815 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 131          |
| explained_variance | 0.897        |
| fps                | 799          |
| nupdates           | 2058         |
| policy_entropy     | 0.006449968  |
| policy_loss        | 8.396739e-05 |
| serial_timesteps   | 263424       |
| time_elapsed       | 1.06e+04     |
| total_timesteps    | 8429568      |
| value_loss         | 0.019727007  |
-------------------------------------
-------------------------------------
| approxkl           | 0.0032479248 |
| clipfrac           | 0.003925781  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 137          |
| explained_variance | 0.965        |
| fps                | 797          |
| nupdates           | 2059         |
| policy_entropy     | 0.0047921096 |
| policy_loss        | -5.66782e-05 |
| serial_timesteps   | 263552       |
| time_elaps

--------------------------------------
| approxkl           | 0.0038677827  |
| clipfrac           | 0.004003906   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 129           |
| explained_variance | 0.969         |
| fps                | 814           |
| nupdates           | 2073          |
| policy_entropy     | 0.0037082213  |
| policy_loss        | 0.00054327067 |
| serial_timesteps   | 265344        |
| time_elapsed       | 1.07e+04      |
| total_timesteps    | 8491008       |
| value_loss         | 0.009114113   |
--------------------------------------
-------------------------------------
| approxkl           | 0.008388459  |
| clipfrac           | 0.004716797  |
| eplenmean          | 1.62e+03     |
| eprewmean          | 129          |
| explained_variance | 0.98         |
| fps                | 790          |
| nupdates           | 2074         |
| policy_entropy     | 0.0036615492 |
| policy_loss        | -0.003604979 |
| serial_timesteps   | 265472      

-------------------------------------
| approxkl           | 0.001954401  |
| clipfrac           | 0.0033398438 |
| eplenmean          | 1.62e+03     |
| eprewmean          | 134          |
| explained_variance | 0.948        |
| fps                | 797          |
| nupdates           | 2088         |
| policy_entropy     | 0.00331949   |
| policy_loss        | 0.0004144334 |
| serial_timesteps   | 267264       |
| time_elapsed       | 1.08e+04     |
| total_timesteps    | 8552448      |
| value_loss         | 0.007456792  |
-------------------------------------
--------------------------------------
| approxkl           | 0.0006606658  |
| clipfrac           | 0.0016796875  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 140           |
| explained_variance | 0.953         |
| fps                | 783           |
| nupdates           | 2089          |
| policy_entropy     | 0.0021475886  |
| policy_loss        | 0.00082574843 |
| serial_timesteps   | 267392        |
|

--------------------------------------
| approxkl           | 9.050856e-10  |
| clipfrac           | 0.0           |
| eplenmean          | 1.62e+03      |
| eprewmean          | 137           |
| explained_variance | 0.94          |
| fps                | 789           |
| nupdates           | 2102          |
| policy_entropy     | 0.00032178848 |
| policy_loss        | 5.705756e-06  |
| serial_timesteps   | 269056        |
| time_elapsed       | 1.08e+04      |
| total_timesteps    | 8609792       |
| value_loss         | 7.9337675e-05 |
--------------------------------------
---------------------------------------
| approxkl           | 0.00033509597  |
| clipfrac           | 0.0044335937   |
| eplenmean          | 1.62e+03       |
| eprewmean          | 142            |
| explained_variance | 0.888          |
| fps                | 798            |
| nupdates           | 2103           |
| policy_entropy     | 0.0052567683   |
| policy_loss        | -0.00011707502 |
| serial_timest

--------------------------------------
| approxkl           | 0.0007967271  |
| clipfrac           | 0.0046191406  |
| eplenmean          | 1.62e+03      |
| eprewmean          | 127           |
| explained_variance | 0.845         |
| fps                | 809           |
| nupdates           | 2117          |
| policy_entropy     | 0.006556811   |
| policy_loss        | 0.00017510806 |
| serial_timesteps   | 270976        |
| time_elapsed       | 1.09e+04      |
| total_timesteps    | 8671232       |
| value_loss         | 0.007456833   |
--------------------------------------
--------------------------------------
| approxkl           | 0.0010993041  |
| clipfrac           | 0.004296875   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 123           |
| explained_variance | 0.868         |
| fps                | 812           |
| nupdates           | 2118          |
| policy_entropy     | 0.006758673   |
| policy_loss        | 0.00055271166 |
| serial_timesteps   | 27

--------------------------------------
| approxkl           | 0.0007087501  |
| clipfrac           | 0.004462891   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 150           |
| explained_variance | 0.871         |
| fps                | 793           |
| nupdates           | 2132          |
| policy_entropy     | 0.00936084    |
| policy_loss        | 0.00087756006 |
| serial_timesteps   | 272896        |
| time_elapsed       | 1.1e+04       |
| total_timesteps    | 8732672       |
| value_loss         | 0.02908843    |
--------------------------------------
--------------------------------------
| approxkl           | 0.002945824   |
| clipfrac           | 0.008476563   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 152           |
| explained_variance | 0.91          |
| fps                | 807           |
| nupdates           | 2133          |
| policy_entropy     | 0.007490117   |
| policy_loss        | -0.0017518662 |
| serial_timesteps   | 27

--------------------------------------
| approxkl           | 0.0016128983  |
| clipfrac           | 0.009609375   |
| eplenmean          | 1.62e+03      |
| eprewmean          | 161           |
| explained_variance | 0.859         |
| fps                | 818           |
| nupdates           | 2147          |
| policy_entropy     | 0.0104552945  |
| policy_loss        | 0.00074964686 |
| serial_timesteps   | 274816        |
| time_elapsed       | 1.11e+04      |
| total_timesteps    | 8794112       |
| value_loss         | 0.045819238   |
--------------------------------------
--------------------------------------
| approxkl           | 0.002144739   |
| clipfrac           | 0.0021875     |
| eplenmean          | 1.62e+03      |
| eprewmean          | 161           |
| explained_variance | 0.999         |
| fps                | 810           |
| nupdates           | 2148          |
| policy_entropy     | 0.0035768014  |
| policy_loss        | 0.00020674721 |
| serial_timesteps   | 27

Process Process-18:
Process Process-26:
Process Process-4:
Process Process-22:
Process Process-1:
Process Process-21:
Process Process-8:
Process Process-27:
Process Process-13:
Process Process-14:
Process Process-3:
Process Process-6:
Process Process-5:
Process Process-15:
Process Process-11:
Process Process-30:
Process Process-32:
Process Process-31:
Traceback (most recent call last):
Process Process-28:
Traceback (most recent call last):
Traceback (most recent call last):
Process Process-20:
Process Process-29:
Process Process-16:
Process Process-9:
Process Process-7:
Process Process-2:
Traceback (most recent call last):
Traceback (most recent call last):
Process Process-17:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceb

  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ubuntu/repos/baselines/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/ubuntu/repos/baselines/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  

  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/home/ubuntu/repos/baselines/baselines/common/vec_env/subproc_vec_env.py", line 10, in worker
    cmd, data = remote.recv()
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/home/ubuntu/projects/algotrading/notebooks/src/common/environments.py", line 207, in step
    position=self._position)
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/home/

  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/site-packages/pandas/core/indexing.py", line 1325, in __getitem__
    return self._getitem_tuple(key)
  File "/home/ubuntu/src/anaconda3/envs/tenso

KeyboardInterrupt: 

  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
  File "/home/ubuntu/src/anaconda3/envs/tensorflow/lib/python3.6/site-packages/pandas/core/indexing.py", line 189, in _has

In [6]:
load_path = '/tmp/openai-2018-05-29-15-08-04-310254/checkpoints/00100'
logger.get_dir()

'/tmp/openai-2018-05-29-15-08-04-310254'

In [4]:
def test(env_id, load_path, seed):

    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True #pylint: disable=E1101
    tf.Session(config=config).__enter__()
    
    def make_env():
        env = gym.make(env_id)
        return env
    
    
    env = DummyVecEnv([make_env])
    running_mean = np.load('{}/mean.npy'.format(load_path))
    running_var = np.load('{}/var.npy'.format(load_path))
    env = VecNormalizeTest(env, running_mean, running_var)
    set_global_seeds(seed)

    policy = CnnPolicy
    ob_space = env.venv.observation_space
    ac_space = env.venv.action_space
    nbatch_act = 1 * 1
    nbatch_train = 32 * 1
    nsteps = 128
    ent_coef = .01
    vf_coef = 0.5
    max_grad_norm = 0.5
    make_model = lambda : ppo2.Model(policy=policy, ob_space=ob_space, ac_space=ac_space, 
                                nbatch_act=nbatch_act, nbatch_train=nbatch_train, nsteps=nsteps, 
                                ent_coef=ent_coef, vf_coef=vf_coef, max_grad_norm=max_grad_norm)
    model = make_model()
    model.load(load_path + '/checkpoints/02150')
    
    obs = env.reset()
    while True:
        actions, _, _, _, = model.step(obs)
        obs[:], _, _, _, = env.step(actions)
        env.venv.render()

In [None]:
load_path = '../data/processed'
load_path = '/tmp/openai-2018-05-30-18-51-01-525742'
test('tradingenv-v0', load_path, seed=0)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [5]:
def make_env():
        env = gym.make('tradingenv-v0')
        return env
env = DummyVecEnv([make_env])
load_path = '/tmp/openai-2018-05-29-15-08-04-310254'
running_mean = np.load('{}/mean.npy'.format(load_path))
running_var = np.load('{}/var.npy'.format(load_path))
env = VecNormalizeTest(env, running_mean, running_var)

In [3]:
import gym
e = gym.make('tradingenv-v5')

In [13]:
e.step(1)

(array([[ 2.44675000e+03,  2.44800000e+03,  2.44675000e+03,
          2.44725000e+03, -8.43391446e-01, -5.37299608e-01,
          0.00000000e+00],
        [ 2.44725000e+03,  2.44800000e+03,  2.44675000e+03,
          2.44700000e+03, -8.54911871e-01, -5.18773258e-01,
          0.00000000e+00],
        [ 2.44700000e+03,  2.44775000e+03,  2.44675000e+03,
          2.44700000e+03, -8.66025404e-01, -5.00000000e-01,
          0.00000000e+00],
        [ 2.44725000e+03,  2.44875000e+03,  2.44700000e+03,
          2.44775000e+03, -8.76726756e-01, -4.80988769e-01,
          0.00000000e+00],
        [ 2.44800000e+03,  2.44925000e+03,  2.44775000e+03,
          2.44850000e+03, -8.87010833e-01, -4.61748613e-01,
          0.00000000e+00],
        [ 2.44850000e+03,  2.44950000e+03,  2.44850000e+03,
          2.44900000e+03, -8.96872742e-01, -4.42288690e-01,
          0.00000000e+00],
        [ 2.44900000e+03,  2.44950000e+03,  2.44850000e+03,
          2.44850000e+03, -9.06307787e-01, -4.22618262e-01