In [1]:
import sys
import os
from baselines import logger, bench
from baselines.common import set_global_seeds
from baselines.common.cmd_util import make_atari_env, atari_arg_parser
from baselines.common.atari_wrappers import wrap_deepmind, make_atari
from baselines.common.vec_env.vec_frame_stack import VecFrameStack
from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
from baselines.common.vec_env.vec_normalize import VecNormalize, VecNormalizeTest
from baselines.ppo2 import ppo2
#from baselines.ppo2.policies import CnnPolicy, LstmPolicy, LnLstmPolicy, MlpPolicy
import multiprocessing
import tensorflow as tf
import pandas as pd
import numpy as np
import gym

from src.common.environments import ESTradingEnv

import matplotlib as mpl
%matplotlib

  from ._conv import register_converters as _register_converters


Using matplotlib backend: Qt5Agg




In [2]:
#Register trading environment with OpenAI Gym
from gym.envs.registration import register
register(id='tradingenv-v0',
         entry_point='src.common.environments:ESTradingEnv_v2',)

In [3]:
from baselines.a2c.utils import fc, conv_to_fc, lstm, batch_to_seq, seq_to_batch
from baselines.common.distributions import make_pdtype
from baselines.common.input import observation_input

def ortho_init(scale=1.0):
    def _ortho_init(shape, dtype, partition_info=None):
        #lasagne ortho init for tf
        shape = tuple(shape)
        if len(shape) == 2:
            flat_shape = shape
        elif len(shape) > 2: # assumes NHWC
            flat_shape = (np.prod(shape[:-1]), shape[-1])
        else:
            raise NotImplementedError
        a = np.random.normal(0.0, 1.0, flat_shape)
        u, _, v = np.linalg.svd(a, full_matrices=False)
        q = u if u.shape == flat_shape else v # pick the one with the correct shape
        q = q.reshape(shape)
        return (scale * q[:shape[0], :shape[1]]).astype(np.float32)
    return _ortho_init

def conv(x, scope, *, nf, rf, stride, pad='VALID', init_scale=1.0, one_dim_bias=False):    
    channel_ax = 2
    strides = [1, stride, 1]
    bshape = [1, 1, nf]
    
    nin = x.get_shape()[channel_ax].value
    wshape = [rf, nin, nf]
    with tf.variable_scope(scope):
        w = tf.get_variable("w", wshape, initializer=ortho_init(init_scale))
        b = tf.get_variable("b", bshape, initializer=tf.constant_initializer(0.0))
        return b + tf.nn.conv1d(x, w, stride=stride, padding=pad)


def cnn(X, is_training, **conv_kwargs):
    activ = tf.nn.relu
    h = activ(conv(X, 'c1', nf=16, rf=4, stride=1, init_scale=np.sqrt(2),
                   **conv_kwargs))
    #h = tf.layers.dropout(h, rate=0.5, training=is_training)
    h2 = activ(conv(h, 'c2', nf=8, rf=4, stride=1, init_scale=np.sqrt(2), **conv_kwargs))
    #h2 = tf.layers.dropout(h2, rate=0.5, training=is_training)
    h3 = conv_to_fc(h2)
    return activ(fc(h3, 'fc1', nh=64, init_scale=np.sqrt(2)))


class CnnPolicy(object):

    def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, reuse=False, **conv_kwargs): #pylint: disable=W0613
        if nsteps == 1:
            self._is_training = False
        else:
            self._is_training = True
            
        self.pdtype = make_pdtype(ac_space)
        X, processed_x = observation_input(ob_space, nbatch)
        with tf.variable_scope("model", reuse=reuse):
            h = cnn(processed_x, self._is_training, **conv_kwargs)
            #activ = tf.nn.relu
            #print(processed_x.get_shape())
            #h = activ(fc(processed_x, 'fc1', nh=512, init_scale=np.sqrt(2)))
            vf = fc(h, 'v', 1)[:,0]
            self.pd, self.pi = self.pdtype.pdfromlatent(h, init_scale=0.01)

        a0 = self.pd.sample()
        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = None

        def step(ob, *_args, **_kwargs):
            a, v, neglogp = sess.run([a0, vf, neglogp0], {X:ob})
            return a, v, self.initial_state, neglogp

        def value(ob, *_args, **_kwargs):
            return sess.run(vf, {X:ob})

        self.X = X
        self.vf = vf
        self.step = step
        self.value = value
        
class LstmPolicy(object):

    def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, nlstm=32, reuse=False):
        if nsteps == 1:
            self._is_training = False
        else:
            self._is_training = True
            
        nenv = nbatch // nsteps
        self.pdtype = make_pdtype(ac_space)
        X, processed_x = observation_input(ob_space, nbatch)

        M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1)
        S = tf.placeholder(tf.float32, [nenv, nlstm*2]) #states
        with tf.variable_scope("model", reuse=reuse):
            h = cnn(X, self._is_training)
            #h = processed_x
            #h = tf.layers.dropout(h, rate=0.5, training=self._is_training)
            xs = batch_to_seq(h, nenv, nsteps)
            ms = batch_to_seq(M, nenv, nsteps)
            h5, snew = lstm(xs, ms, S, 'lstm1', nh=nlstm)
            #h5 = tf.layers.dropout(h5, rate=0.5, training=self._is_training)
            h5 = seq_to_batch(h5)
            vf = fc(h5, 'v', 1)
            self.pd, self.pi = self.pdtype.pdfromlatent(h5)

        v0 = vf[:, 0]
        a0 = self.pd.sample()
        neglogp0 = self.pd.neglogp(a0)
        self.initial_state = np.zeros((nenv, nlstm*2), dtype=np.float32)

        def step(ob, state, mask):
            a, v, s, neglogp = sess.run([a0, v0, snew, neglogp0], {X:ob, S:state, M:mask})
            return a, v, s, neglogp

        def value(ob, state, mask):
            return sess.run(v0, {X:ob, S:state, M:mask})

        self.X = X
        self.M = M
        self.S = S
        self.vf = vf
        self.step = step
        self.value = value

In [4]:
def train(env_id, num_timesteps, seed, policy):

    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True #pylint: disable=E1101
    tf.Session(config=config).__enter__()
    
    def make_env(rank):
        def _thunk():
            env = gym.make(env_id)
            env.seed(seed + rank)
            env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
            return env
        return _thunk
    

    #env = DummyVecEnv([make_env])
    env = SubprocVecEnv([make_env(i) for i in range(32)])
    env = VecNormalize(env, ob=True)
    set_global_seeds(seed)

    policy = LstmPolicy
    ppo2.learn(policy=policy, env=env, nsteps=256, nminibatches=32,
        lam=0.95, gamma=0.99, noptepochs=20, log_interval=1,
        ent_coef=.01,
        lr=lambda f: f * 2.5e-3,
        cliprange=0.1,
        total_timesteps=num_timesteps)

In [None]:
logger.configure()

train('tradingenv-v0', num_timesteps=1e7, seed=0, policy='cnn')

Logging to /tmp/openai-2018-08-23-09-38-32-256117
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
-------------------------------------
| approxkl           | 0.011441799  |
| clipfrac           | 0.4671448    |
| eplenmean          | 111          |
| eprewmean          | -7.79e+03    |
| explained_variance | 0.0407       |
| fps                | 95           |
| nupdates           | 1            |
| policy_entropy     | 1.0606921    |
| policy_loss        | -0.029196804 |
| serial_timesteps   | 256          |
| time_elapsed       | 86           |
| total_timesteps    | 8192         |
| value_loss         | 0.4977464    |
-------------------------------------
Saving to /tmp/openai-2018-08-23-09-38-32-

------------------------------------
| approxkl           | 0.23909214  |
| clipfrac           | 0.73638916  |
| eplenmean          | 174         |
| eprewmean          | -7.61e+03   |
| explained_variance | 0.00378     |
| fps                | 116         |
| nupdates           | 14          |
| policy_entropy     | 0.70232886  |
| policy_loss        | -0.04372043 |
| serial_timesteps   | 3584        |
| time_elapsed       | 998         |
| total_timesteps    | 114688      |
| value_loss         | 0.0414134   |
------------------------------------
------------------------------------
| approxkl           | 0.25558668  |
| clipfrac           | 0.7400757   |
| eplenmean          | 182         |
| eprewmean          | -7.43e+03   |
| explained_variance | 0.0842      |
| fps                | 117         |
| nupdates           | 15          |
| policy_entropy     | 0.6939071   |
| policy_loss        | -0.04488061 |
| serial_timesteps   | 3840        |
| time_elapsed       | 1.07e+03    |
|

-------------------------------------
| approxkl           | 0.37088498   |
| clipfrac           | 0.5581726    |
| eplenmean          | 222          |
| eprewmean          | -3.62e+03    |
| explained_variance | -0.00105     |
| fps                | 115          |
| nupdates           | 29           |
| policy_entropy     | 0.48334938   |
| policy_loss        | -0.027583372 |
| serial_timesteps   | 7424         |
| time_elapsed       | 2.05e+03     |
| total_timesteps    | 237568       |
| value_loss         | 0.008417767  |
-------------------------------------
-------------------------------------
| approxkl           | 0.3326018    |
| clipfrac           | 0.5780029    |
| eplenmean          | 224          |
| eprewmean          | -3.25e+03    |
| explained_variance | 0.0267       |
| fps                | 114          |
| nupdates           | 30           |
| policy_entropy     | 0.50313294   |
| policy_loss        | -0.028612971 |
| serial_timesteps   | 7680         |
| time_elaps

-------------------------------------
| approxkl           | 0.33654916   |
| clipfrac           | 0.5216858    |
| eplenmean          | 191          |
| eprewmean          | -1.07e+03    |
| explained_variance | -0.0363      |
| fps                | 114          |
| nupdates           | 44           |
| policy_entropy     | 0.4288748    |
| policy_loss        | -0.021984074 |
| serial_timesteps   | 11264        |
| time_elapsed       | 3.12e+03     |
| total_timesteps    | 360448       |
| value_loss         | 0.0016876959 |
-------------------------------------
-------------------------------------
| approxkl           | 0.34646285   |
| clipfrac           | 0.5214294    |
| eplenmean          | 185          |
| eprewmean          | -1.13e+03    |
| explained_variance | -0.00877     |
| fps                | 116          |
| nupdates           | 45           |
| policy_entropy     | 0.4236408    |
| policy_loss        | -0.019619785 |
| serial_timesteps   | 11520        |
| time_elaps

--------------------------------------
| approxkl           | 0.30827639    |
| clipfrac           | 0.4874939     |
| eplenmean          | 189           |
| eprewmean          | -710          |
| explained_variance | -0.0334       |
| fps                | 113           |
| nupdates           | 59            |
| policy_entropy     | 0.41527873    |
| policy_loss        | -0.01925296   |
| serial_timesteps   | 15104         |
| time_elapsed       | 4.19e+03      |
| total_timesteps    | 483328        |
| value_loss         | 0.00066393265 |
--------------------------------------
--------------------------------------
| approxkl           | 0.35026246    |
| clipfrac           | 0.48726195    |
| eplenmean          | 180           |
| eprewmean          | -714          |
| explained_variance | -0.00929      |
| fps                | 114           |
| nupdates           | 60            |
| policy_entropy     | 0.40082288    |
| policy_loss        | -0.01931334   |
| serial_timesteps   | 15

--------------------------------------
| approxkl           | 0.2925734     |
| clipfrac           | 0.45576173    |
| eplenmean          | 197           |
| eprewmean          | -650          |
| explained_variance | -0.0241       |
| fps                | 116           |
| nupdates           | 74            |
| policy_entropy     | 0.40361413    |
| policy_loss        | -0.010740976  |
| serial_timesteps   | 18944         |
| time_elapsed       | 5.24e+03      |
| total_timesteps    | 606208        |
| value_loss         | 0.00062768784 |
--------------------------------------
--------------------------------------
| approxkl           | 0.29800183    |
| clipfrac           | 0.45007935    |
| eplenmean          | 191           |
| eprewmean          | -667          |
| explained_variance | -0.00265      |
| fps                | 115           |
| nupdates           | 75            |
| policy_entropy     | 0.40503287    |
| policy_loss        | -0.010104721  |
| serial_timesteps   | 19

--------------------------------------
| approxkl           | 0.34625626    |
| clipfrac           | 0.44055787    |
| eplenmean          | 212           |
| eprewmean          | -644          |
| explained_variance | -0.00234      |
| fps                | 116           |
| nupdates           | 89            |
| policy_entropy     | 0.3878376     |
| policy_loss        | 0.008123795   |
| serial_timesteps   | 22784         |
| time_elapsed       | 6.35e+03      |
| total_timesteps    | 729088        |
| value_loss         | 0.00083881366 |
--------------------------------------
-------------------------------------
| approxkl           | 0.32334447   |
| clipfrac           | 0.41744384   |
| eplenmean          | 211          |
| eprewmean          | -696         |
| explained_variance | -0.0198      |
| fps                | 116          |
| nupdates           | 90           |
| policy_entropy     | 0.36794543   |
| policy_loss        | -0.004556219 |
| serial_timesteps   | 23040       

--------------------------------------
| approxkl           | 0.2779444     |
| clipfrac           | 0.37817383    |
| eplenmean          | 256           |
| eprewmean          | -829          |
| explained_variance | -0.00688      |
| fps                | 116           |
| nupdates           | 104           |
| policy_entropy     | 0.38849872    |
| policy_loss        | 0.0031945365  |
| serial_timesteps   | 26624         |
| time_elapsed       | 7.43e+03      |
| total_timesteps    | 851968        |
| value_loss         | 0.00093118986 |
--------------------------------------
-------------------------------------
| approxkl           | 0.27176362   |
| clipfrac           | 0.37860718   |
| eplenmean          | 250          |
| eprewmean          | -810         |
| explained_variance | -0.00227     |
| fps                | 116          |
| nupdates           | 105          |
| policy_entropy     | 0.38882083   |
| policy_loss        | 0.0020063366 |
| serial_timesteps   | 26880       

-------------------------------------
| approxkl           | 0.27008742   |
| clipfrac           | 0.36870727   |
| eplenmean          | 247          |
| eprewmean          | -718         |
| explained_variance | -0.0103      |
| fps                | 116          |
| nupdates           | 119          |
| policy_entropy     | 0.39962026   |
| policy_loss        | 0.014785191  |
| serial_timesteps   | 30464        |
| time_elapsed       | 8.49e+03     |
| total_timesteps    | 974848       |
| value_loss         | 0.0005491647 |
-------------------------------------
-------------------------------------
| approxkl           | 0.27652186   |
| clipfrac           | 0.35318604   |
| eplenmean          | 240          |
| eprewmean          | -658         |
| explained_variance | -0.0138      |
| fps                | 116          |
| nupdates           | 120          |
| policy_entropy     | 0.39967683   |
| policy_loss        | 0.012609281  |
| serial_timesteps   | 30720        |
| time_elaps

--------------------------------------
| approxkl           | 0.26215583    |
| clipfrac           | 0.32763672    |
| eplenmean          | 248           |
| eprewmean          | -773          |
| explained_variance | -0.00959      |
| fps                | 115           |
| nupdates           | 134           |
| policy_entropy     | 0.40287262    |
| policy_loss        | 0.014954612   |
| serial_timesteps   | 34304         |
| time_elapsed       | 9.55e+03      |
| total_timesteps    | 1097728       |
| value_loss         | 0.00096559775 |
--------------------------------------
-------------------------------------
| approxkl           | 0.25041917   |
| clipfrac           | 0.32698363   |
| eplenmean          | 242          |
| eprewmean          | -824         |
| explained_variance | -0.000742    |
| fps                | 115          |
| nupdates           | 135          |
| policy_entropy     | 0.39302546   |
| policy_loss        | 0.01840101   |
| serial_timesteps   | 34560       

-------------------------------------
| approxkl           | 0.20023231   |
| clipfrac           | 0.31350097   |
| eplenmean          | 208          |
| eprewmean          | -652         |
| explained_variance | -0.0032      |
| fps                | 111          |
| nupdates           | 149          |
| policy_entropy     | 0.4246059    |
| policy_loss        | 0.018125357  |
| serial_timesteps   | 38144        |
| time_elapsed       | 1.06e+04     |
| total_timesteps    | 1220608      |
| value_loss         | 0.0007612314 |
-------------------------------------
--------------------------------------
| approxkl           | 0.17985429    |
| clipfrac           | 0.28812867    |
| eplenmean          | 217           |
| eprewmean          | -611          |
| explained_variance | -0.0349       |
| fps                | 112           |
| nupdates           | 150           |
| policy_entropy     | 0.41894594    |
| policy_loss        | 0.017257283   |
| serial_timesteps   | 38400         |
|

--------------------------------------
| approxkl           | 0.16293663    |
| clipfrac           | 0.2581543     |
| eplenmean          | 300           |
| eprewmean          | -678          |
| explained_variance | -0.00457      |
| fps                | 114           |
| nupdates           | 164           |
| policy_entropy     | 0.3960106     |
| policy_loss        | 0.020130368   |
| serial_timesteps   | 41984         |
| time_elapsed       | 1.17e+04      |
| total_timesteps    | 1343488       |
| value_loss         | 0.00050905603 |
--------------------------------------
-------------------------------------
| approxkl           | 0.1716952    |
| clipfrac           | 0.2484375    |
| eplenmean          | 327          |
| eprewmean          | -694         |
| explained_variance | -0.00324     |
| fps                | 114          |
| nupdates           | 165          |
| policy_entropy     | 0.39657122   |
| policy_loss        | 0.021217793  |
| serial_timesteps   | 42240       

--------------------------------------
| approxkl           | 0.13379385    |
| clipfrac           | 0.23132935    |
| eplenmean          | 294           |
| eprewmean          | -590          |
| explained_variance | -0.00534      |
| fps                | 115           |
| nupdates           | 179           |
| policy_entropy     | 0.41577226    |
| policy_loss        | 0.015183747   |
| serial_timesteps   | 45824         |
| time_elapsed       | 1.28e+04      |
| total_timesteps    | 1466368       |
| value_loss         | 0.00051767414 |
--------------------------------------
--------------------------------------
| approxkl           | 0.120860755   |
| clipfrac           | 0.20528564    |
| eplenmean          | 305           |
| eprewmean          | -575          |
| explained_variance | -0.000486     |
| fps                | 114           |
| nupdates           | 180           |
| policy_entropy     | 0.4151381     |
| policy_loss        | 0.011518435   |
| serial_timesteps   | 46

--------------------------------------
| approxkl           | 0.07722126    |
| clipfrac           | 0.17411499    |
| eplenmean          | 381           |
| eprewmean          | -556          |
| explained_variance | -0.00302      |
| fps                | 116           |
| nupdates           | 194           |
| policy_entropy     | 0.36981362    |
| policy_loss        | 0.018029744   |
| serial_timesteps   | 49664         |
| time_elapsed       | 1.38e+04      |
| total_timesteps    | 1589248       |
| value_loss         | 0.00026478496 |
--------------------------------------
--------------------------------------
| approxkl           | 0.08604763    |
| clipfrac           | 0.1861084     |
| eplenmean          | 395           |
| eprewmean          | -551          |
| explained_variance | -0.00191      |
| fps                | 116           |
| nupdates           | 195           |
| policy_entropy     | 0.37991506    |
| policy_loss        | 0.018640323   |
| serial_timesteps   | 49

--------------------------------------
| approxkl           | 0.041152652   |
| clipfrac           | 0.16106567    |
| eplenmean          | 570           |
| eprewmean          | -575          |
| explained_variance | -0.0047       |
| fps                | 118           |
| nupdates           | 208           |
| policy_entropy     | 0.4055203     |
| policy_loss        | 0.009551396   |
| serial_timesteps   | 53248         |
| time_elapsed       | 1.48e+04      |
| total_timesteps    | 1703936       |
| value_loss         | 0.00022479973 |
--------------------------------------
-------------------------------------
| approxkl           | 0.04944966   |
| clipfrac           | 0.17566529   |
| eplenmean          | 580          |
| eprewmean          | -589         |
| explained_variance | -5.98e-05    |
| fps                | 117          |
| nupdates           | 209          |
| policy_entropy     | 0.38379395   |
| policy_loss        | 0.00653994   |
| serial_timesteps   | 53504       

--------------------------------------
| approxkl           | 0.03522451    |
| clipfrac           | 0.14156494    |
| eplenmean          | 616           |
| eprewmean          | -774          |
| explained_variance | 0.000718      |
| fps                | 117           |
| nupdates           | 223           |
| policy_entropy     | 0.336901      |
| policy_loss        | 0.0069047017  |
| serial_timesteps   | 57088         |
| time_elapsed       | 1.59e+04      |
| total_timesteps    | 1826816       |
| value_loss         | 0.00047612324 |
--------------------------------------
-------------------------------------
| approxkl           | 0.036399093  |
| clipfrac           | 0.14934692   |
| eplenmean          | 625          |
| eprewmean          | -774         |
| explained_variance | -0.00583     |
| fps                | 117          |
| nupdates           | 224          |
| policy_entropy     | 0.34635362   |
| policy_loss        | 0.0098046055 |
| serial_timesteps   | 57344       

--------------------------------------
| approxkl           | 0.015640631   |
| clipfrac           | 0.07723999    |
| eplenmean          | 729           |
| eprewmean          | -746          |
| explained_variance | 0.00324       |
| fps                | 115           |
| nupdates           | 238           |
| policy_entropy     | 0.27450433    |
| policy_loss        | 0.004870998   |
| serial_timesteps   | 60928         |
| time_elapsed       | 1.69e+04      |
| total_timesteps    | 1949696       |
| value_loss         | 0.00024645307 |
--------------------------------------
--------------------------------------
| approxkl           | 0.0188407     |
| clipfrac           | 0.10305786    |
| eplenmean          | 729           |
| eprewmean          | -742          |
| explained_variance | 0.000371      |
| fps                | 116           |
| nupdates           | 239           |
| policy_entropy     | 0.27086946    |
| policy_loss        | 0.00790711    |
| serial_timesteps   | 61

--------------------------------------
| approxkl           | 0.006477218   |
| clipfrac           | 0.03927002    |
| eplenmean          | 1.08e+03      |
| eprewmean          | -836          |
| explained_variance | 0.000397      |
| fps                | 116           |
| nupdates           | 252           |
| policy_entropy     | 0.16753235    |
| policy_loss        | 0.00081611593 |
| serial_timesteps   | 64512         |
| time_elapsed       | 1.79e+04      |
| total_timesteps    | 2064384       |
| value_loss         | 0.0006786418  |
--------------------------------------
------------------------------------
| approxkl           | 0.00674096  |
| clipfrac           | 0.037866212 |
| eplenmean          | 1.11e+03    |
| eprewmean          | -867        |
| explained_variance | -0.000932   |
| fps                | 116         |
| nupdates           | 253         |
| policy_entropy     | 0.16367915  |
| policy_loss        | 0.006456802 |
| serial_timesteps   | 64768       |
| time_e

--------------------------------------
| approxkl           | 0.009079081   |
| clipfrac           | 0.037738036   |
| eplenmean          | 1.43e+03      |
| eprewmean          | -720          |
| explained_variance | -0.014        |
| fps                | 111           |
| nupdates           | 267           |
| policy_entropy     | 0.112006806   |
| policy_loss        | 0.0025593457  |
| serial_timesteps   | 68352         |
| time_elapsed       | 1.9e+04       |
| total_timesteps    | 2187264       |
| value_loss         | 6.4867185e-05 |
--------------------------------------
--------------------------------------
| approxkl           | 0.010542205   |
| clipfrac           | 0.030639648   |
| eplenmean          | 1.44e+03      |
| eprewmean          | -704          |
| explained_variance | 0.000436      |
| fps                | 111           |
| nupdates           | 268           |
| policy_entropy     | 0.09405193    |
| policy_loss        | 0.0027720924  |
| serial_timesteps   | 68

-------------------------------------
| approxkl           | 0.0058611026 |
| clipfrac           | 0.0060180663 |
| eplenmean          | 1.55e+03     |
| eprewmean          | -405         |
| explained_variance | -0.025       |
| fps                | 119          |
| nupdates           | 282          |
| policy_entropy     | 0.043878745  |
| policy_loss        | 0.0009113254 |
| serial_timesteps   | 72192        |
| time_elapsed       | 2e+04        |
| total_timesteps    | 2310144      |
| value_loss         | 2.444927e-05 |
-------------------------------------
--------------------------------------
| approxkl           | 0.0021471495  |
| clipfrac           | 0.007525635   |
| eplenmean          | 1.56e+03      |
| eprewmean          | -372          |
| explained_variance | -0.012        |
| fps                | 119           |
| nupdates           | 283           |
| policy_entropy     | 0.047786046   |
| policy_loss        | 0.00096926314 |
| serial_timesteps   | 72448         |
|

---------------------------------------
| approxkl           | 0.010015601    |
| clipfrac           | 0.007562256    |
| eplenmean          | 1.61e+03       |
| eprewmean          | -124           |
| explained_variance | 0.0135         |
| fps                | 119            |
| nupdates           | 296            |
| policy_entropy     | 0.021760236    |
| policy_loss        | -0.00067492016 |
| serial_timesteps   | 75776          |
| time_elapsed       | 2.1e+04        |
| total_timesteps    | 2424832        |
| value_loss         | 3.0454912e-05  |
---------------------------------------
--------------------------------------
| approxkl           | 0.0025952999  |
| clipfrac           | 0.0025146485  |
| eplenmean          | 1.61e+03      |
| eprewmean          | -118          |
| explained_variance | 0.175         |
| fps                | 119           |
| nupdates           | 297           |
| policy_entropy     | 0.01772022    |
| policy_loss        | 0.000770936   |
| serial_t

-------------------------------------
| approxkl           | 0.019179447  |
| clipfrac           | 0.05217285   |
| eplenmean          | 1.53e+03     |
| eprewmean          | -547         |
| explained_variance | 0.282        |
| fps                | 119          |
| nupdates           | 310          |
| policy_entropy     | 0.14548141   |
| policy_loss        | 0.006505166  |
| serial_timesteps   | 79360        |
| time_elapsed       | 2.19e+04     |
| total_timesteps    | 2539520      |
| value_loss         | 0.0004571952 |
-------------------------------------
-------------------------------------
| approxkl           | 0.01461754   |
| clipfrac           | 0.050360106  |
| eplenmean          | 1.49e+03     |
| eprewmean          | -582         |
| explained_variance | 0.207        |
| fps                | 119          |
| nupdates           | 311          |
| policy_entropy     | 0.11983138   |
| policy_loss        | 0.0077887103 |
| serial_timesteps   | 79616        |
| time_elaps

---------------------------------------
| approxkl           | 4.9732334e-05  |
| clipfrac           | 0.0017028808   |
| eplenmean          | 1.5e+03        |
| eprewmean          | -473           |
| explained_variance | -0.0101        |
| fps                | 115            |
| nupdates           | 325            |
| policy_entropy     | 0.017725809    |
| policy_loss        | -0.00046811887 |
| serial_timesteps   | 83200          |
| time_elapsed       | 2.3e+04        |
| total_timesteps    | 2662400        |
| value_loss         | 1.0599049e-05  |
---------------------------------------
--------------------------------------
| approxkl           | 0.0002117691  |
| clipfrac           | 0.0014770508  |
| eplenmean          | 1.5e+03       |
| eprewmean          | -459          |
| explained_variance | 0.124         |
| fps                | 115           |
| nupdates           | 326           |
| policy_entropy     | 0.01873957    |
| policy_loss        | 0.00030365802 |
| serial_t

---------------------------------------
| approxkl           | 0.0003359412   |
| clipfrac           | 0.002935791    |
| eplenmean          | 1.62e+03       |
| eprewmean          | -82.8          |
| explained_variance | -0.00857       |
| fps                | 112            |
| nupdates           | 339            |
| policy_entropy     | 0.031028843    |
| policy_loss        | -0.0001661126  |
| serial_timesteps   | 86784          |
| time_elapsed       | 2.4e+04        |
| total_timesteps    | 2777088        |
| value_loss         | 1.40735665e-05 |
---------------------------------------
--------------------------------------
| approxkl           | 0.0001435821  |
| clipfrac           | 0.0018249511  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -75.7         |
| explained_variance | -0.0277       |
| fps                | 112           |
| nupdates           | 340           |
| policy_entropy     | 0.03494332    |
| policy_loss        | -0.0005771908 |
| serial_t

--------------------------------------
| approxkl           | 5.549369e-05  |
| clipfrac           | 0.0011901855  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -54           |
| explained_variance | 0.111         |
| fps                | 116           |
| nupdates           | 353           |
| policy_entropy     | 0.022335028   |
| policy_loss        | 0.0006051959  |
| serial_timesteps   | 90368         |
| time_elapsed       | 2.5e+04       |
| total_timesteps    | 2891776       |
| value_loss         | 1.1171396e-05 |
--------------------------------------
---------------------------------------
| approxkl           | 2.8782199e-05  |
| clipfrac           | 0.0017028808   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -53.2          |
| explained_variance | -0.0171        |
| fps                | 117            |
| nupdates           | 354            |
| policy_entropy     | 0.018565362    |
| policy_loss        | -0.00014776606 |
| serial_timest

--------------------------------------
| approxkl           | 7.60328e-06   |
| clipfrac           | 0.0004394531  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -37.4         |
| explained_variance | 0.0521        |
| fps                | 111           |
| nupdates           | 367           |
| policy_entropy     | 0.012525119   |
| policy_loss        | 5.3547476e-05 |
| serial_timesteps   | 93952         |
| time_elapsed       | 2.59e+04      |
| total_timesteps    | 3006464       |
| value_loss         | 3.8291464e-06 |
--------------------------------------
--------------------------------------
| approxkl           | 1.0013906e-05 |
| clipfrac           | 0.00072631834 |
| eplenmean          | 1.62e+03      |
| eprewmean          | -34.4         |
| explained_variance | 0.0284        |
| fps                | 108           |
| nupdates           | 368           |
| policy_entropy     | 0.0113944495  |
| policy_loss        | -8.953396e-05 |
| serial_timesteps   | 94

--------------------------------------
| approxkl           | 5.8405894e-05 |
| clipfrac           | 0.0009460449  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -27.8         |
| explained_variance | 0.174         |
| fps                | 119           |
| nupdates           | 381           |
| policy_entropy     | 0.015666787   |
| policy_loss        | 0.00021050908 |
| serial_timesteps   | 97536         |
| time_elapsed       | 2.69e+04      |
| total_timesteps    | 3121152       |
| value_loss         | 6.5554195e-06 |
--------------------------------------
--------------------------------------
| approxkl           | 2.4854608e-05 |
| clipfrac           | 0.0014526367  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -27.3         |
| explained_variance | 0.151         |
| fps                | 119           |
| nupdates           | 382           |
| policy_entropy     | 0.016268928   |
| policy_loss        | -0.000161609  |
| serial_timesteps   | 97

---------------------------------------
| approxkl           | 5.285763e-05   |
| clipfrac           | 0.0010986328   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -21.7          |
| explained_variance | -0.00178       |
| fps                | 120            |
| nupdates           | 395            |
| policy_entropy     | 0.011146776    |
| policy_loss        | -6.9723355e-05 |
| serial_timesteps   | 101120         |
| time_elapsed       | 2.79e+04       |
| total_timesteps    | 3235840        |
| value_loss         | 4.411201e-06   |
---------------------------------------
---------------------------------------
| approxkl           | 1.0779136e-05  |
| clipfrac           | 0.00045776367  |
| eplenmean          | 1.62e+03       |
| eprewmean          | -21.2          |
| explained_variance | 0.847          |
| fps                | 120            |
| nupdates           | 396            |
| policy_entropy     | 0.011558358    |
| policy_loss        | -1.7953233e-05 |


--------------------------------------
| approxkl           | 2.5026495e-05 |
| clipfrac           | 0.000579834   |
| eplenmean          | 1.62e+03      |
| eprewmean          | -15.4         |
| explained_variance | 0.05          |
| fps                | 117           |
| nupdates           | 409           |
| policy_entropy     | 0.016680779   |
| policy_loss        | 1.974299e-06  |
| serial_timesteps   | 104704        |
| time_elapsed       | 2.89e+04      |
| total_timesteps    | 3350528       |
| value_loss         | 7.072305e-06  |
--------------------------------------
--------------------------------------
| approxkl           | 1.2275309e-05 |
| clipfrac           | 0.0009399414  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -14.4         |
| explained_variance | 0.183         |
| fps                | 115           |
| nupdates           | 410           |
| policy_entropy     | 0.016087372   |
| policy_loss        | 1.4090236e-05 |
| serial_timesteps   | 10

---------------------------------------
| approxkl           | 6.0068898e-05  |
| clipfrac           | 0.0007568359   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -14.9          |
| explained_variance | 0.365          |
| fps                | 117            |
| nupdates           | 423            |
| policy_entropy     | 0.009358342    |
| policy_loss        | -0.00013395523 |
| serial_timesteps   | 108288         |
| time_elapsed       | 2.98e+04       |
| total_timesteps    | 3465216        |
| value_loss         | 2.1225464e-06  |
---------------------------------------
---------------------------------------
| approxkl           | 3.535932e-05   |
| clipfrac           | 0.0011901855   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -16.2          |
| explained_variance | 0.267          |
| fps                | 112            |
| nupdates           | 424            |
| policy_entropy     | 0.009220996    |
| policy_loss        | -0.00016258565 |


---------------------------------------
| approxkl           | 1.3172797e-05  |
| clipfrac           | 0.0008605957   |
| eplenmean          | 1.41e+03       |
| eprewmean          | -153           |
| explained_variance | 0.151          |
| fps                | 115            |
| nupdates           | 437            |
| policy_entropy     | 0.024307093    |
| policy_loss        | -2.0614725e-05 |
| serial_timesteps   | 111872         |
| time_elapsed       | 3.09e+04       |
| total_timesteps    | 3579904        |
| value_loss         | 1.1191603e-05  |
---------------------------------------
--------------------------------------
| approxkl           | 1.4449894e-05 |
| clipfrac           | 0.00025634764 |
| eplenmean          | 1.41e+03      |
| eprewmean          | -153          |
| explained_variance | 0.0813        |
| fps                | 117           |
| nupdates           | 438           |
| policy_entropy     | 0.026295578   |
| policy_loss        | -5.385176e-05 |
| serial_t

Saving to /tmp/openai-2018-08-23-09-38-32-256117/checkpoints/00450
--------------------------------------
| approxkl           | 0.00028399267 |
| clipfrac           | 0.024176026   |
| eplenmean          | 1.37e+03      |
| eprewmean          | -569          |
| explained_variance | 0.00594       |
| fps                | 114           |
| nupdates           | 451           |
| policy_entropy     | 0.18129039    |
| policy_loss        | -0.002014993  |
| serial_timesteps   | 115456        |
| time_elapsed       | 3.18e+04      |
| total_timesteps    | 3694592       |
| value_loss         | 0.00093381107 |
--------------------------------------
--------------------------------------
| approxkl           | 0.00024747092 |
| clipfrac           | 0.020965576   |
| eplenmean          | 1.39e+03      |
| eprewmean          | -660          |
| explained_variance | 0.015         |
| fps                | 113           |
| nupdates           | 452           |
| policy_entropy     | 0.16511077   

---------------------------------------
| approxkl           | 4.665378e-05   |
| clipfrac           | 0.003076172    |
| eplenmean          | 1.41e+03       |
| eprewmean          | -873           |
| explained_variance | -0.00731       |
| fps                | 119            |
| nupdates           | 465            |
| policy_entropy     | 0.05845332     |
| policy_loss        | -0.0001222732  |
| serial_timesteps   | 119040         |
| time_elapsed       | 3.28e+04       |
| total_timesteps    | 3809280        |
| value_loss         | 0.000120297365 |
---------------------------------------
---------------------------------------
| approxkl           | 6.1759754e-05  |
| clipfrac           | 0.005609131    |
| eplenmean          | 1.42e+03       |
| eprewmean          | -814           |
| explained_variance | -0.0104        |
| fps                | 118            |
| nupdates           | 466            |
| policy_entropy     | 0.05211549     |
| policy_loss        | -0.00059911166 |


---------------------------------------
| approxkl           | 1.3940147e-05  |
| clipfrac           | 0.0006713867   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -218           |
| explained_variance | -0.00146       |
| fps                | 119            |
| nupdates           | 479            |
| policy_entropy     | 0.019103063    |
| policy_loss        | -2.3748658e-05 |
| serial_timesteps   | 122624         |
| time_elapsed       | 3.38e+04       |
| total_timesteps    | 3923968        |
| value_loss         | 1.1009957e-05  |
---------------------------------------
---------------------------------------
| approxkl           | 3.4248194e-05  |
| clipfrac           | 0.0013671875   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -215           |
| explained_variance | -0.000304      |
| fps                | 119            |
| nupdates           | 480            |
| policy_entropy     | 0.023016617    |
| policy_loss        | -6.7343294e-06 |


--------------------------------------
| approxkl           | 3.172084e-05  |
| clipfrac           | 0.0016784668  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -45.1         |
| explained_variance | -5.27e-05     |
| fps                | 119           |
| nupdates           | 493           |
| policy_entropy     | 0.008898585   |
| policy_loss        | -0.000297537  |
| serial_timesteps   | 126208        |
| time_elapsed       | 3.47e+04      |
| total_timesteps    | 4038656       |
| value_loss         | 1.1182918e-05 |
--------------------------------------
--------------------------------------
| approxkl           | 6.2228705e-06 |
| clipfrac           | 0.0003173828  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -38.9         |
| explained_variance | -0.00163      |
| fps                | 118           |
| nupdates           | 494           |
| policy_entropy     | 0.008614486   |
| policy_loss        | 5.0602997e-05 |
| serial_timesteps   | 12

--------------------------------------
| approxkl           | 2.182236e-05  |
| clipfrac           | 0.0012512207  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -22.4         |
| explained_variance | -2.1e-05      |
| fps                | 119           |
| nupdates           | 507           |
| policy_entropy     | 0.009907685   |
| policy_loss        | -4.365175e-05 |
| serial_timesteps   | 129792        |
| time_elapsed       | 3.57e+04      |
| total_timesteps    | 4153344       |
| value_loss         | 9.006333e-06  |
--------------------------------------
---------------------------------------
| approxkl           | 2.1269841e-05  |
| clipfrac           | 0.0012512207   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -20            |
| explained_variance | -0.00133       |
| fps                | 118            |
| nupdates           | 508            |
| policy_entropy     | 0.008609946    |
| policy_loss        | -0.00038868783 |
| serial_timest

--------------------------------------
| approxkl           | 3.9600745e-05 |
| clipfrac           | 0.0008239746  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -16           |
| explained_variance | -0.00329      |
| fps                | 120           |
| nupdates           | 521           |
| policy_entropy     | 0.00453064    |
| policy_loss        | -5.36254e-05  |
| serial_timesteps   | 133376        |
| time_elapsed       | 3.67e+04      |
| total_timesteps    | 4268032       |
| value_loss         | 8.364744e-06  |
--------------------------------------
--------------------------------------
| approxkl           | 6.3089146e-06 |
| clipfrac           | 0.0003173828  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -18.6         |
| explained_variance | -0.000164     |
| fps                | 119           |
| nupdates           | 522           |
| policy_entropy     | 0.0042639445  |
| policy_loss        | -9.352218e-05 |
| serial_timesteps   | 13

---------------------------------------
| approxkl           | 1.0294303e-05  |
| clipfrac           | 0.00053100585  |
| eplenmean          | 1.62e+03       |
| eprewmean          | -14.2          |
| explained_variance | -6.08e-05      |
| fps                | 117            |
| nupdates           | 535            |
| policy_entropy     | 0.0066556833   |
| policy_loss        | -5.8997335e-05 |
| serial_timesteps   | 136960         |
| time_elapsed       | 3.76e+04       |
| total_timesteps    | 4382720        |
| value_loss         | 6.218446e-06   |
---------------------------------------
---------------------------------------
| approxkl           | 2.7977233e-06  |
| clipfrac           | 0.00025634764  |
| eplenmean          | 1.62e+03       |
| eprewmean          | -15.2          |
| explained_variance | -0.00565       |
| fps                | 117            |
| nupdates           | 536            |
| policy_entropy     | 0.006400682    |
| policy_loss        | -1.0516518e-05 |


---------------------------------------
| approxkl           | 1.07404785e-05 |
| clipfrac           | 0.0006591797   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -12.5          |
| explained_variance | 0.00413        |
| fps                | 120            |
| nupdates           | 549            |
| policy_entropy     | 0.007333099    |
| policy_loss        | -1.3089797e-06 |
| serial_timesteps   | 140544         |
| time_elapsed       | 3.86e+04       |
| total_timesteps    | 4497408        |
| value_loss         | 5.204732e-06   |
---------------------------------------
---------------------------------------
| approxkl           | 2.2418844e-05  |
| clipfrac           | 0.00080566405  |
| eplenmean          | 1.62e+03       |
| eprewmean          | -13.6          |
| explained_variance | -0.0435        |
| fps                | 120            |
| nupdates           | 550            |
| policy_entropy     | 0.00658111     |
| policy_loss        | -7.3306226e-05 |


--------------------------------------
| approxkl           | 5.152565e-06  |
| clipfrac           | 0.00034179687 |
| eplenmean          | 1.62e+03      |
| eprewmean          | -12.3         |
| explained_variance | 0.0273        |
| fps                | 118           |
| nupdates           | 563           |
| policy_entropy     | 0.005907209   |
| policy_loss        | 7.792487e-06  |
| serial_timesteps   | 144128        |
| time_elapsed       | 3.96e+04      |
| total_timesteps    | 4612096       |
| value_loss         | 5.2182977e-06 |
--------------------------------------
---------------------------------------
| approxkl           | 3.310001e-06   |
| clipfrac           | 0.00040283203  |
| eplenmean          | 1.62e+03       |
| eprewmean          | -11.6          |
| explained_variance | -0.0382        |
| fps                | 117            |
| nupdates           | 564            |
| policy_entropy     | 0.0054592183   |
| policy_loss        | -1.8041732e-05 |
| serial_timest

---------------------------------------
| approxkl           | 1.9877032e-05  |
| clipfrac           | 0.0005615234   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -8.06          |
| explained_variance | -5.66e-05      |
| fps                | 120            |
| nupdates           | 577            |
| policy_entropy     | 0.008518682    |
| policy_loss        | -0.00010576112 |
| serial_timesteps   | 147712         |
| time_elapsed       | 4.06e+04       |
| total_timesteps    | 4726784        |
| value_loss         | 3.3058798e-06  |
---------------------------------------
--------------------------------------
| approxkl           | 5.123343e-06  |
| clipfrac           | 0.0005065918  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -8.06         |
| explained_variance | -0.0263       |
| fps                | 119           |
| nupdates           | 578           |
| policy_entropy     | 0.009722715   |
| policy_loss        | -5.275884e-06 |
| serial_t

---------------------------------------
| approxkl           | 3.802295e-05   |
| clipfrac           | 0.0022399903   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -10.2          |
| explained_variance | -1.73e-05      |
| fps                | 119            |
| nupdates           | 591            |
| policy_entropy     | 0.017836282    |
| policy_loss        | -0.00031269604 |
| serial_timesteps   | 151296         |
| time_elapsed       | 4.15e+04       |
| total_timesteps    | 4841472        |
| value_loss         | 0.00011559846  |
---------------------------------------
---------------------------------------
| approxkl           | 2.8691411e-05  |
| clipfrac           | 0.0021118163   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -11.4          |
| explained_variance | 8.28e-05       |
| fps                | 119            |
| nupdates           | 592            |
| policy_entropy     | 0.016305197    |
| policy_loss        | -0.00032289122 |


--------------------------------------
| approxkl           | 1.2500304e-05 |
| clipfrac           | 0.00087280275 |
| eplenmean          | 1.62e+03      |
| eprewmean          | -20.5         |
| explained_variance | -9.66e-06     |
| fps                | 119           |
| nupdates           | 605           |
| policy_entropy     | 0.010021543   |
| policy_loss        | -0.0001946056 |
| serial_timesteps   | 154880        |
| time_elapsed       | 4.25e+04      |
| total_timesteps    | 4956160       |
| value_loss         | 6.282474e-06  |
--------------------------------------
---------------------------------------
| approxkl           | 7.392685e-06   |
| clipfrac           | 0.00035400392  |
| eplenmean          | 1.62e+03       |
| eprewmean          | -20.1          |
| explained_variance | -0.0508        |
| fps                | 119            |
| nupdates           | 606            |
| policy_entropy     | 0.009489736    |
| policy_loss        | -6.3369414e-05 |
| serial_timest

--------------------------------------
| approxkl           | 8.96655e-06   |
| clipfrac           | 0.0006713867  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -14.6         |
| explained_variance | -1.07e-06     |
| fps                | 118           |
| nupdates           | 619           |
| policy_entropy     | 0.012469567   |
| policy_loss        | -9.407087e-05 |
| serial_timesteps   | 158464        |
| time_elapsed       | 4.35e+04      |
| total_timesteps    | 5070848       |
| value_loss         | 3.9600327e-06 |
--------------------------------------
---------------------------------------
| approxkl           | 1.181609e-05   |
| clipfrac           | 0.0010009765   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -13            |
| explained_variance | 2.68e-06       |
| fps                | 117            |
| nupdates           | 620            |
| policy_entropy     | 0.014045291    |
| policy_loss        | -1.3317529e-05 |
| serial_timest

---------------------------------------
| approxkl           | 1.4407326e-05  |
| clipfrac           | 0.0008972168   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -15.5          |
| explained_variance | 9.54e-07       |
| fps                | 111            |
| nupdates           | 633            |
| policy_entropy     | 0.014109393    |
| policy_loss        | -0.00011630046 |
| serial_timesteps   | 162048         |
| time_elapsed       | 4.45e+04       |
| total_timesteps    | 5185536        |
| value_loss         | 1.1187307e-05  |
---------------------------------------
--------------------------------------
| approxkl           | 8.908262e-06  |
| clipfrac           | 0.00072631834 |
| eplenmean          | 1.62e+03      |
| eprewmean          | -13.5         |
| explained_variance | -0.0193       |
| fps                | 120           |
| nupdates           | 634           |
| policy_entropy     | 0.013891863   |
| policy_loss        | 1.0929254e-05 |
| serial_t

--------------------------------------
| approxkl           | 1.8007235e-05 |
| clipfrac           | 0.0012634278  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -10.1         |
| explained_variance | -0.00206      |
| fps                | 120           |
| nupdates           | 647           |
| policy_entropy     | 0.01130431    |
| policy_loss        | -9.198174e-05 |
| serial_timesteps   | 165632        |
| time_elapsed       | 4.54e+04      |
| total_timesteps    | 5300224       |
| value_loss         | 1.1064324e-05 |
--------------------------------------
---------------------------------------
| approxkl           | 1.730146e-05   |
| clipfrac           | 0.00079956057  |
| eplenmean          | 1.62e+03       |
| eprewmean          | -8.18          |
| explained_variance | -6.13e-05      |
| fps                | 118            |
| nupdates           | 648            |
| policy_entropy     | 0.013326784    |
| policy_loss        | -0.00019409985 |
| serial_timest

--------------------------------------
| approxkl           | 9.2172496e-07 |
| clipfrac           | 4.8828126e-05 |
| eplenmean          | 1.62e+03      |
| eprewmean          | -10.8         |
| explained_variance | -0.00108      |
| fps                | 121           |
| nupdates           | 661           |
| policy_entropy     | 0.0068066106  |
| policy_loss        | -6.998959e-06 |
| serial_timesteps   | 169216        |
| time_elapsed       | 4.64e+04      |
| total_timesteps    | 5414912       |
| value_loss         | 7.063305e-08  |
--------------------------------------
---------------------------------------
| approxkl           | 1.1533497e-05  |
| clipfrac           | 0.0005615234   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -10.8          |
| explained_variance | 0.0263         |
| fps                | 120            |
| nupdates           | 662            |
| policy_entropy     | 0.006934003    |
| policy_loss        | -0.00018072085 |
| serial_timest

--------------------------------------
| approxkl           | 8.581973e-06  |
| clipfrac           | 0.00044555665 |
| eplenmean          | 1.62e+03      |
| eprewmean          | -10.6         |
| explained_variance | 2.8e-06       |
| fps                | 120           |
| nupdates           | 675           |
| policy_entropy     | 0.017441455   |
| policy_loss        | 5.6316137e-05 |
| serial_timesteps   | 172800        |
| time_elapsed       | 4.74e+04      |
| total_timesteps    | 5529600       |
| value_loss         | 1.955383e-05  |
--------------------------------------
---------------------------------------
| approxkl           | 1.274944e-05   |
| clipfrac           | 0.00084838865  |
| eplenmean          | 1.62e+03       |
| eprewmean          | -10.6          |
| explained_variance | 9.54e-07       |
| fps                | 119            |
| nupdates           | 676            |
| policy_entropy     | 0.017065229    |
| policy_loss        | -5.0823328e-05 |
| serial_timest

---------------------------------------
| approxkl           | 2.6261117e-05  |
| clipfrac           | 0.0010803223   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -9.52          |
| explained_variance | 5.82e-05       |
| fps                | 120            |
| nupdates           | 689            |
| policy_entropy     | 0.009556281    |
| policy_loss        | -0.0004219741  |
| serial_timesteps   | 176384         |
| time_elapsed       | 4.83e+04       |
| total_timesteps    | 5644288        |
| value_loss         | 1.29769305e-05 |
---------------------------------------
--------------------------------------
| approxkl           | 9.148086e-06  |
| clipfrac           | 0.0007141113  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -12.2         |
| explained_variance | -0.00176      |
| fps                | 121           |
| nupdates           | 690           |
| policy_entropy     | 0.011165331   |
| policy_loss        | 2.5479525e-05 |
| serial_t

--------------------------------------
| approxkl           | 1.2263047e-05 |
| clipfrac           | 0.0004272461  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -17.4         |
| explained_variance | -3.1e-06      |
| fps                | 116           |
| nupdates           | 703           |
| policy_entropy     | 0.007895035   |
| policy_loss        | -7.604631e-05 |
| serial_timesteps   | 179968        |
| time_elapsed       | 4.93e+04      |
| total_timesteps    | 5758976       |
| value_loss         | 1.4730192e-06 |
--------------------------------------
---------------------------------------
| approxkl           | 1.8864583e-05  |
| clipfrac           | 0.0008972168   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -15.7          |
| explained_variance | -0.000415      |
| fps                | 117            |
| nupdates           | 704            |
| policy_entropy     | 0.0074727847   |
| policy_loss        | -0.00018788931 |
| serial_timest

---------------------------------------
| approxkl           | 9.024052e-06   |
| clipfrac           | 0.0008911133   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -8.1           |
| explained_variance | 0.00785        |
| fps                | 112            |
| nupdates           | 717            |
| policy_entropy     | 0.009663525    |
| policy_loss        | -0.00017798762 |
| serial_timesteps   | 183552         |
| time_elapsed       | 5.03e+04       |
| total_timesteps    | 5873664        |
| value_loss         | 5.0945596e-06  |
---------------------------------------
--------------------------------------
| approxkl           | 1.2391925e-05 |
| clipfrac           | 0.00084838865 |
| eplenmean          | 1.62e+03      |
| eprewmean          | -8.64         |
| explained_variance | -0.00156      |
| fps                | 109           |
| nupdates           | 718           |
| policy_entropy     | 0.00904512    |
| policy_loss        | 1.3376062e-06 |
| serial_t

---------------------------------------
| approxkl           | 1.1899794e-05  |
| clipfrac           | 0.00095214846  |
| eplenmean          | 1.62e+03       |
| eprewmean          | -10.2          |
| explained_variance | 0.016          |
| fps                | 115            |
| nupdates           | 731            |
| policy_entropy     | 0.010772943    |
| policy_loss        | -7.9808924e-05 |
| serial_timesteps   | 187136         |
| time_elapsed       | 5.13e+04       |
| total_timesteps    | 5988352        |
| value_loss         | 9.621978e-06   |
---------------------------------------
---------------------------------------
| approxkl           | 6.926316e-06   |
| clipfrac           | 0.00057373045  |
| eplenmean          | 1.62e+03       |
| eprewmean          | -9.18          |
| explained_variance | -8.82e-06      |
| fps                | 115            |
| nupdates           | 732            |
| policy_entropy     | 0.010135362    |
| policy_loss        | -1.7895782e-05 |


--------------------------------------
| approxkl           | 1.2310731e-05 |
| clipfrac           | 0.0006896973  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -12.9         |
| explained_variance | -0.000894     |
| fps                | 120           |
| nupdates           | 745           |
| policy_entropy     | 0.00935677    |
| policy_loss        | -9.811347e-05 |
| serial_timesteps   | 190720        |
| time_elapsed       | 5.23e+04      |
| total_timesteps    | 6103040       |
| value_loss         | 6.5643444e-06 |
--------------------------------------
---------------------------------------
| approxkl           | 1.4833482e-05  |
| clipfrac           | 0.0013549805   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -13            |
| explained_variance | -0.00246       |
| fps                | 118            |
| nupdates           | 746            |
| policy_entropy     | 0.008550572    |
| policy_loss        | -0.00018156474 |
| serial_timest

--------------------------------------
| approxkl           | 1.2943774e-05 |
| clipfrac           | 0.0011047364  |
| eplenmean          | 1.62e+03      |
| eprewmean          | -14.8         |
| explained_variance | 0.00638       |
| fps                | 122           |
| nupdates           | 759           |
| policy_entropy     | 0.009702372   |
| policy_loss        | -7.633456e-05 |
| serial_timesteps   | 194304        |
| time_elapsed       | 5.33e+04      |
| total_timesteps    | 6217728       |
| value_loss         | 2.0250753e-05 |
--------------------------------------
---------------------------------------
| approxkl           | 7.779485e-06   |
| clipfrac           | 0.0006469727   |
| eplenmean          | 1.62e+03       |
| eprewmean          | -14.3          |
| explained_variance | -0.0111        |
| fps                | 119            |
| nupdates           | 760            |
| policy_entropy     | 0.009740356    |
| policy_loss        | -0.00010185887 |
| serial_timest

In [6]:
load_path = '/tmp/openai-2018-05-29-15-08-04-310254/checkpoints/00100'
logger.get_dir()

'/tmp/openai-2018-05-29-15-08-04-310254'

In [4]:
def test(env_id, load_path, seed):

    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    config.gpu_options.allow_growth = True #pylint: disable=E1101
    tf.Session(config=config).__enter__()
    
    def make_env():
        env = gym.make(env_id)
        return env
    
    
    env = DummyVecEnv([make_env])
    running_mean = np.load('{}/mean.npy'.format(load_path))
    running_var = np.load('{}/var.npy'.format(load_path))
    env = VecNormalizeTest(env, running_mean, running_var)
    set_global_seeds(seed)

    policy = LstmPolicy
    ob_space = env.venv.observation_space
    ac_space = env.venv.action_space
    nbatch_act = 1 * 1
    nbatch_train = 32 * 8
    nsteps = 256
    ent_coef = .01
    vf_coef = 0.5
    max_grad_norm = 0.5
    make_model = lambda : ppo2.Model(policy=policy, ob_space=ob_space, ac_space=ac_space, 
                                nbatch_act=nbatch_act, nbatch_train=nbatch_train, nsteps=nsteps, 
                                ent_coef=ent_coef, vf_coef=vf_coef, max_grad_norm=max_grad_norm)
    model = make_model()
    model.load(load_path + '/checkpoints/01200')
    
    obs = env.reset()
    states = model.initial_state
    dones = [False for _ in range(1)]
    while True:
        actions, _, states, _, = model.step(obs, states, dones)
        obs[:], _, dones, _, = env.step(actions)
        env.venv.render()

In [None]:
load_path = '../data/processed'
load_path = '/tmp/openai-2018-06-22-12-45-22-090687'
test('tradingenv-v0', load_path, seed=0)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [5]:
def make_env():
        env = gym.make('tradingenv-v0')
        return env
env = DummyVecEnv([make_env])
load_path = '/tmp/openai-2018-05-29-15-08-04-310254'
running_mean = np.load('{}/mean.npy'.format(load_path))
running_var = np.load('{}/var.npy'.format(load_path))
env = VecNormalizeTest(env, running_mean, running_var)

In [3]:
import gym
e = gym.make('tradingenv-v5')

In [13]:
e.step(1)

(array([[ 2.44675000e+03,  2.44800000e+03,  2.44675000e+03,
          2.44725000e+03, -8.43391446e-01, -5.37299608e-01,
          0.00000000e+00],
        [ 2.44725000e+03,  2.44800000e+03,  2.44675000e+03,
          2.44700000e+03, -8.54911871e-01, -5.18773258e-01,
          0.00000000e+00],
        [ 2.44700000e+03,  2.44775000e+03,  2.44675000e+03,
          2.44700000e+03, -8.66025404e-01, -5.00000000e-01,
          0.00000000e+00],
        [ 2.44725000e+03,  2.44875000e+03,  2.44700000e+03,
          2.44775000e+03, -8.76726756e-01, -4.80988769e-01,
          0.00000000e+00],
        [ 2.44800000e+03,  2.44925000e+03,  2.44775000e+03,
          2.44850000e+03, -8.87010833e-01, -4.61748613e-01,
          0.00000000e+00],
        [ 2.44850000e+03,  2.44950000e+03,  2.44850000e+03,
          2.44900000e+03, -8.96872742e-01, -4.42288690e-01,
          0.00000000e+00],
        [ 2.44900000e+03,  2.44950000e+03,  2.44850000e+03,
          2.44850000e+03, -9.06307787e-01, -4.22618262e-01