In [8]:
#!/usr/bin/env python3
import numpy as np
import gym
import os
from pathlib import Path

import matplotlib.pyplot as plt
import time
from IPython import display
%matplotlib notebook

from stable_baselines.common.cmd_util import mujoco_arg_parser
from stable_baselines import bench, logger
from stable_baselines.common import set_global_seeds
from stable_baselines.common.vec_env.vec_normalize import VecNormalize
from stable_baselines.ppo2 import PPO2
from stable_baselines.ppo1 import PPO1
from stable_baselines.common.policies import MlpPolicy, MlpLstmPolicy
from stable_baselines.common.vec_env.dummy_vec_env import DummyVecEnv
from stable_baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
from stable_baselines.results_plotter import load_results, ts2xy

home = str(Path.home())

def train(env_id, num_timesteps, seed):
    """
    Train PPO2 model for Mujoco environment, for testing purposes
    :param env_id: (str) the environment id string
    :param num_timesteps: (int) the number of timesteps to run
    :param seed: (int) Used to seed the random generator.
    """
    def make_env(_env_id):
        env_out = gym.make(_env_id)
        env_out = bench.Monitor(env_out, log_dir, allow_early_resets=True)
        return env_out
    
    n_cpu = 8#multiprocessing.cpu_count()
    #env = SubprocVecEnv([lambda: make_env(env_id) for i in range(n_cpu)])

    env = DummyVecEnv([lambda: make_env(env_id)])
    #env = VecNormalize(env)

    def callback(_locals, _globals):
        global n_steps, best_mean_reward
        print("Step:", n_steps)

        if (n_steps + 1) % 100 == 0:
            _locals['self'].save("model_{}".format(env_id))
        n_steps += 1
        return False

    set_global_seeds(seed)
    policy = MlpPolicy
    model = PPO1(policy=policy, env=env, verbose=1, tensorboard_log=log_dir)
    model.learn(total_timesteps=num_timesteps, callback=callback)
    model.save("model_{}".format(env_id))

    return model, env

In [9]:
env_id='CartPole-v1'
num_timesteps=20000000
seed=343
best_mean_reward, n_steps = -np.inf, 0

base_dir = home + '/ppo_logs'
prev = [f for f in os.listdir(base_dir) if env_id in f]
log_dir = base_dir + '/{}-{}'.format(env_id, len(prev))
os.makedirs(log_dir, exist_ok=True)

print('Logging to {}'.format(log_dir))

logger.configure()
model, env = train(env_id, num_timesteps, seed)

Logging to /home/ubuntu/ppo_logs/CartPole-v1-2
Logging to /tmp/openai-2018-12-02-20-14-22-023037
Step: 0
********** Iteration 0 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00221 |      -0.00693 |     103.78012 |      9.82e-05 |       0.69305
     -0.01107 |      -0.00692 |     102.36276 |       0.00103 |       0.69214
     -0.01956 |      -0.00690 |     100.92700 |       0.00368 |       0.68954
     -0.02522 |      -0.00685 |      99.28327 |       0.00821 |       0.68513
Evaluating losses...
     -0.02643 |      -0.00682 |      98.18385 |       0.01164 |       0.68183
-----------------------------------
| EpLenMean       | 27.1          |
| EpRewMean       | 27.1          |
| EpThisIter      | 9             |
| EpisodesSoFar   | 9             |
| TimeElapsed     | 0.778         |
| TimestepsSoFar  | 256           |
| ev_tdlam_before | 0.000964      |
| loss_ent        | 0.6818348     |
| loss_kl         | 0.0116381645

Step: 8
********** Iteration 8 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00054 |      -0.00590 |     120.70531 |       0.00061 |       0.58961
     -0.00494 |      -0.00591 |     112.07675 |       0.00291 |       0.59109
     -0.00661 |      -0.00593 |     103.75373 |       0.00396 |       0.59348
     -0.00916 |      -0.00595 |      96.83098 |       0.00454 |       0.59502
Evaluating losses...
     -0.00964 |      -0.00596 |      92.51924 |       0.00531 |       0.59583
----------------------------------
| EpLenMean       | 44.9         |
| EpRewMean       | 44.9         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 50           |
| TimeElapsed     | 4.28         |
| TimestepsSoFar  | 2594         |
| ev_tdlam_before | 0.0995       |
| loss_ent        | 0.595829     |
| loss_kl         | 0.0053133285 |
| loss_pol_entpen | -0.00595829  |
| loss_pol_surr   | -0.00964312  |
| loss_vf_loss    | 92.51924     

Step: 16
********** Iteration 16 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -8.95e-05 |      -0.00582 |     117.74938 |       0.00031 |       0.58205
     -0.00946 |      -0.00575 |     115.13451 |       0.00266 |       0.57517
     -0.00977 |      -0.00568 |     112.27246 |       0.00637 |       0.56847
     -0.00947 |      -0.00566 |     109.66964 |       0.00736 |       0.56605
Evaluating losses...
     -0.01035 |      -0.00567 |     107.64496 |       0.00644 |       0.56693
-----------------------------------
| EpLenMean       | 68.5          |
| EpRewMean       | 68.5          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 63            |
| TimeElapsed     | 7.79          |
| TimestepsSoFar  | 5599          |
| ev_tdlam_before | 0.019         |
| loss_ent        | 0.5669255     |
| loss_kl         | 0.0064437045  |
| loss_pol_entpen | -0.0056692553 |
| loss_pol_surr   | -0.010354795  |
| loss_vf_loss    |

Step: 24
********** Iteration 24 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -6.68e-05 |      -0.00557 |     142.90269 |       0.00012 |       0.55696
     -0.01066 |      -0.00547 |     134.71417 |       0.00432 |       0.54717
     -0.00777 |      -0.00540 |     131.29736 |       0.00937 |       0.54029
     -0.01238 |      -0.00544 |     131.80032 |       0.00437 |       0.54438
Evaluating losses...
     -0.01102 |      -0.00549 |     128.94067 |       0.00145 |       0.54905
----------------------------------
| EpLenMean       | 87.3         |
| EpRewMean       | 87.3         |
| EpThisIter      | 3            |
| EpisodesSoFar   | 73           |
| TimeElapsed     | 11.3         |
| TimestepsSoFar  | 8406         |
| ev_tdlam_before | 0.111        |
| loss_ent        | 0.5490523    |
| loss_kl         | 0.0014500191 |
| loss_pol_entpen | -0.005490523 |
| loss_pol_surr   | -0.011015154 |
| loss_vf_loss    | 128.94067  

Step: 32
********** Iteration 32 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00204 |      -0.00601 |      58.93188 |       0.00019 |       0.60054
     -0.00081 |      -0.00605 |      56.20024 |       0.00070 |       0.60504
     -0.00059 |      -0.00608 |      54.44357 |       0.00148 |       0.60822
     -0.00302 |      -0.00612 |      49.62776 |       0.00187 |       0.61158
Evaluating losses...
     -0.00422 |      -0.00614 |      46.98482 |       0.00217 |       0.61429
----------------------------------
| EpLenMean       | 94           |
| EpRewMean       | 94           |
| EpThisIter      | 1            |
| EpisodesSoFar   | 88           |
| TimeElapsed     | 14.8         |
| TimestepsSoFar  | 10994        |
| ev_tdlam_before | 0.849        |
| loss_ent        | 0.61428624   |
| loss_kl         | 0.0021662754 |
| loss_pol_entpen | -0.006142862 |
| loss_pol_surr   | -0.004221812 |
| loss_vf_loss    | 46.98482   

Step: 40
********** Iteration 40 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00068 |      -0.00566 |     153.62685 |      7.55e-05 |       0.56581
     -0.00395 |      -0.00559 |     152.85251 |       0.00067 |       0.55890
     -0.00542 |      -0.00547 |     151.77979 |       0.00284 |       0.54734
     -0.00444 |      -0.00537 |     150.92450 |       0.00581 |       0.53708
Evaluating losses...
     -0.00435 |      -0.00535 |     150.35777 |       0.00630 |       0.53519
-----------------------------------
| EpLenMean       | 105           |
| EpRewMean       | 105           |
| EpThisIter      | 2             |
| EpisodesSoFar   | 100           |
| TimeElapsed     | 18.3          |
| TimestepsSoFar  | 14241         |
| ev_tdlam_before | 0.428         |
| loss_ent        | 0.53518564    |
| loss_kl         | 0.0062977998  |
| loss_pol_entpen | -0.0053518564 |
| loss_pol_surr   | -0.0043482836 |
| loss_vf_loss    |

Step: 48
********** Iteration 48 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00039 |      -0.00580 |     119.42769 |      4.28e-05 |       0.57963
     -0.00183 |      -0.00581 |     118.32246 |       0.00047 |       0.58131
     -0.00255 |      -0.00584 |     116.83781 |       0.00194 |       0.58407
     -0.00365 |      -0.00587 |     115.33183 |       0.00503 |       0.58662
Evaluating losses...
     -0.00507 |      -0.00587 |     114.45201 |       0.00694 |       0.58750
----------------------------------
| EpLenMean       | 132          |
| EpRewMean       | 132          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 108          |
| TimeElapsed     | 21.8         |
| TimestepsSoFar  | 17513        |
| ev_tdlam_before | -0.0018      |
| loss_ent        | 0.5874988    |
| loss_kl         | 0.006943562  |
| loss_pol_entpen | -0.005874988 |
| loss_pol_surr   | -0.005073786 |
| loss_vf_loss    | 114.45201  

Step: 56
********** Iteration 56 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00043 |      -0.00594 |     183.90462 |      4.78e-05 |       0.59387
     -0.00270 |      -0.00587 |     183.90079 |       0.00041 |       0.58727
     -0.00760 |      -0.00575 |     183.91869 |       0.00167 |       0.57456
     -0.00783 |      -0.00558 |     183.93884 |       0.00498 |       0.55762
Evaluating losses...
     -0.00785 |      -0.00549 |     183.89696 |       0.00739 |       0.54919
-----------------------------------
| EpLenMean       | 158           |
| EpRewMean       | 158           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 118           |
| TimeElapsed     | 25.3          |
| TimestepsSoFar  | 20929         |
| ev_tdlam_before | 0.000238      |
| loss_ent        | 0.54918593    |
| loss_kl         | 0.007389982   |
| loss_pol_entpen | -0.0054918593 |
| loss_pol_surr   | -0.007851478  |
| loss_vf_loss    |

Step: 64
********** Iteration 64 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00069 |      -0.00588 |     125.87257 |      4.52e-05 |       0.58754
     -0.00306 |      -0.00585 |     124.88932 |       0.00042 |       0.58478
     -0.00490 |      -0.00581 |     123.66988 |       0.00142 |       0.58054
     -0.00424 |      -0.00574 |     122.84009 |       0.00369 |       0.57402
Evaluating losses...
     -0.00417 |      -0.00572 |     122.19365 |       0.00486 |       0.57193
-----------------------------------
| EpLenMean       | 192           |
| EpRewMean       | 192           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 126           |
| TimeElapsed     | 28.8          |
| TimestepsSoFar  | 25267         |
| ev_tdlam_before | 0.00435       |
| loss_ent        | 0.5719305     |
| loss_kl         | 0.004862964   |
| loss_pol_entpen | -0.0057193055 |
| loss_pol_surr   | -0.004172478  |
| loss_vf_loss    |

Step: 72
********** Iteration 72 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00123 |      -0.00566 |     215.86584 |       0.00026 |       0.56563
     -0.00243 |      -0.00566 |     216.01935 |       0.00199 |       0.56582
     -0.00202 |      -0.00563 |     216.05797 |       0.00686 |       0.56280
     -0.00361 |      -0.00562 |     216.07822 |       0.00766 |       0.56214
Evaluating losses...
     -0.00520 |      -0.00564 |     216.08179 |       0.00424 |       0.56379
-----------------------------------
| EpLenMean       | 221           |
| EpRewMean       | 221           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 135           |
| TimeElapsed     | 32.3          |
| TimestepsSoFar  | 29464         |
| ev_tdlam_before | -0.000581     |
| loss_ent        | 0.56379056    |
| loss_kl         | 0.004237589   |
| loss_pol_entpen | -0.0056379056 |
| loss_pol_surr   | -0.00520155   |
| loss_vf_loss    |

Step: 80
********** Iteration 80 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00089 |      -0.00609 |     128.22760 |      9.37e-05 |       0.60896
      0.00032 |      -0.00612 |     128.06253 |       0.00040 |       0.61219
      0.00037 |      -0.00615 |     127.57549 |       0.00069 |       0.61503
     9.23e-05 |      -0.00618 |     126.75041 |       0.00104 |       0.61847
Evaluating losses...
     -0.00080 |      -0.00620 |     126.21159 |       0.00126 |       0.62018
-----------------------------------
| EpLenMean       | 244           |
| EpRewMean       | 244           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 143           |
| TimeElapsed     | 35.8          |
| TimestepsSoFar  | 32694         |
| ev_tdlam_before | -0.00033      |
| loss_ent        | 0.62018085    |
| loss_kl         | 0.0012636746  |
| loss_pol_entpen | -0.0062018083 |
| loss_pol_surr   | -0.00080337   |
| loss_vf_loss    |

Step: 88
********** Iteration 88 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00036 |      -0.00611 |     213.61249 |      4.83e-05 |       0.61114
      0.00085 |      -0.00608 |     213.88017 |       0.00025 |       0.60838
      0.00085 |      -0.00608 |     213.86844 |       0.00042 |       0.60805
     2.00e-05 |      -0.00610 |     213.70992 |       0.00038 |       0.61035
Evaluating losses...
     -0.00048 |      -0.00612 |     213.57170 |       0.00031 |       0.61249
------------------------------------
| EpLenMean       | 268            |
| EpRewMean       | 268            |
| EpThisIter      | 1              |
| EpisodesSoFar   | 151            |
| TimeElapsed     | 39.3           |
| TimestepsSoFar  | 36682          |
| ev_tdlam_before | 0.000442       |
| loss_ent        | 0.6124891      |
| loss_kl         | 0.00031301397  |
| loss_pol_entpen | -0.006124891   |
| loss_pol_surr   | -0.00048273243 |
| loss_

Step: 96
********** Iteration 96 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00040 |      -0.00617 |      71.70907 |      1.39e-05 |       0.61719
     -0.00119 |      -0.00622 |      61.02293 |       0.00028 |       0.62206
     -0.00288 |      -0.00630 |      61.35752 |       0.00120 |       0.62958
     -0.00457 |      -0.00637 |      61.99464 |       0.00279 |       0.63692
Evaluating losses...
     -0.00546 |      -0.00641 |      59.50675 |       0.00391 |       0.64092
-----------------------------------
| EpLenMean       | 279           |
| EpRewMean       | 279           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 160           |
| TimeElapsed     | 42.8          |
| TimestepsSoFar  | 39493         |
| ev_tdlam_before | 0.805         |
| loss_ent        | 0.64092076    |
| loss_kl         | 0.003905091   |
| loss_pol_entpen | -0.006409208  |
| loss_pol_surr   | -0.0054557733 |
| loss_vf_loss    |

Step: 104
********** Iteration 104 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00107 |      -0.00563 |     146.79243 |       0.00034 |       0.56253
     -0.01312 |      -0.00562 |     154.16602 |       0.00344 |       0.56171
     -0.01374 |      -0.00559 |     150.67307 |       0.00839 |       0.55880
     -0.01237 |      -0.00560 |     141.56412 |       0.00949 |       0.55964
Evaluating losses...
     -0.01345 |      -0.00562 |     135.75604 |       0.00841 |       0.56193
----------------------------------
| EpLenMean       | 283          |
| EpRewMean       | 283          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 170          |
| TimeElapsed     | 46.3         |
| TimestepsSoFar  | 42680        |
| ev_tdlam_before | -0.184       |
| loss_ent        | 0.5619254    |
| loss_kl         | 0.008411527  |
| loss_pol_entpen | -0.005619254 |
| loss_pol_surr   | -0.013446134 |
| loss_vf_loss    | 135.75604

Step: 112
********** Iteration 112 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00227 |      -0.00555 |      52.89030 |       0.00023 |       0.55462
      0.00111 |      -0.00551 |      47.76324 |       0.00184 |       0.55147
     -0.00176 |      -0.00548 |      42.67278 |       0.00262 |       0.54819
     -0.00078 |      -0.00548 |      38.93789 |       0.00090 |       0.54834
Evaluating losses...
     -0.00428 |      -0.00547 |      37.05059 |       0.00054 |       0.54655
-----------------------------------
| EpLenMean       | 305           |
| EpRewMean       | 305           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 178           |
| TimeElapsed     | 49.8          |
| TimestepsSoFar  | 46449         |
| ev_tdlam_before | 0.788         |
| loss_ent        | 0.54655015    |
| loss_kl         | 0.0005381567  |
| loss_pol_entpen | -0.005465501  |
| loss_pol_surr   | -0.0042752326 |
| loss_vf_loss   

Step: 120
********** Iteration 120 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00090 |      -0.00572 |     227.05083 |      3.61e-05 |       0.57224
     -0.00570 |      -0.00569 |     215.58878 |       0.00177 |       0.56895
     -0.00991 |      -0.00561 |     200.30670 |       0.00970 |       0.56132
     -0.00866 |      -0.00552 |     195.38959 |       0.01735 |       0.55209
Evaluating losses...
     -0.00902 |      -0.00550 |     195.75937 |       0.01671 |       0.55029
----------------------------------
| EpLenMean       | 320          |
| EpRewMean       | 320          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 186          |
| TimeElapsed     | 53.3         |
| TimestepsSoFar  | 49958        |
| ev_tdlam_before | -0.892       |
| loss_ent        | 0.55029434   |
| loss_kl         | 0.016705958  |
| loss_pol_entpen | -0.005502944 |
| loss_pol_surr   | -0.009020707 |
| loss_vf_loss    | 195.75937

Step: 128
********** Iteration 128 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00115 |      -0.00505 |     246.23495 |      9.96e-05 |       0.50519
     -0.00637 |      -0.00509 |     246.09067 |       0.00234 |       0.50912
     -0.00636 |      -0.00510 |     245.48277 |       0.00861 |       0.50953
     -0.00563 |      -0.00508 |     244.70180 |       0.00993 |       0.50808
Evaluating losses...
     -0.00768 |      -0.00507 |     244.28571 |       0.00721 |       0.50659
-----------------------------------
| EpLenMean       | 341           |
| EpRewMean       | 341           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 194           |
| TimeElapsed     | 56.8          |
| TimestepsSoFar  | 53494         |
| ev_tdlam_before | -0.000152     |
| loss_ent        | 0.50659484    |
| loss_kl         | 0.0072102062  |
| loss_pol_entpen | -0.0050659482 |
| loss_pol_surr   | -0.0076786177 |
| loss_vf_loss   

Step: 136
********** Iteration 136 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00012 |      -0.00570 |     124.28745 |       0.00104 |       0.56987
     -0.00205 |      -0.00569 |     120.69942 |       0.00623 |       0.56886
    -9.42e-05 |      -0.00568 |     114.34421 |       0.01267 |       0.56808
     -0.00065 |      -0.00575 |     109.01025 |       0.01027 |       0.57481
Evaluating losses...
     -0.00321 |      -0.00580 |     107.34850 |       0.00633 |       0.58026
-----------------------------------
| EpLenMean       | 354           |
| EpRewMean       | 354           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 202           |
| TimeElapsed     | 60.3          |
| TimestepsSoFar  | 57271         |
| ev_tdlam_before | -0.037        |
| loss_ent        | 0.58025575    |
| loss_kl         | 0.0063288906  |
| loss_pol_entpen | -0.005802557  |
| loss_pol_surr   | -0.0032104254 |
| loss_vf_loss   

Step: 144
********** Iteration 144 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00060 |      -0.00490 |     124.36416 |      1.40e-05 |       0.49049
     -0.00037 |      -0.00494 |     123.40759 |      6.95e-05 |       0.49394
    -1.90e-05 |      -0.00497 |     121.06400 |       0.00047 |       0.49708
     -0.00106 |      -0.00500 |     118.08235 |       0.00152 |       0.49973
Evaluating losses...
     -0.00287 |      -0.00502 |     116.60378 |       0.00229 |       0.50208
-----------------------------------
| EpLenMean       | 362           |
| EpRewMean       | 362           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 210           |
| TimeElapsed     | 63.8          |
| TimestepsSoFar  | 61748         |
| ev_tdlam_before | 0.134         |
| loss_ent        | 0.5020751     |
| loss_kl         | 0.0022900072  |
| loss_pol_entpen | -0.005020751  |
| loss_pol_surr   | -0.0028710216 |
| loss_vf_loss   

Step: 152
********** Iteration 152 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00152 |      -0.00542 |     158.45555 |       0.00013 |       0.54178
     -0.00397 |      -0.00555 |     156.34544 |       0.00126 |       0.55489
     -0.00625 |      -0.00569 |     151.57674 |       0.00347 |       0.56909
     -0.00702 |      -0.00579 |     146.16643 |       0.00545 |       0.57911
Evaluating losses...
     -0.00727 |      -0.00584 |     142.84680 |       0.00659 |       0.58395
-----------------------------------
| EpLenMean       | 373           |
| EpRewMean       | 373           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 218           |
| TimeElapsed     | 67.3          |
| TimestepsSoFar  | 65920         |
| ev_tdlam_before | -0.287        |
| loss_ent        | 0.5839505     |
| loss_kl         | 0.0065917214  |
| loss_pol_entpen | -0.005839505  |
| loss_pol_surr   | -0.0072727427 |
| loss_vf_loss   

Step: 160
********** Iteration 160 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00041 |      -0.00555 |     166.95671 |      1.11e-05 |       0.55536
     -0.00144 |      -0.00555 |     162.48055 |       0.00027 |       0.55527
     -0.00802 |      -0.00552 |     155.48193 |       0.00196 |       0.55182
     -0.00937 |      -0.00546 |     150.10466 |       0.00563 |       0.54573
Evaluating losses...
     -0.00834 |      -0.00542 |     146.19269 |       0.00798 |       0.54240
-----------------------------------
| EpLenMean       | 370           |
| EpRewMean       | 370           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 226           |
| TimeElapsed     | 70.8          |
| TimestepsSoFar  | 69970         |
| ev_tdlam_before | 0.391         |
| loss_ent        | 0.5424043     |
| loss_kl         | 0.007984099   |
| loss_pol_entpen | -0.0054240427 |
| loss_pol_surr   | -0.008339269  |
| loss_vf_loss   

Step: 168
********** Iteration 168 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00106 |      -0.00576 |      47.64787 |       0.00016 |       0.57622
     -0.00354 |      -0.00576 |      46.41585 |       0.00274 |       0.57567
     -0.00952 |      -0.00577 |      45.26003 |       0.01150 |       0.57705
     -0.00850 |      -0.00580 |      42.25557 |       0.01792 |       0.58045
Evaluating losses...
     -0.01044 |      -0.00584 |      40.47276 |       0.01706 |       0.58412
-----------------------------------
| EpLenMean       | 348           |
| EpRewMean       | 348           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 239           |
| TimeElapsed     | 74.3          |
| TimestepsSoFar  | 73237         |
| ev_tdlam_before | 0.878         |
| loss_ent        | 0.5841166     |
| loss_kl         | 0.017055262   |
| loss_pol_entpen | -0.0058411653 |
| loss_pol_surr   | -0.010443307  |
| loss_vf_loss   

Step: 176
********** Iteration 176 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00169 |      -0.00602 |      53.08752 |       0.00033 |       0.60151
     -0.00627 |      -0.00606 |      46.65898 |       0.00211 |       0.60556
     -0.00804 |      -0.00606 |      42.93070 |       0.00588 |       0.60563
     -0.00618 |      -0.00605 |      39.97493 |       0.01046 |       0.60459
Evaluating losses...
     -0.00570 |      -0.00605 |      38.23185 |       0.01176 |       0.60465
-----------------------------------
| EpLenMean       | 346           |
| EpRewMean       | 346           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 248           |
| TimeElapsed     | 77.8          |
| TimestepsSoFar  | 76775         |
| ev_tdlam_before | 0.792         |
| loss_ent        | 0.6046534     |
| loss_kl         | 0.011764506   |
| loss_pol_entpen | -0.0060465345 |
| loss_pol_surr   | -0.005704552  |
| loss_vf_loss   

Step: 184
********** Iteration 184 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00066 |      -0.00597 |     154.81541 |      6.90e-05 |       0.59710
     -0.00075 |      -0.00604 |     154.39673 |       0.00049 |       0.60364
     -0.00703 |      -0.00608 |     152.31081 |       0.00155 |       0.60752
     -0.01263 |      -0.00611 |     149.53778 |       0.00412 |       0.61063
Evaluating losses...
     -0.01431 |      -0.00610 |     147.52536 |       0.00695 |       0.61036
-----------------------------------
| EpLenMean       | 340           |
| EpRewMean       | 340           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 257           |
| TimeElapsed     | 81.3          |
| TimestepsSoFar  | 79977         |
| ev_tdlam_before | 0.508         |
| loss_ent        | 0.6103573     |
| loss_kl         | 0.006950784   |
| loss_pol_entpen | -0.0061035724 |
| loss_pol_surr   | -0.014305182  |
| loss_vf_loss   

Step: 192
********** Iteration 192 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -3.15e-05 |      -0.00603 |     244.27596 |      7.08e-06 |       0.60334
     -0.00097 |      -0.00598 |     234.43736 |       0.00024 |       0.59807
     -0.00352 |      -0.00590 |     222.15198 |       0.00097 |       0.59034
     -0.00286 |      -0.00586 |     211.06400 |       0.00174 |       0.58585
Evaluating losses...
     -0.00447 |      -0.00586 |     204.65758 |       0.00207 |       0.58557
-----------------------------------
| EpLenMean       | 339           |
| EpRewMean       | 339           |
| EpThisIter      | 2             |
| EpisodesSoFar   | 267           |
| TimeElapsed     | 84.8          |
| TimestepsSoFar  | 82925         |
| ev_tdlam_before | 0.389         |
| loss_ent        | 0.58557105    |
| loss_kl         | 0.0020709343  |
| loss_pol_entpen | -0.0058557102 |
| loss_pol_surr   | -0.0044748764 |
| loss_vf_loss   

Step: 200
********** Iteration 200 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00026 |      -0.00618 |     142.66135 |      2.31e-05 |       0.61781
     -0.00171 |      -0.00620 |     144.29388 |       0.00045 |       0.62048
     -0.00519 |      -0.00623 |     144.52142 |       0.00202 |       0.62348
     -0.00779 |      -0.00626 |     143.63152 |       0.00531 |       0.62619
Evaluating losses...
     -0.00769 |      -0.00627 |     142.86841 |       0.00809 |       0.62655
-----------------------------------
| EpLenMean       | 334           |
| EpRewMean       | 334           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 276           |
| TimeElapsed     | 88.3          |
| TimestepsSoFar  | 86059         |
| ev_tdlam_before | -0.0169       |
| loss_ent        | 0.62655133    |
| loss_kl         | 0.008085132   |
| loss_pol_entpen | -0.0062655127 |
| loss_pol_surr   | -0.007689014  |
| loss_vf_loss   

Step: 208
********** Iteration 208 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00144 |      -0.00561 |     102.72577 |       0.00015 |       0.56091
     -0.00590 |      -0.00558 |     102.06850 |       0.00278 |       0.55777
     -0.00902 |      -0.00557 |     101.01096 |       0.00839 |       0.55715
     -0.01066 |      -0.00565 |      99.84624 |       0.00935 |       0.56519
Evaluating losses...
     -0.01284 |      -0.00573 |      99.06232 |       0.00756 |       0.57301
----------------------------------
| EpLenMean       | 335          |
| EpRewMean       | 335          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 285          |
| TimeElapsed     | 91.8         |
| TimestepsSoFar  | 89521        |
| ev_tdlam_before | 0.357        |
| loss_ent        | 0.5730073    |
| loss_kl         | 0.00756029   |
| loss_pol_entpen | -0.005730073 |
| loss_pol_surr   | -0.012839105 |
| loss_vf_loss    | 99.06232 

Step: 216
********** Iteration 216 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00015 |      -0.00592 |     235.36446 |       0.00013 |       0.59181
     -0.00216 |      -0.00589 |     234.69583 |       0.00091 |       0.58873
     -0.00376 |      -0.00582 |     233.26538 |       0.00267 |       0.58248
     -0.00433 |      -0.00576 |     231.61179 |       0.00526 |       0.57569
Evaluating losses...
     -0.00460 |      -0.00573 |     230.60393 |       0.00643 |       0.57255
-----------------------------------
| EpLenMean       | 337           |
| EpRewMean       | 337           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 293           |
| TimeElapsed     | 95.3          |
| TimestepsSoFar  | 93739         |
| ev_tdlam_before | 0.0125        |
| loss_ent        | 0.57255256    |
| loss_kl         | 0.0064258077  |
| loss_pol_entpen | -0.0057255253 |
| loss_pol_surr   | -0.004602898  |
| loss_vf_loss   

Step: 224
********** Iteration 224 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00019 |      -0.00579 |     136.49608 |      7.47e-05 |       0.57934
      0.00074 |      -0.00583 |     136.92032 |       0.00047 |       0.58260
     -0.00060 |      -0.00584 |     136.66159 |       0.00101 |       0.58412
     -0.00298 |      -0.00583 |     135.94963 |       0.00242 |       0.58305
Evaluating losses...
     -0.00410 |      -0.00581 |     135.43343 |       0.00406 |       0.58108
----------------------------------
| EpLenMean       | 335          |
| EpRewMean       | 335          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 301          |
| TimeElapsed     | 98.8         |
| TimestepsSoFar  | 97742        |
| ev_tdlam_before | -0.0117      |
| loss_ent        | 0.58107793   |
| loss_kl         | 0.004063444  |
| loss_pol_entpen | -0.005810779 |
| loss_pol_surr   | -0.004095588 |
| loss_vf_loss    | 135.43343

Step: 232
********** Iteration 232 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00015 |      -0.00604 |     242.31039 |      5.16e-06 |       0.60373
     2.24e-06 |      -0.00608 |     242.13889 |       0.00026 |       0.60770
     -0.00180 |      -0.00613 |     241.52200 |       0.00128 |       0.61274
     -0.00181 |      -0.00615 |     240.87093 |       0.00260 |       0.61548
Evaluating losses...
     -0.00235 |      -0.00616 |     240.32365 |       0.00301 |       0.61620
-----------------------------------
| EpLenMean       | 336           |
| EpRewMean       | 336           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 309           |
| TimeElapsed     | 102           |
| TimestepsSoFar  | 102258        |
| ev_tdlam_before | -0.0622       |
| loss_ent        | 0.61620486    |
| loss_kl         | 0.0030060033  |
| loss_pol_entpen | -0.006162049  |
| loss_pol_surr   | -0.0023526996 |
| loss_vf_loss   

Step: 240
********** Iteration 240 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00045 |      -0.00596 |     285.88794 |      5.49e-05 |       0.59566
      0.00042 |      -0.00595 |     277.22113 |       0.00031 |       0.59542
     -0.00206 |      -0.00592 |     261.23721 |       0.00073 |       0.59214
     -0.00365 |      -0.00586 |     246.86081 |       0.00128 |       0.58622
Evaluating losses...
     -0.00357 |      -0.00582 |     237.31110 |       0.00182 |       0.58187
-----------------------------------
| EpLenMean       | 333           |
| EpRewMean       | 333           |
| EpThisIter      | 2             |
| EpisodesSoFar   | 318           |
| TimeElapsed     | 106           |
| TimestepsSoFar  | 106474        |
| ev_tdlam_before | 0.262         |
| loss_ent        | 0.581866      |
| loss_kl         | 0.0018239466  |
| loss_pol_entpen | -0.00581866   |
| loss_pol_surr   | -0.0035714982 |
| loss_vf_loss   

Step: 248
********** Iteration 248 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00019 |      -0.00622 |     144.78816 |      9.46e-05 |       0.62161
    -4.07e-05 |      -0.00622 |     144.80557 |       0.00051 |       0.62182
      0.00032 |      -0.00623 |     144.55138 |       0.00119 |       0.62258
     -0.00024 |      -0.00624 |     144.19028 |       0.00195 |       0.62442
Evaluating losses...
     -0.00041 |      -0.00626 |     143.93932 |       0.00239 |       0.62608
-----------------------------------
| EpLenMean       | 319           |
| EpRewMean       | 319           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 328           |
| TimeElapsed     | 109           |
| TimestepsSoFar  | 109957        |
| ev_tdlam_before | 0.0142        |
| loss_ent        | 0.6260813     |
| loss_kl         | 0.0023942688  |
| loss_pol_entpen | -0.006260813  |
| loss_pol_surr   | -0.0004136404 |
| loss_vf_loss   

Step: 256
********** Iteration 256 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00243 |      -0.00565 |      48.10685 |       0.00021 |       0.56533
     -0.01328 |      -0.00558 |      30.37234 |       0.00426 |       0.55784
     -0.01433 |      -0.00546 |      27.24405 |       0.01565 |       0.54553
     -0.01439 |      -0.00537 |      28.58874 |       0.01804 |       0.53653
Evaluating losses...
     -0.01864 |      -0.00533 |      26.79497 |       0.01455 |       0.53278
-----------------------------------
| EpLenMean       | 334           |
| EpRewMean       | 334           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 336           |
| TimeElapsed     | 113           |
| TimestepsSoFar  | 113529        |
| ev_tdlam_before | 0.875         |
| loss_ent        | 0.5327811     |
| loss_kl         | 0.014553198   |
| loss_pol_entpen | -0.0053278115 |
| loss_pol_surr   | -0.01863756   |
| loss_vf_loss   

Step: 264
********** Iteration 264 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00155 |      -0.00577 |     213.24275 |       0.00014 |       0.57679
     -0.00828 |      -0.00568 |     199.58440 |       0.00500 |       0.56833
     -0.00963 |      -0.00554 |     176.94913 |       0.01624 |       0.55400
     -0.01154 |      -0.00546 |     164.78943 |       0.01460 |       0.54567
Evaluating losses...
     -0.01225 |      -0.00545 |     163.54431 |       0.00903 |       0.54479
----------------------------------
| EpLenMean       | 348          |
| EpRewMean       | 348          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 344          |
| TimeElapsed     | 116          |
| TimestepsSoFar  | 117957       |
| ev_tdlam_before | -1.28        |
| loss_ent        | 0.5447911    |
| loss_kl         | 0.009034709  |
| loss_pol_entpen | -0.005447911 |
| loss_pol_surr   | -0.012250373 |
| loss_vf_loss    | 163.54431

Step: 272
********** Iteration 272 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00053 |      -0.00486 |     256.35229 |      3.16e-05 |       0.48580
     -0.00163 |      -0.00480 |     255.72601 |       0.00036 |       0.48040
     -0.00609 |      -0.00477 |     254.46741 |       0.00193 |       0.47676
     -0.00696 |      -0.00475 |     253.34351 |       0.00492 |       0.47477
Evaluating losses...
     -0.00638 |      -0.00474 |     252.37517 |       0.00616 |       0.47384
----------------------------------
| EpLenMean       | 356          |
| EpRewMean       | 356          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 352          |
| TimeElapsed     | 120          |
| TimestepsSoFar  | 121269       |
| ev_tdlam_before | -0.0183      |
| loss_ent        | 0.47384244   |
| loss_kl         | 0.0061565847 |
| loss_pol_entpen | -0.004738424 |
| loss_pol_surr   | -0.006376598 |
| loss_vf_loss    | 252.37517

Step: 280
********** Iteration 280 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00016 |      -0.00432 |     237.19171 |      1.45e-05 |       0.43224
     -0.00204 |      -0.00433 |     237.13026 |       0.00080 |       0.43303
     -0.00533 |      -0.00434 |     236.55717 |       0.00361 |       0.43449
     -0.00421 |      -0.00435 |     235.91582 |       0.00728 |       0.43544
Evaluating losses...
     -0.00391 |      -0.00436 |     235.26814 |       0.00806 |       0.43578
-----------------------------------
| EpLenMean       | 365           |
| EpRewMean       | 365           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 360           |
| TimeElapsed     | 123           |
| TimestepsSoFar  | 124965        |
| ev_tdlam_before | 0.0188        |
| loss_ent        | 0.43577802    |
| loss_kl         | 0.008064286   |
| loss_pol_entpen | -0.0043577803 |
| loss_pol_surr   | -0.0039118715 |
| loss_vf_loss   

Step: 288
********** Iteration 288 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00168 |      -0.00477 |     220.82407 |       0.00026 |       0.47659
     -0.01856 |      -0.00478 |     220.06802 |       0.00425 |       0.47836
     -0.01444 |      -0.00475 |     218.17673 |       0.01459 |       0.47512
     -0.01533 |      -0.00474 |     214.64545 |       0.01425 |       0.47434
Evaluating losses...
     -0.01945 |      -0.00475 |     212.88435 |       0.00883 |       0.47473
-----------------------------------
| EpLenMean       | 384           |
| EpRewMean       | 384           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 368           |
| TimeElapsed     | 127           |
| TimestepsSoFar  | 129045        |
| ev_tdlam_before | 0.0663        |
| loss_ent        | 0.47473297    |
| loss_kl         | 0.008828187   |
| loss_pol_entpen | -0.0047473297 |
| loss_pol_surr   | -0.019445982  |
| loss_vf_loss   

Step: 296
********** Iteration 296 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00109 |      -0.00491 |     233.88994 |       0.00011 |       0.49095
      0.00112 |      -0.00487 |     233.68010 |       0.00062 |       0.48750
     -0.00032 |      -0.00484 |     232.91304 |       0.00095 |       0.48415
     -0.00103 |      -0.00481 |     231.55103 |       0.00094 |       0.48114
Evaluating losses...
     -0.00164 |      -0.00479 |     230.92349 |       0.00078 |       0.47942
-----------------------------------
| EpLenMean       | 399           |
| EpRewMean       | 399           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 376           |
| TimeElapsed     | 130           |
| TimestepsSoFar  | 133509        |
| ev_tdlam_before | 0.0256        |
| loss_ent        | 0.479423      |
| loss_kl         | 0.00077664596 |
| loss_pol_entpen | -0.00479423   |
| loss_pol_surr   | -0.0016384795 |
| loss_vf_loss   

Step: 304
********** Iteration 304 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00015 |      -0.00508 |     161.03378 |      5.13e-05 |       0.50833
      0.00291 |      -0.00506 |     159.11752 |       0.00114 |       0.50636
     -0.00178 |      -0.00501 |     156.16809 |       0.00238 |       0.50122
     -0.00270 |      -0.00492 |     152.61932 |       0.00227 |       0.49164
Evaluating losses...
     -0.00478 |      -0.00485 |     150.31918 |       0.00247 |       0.48485
-----------------------------------
| EpLenMean       | 412           |
| EpRewMean       | 412           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 384           |
| TimeElapsed     | 134           |
| TimestepsSoFar  | 138357        |
| ev_tdlam_before | 0.107         |
| loss_ent        | 0.48485023    |
| loss_kl         | 0.0024723453  |
| loss_pol_entpen | -0.0048485026 |
| loss_pol_surr   | -0.004782403  |
| loss_vf_loss   

Step: 312
********** Iteration 312 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00038 |      -0.00458 |     127.48827 |      2.56e-05 |       0.45814
     -0.00462 |      -0.00465 |     127.67589 |       0.00185 |       0.46547
     -0.00850 |      -0.00472 |     127.38056 |       0.00939 |       0.47244
     -0.00793 |      -0.00478 |     126.94155 |       0.01283 |       0.47759
Evaluating losses...
     -0.00910 |      -0.00480 |     126.34674 |       0.01001 |       0.48042
----------------------------------
| EpLenMean       | 410          |
| EpRewMean       | 410          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 392          |
| TimeElapsed     | 137          |
| TimestepsSoFar  | 141589       |
| ev_tdlam_before | 0.119        |
| loss_ent        | 0.4804197    |
| loss_kl         | 0.01001373   |
| loss_pol_entpen | -0.004804197 |
| loss_pol_surr   | -0.009102507 |
| loss_vf_loss    | 126.34674

Step: 320
********** Iteration 320 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00089 |      -0.00495 |     241.61868 |      4.87e-05 |       0.49490
      0.00154 |      -0.00493 |     242.19072 |       0.00019 |       0.49278
      0.00043 |      -0.00489 |     240.76437 |       0.00018 |       0.48922
     -0.00248 |      -0.00484 |     237.50529 |       0.00090 |       0.48359
Evaluating losses...
     -0.00370 |      -0.00480 |     235.93956 |       0.00177 |       0.48004
-----------------------------------
| EpLenMean       | 410           |
| EpRewMean       | 410           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 400           |
| TimeElapsed     | 141           |
| TimestepsSoFar  | 145017        |
| ev_tdlam_before | 0.0378        |
| loss_ent        | 0.4800392     |
| loss_kl         | 0.0017681118  |
| loss_pol_entpen | -0.0048003923 |
| loss_pol_surr   | -0.003699921  |
| loss_vf_loss   

Step: 328
********** Iteration 328 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     9.98e-05 |      -0.00477 |     235.02742 |      3.98e-05 |       0.47736
     -0.00043 |      -0.00479 |     235.27919 |       0.00033 |       0.47894
     -0.00343 |      -0.00484 |     234.73285 |       0.00115 |       0.48393
     -0.00511 |      -0.00490 |     233.79037 |       0.00284 |       0.49021
Evaluating losses...
     -0.00723 |      -0.00494 |     232.49417 |       0.00440 |       0.49424
-----------------------------------
| EpLenMean       | 406           |
| EpRewMean       | 406           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 408           |
| TimeElapsed     | 144           |
| TimestepsSoFar  | 148641        |
| ev_tdlam_before | 0.0176        |
| loss_ent        | 0.4942351     |
| loss_kl         | 0.004398997   |
| loss_pol_entpen | -0.004942351  |
| loss_pol_surr   | -0.0072274245 |
| loss_vf_loss   

Step: 336
********** Iteration 336 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00092 |      -0.00502 |     238.29828 |      1.33e-05 |       0.50164
     -0.00217 |      -0.00506 |     238.29030 |       0.00044 |       0.50573
     -0.01064 |      -0.00514 |     237.82860 |       0.00360 |       0.51369
     -0.01325 |      -0.00516 |     237.03468 |       0.00960 |       0.51596
Evaluating losses...
     -0.01217 |      -0.00515 |     236.61200 |       0.01320 |       0.51540
----------------------------------
| EpLenMean       | 406          |
| EpRewMean       | 406          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 416          |
| TimeElapsed     | 148          |
| TimestepsSoFar  | 152649       |
| ev_tdlam_before | -0.00521     |
| loss_ent        | 0.5153962    |
| loss_kl         | 0.013202256  |
| loss_pol_entpen | -0.005153962 |
| loss_pol_surr   | -0.012165448 |
| loss_vf_loss    | 236.612  

Step: 344
********** Iteration 344 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00057 |      -0.00499 |     233.96173 |      7.55e-05 |       0.49913
     -0.00201 |      -0.00507 |     234.00983 |       0.00084 |       0.50675
     -0.00385 |      -0.00516 |     233.68503 |       0.00259 |       0.51611
     -0.00496 |      -0.00525 |     233.11661 |       0.00464 |       0.52493
Evaluating losses...
     -0.00533 |      -0.00530 |     232.69571 |       0.00565 |       0.52971
-----------------------------------
| EpLenMean       | 420           |
| EpRewMean       | 420           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 424           |
| TimeElapsed     | 151           |
| TimestepsSoFar  | 157041        |
| ev_tdlam_before | 0.00321       |
| loss_ent        | 0.5297129     |
| loss_kl         | 0.005646614   |
| loss_pol_entpen | -0.005297129  |
| loss_pol_surr   | -0.0053332373 |
| loss_vf_loss   

Step: 352
********** Iteration 352 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00073 |      -0.00501 |     220.84973 |      4.83e-05 |       0.50089
     -0.00556 |      -0.00494 |     220.23903 |       0.00116 |       0.49397
     -0.00673 |      -0.00485 |     219.45767 |       0.00377 |       0.48523
     -0.00697 |      -0.00482 |     218.42493 |       0.00522 |       0.48238
Evaluating losses...
     -0.00718 |      -0.00483 |     217.44366 |       0.00515 |       0.48301
----------------------------------
| EpLenMean       | 432          |
| EpRewMean       | 432          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 432          |
| TimeElapsed     | 155          |
| TimestepsSoFar  | 161817       |
| ev_tdlam_before | 0.0382       |
| loss_ent        | 0.4830074    |
| loss_kl         | 0.005150743  |
| loss_pol_entpen | -0.004830074 |
| loss_pol_surr   | -0.007180663 |
| loss_vf_loss    | 217.44366

Step: 360
********** Iteration 360 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00011 |      -0.00511 |     224.89206 |      4.55e-06 |       0.51086
     -0.00034 |      -0.00510 |     225.31845 |       0.00011 |       0.50993
     -0.00051 |      -0.00507 |     222.73164 |       0.00055 |       0.50689
     -0.00171 |      -0.00505 |     219.23636 |       0.00115 |       0.50497
Evaluating losses...
     -0.00222 |      -0.00504 |     217.31171 |       0.00158 |       0.50367
-----------------------------------
| EpLenMean       | 433           |
| EpRewMean       | 433           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 441           |
| TimeElapsed     | 158           |
| TimestepsSoFar  | 166147        |
| ev_tdlam_before | 0.0972        |
| loss_ent        | 0.5036688     |
| loss_kl         | 0.001583278   |
| loss_pol_entpen | -0.0050366875 |
| loss_pol_surr   | -0.0022248216 |
| loss_vf_loss   

Step: 368
********** Iteration 368 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -2.05e-06 |      -0.00465 |     240.04405 |      4.01e-05 |       0.46495
     -0.00138 |      -0.00462 |     240.16412 |       0.00040 |       0.46238
     -0.00294 |      -0.00463 |     239.07053 |       0.00115 |       0.46286
     -0.00502 |      -0.00463 |     236.90115 |       0.00240 |       0.46271
Evaluating losses...
     -0.00586 |      -0.00462 |     235.71300 |       0.00331 |       0.46242
----------------------------------
| EpLenMean       | 431          |
| EpRewMean       | 431          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 449          |
| TimeElapsed     | 162          |
| TimestepsSoFar  | 170020       |
| ev_tdlam_before | 0.00924      |
| loss_ent        | 0.46242243   |
| loss_kl         | 0.0033098517 |
| loss_pol_entpen | -0.004624224 |
| loss_pol_surr   | -0.0058605   |
| loss_vf_loss    | 235.713  

Step: 376
********** Iteration 376 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00055 |      -0.00486 |     246.00426 |       0.00013 |       0.48564
     -0.00337 |      -0.00481 |     245.64725 |       0.00160 |       0.48066
     -0.00543 |      -0.00471 |     244.78314 |       0.00445 |       0.47103
     -0.00471 |      -0.00462 |     243.10304 |       0.00650 |       0.46238
Evaluating losses...
     -0.00549 |      -0.00459 |     242.28871 |       0.00642 |       0.45874
-----------------------------------
| EpLenMean       | 436           |
| EpRewMean       | 436           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 457           |
| TimeElapsed     | 165           |
| TimestepsSoFar  | 174580        |
| ev_tdlam_before | -0.0316       |
| loss_ent        | 0.45874423    |
| loss_kl         | 0.0064188857  |
| loss_pol_entpen | -0.0045874426 |
| loss_pol_surr   | -0.005489614  |
| loss_vf_loss   

Step: 384
********** Iteration 384 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00198 |      -0.00510 |     139.36032 |       0.00015 |       0.51026
     -0.00785 |      -0.00503 |     138.96883 |       0.00282 |       0.50294
     -0.00926 |      -0.00492 |     138.36356 |       0.01021 |       0.49234
     -0.00763 |      -0.00488 |     137.64169 |       0.01611 |       0.48767
Evaluating losses...
     -0.00747 |      -0.00486 |     137.15698 |       0.01762 |       0.48613
-----------------------------------
| EpLenMean       | 438           |
| EpRewMean       | 438           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 465           |
| TimeElapsed     | 169           |
| TimestepsSoFar  | 179024        |
| ev_tdlam_before | 0.0172        |
| loss_ent        | 0.48613358    |
| loss_kl         | 0.017616905   |
| loss_pol_entpen | -0.0048613357 |
| loss_pol_surr   | -0.007470038  |
| loss_vf_loss   

Step: 392
********** Iteration 392 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00025 |      -0.00501 |     137.41940 |      9.02e-05 |       0.50064
     -0.00168 |      -0.00498 |     137.58034 |       0.00055 |       0.49810
     -0.00346 |      -0.00495 |     137.39751 |       0.00157 |       0.49453
     -0.00591 |      -0.00490 |     136.89780 |       0.00332 |       0.49016
Evaluating losses...
     -0.00672 |      -0.00488 |     136.49002 |       0.00450 |       0.48770
----------------------------------
| EpLenMean       | 433          |
| EpRewMean       | 433          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 473          |
| TimeElapsed     | 172          |
| TimestepsSoFar  | 182352       |
| ev_tdlam_before | 0.0386       |
| loss_ent        | 0.48769975   |
| loss_kl         | 0.004503842  |
| loss_pol_entpen | -0.004876998 |
| loss_pol_surr   | -0.006721901 |
| loss_vf_loss    | 136.49002

Step: 400
********** Iteration 400 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00040 |      -0.00436 |     235.08424 |      7.73e-06 |       0.43626
     -0.00044 |      -0.00437 |     233.69417 |      2.59e-05 |       0.43729
     -0.00159 |      -0.00438 |     232.33946 |       0.00086 |       0.43804
     -0.00446 |      -0.00439 |     231.04077 |       0.00385 |       0.43852
Evaluating losses...
     -0.00598 |      -0.00438 |     230.34068 |       0.00673 |       0.43845
----------------------------------
| EpLenMean       | 430          |
| EpRewMean       | 430          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 481          |
| TimeElapsed     | 176          |
| TimestepsSoFar  | 186826       |
| ev_tdlam_before | 0.0352       |
| loss_ent        | 0.4384501    |
| loss_kl         | 0.0067333654 |
| loss_pol_entpen | -0.004384501 |
| loss_pol_surr   | -0.005980963 |
| loss_vf_loss    | 230.34068

Step: 408
********** Iteration 408 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00043 |      -0.00459 |     149.37241 |      3.18e-05 |       0.45910
     -0.00081 |      -0.00454 |     148.81889 |       0.00045 |       0.45380
     -0.00634 |      -0.00446 |     147.25089 |       0.00205 |       0.44630
     -0.00681 |      -0.00440 |     145.34439 |       0.00372 |       0.43954
Evaluating losses...
     -0.00716 |      -0.00436 |     144.08093 |       0.00440 |       0.43564
-----------------------------------
| EpLenMean       | 428           |
| EpRewMean       | 428           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 489           |
| TimeElapsed     | 179           |
| TimestepsSoFar  | 190438        |
| ev_tdlam_before | 0.00455       |
| loss_ent        | 0.43564057    |
| loss_kl         | 0.0043988135  |
| loss_pol_entpen | -0.0043564057 |
| loss_pol_surr   | -0.007158786  |
| loss_vf_loss   

Step: 416
********** Iteration 416 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00673 |      -0.00465 |     359.16580 |       0.00180 |       0.46460
     -0.01429 |      -0.00444 |     339.79178 |       0.01507 |       0.44449
     -0.01150 |      -0.00437 |     312.82224 |       0.02171 |       0.43737
     -0.01396 |      -0.00445 |     287.42914 |       0.01345 |       0.44527
Evaluating losses...
     -0.01515 |      -0.00451 |     273.33719 |       0.00721 |       0.45129
----------------------------------
| EpLenMean       | 425          |
| EpRewMean       | 425          |
| EpThisIter      | 2            |
| EpisodesSoFar   | 498          |
| TimeElapsed     | 183          |
| TimestepsSoFar  | 193934       |
| ev_tdlam_before | -0.265       |
| loss_ent        | 0.4512907    |
| loss_kl         | 0.007212884  |
| loss_pol_entpen | -0.004512907 |
| loss_pol_surr   | -0.015153943 |
| loss_vf_loss    | 273.3372 

Step: 424
********** Iteration 424 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00197 |      -0.00431 |     115.22884 |      9.73e-05 |       0.43087
      0.00150 |      -0.00432 |     114.70568 |       0.00026 |       0.43157
     -0.00031 |      -0.00436 |     114.19485 |       0.00034 |       0.43553
     -0.00210 |      -0.00442 |     113.55362 |       0.00152 |       0.44188
Evaluating losses...
     -0.00238 |      -0.00446 |     113.04796 |       0.00252 |       0.44636
-----------------------------------
| EpLenMean       | 394           |
| EpRewMean       | 394           |
| EpThisIter      | 3             |
| EpisodesSoFar   | 512           |
| TimeElapsed     | 186           |
| TimestepsSoFar  | 197029        |
| ev_tdlam_before | 0.704         |
| loss_ent        | 0.44636142    |
| loss_kl         | 0.002520198   |
| loss_pol_entpen | -0.0044636144 |
| loss_pol_surr   | -0.002383206  |
| loss_vf_loss   

Step: 432
********** Iteration 432 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00029 |      -0.00504 |      94.74863 |      2.89e-05 |       0.50424
     -0.00231 |      -0.00507 |      94.60267 |       0.00025 |       0.50676
     -0.00573 |      -0.00506 |      94.11064 |       0.00089 |       0.50622
     -0.00780 |      -0.00504 |      93.42341 |       0.00269 |       0.50450
Evaluating losses...
     -0.00966 |      -0.00502 |      93.10374 |       0.00448 |       0.50230
-----------------------------------
| EpLenMean       | 336           |
| EpRewMean       | 336           |
| EpThisIter      | 2             |
| EpisodesSoFar   | 529           |
| TimeElapsed     | 190           |
| TimestepsSoFar  | 199325        |
| ev_tdlam_before | 0.701         |
| loss_ent        | 0.5023027     |
| loss_kl         | 0.0044813156  |
| loss_pol_entpen | -0.0050230273 |
| loss_pol_surr   | -0.009660227  |
| loss_vf_loss   

Step: 440
********** Iteration 440 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00082 |      -0.00474 |     177.99173 |       0.00013 |       0.47370
     -0.00377 |      -0.00461 |     148.19420 |       0.00091 |       0.46138
     -0.00927 |      -0.00446 |     137.11479 |       0.00241 |       0.44603
     -0.01191 |      -0.00435 |     135.85571 |       0.00425 |       0.43470
Evaluating losses...
     -0.01336 |      -0.00429 |     136.27914 |       0.00710 |       0.42931
-----------------------------------
| EpLenMean       | 309           |
| EpRewMean       | 309           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 540           |
| TimeElapsed     | 194           |
| TimestepsSoFar  | 202161        |
| ev_tdlam_before | -0.259        |
| loss_ent        | 0.42931455    |
| loss_kl         | 0.007100356   |
| loss_pol_entpen | -0.0042931456 |
| loss_pol_surr   | -0.013359245  |
| loss_vf_loss   

Step: 448
********** Iteration 448 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00083 |      -0.00370 |     110.83458 |      2.83e-05 |       0.37030
     -0.00100 |      -0.00372 |      99.22030 |      7.91e-05 |       0.37216
     -0.00721 |      -0.00375 |      92.70750 |       0.00439 |       0.37492
     -0.00632 |      -0.00376 |      92.50214 |       0.01747 |       0.37570
Evaluating losses...
     -0.00689 |      -0.00375 |      90.26091 |       0.02100 |       0.37541
-----------------------------------
| EpLenMean       | 286           |
| EpRewMean       | 286           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 551           |
| TimeElapsed     | 197           |
| TimestepsSoFar  | 204616        |
| ev_tdlam_before | 0.517         |
| loss_ent        | 0.37540784    |
| loss_kl         | 0.020999245   |
| loss_pol_entpen | -0.0037540784 |
| loss_pol_surr   | -0.0068929587 |
| loss_vf_loss   

Step: 456
********** Iteration 456 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00045 |      -0.00415 |     126.16525 |      5.30e-05 |       0.41452
     -0.00466 |      -0.00418 |     125.19417 |       0.00075 |       0.41799
     -0.00652 |      -0.00422 |     122.73956 |       0.00269 |       0.42151
     -0.00528 |      -0.00423 |     119.56635 |       0.00425 |       0.42269
Evaluating losses...
     -0.00552 |      -0.00423 |     117.43899 |       0.00413 |       0.42342
----------------------------------
| EpLenMean       | 282          |
| EpRewMean       | 282          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 559          |
| TimeElapsed     | 201          |
| TimestepsSoFar  | 208725       |
| ev_tdlam_before | 0.145        |
| loss_ent        | 0.42341536   |
| loss_kl         | 0.0041276217 |
| loss_pol_entpen | -0.004234154 |
| loss_pol_surr   | -0.005518215 |
| loss_vf_loss    | 117.43899

Step: 464
********** Iteration 464 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -1.43e-05 |      -0.00445 |     256.18433 |      2.18e-05 |       0.44460
     -0.00053 |      -0.00449 |     251.03706 |       0.00013 |       0.44933
     -0.00151 |      -0.00455 |     245.01390 |       0.00035 |       0.45459
     -0.00398 |      -0.00459 |     239.14363 |       0.00093 |       0.45933
Evaluating losses...
     -0.00503 |      -0.00462 |     235.95270 |       0.00154 |       0.46192
-----------------------------------
| EpLenMean       | 284           |
| EpRewMean       | 284           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 567           |
| TimeElapsed     | 204           |
| TimestepsSoFar  | 213193        |
| ev_tdlam_before | -0.075        |
| loss_ent        | 0.46191758    |
| loss_kl         | 0.0015396934  |
| loss_pol_entpen | -0.0046191756 |
| loss_pol_surr   | -0.0050313156 |
| loss_vf_loss   

Step: 472
********** Iteration 472 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00022 |      -0.00341 |     297.42603 |      2.66e-05 |       0.34136
     -0.00102 |      -0.00336 |     294.63327 |       0.00037 |       0.33637
     -0.00561 |      -0.00333 |     289.74902 |       0.00149 |       0.33279
     -0.00603 |      -0.00330 |     283.87650 |       0.00459 |       0.33047
Evaluating losses...
     -0.00816 |      -0.00328 |     280.67719 |       0.00719 |       0.32848
-----------------------------------
| EpLenMean       | 285           |
| EpRewMean       | 285           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 575           |
| TimeElapsed     | 208           |
| TimestepsSoFar  | 216545        |
| ev_tdlam_before | -0.362        |
| loss_ent        | 0.3284778     |
| loss_kl         | 0.0071915463  |
| loss_pol_entpen | -0.003284778  |
| loss_pol_surr   | -0.0081607215 |
| loss_vf_loss   

Step: 480
********** Iteration 480 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00017 |      -0.00307 |     238.42889 |      7.51e-05 |       0.30665
     -0.00235 |      -0.00312 |     238.07553 |       0.00092 |       0.31156
     -0.00470 |      -0.00317 |     236.45074 |       0.00351 |       0.31667
     -0.00845 |      -0.00321 |     234.97295 |       0.00640 |       0.32109
Evaluating losses...
     -0.00874 |      -0.00323 |     233.59482 |       0.00715 |       0.32290
-----------------------------------
| EpLenMean       | 280           |
| EpRewMean       | 280           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 583           |
| TimeElapsed     | 211           |
| TimestepsSoFar  | 220281        |
| ev_tdlam_before | -0.0102       |
| loss_ent        | 0.32290006    |
| loss_kl         | 0.0071541774  |
| loss_pol_entpen | -0.0032290004 |
| loss_pol_surr   | -0.008743129  |
| loss_vf_loss   

Step: 488
********** Iteration 488 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     2.12e-05 |      -0.00327 |     259.39496 |       0.00022 |       0.32747
     4.09e-05 |      -0.00326 |     257.57590 |       0.00223 |       0.32579
     -0.00135 |      -0.00323 |     253.70544 |       0.00509 |       0.32277
     -0.00082 |      -0.00321 |     248.07758 |       0.00596 |       0.32150
Evaluating losses...
     -0.00183 |      -0.00321 |     244.14993 |       0.00545 |       0.32130
-----------------------------------
| EpLenMean       | 284           |
| EpRewMean       | 284           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 591           |
| TimeElapsed     | 215           |
| TimestepsSoFar  | 224401        |
| ev_tdlam_before | -0.151        |
| loss_ent        | 0.32130095    |
| loss_kl         | 0.0054477025  |
| loss_pol_entpen | -0.0032130096 |
| loss_pol_surr   | -0.0018285722 |
| loss_vf_loss   

Step: 496
********** Iteration 496 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00062 |      -0.00323 |     230.97643 |      5.63e-05 |       0.32321
     -0.00237 |      -0.00323 |     229.88043 |       0.00079 |       0.32299
     -0.00268 |      -0.00325 |     227.42586 |       0.00315 |       0.32495
     -0.00392 |      -0.00326 |     224.65669 |       0.00600 |       0.32640
Evaluating losses...
     -0.00409 |      -0.00328 |     222.64732 |       0.00705 |       0.32757
-----------------------------------
| EpLenMean       | 294           |
| EpRewMean       | 294           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 599           |
| TimeElapsed     | 218           |
| TimestepsSoFar  | 228905        |
| ev_tdlam_before | 0.0482        |
| loss_ent        | 0.32757312    |
| loss_kl         | 0.0070484164  |
| loss_pol_entpen | -0.0032757309 |
| loss_pol_surr   | -0.004086159  |
| loss_vf_loss   

Step: 504
********** Iteration 504 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00075 |      -0.00293 |     130.92575 |      7.25e-05 |       0.29322
     -0.00266 |      -0.00300 |     128.14722 |       0.00101 |       0.30003
     -0.00590 |      -0.00308 |     125.03458 |       0.00317 |       0.30769
     -0.00747 |      -0.00315 |     121.85224 |       0.00543 |       0.31493
Evaluating losses...
     -0.00751 |      -0.00319 |     119.84571 |       0.00672 |       0.31881
----------------------------------
| EpLenMean       | 312          |
| EpRewMean       | 312          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 607          |
| TimeElapsed     | 222          |
| TimestepsSoFar  | 233356       |
| ev_tdlam_before | 0.0818       |
| loss_ent        | 0.3188129    |
| loss_kl         | 0.006715959  |
| loss_pol_entpen | -0.003188129 |
| loss_pol_surr   | -0.007506624 |
| loss_vf_loss    | 119.84571

Step: 512
********** Iteration 512 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00628 |      -0.00382 |      36.51417 |       0.00032 |       0.38196
     -0.01386 |      -0.00371 |      30.98662 |       0.00331 |       0.37077
     -0.01789 |      -0.00362 |      26.82569 |       0.01043 |       0.36174
     -0.01733 |      -0.00356 |      23.04643 |       0.01549 |       0.35557
Evaluating losses...
     -0.01748 |      -0.00353 |      21.35115 |       0.01624 |       0.35281
-----------------------------------
| EpLenMean       | 334           |
| EpRewMean       | 334           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 615           |
| TimeElapsed     | 225           |
| TimestepsSoFar  | 236962        |
| ev_tdlam_before | 0.905         |
| loss_ent        | 0.3528096     |
| loss_kl         | 0.016238434   |
| loss_pol_entpen | -0.0035280962 |
| loss_pol_surr   | -0.017478619  |
| loss_vf_loss   

Step: 520
********** Iteration 520 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00085 |      -0.00409 |     161.22449 |      3.57e-05 |       0.40939
     -0.00359 |      -0.00413 |     158.94963 |       0.00054 |       0.41346
     -0.00854 |      -0.00418 |     155.01123 |       0.00239 |       0.41829
     -0.00842 |      -0.00424 |     150.62634 |       0.00553 |       0.42441
Evaluating losses...
     -0.00852 |      -0.00429 |     147.72076 |       0.00701 |       0.42940
-----------------------------------
| EpLenMean       | 360           |
| EpRewMean       | 360           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 623           |
| TimeElapsed     | 229           |
| TimestepsSoFar  | 240978        |
| ev_tdlam_before | -0.193        |
| loss_ent        | 0.42939782    |
| loss_kl         | 0.007013948   |
| loss_pol_entpen | -0.004293978  |
| loss_pol_surr   | -0.0085246675 |
| loss_vf_loss   

Step: 528
********** Iteration 528 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00034 |      -0.00499 |     128.78015 |       0.00011 |       0.49920
     -0.00630 |      -0.00503 |     128.73869 |       0.00204 |       0.50305
     -0.00960 |      -0.00503 |     127.10673 |       0.01018 |       0.50269
     -0.00824 |      -0.00508 |     124.65993 |       0.01423 |       0.50788
Evaluating losses...
     -0.01047 |      -0.00515 |     123.05902 |       0.01147 |       0.51479
----------------------------------
| EpLenMean       | 386          |
| EpRewMean       | 386          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 631          |
| TimeElapsed     | 232          |
| TimestepsSoFar  | 245378       |
| ev_tdlam_before | 0.126        |
| loss_ent        | 0.51478887   |
| loss_kl         | 0.011468051  |
| loss_pol_entpen | -0.005147889 |
| loss_pol_surr   | -0.01047021  |
| loss_vf_loss    | 123.05902

Step: 536
********** Iteration 536 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -5.18e-05 |      -0.00557 |     137.00751 |      8.66e-05 |       0.55741
     -0.00139 |      -0.00551 |     134.87988 |       0.00068 |       0.55067
     -0.00536 |      -0.00547 |     131.48700 |       0.00156 |       0.54701
     -0.00927 |      -0.00548 |     129.01729 |       0.00258 |       0.54824
Evaluating losses...
     -0.01050 |      -0.00553 |     127.67105 |       0.00325 |       0.55267
-----------------------------------
| EpLenMean       | 412           |
| EpRewMean       | 412           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 639           |
| TimeElapsed     | 236           |
| TimestepsSoFar  | 250162        |
| ev_tdlam_before | 0.0497        |
| loss_ent        | 0.5526662     |
| loss_kl         | 0.003254778   |
| loss_pol_entpen | -0.0055266623 |
| loss_pol_surr   | -0.010498413  |
| loss_vf_loss   

Step: 544
********** Iteration 544 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00052 |      -0.00524 |     125.06010 |       0.00017 |       0.52431
     -0.00099 |      -0.00527 |     122.69417 |       0.00101 |       0.52729
     -0.00289 |      -0.00530 |     120.03176 |       0.00197 |       0.53001
     -0.00298 |      -0.00529 |     117.13763 |       0.00390 |       0.52918
Evaluating losses...
     -0.00503 |      -0.00528 |     115.57481 |       0.00547 |       0.52778
-----------------------------------
| EpLenMean       | 428           |
| EpRewMean       | 428           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 647           |
| TimeElapsed     | 239           |
| TimestepsSoFar  | 254430        |
| ev_tdlam_before | 0.183         |
| loss_ent        | 0.52777815    |
| loss_kl         | 0.0054707373  |
| loss_pol_entpen | -0.0052777817 |
| loss_pol_surr   | -0.005034417  |
| loss_vf_loss   

Step: 552
********** Iteration 552 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -5.98e-05 |      -0.00533 |     155.75052 |      2.67e-05 |       0.53320
     -0.00206 |      -0.00530 |     133.75517 |       0.00020 |       0.52970
     -0.00417 |      -0.00524 |     114.19614 |       0.00061 |       0.52428
     -0.00546 |      -0.00519 |     101.58242 |       0.00117 |       0.51880
Evaluating losses...
     -0.00635 |      -0.00517 |      96.78391 |       0.00145 |       0.51728
-----------------------------------
| EpLenMean       | 435           |
| EpRewMean       | 435           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 656           |
| TimeElapsed     | 243           |
| TimestepsSoFar  | 258599        |
| ev_tdlam_before | -0.138        |
| loss_ent        | 0.51728487    |
| loss_kl         | 0.0014542964  |
| loss_pol_entpen | -0.0051728482 |
| loss_pol_surr   | -0.006347643  |
| loss_vf_loss   

Step: 560
********** Iteration 560 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00134 |      -0.00528 |     223.71716 |       0.00012 |       0.52804
     -0.00493 |      -0.00540 |     222.51678 |       0.00133 |       0.54045
     -0.00659 |      -0.00553 |     221.31792 |       0.00354 |       0.55257
     -0.00630 |      -0.00562 |     219.82274 |       0.00584 |       0.56180
Evaluating losses...
     -0.00676 |      -0.00566 |     218.90260 |       0.00702 |       0.56619
-----------------------------------
| EpLenMean       | 432           |
| EpRewMean       | 432           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 664           |
| TimeElapsed     | 246           |
| TimestepsSoFar  | 263096        |
| ev_tdlam_before | 0.0353        |
| loss_ent        | 0.5661887     |
| loss_kl         | 0.0070211063  |
| loss_pol_entpen | -0.005661886  |
| loss_pol_surr   | -0.0067577586 |
| loss_vf_loss   

Step: 568
********** Iteration 568 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00077 |      -0.00605 |     194.92937 |      6.59e-05 |       0.60511
      0.00146 |      -0.00606 |     193.43442 |       0.00017 |       0.60558
     -0.00247 |      -0.00597 |     191.81125 |       0.00062 |       0.59669
     -0.00508 |      -0.00584 |     189.71179 |       0.00268 |       0.58361
Evaluating losses...
     -0.00462 |      -0.00576 |     188.44283 |       0.00423 |       0.57638
-----------------------------------
| EpLenMean       | 413           |
| EpRewMean       | 413           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 676           |
| TimeElapsed     | 250           |
| TimestepsSoFar  | 266510        |
| ev_tdlam_before | 0.124         |
| loss_ent        | 0.57637936    |
| loss_kl         | 0.0042336863  |
| loss_pol_entpen | -0.0057637934 |
| loss_pol_surr   | -0.0046235863 |
| loss_vf_loss   

Step: 576
********** Iteration 576 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00013 |      -0.00542 |     134.63788 |      8.79e-06 |       0.54226
     8.27e-05 |      -0.00540 |     134.33884 |      6.95e-05 |       0.54047
      0.00010 |      -0.00541 |     133.72043 |       0.00019 |       0.54112
     -0.00013 |      -0.00543 |     132.88344 |       0.00033 |       0.54314
Evaluating losses...
     -0.00043 |      -0.00544 |     132.46812 |       0.00044 |       0.54420
-----------------------------------
| EpLenMean       | 404           |
| EpRewMean       | 404           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 685           |
| TimeElapsed     | 253           |
| TimestepsSoFar  | 270116        |
| ev_tdlam_before | -0.0215       |
| loss_ent        | 0.54419565    |
| loss_kl         | 0.0004378226  |
| loss_pol_entpen | -0.0054419567 |
| loss_pol_surr   | -0.0004306361 |
| loss_vf_loss   

Step: 584
********** Iteration 584 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00027 |      -0.00511 |     133.81731 |      2.67e-05 |       0.51056
     -0.00361 |      -0.00512 |     133.80879 |       0.00059 |       0.51156
     -0.00682 |      -0.00509 |     133.29555 |       0.00316 |       0.50861
     -0.01133 |      -0.00505 |     132.61522 |       0.00834 |       0.50528
Evaluating losses...
     -0.01146 |      -0.00505 |     131.95956 |       0.01134 |       0.50455
----------------------------------
| EpLenMean       | 400          |
| EpRewMean       | 400          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 693          |
| TimeElapsed     | 257          |
| TimestepsSoFar  | 273564       |
| ev_tdlam_before | 0.0133       |
| loss_ent        | 0.50454617   |
| loss_kl         | 0.011340279  |
| loss_pol_entpen | -0.005045462 |
| loss_pol_surr   | -0.011463694 |
| loss_vf_loss    | 131.95956

Step: 592
********** Iteration 592 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00030 |      -0.00516 |     132.04279 |      3.22e-05 |       0.51616
      0.00043 |      -0.00518 |     132.19731 |       0.00049 |       0.51828
     -0.00202 |      -0.00523 |     131.57916 |       0.00150 |       0.52257
     -0.00293 |      -0.00526 |     130.43842 |       0.00282 |       0.52592
Evaluating losses...
     -0.00323 |      -0.00528 |     129.51602 |       0.00370 |       0.52826
-----------------------------------
| EpLenMean       | 396           |
| EpRewMean       | 396           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 701           |
| TimeElapsed     | 260           |
| TimestepsSoFar  | 277396        |
| ev_tdlam_before | 0.0343        |
| loss_ent        | 0.5282619     |
| loss_kl         | 0.0037032983  |
| loss_pol_entpen | -0.005282619  |
| loss_pol_surr   | -0.0032297503 |
| loss_vf_loss   

Step: 600
********** Iteration 600 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     1.66e-05 |      -0.00554 |     134.98386 |      1.74e-05 |       0.55377
     -0.00067 |      -0.00559 |     135.27893 |       0.00028 |       0.55902
     -0.00418 |      -0.00570 |     135.03690 |       0.00123 |       0.57002
     -0.00608 |      -0.00583 |     134.49585 |       0.00312 |       0.58292
Evaluating losses...
     -0.00722 |      -0.00591 |     133.96205 |       0.00478 |       0.59121
----------------------------------
| EpLenMean       | 397          |
| EpRewMean       | 397          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 709          |
| TimeElapsed     | 264          |
| TimestepsSoFar  | 281612       |
| ev_tdlam_before | 0.0146       |
| loss_ent        | 0.5912117    |
| loss_kl         | 0.004781627  |
| loss_pol_entpen | -0.005912117 |
| loss_pol_surr   | -0.007219229 |
| loss_vf_loss    | 133.96205

Step: 608
********** Iteration 608 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00451 |      -0.00594 |     484.10464 |       0.00019 |       0.59368
     -0.01984 |      -0.00564 |     470.73285 |       0.00781 |       0.56358
     -0.01301 |      -0.00540 |     447.44839 |       0.02027 |       0.53980
     -0.01461 |      -0.00545 |     423.40073 |       0.01836 |       0.54532
Evaluating losses...
     -0.01919 |      -0.00558 |     409.05023 |       0.01236 |       0.55832
----------------------------------
| EpLenMean       | 384          |
| EpRewMean       | 384          |
| EpThisIter      | 4            |
| EpisodesSoFar   | 721          |
| TimeElapsed     | 267          |
| TimestepsSoFar  | 285528       |
| ev_tdlam_before | 0.0309       |
| loss_ent        | 0.5583166    |
| loss_kl         | 0.012363771  |
| loss_pol_entpen | -0.005583166 |
| loss_pol_surr   | -0.01919382  |
| loss_vf_loss    | 409.05023

Step: 616
********** Iteration 616 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00042 |      -0.00628 |     214.69681 |      6.38e-05 |       0.62760
     -0.00093 |      -0.00631 |     214.07135 |       0.00053 |       0.63108
     -0.00228 |      -0.00634 |     212.96089 |       0.00162 |       0.63379
     -0.00314 |      -0.00637 |     211.71696 |       0.00341 |       0.63683
Evaluating losses...
     -0.00340 |      -0.00638 |     210.66449 |       0.00471 |       0.63818
-----------------------------------
| EpLenMean       | 369           |
| EpRewMean       | 369           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 730           |
| TimeElapsed     | 271           |
| TimestepsSoFar  | 289108        |
| ev_tdlam_before | -0.0105       |
| loss_ent        | 0.6381824     |
| loss_kl         | 0.0047079953  |
| loss_pol_entpen | -0.0063818237 |
| loss_pol_surr   | -0.0034018122 |
| loss_vf_loss   

Step: 624
********** Iteration 624 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00047 |      -0.00593 |     210.10822 |      3.37e-05 |       0.59321
     -0.00030 |      -0.00587 |     209.74048 |       0.00025 |       0.58744
     -0.00287 |      -0.00578 |     208.98743 |       0.00120 |       0.57820
     -0.00605 |      -0.00570 |     207.73627 |       0.00275 |       0.56958
Evaluating losses...
     -0.00683 |      -0.00564 |     206.78537 |       0.00412 |       0.56411
-----------------------------------
| EpLenMean       | 343           |
| EpRewMean       | 343           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 742           |
| TimeElapsed     | 274           |
| TimestepsSoFar  | 292722        |
| ev_tdlam_before | -0.0855       |
| loss_ent        | 0.5641078     |
| loss_kl         | 0.004116824   |
| loss_pol_entpen | -0.0056410776 |
| loss_pol_surr   | -0.006833839  |
| loss_vf_loss   

Step: 632
********** Iteration 632 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00051 |      -0.00486 |     217.70573 |      5.32e-05 |       0.48567
      0.00059 |      -0.00479 |     217.86945 |       0.00036 |       0.47887
     -0.00280 |      -0.00477 |     217.90280 |       0.00134 |       0.47683
     -0.00527 |      -0.00478 |     217.76990 |       0.00378 |       0.47839
Evaluating losses...
     -0.00868 |      -0.00480 |     217.65399 |       0.00668 |       0.47965
-----------------------------------
| EpLenMean       | 345           |
| EpRewMean       | 345           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 750           |
| TimeElapsed     | 278           |
| TimestepsSoFar  | 297282        |
| ev_tdlam_before | 0.0151        |
| loss_ent        | 0.47965324    |
| loss_kl         | 0.0066763735  |
| loss_pol_entpen | -0.0047965324 |
| loss_pol_surr   | -0.008682933  |
| loss_vf_loss   

Step: 640
********** Iteration 640 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00099 |      -0.00526 |     144.82210 |       0.00014 |       0.52575
     -0.00428 |      -0.00533 |     142.97270 |       0.00131 |       0.53310
     -0.00579 |      -0.00542 |     140.03311 |       0.00386 |       0.54197
     -0.00563 |      -0.00550 |     137.31235 |       0.00730 |       0.54974
Evaluating losses...
     -0.00663 |      -0.00554 |     135.55333 |       0.00935 |       0.55413
-----------------------------------
| EpLenMean       | 358           |
| EpRewMean       | 358           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 758           |
| TimeElapsed     | 281           |
| TimestepsSoFar  | 301726        |
| ev_tdlam_before | -0.0803       |
| loss_ent        | 0.5541278     |
| loss_kl         | 0.009350184   |
| loss_pol_entpen | -0.005541278  |
| loss_pol_surr   | -0.0066288263 |
| loss_vf_loss   

Step: 648
********** Iteration 648 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -7.41e-05 |      -0.00554 |     133.50777 |       0.00013 |       0.55380
     -0.00348 |      -0.00550 |     133.30060 |       0.00097 |       0.54951
     -0.00698 |      -0.00544 |     132.04074 |       0.00357 |       0.54354
     -0.00794 |      -0.00538 |     130.38631 |       0.00855 |       0.53829
Evaluating losses...
     -0.00696 |      -0.00536 |     129.16281 |       0.01113 |       0.53612
-----------------------------------
| EpLenMean       | 350           |
| EpRewMean       | 350           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 766           |
| TimeElapsed     | 285           |
| TimestepsSoFar  | 305166        |
| ev_tdlam_before | 0.0599        |
| loss_ent        | 0.5361185     |
| loss_kl         | 0.011134826   |
| loss_pol_entpen | -0.0053611845 |
| loss_pol_surr   | -0.006959429  |
| loss_vf_loss   

Step: 656
********** Iteration 656 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     9.85e-05 |      -0.00578 |     122.99979 |      6.96e-06 |       0.57816
     -0.00055 |      -0.00574 |     123.20426 |       0.00024 |       0.57371
     -0.00400 |      -0.00569 |     122.61702 |       0.00115 |       0.56875
     -0.00736 |      -0.00565 |     121.40935 |       0.00290 |       0.56459
Evaluating losses...
     -0.00892 |      -0.00563 |     120.11848 |       0.00437 |       0.56340
-----------------------------------
| EpLenMean       | 361           |
| EpRewMean       | 361           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 778           |
| TimeElapsed     | 288           |
| TimestepsSoFar  | 309255        |
| ev_tdlam_before | 0.083         |
| loss_ent        | 0.56340057    |
| loss_kl         | 0.0043706037  |
| loss_pol_entpen | -0.0056340056 |
| loss_pol_surr   | -0.0089244135 |
| loss_vf_loss   

Step: 664
********** Iteration 664 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00064 |      -0.00551 |     132.10390 |      3.47e-05 |       0.55057
      0.00043 |      -0.00552 |     132.03540 |      9.73e-05 |       0.55214
      0.00029 |      -0.00553 |     131.44765 |      7.81e-05 |       0.55282
    -9.58e-05 |      -0.00553 |     130.73955 |      4.36e-05 |       0.55251
Evaluating losses...
     -0.00035 |      -0.00552 |     130.10962 |      5.19e-05 |       0.55219
------------------------------------
| EpLenMean       | 368            |
| EpRewMean       | 368            |
| EpThisIter      | 1              |
| EpisodesSoFar   | 786            |
| TimeElapsed     | 292            |
| TimestepsSoFar  | 313751         |
| ev_tdlam_before | -0.0207        |
| loss_ent        | 0.55219066     |
| loss_kl         | 5.190629e-05   |
| loss_pol_entpen | -0.005521907   |
| loss_pol_surr   | -0.00035412982 |
| los

Step: 672
********** Iteration 672 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00013 |      -0.00539 |     204.43823 |      2.42e-06 |       0.53870
     -0.00041 |      -0.00540 |     204.26947 |       0.00014 |       0.54005
     -0.00121 |      -0.00543 |     203.93353 |       0.00048 |       0.54256
     -0.00207 |      -0.00547 |     203.49571 |       0.00111 |       0.54662
Evaluating losses...
     -0.00232 |      -0.00548 |     203.26657 |       0.00171 |       0.54848
-----------------------------------
| EpLenMean       | 375           |
| EpRewMean       | 375           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 794           |
| TimeElapsed     | 295           |
| TimestepsSoFar  | 318631        |
| ev_tdlam_before | -0.0106       |
| loss_ent        | 0.5484792     |
| loss_kl         | 0.0017108876  |
| loss_pol_entpen | -0.0054847915 |
| loss_pol_surr   | -0.0023181867 |
| loss_vf_loss   

Step: 680
********** Iteration 680 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00015 |      -0.00583 |     219.25169 |      7.62e-05 |       0.58317
     -0.00513 |      -0.00573 |     218.21564 |       0.00090 |       0.57269
     -0.00975 |      -0.00557 |     216.83353 |       0.00341 |       0.55740
     -0.01138 |      -0.00543 |     215.20746 |       0.00727 |       0.54339
Evaluating losses...
     -0.01156 |      -0.00538 |     214.55441 |       0.00929 |       0.53757
----------------------------------
| EpLenMean       | 364          |
| EpRewMean       | 364          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 803          |
| TimeElapsed     | 299          |
| TimestepsSoFar  | 321933       |
| ev_tdlam_before | 0.00495      |
| loss_ent        | 0.53757364   |
| loss_kl         | 0.009288194  |
| loss_pol_entpen | -0.005375736 |
| loss_pol_surr   | -0.011559766 |
| loss_vf_loss    | 214.55441

KeyboardInterrupt: 