In [1]:
!pip install gym_super_mario_bros==7.4.0 nes_py

!pip install torch torchvision torchaudio

!pip install stable-baselines3[extra]

%load_ext tensorboard

Defaulting to user installation because normal site-packages is not writeable
Collecting gym_super_mario_bros==7.4.0
  Downloading gym_super_mario_bros-7.4.0-py3-none-any.whl (199 kB)
Collecting nes_py
  Downloading nes_py-8.2.1.tar.gz (77 kB)
Collecting gym>=0.17.2
  Downloading gym-0.26.2.tar.gz (721 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
    Preparing wheel metadata: started
    Preparing wheel metadata: finished with status 'done'
Collecting pyglet<=1.5.21,>=1.4.0
  Downloading pyglet-1.5.21-py3-none-any.whl (1.1 MB)
Collecting gym-notices>=0.0.4
  Downloading gym_notices-0.0.8-py3-none-any.whl (3.0 kB)
Building wheels for collected packages: nes-py, gym
  Building wheel for nes-py (setup.py): started
  Building wheel for nes-py (setup.py): finished with status 'done'
  Created wheel for nes-py: filename

In [2]:
import gym_super_mario_bros

from nes_py.wrappers import JoypadSpace

from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

import os

from stable_baselines3 import PPO
from stable_baselines3 import DQN
from stable_baselines3 import A2C

from stable_baselines3.common.callbacks import BaseCallback

In [3]:
# Base environment setup
env = gym_super_mario_bros.make('SuperMarioBros2-v0')
# Simplify controls
env = JoypadSpace(env, SIMPLE_MOVEMENT)

In [4]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)
    
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [5]:
CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

In [6]:
%tensorboard --logdir logs

In [7]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

In [8]:
model = A2C('MlpPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.001, n_steps=256, seed=10)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [9]:
import time

In [None]:
start_time = time.time()
model.learn(total_timesteps=1000000, callback=callback)
training_time = time.time() - start_time

Logging to ./logs/PPO_1
----------------------------
| time/              |     |
|    fps             | 28  |
|    iterations      | 1   |
|    time_elapsed    | 8   |
|    total_timesteps | 256 |
----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 44          |
|    iterations           | 2           |
|    time_elapsed         | 11          |
|    total_timesteps      | 512         |
| train/                  |             |
|    approx_kl            | 0.052752715 |
|    clip_fraction        | 0.491       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.89       |
|    explained_variance   | -0.00534    |
|    learning_rate        | 0.001       |
|    loss                 | 12.3        |
|    n_updates            | 10          |
|    policy_gradient_loss | 0.0397      |
|    value_loss           | 201         |
-----------------------------------------
-----------------

------------------------------------------
| time/                   |              |
|    fps                  | 88           |
|    iterations           | 13           |
|    time_elapsed         | 37           |
|    total_timesteps      | 3328         |
| train/                  |              |
|    approx_kl            | 0.0155687025 |
|    clip_fraction        | 0.191        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.612       |
|    explained_variance   | -0.108       |
|    learning_rate        | 0.001        |
|    loss                 | 0.0156       |
|    n_updates            | 120          |
|    policy_gradient_loss | 0.0129       |
|    value_loss           | 0.178        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 87          |
|    iterations           | 14          |
|    time_elapsed         | 40          |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 85          |
|    iterations           | 24          |
|    time_elapsed         | 72          |
|    total_timesteps      | 6144        |
| train/                  |             |
|    approx_kl            | 0.024056206 |
|    clip_fraction        | 0.24        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.19       |
|    explained_variance   | 0.312       |
|    learning_rate        | 0.001       |
|    loss                 | -0.00897    |
|    n_updates            | 230         |
|    policy_gradient_loss | -0.0249     |
|    value_loss           | 0.0712      |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 84         |
|    iterations           | 25         |
|    time_elapsed         | 75         |
|    total_timesteps      | 6400       

-----------------------------------------
| time/                   |             |
|    fps                  | 45          |
|    iterations           | 35          |
|    time_elapsed         | 199         |
|    total_timesteps      | 8960        |
| train/                  |             |
|    approx_kl            | 0.027120464 |
|    clip_fraction        | 0.278       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.26       |
|    explained_variance   | -0.00249    |
|    learning_rate        | 0.001       |
|    loss                 | 0.0127      |
|    n_updates            | 340         |
|    policy_gradient_loss | -0.0315     |
|    value_loss           | 0.17        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 43          |
|    iterations           | 36          |
|    time_elapsed         | 213         |
|    total_timesteps      | 9216  

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 5.28e+03  |
|    ep_rew_mean          | 838       |
| time/                   |           |
|    fps                  | 34        |
|    iterations           | 46        |
|    time_elapsed         | 345       |
|    total_timesteps      | 11776     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.62e-14 |
|    explained_variance   | -0.188    |
|    learning_rate        | 0.001     |
|    loss                 | 0.439     |
|    n_updates            | 450       |
|    policy_gradient_loss | 1.93e-08  |
|    value_loss           | 2.54      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 5.28e+03  |
|    ep_rew_mean          | 838       |


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5.28e+03    |
|    ep_rew_mean          | 838         |
| time/                   |             |
|    fps                  | 29          |
|    iterations           | 56          |
|    time_elapsed         | 483         |
|    total_timesteps      | 14336       |
| train/                  |             |
|    approx_kl            | 0.010437658 |
|    clip_fraction        | 0.0473      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.287      |
|    explained_variance   | -0.0358     |
|    learning_rate        | 0.001       |
|    loss                 | 0.0454      |
|    n_updates            | 550         |
|    policy_gradient_loss | -0.00195    |
|    value_loss           | 0.226       |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5.28e+

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5.28e+03   |
|    ep_rew_mean          | 838        |
| time/                   |            |
|    fps                  | 27         |
|    iterations           | 66         |
|    time_elapsed         | 613        |
|    total_timesteps      | 16896      |
| train/                  |            |
|    approx_kl            | 0.06696649 |
|    clip_fraction        | 0.334      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.18      |
|    explained_variance   | -0.214     |
|    learning_rate        | 0.001      |
|    loss                 | -0.0589    |
|    n_updates            | 650        |
|    policy_gradient_loss | -0.0413    |
|    value_loss           | 0.0944     |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5.28e+03   |
|    ep_rew_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5.28e+03   |
|    ep_rew_mean          | 838        |
| time/                   |            |
|    fps                  | 25         |
|    iterations           | 76         |
|    time_elapsed         | 749        |
|    total_timesteps      | 19456      |
| train/                  |            |
|    approx_kl            | 0.05615893 |
|    clip_fraction        | 0.32       |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.99      |
|    explained_variance   | -0.14      |
|    learning_rate        | 0.001      |
|    loss                 | -0.0336    |
|    n_updates            | 750        |
|    policy_gradient_loss | -0.0418    |
|    value_loss           | 0.141      |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5.28e+03    |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 6.87e+03   |
|    ep_rew_mean          | 905        |
| time/                   |            |
|    fps                  | 24         |
|    iterations           | 86         |
|    time_elapsed         | 885        |
|    total_timesteps      | 22016      |
| train/                  |            |
|    approx_kl            | 0.06839679 |
|    clip_fraction        | 0.152      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.479     |
|    explained_variance   | -0.0283    |
|    learning_rate        | 0.001      |
|    loss                 | 0.00214    |
|    n_updates            | 850        |
|    policy_gradient_loss | -0.0188    |
|    value_loss           | 0.123      |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 6.87e+03  |
|    ep_rew_mean   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 6.87e+03    |
|    ep_rew_mean          | 905         |
| time/                   |             |
|    fps                  | 23          |
|    iterations           | 96          |
|    time_elapsed         | 1030        |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.074769005 |
|    clip_fraction        | 0.355       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.11       |
|    explained_variance   | -0.39       |
|    learning_rate        | 0.001       |
|    loss                 | -0.0707     |
|    n_updates            | 950         |
|    policy_gradient_loss | -0.0631     |
|    value_loss           | 0.0884      |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 6.87e+03

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 6.87e+03   |
|    ep_rew_mean          | 905        |
| time/                   |            |
|    fps                  | 23         |
|    iterations           | 106        |
|    time_elapsed         | 1167       |
|    total_timesteps      | 27136      |
| train/                  |            |
|    approx_kl            | 0.09602174 |
|    clip_fraction        | 0.463      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.12      |
|    explained_variance   | -0.302     |
|    learning_rate        | 0.001      |
|    loss                 | -0.0385    |
|    n_updates            | 1050       |
|    policy_gradient_loss | -0.0605    |
|    value_loss           | 0.0984     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 6.87e+03    |
|    ep_rew_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 6.87e+03   |
|    ep_rew_mean          | 905        |
| time/                   |            |
|    fps                  | 22         |
|    iterations           | 116        |
|    time_elapsed         | 1303       |
|    total_timesteps      | 29696      |
| train/                  |            |
|    approx_kl            | 0.10528921 |
|    clip_fraction        | 0.502      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.939     |
|    explained_variance   | -0.137     |
|    learning_rate        | 0.001      |
|    loss                 | -0.0403    |
|    n_updates            | 1150       |
|    policy_gradient_loss | -0.0439    |
|    value_loss           | 0.203      |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 6.87e+03   |
|    ep_rew_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 6.87e+03   |
|    ep_rew_mean          | 905        |
| time/                   |            |
|    fps                  | 22         |
|    iterations           | 126        |
|    time_elapsed         | 1436       |
|    total_timesteps      | 32256      |
| train/                  |            |
|    approx_kl            | 0.08466415 |
|    clip_fraction        | 0.394      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.861     |
|    explained_variance   | -0.204     |
|    learning_rate        | 0.001      |
|    loss                 | -0.00419   |
|    n_updates            | 1250       |
|    policy_gradient_loss | -0.0552    |
|    value_loss           | 0.167      |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 6.87e+03   |
|    ep_rew_mean

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 6.87e+03   |
|    ep_rew_mean          | 905        |
| time/                   |            |
|    fps                  | 22         |
|    iterations           | 136        |
|    time_elapsed         | 1575       |
|    total_timesteps      | 34816      |
| train/                  |            |
|    approx_kl            | 0.14379299 |
|    clip_fraction        | 0.455      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.921     |
|    explained_variance   | -0.267     |
|    learning_rate        | 0.001      |
|    loss                 | -0.0723    |
|    n_updates            | 1350       |
|    policy_gradient_loss | -0.0563    |
|    value_loss           | 0.0749     |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 6.87e+03   |
|    ep_rew_mean

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 7.37e+03  |
|    ep_rew_mean          | 1.01e+03  |
| time/                   |           |
|    fps                  | 21        |
|    iterations           | 146       |
|    time_elapsed         | 1717      |
|    total_timesteps      | 37376     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.32e-34 |
|    explained_variance   | 0.877     |
|    learning_rate        | 0.001     |
|    loss                 | 21.1      |
|    n_updates            | 1450      |
|    policy_gradient_loss | -1.1e-08  |
|    value_loss           | 81.6      |
---------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 6.25e+03 |
|    ep_rew_mean          | 1.06e+03 |
| ti

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 4.38e+03  |
|    ep_rew_mean          | 1.15e+03  |
| time/                   |           |
|    fps                  | 21        |
|    iterations           | 156       |
|    time_elapsed         | 1857      |
|    total_timesteps      | 39936     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.726     |
|    learning_rate        | 0.001     |
|    loss                 | 11.9      |
|    n_updates            | 1550      |
|    policy_gradient_loss | -2.33e-11 |
|    value_loss           | 127       |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 4e+03     |
|    ep_rew_mean          | 1.17e+03  |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.23e+03  |
|    ep_rew_mean          | 1.2e+03   |
| time/                   |           |
|    fps                  | 21        |
|    iterations           | 166       |
|    time_elapsed         | 1988      |
|    total_timesteps      | 42496     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.602     |
|    learning_rate        | 0.001     |
|    loss                 | 11.9      |
|    n_updates            | 1650      |
|    policy_gradient_loss | -9.31e-10 |
|    value_loss           | 181       |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 3.04e+03  |
|    ep_rew_mean          | 1.21e+03  |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.62e+03  |
|    ep_rew_mean          | 1.23e+03  |
| time/                   |           |
|    fps                  | 21        |
|    iterations           | 176       |
|    time_elapsed         | 2124      |
|    total_timesteps      | 45056     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.713     |
|    learning_rate        | 0.001     |
|    loss                 | 31.9      |
|    n_updates            | 1750      |
|    policy_gradient_loss | -2.58e-09 |
|    value_loss           | 175       |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.51e+03  |
|    ep_rew_mean          | 1.24e+03  |


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 2.24e+03 |
|    ep_rew_mean          | 1.25e+03 |
| time/                   |          |
|    fps                  | 21       |
|    iterations           | 186      |
|    time_elapsed         | 2263     |
|    total_timesteps      | 47616    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.81     |
|    learning_rate        | 0.001    |
|    loss                 | 29.2     |
|    n_updates            | 1850     |
|    policy_gradient_loss | -2.7e-09 |
|    value_loss           | 99.2     |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 2.17e+03  |
|    ep_rew_mean          | 1.25e+03  |
| time/              

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1.98e+03 |
|    ep_rew_mean          | 1.26e+03 |
| time/                   |          |
|    fps                  | 20       |
|    iterations           | 196      |
|    time_elapsed         | 2397     |
|    total_timesteps      | 50176    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.718    |
|    learning_rate        | 0.001    |
|    loss                 | 16.7     |
|    n_updates            | 1950     |
|    policy_gradient_loss | 3.52e-09 |
|    value_loss           | 146      |
--------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1.93e+03 |
|    ep_rew_mean          | 1.26e+03 |
| time/                  

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1.8e+03  |
|    ep_rew_mean          | 1.27e+03 |
| time/                   |          |
|    fps                  | 20       |
|    iterations           | 206      |
|    time_elapsed         | 2535     |
|    total_timesteps      | 52736    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.697    |
|    learning_rate        | 0.001    |
|    loss                 | 16.3     |
|    n_updates            | 2050     |
|    policy_gradient_loss | 8.15e-10 |
|    value_loss           | 180      |
--------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1.76e+03 |
|    ep_rew_mean          | 1.27e+03 |
| time/                  

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1.66e+03 |
|    ep_rew_mean          | 1.28e+03 |
| time/                   |          |
|    fps                  | 20       |
|    iterations           | 216      |
|    time_elapsed         | 2676     |
|    total_timesteps      | 55296    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.653    |
|    learning_rate        | 0.001    |
|    loss                 | 7.93     |
|    n_updates            | 2150     |
|    policy_gradient_loss | 3.93e-09 |
|    value_loss           | 158      |
--------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1.63e+03 |
|    ep_rew_mean          | 1.28e+03 |
| time/                  

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.55e+03  |
|    ep_rew_mean          | 1.28e+03  |
| time/                   |           |
|    fps                  | 20        |
|    iterations           | 226       |
|    time_elapsed         | 2813      |
|    total_timesteps      | 57856     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.688     |
|    learning_rate        | 0.001     |
|    loss                 | 23.3      |
|    n_updates            | 2250      |
|    policy_gradient_loss | -1.49e-09 |
|    value_loss           | 112       |
---------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1.52e+03 |
|    ep_rew_mean          | 1.28e+03 |
| ti

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1.46e+03 |
|    ep_rew_mean          | 1.28e+03 |
| time/                   |          |
|    fps                  | 20       |
|    iterations           | 236      |
|    time_elapsed         | 2950     |
|    total_timesteps      | 60416    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.753    |
|    learning_rate        | 0.001    |
|    loss                 | 40.4     |
|    n_updates            | 2350     |
|    policy_gradient_loss | 2.33e-10 |
|    value_loss           | 113      |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.44e+03  |
|    ep_rew_mean          | 1.29e+03  |
| time/              

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1.37e+03 |
|    ep_rew_mean          | 1.29e+03 |
| time/                   |          |
|    fps                  | 20       |
|    iterations           | 246      |
|    time_elapsed         | 3087     |
|    total_timesteps      | 62976    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.877    |
|    learning_rate        | 0.001    |
|    loss                 | 22       |
|    n_updates            | 2450     |
|    policy_gradient_loss | 7.45e-10 |
|    value_loss           | 66.9     |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.37e+03  |
|    ep_rew_mean          | 1.29e+03  |
| time/              

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.31e+03  |
|    ep_rew_mean          | 1.29e+03  |
| time/                   |           |
|    fps                  | 20        |
|    iterations           | 256       |
|    time_elapsed         | 3223      |
|    total_timesteps      | 65536     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.854     |
|    learning_rate        | 0.001     |
|    loss                 | 68.4      |
|    n_updates            | 2550      |
|    policy_gradient_loss | -1.77e-09 |
|    value_loss           | 118       |
---------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.31e+03     |
|    ep_rew_mean          | 1.2

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1.26e+03 |
|    ep_rew_mean          | 1.29e+03 |
| time/                   |          |
|    fps                  | 20       |
|    iterations           | 266      |
|    time_elapsed         | 3356     |
|    total_timesteps      | 68096    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.911    |
|    learning_rate        | 0.001    |
|    loss                 | 9        |
|    n_updates            | 2650     |
|    policy_gradient_loss | 1.51e-09 |
|    value_loss           | 86.5     |
--------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.26e+03      |
|    ep_rew_mean          | 1.29e+03      |
| tim

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.22e+03  |
|    ep_rew_mean          | 1.3e+03   |
| time/                   |           |
|    fps                  | 20        |
|    iterations           | 276       |
|    time_elapsed         | 3491      |
|    total_timesteps      | 70656     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.409     |
|    learning_rate        | 0.001     |
|    loss                 | 95.6      |
|    n_updates            | 2750      |
|    policy_gradient_loss | -6.52e-10 |
|    value_loss           | 337       |
---------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.22e+03     |
|    ep_rew_mean          | 1.3

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1.18e+03 |
|    ep_rew_mean          | 1.3e+03  |
| time/                   |          |
|    fps                  | 20       |
|    iterations           | 286      |
|    time_elapsed         | 3628     |
|    total_timesteps      | 73216    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.496    |
|    learning_rate        | 0.001    |
|    loss                 | 94.4     |
|    n_updates            | 2850     |
|    policy_gradient_loss | 3.63e-09 |
|    value_loss           | 352      |
--------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.18e+03    |
|    ep_rew_mean          | 1.3e+03     |
| time/      

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1.15e+03 |
|    ep_rew_mean          | 1.3e+03  |
| time/                   |          |
|    fps                  | 20       |
|    iterations           | 296      |
|    time_elapsed         | 3769     |
|    total_timesteps      | 75776    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.651    |
|    learning_rate        | 0.001    |
|    loss                 | 57.3     |
|    n_updates            | 2950     |
|    policy_gradient_loss | 1e-09    |
|    value_loss           | 228      |
--------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.15e+03     |
|    ep_rew_mean          | 1.3e+03      |
| time/  

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.12e+03  |
|    ep_rew_mean          | 1.3e+03   |
| time/                   |           |
|    fps                  | 20        |
|    iterations           | 306       |
|    time_elapsed         | 3895      |
|    total_timesteps      | 78336     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.628     |
|    learning_rate        | 0.001     |
|    loss                 | 14        |
|    n_updates            | 3050      |
|    policy_gradient_loss | -5.12e-09 |
|    value_loss           | 188       |
---------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.12e+03      |
|    ep_rew_mean          | 

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.09e+03  |
|    ep_rew_mean          | 1.3e+03   |
| time/                   |           |
|    fps                  | 20        |
|    iterations           | 316       |
|    time_elapsed         | 4034      |
|    total_timesteps      | 80896     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.758     |
|    learning_rate        | 0.001     |
|    loss                 | 90.5      |
|    n_updates            | 3150      |
|    policy_gradient_loss | -5.96e-09 |
|    value_loss           | 155       |
---------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 1.09e+03      |
|    ep_rew_mean          | 

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1.07e+03 |
|    ep_rew_mean          | 1.3e+03  |
| time/                   |          |
|    fps                  | 20       |
|    iterations           | 326      |
|    time_elapsed         | 4170     |
|    total_timesteps      | 83456    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.765    |
|    learning_rate        | 0.001    |
|    loss                 | 19.7     |
|    n_updates            | 3250     |
|    policy_gradient_loss | 7.45e-10 |
|    value_loss           | 155      |
--------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.07e+03     |
|    ep_rew_mean          | 1.3e+03      |
| time/  

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1.05e+03 |
|    ep_rew_mean          | 1.3e+03  |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 336      |
|    time_elapsed         | 4307     |
|    total_timesteps      | 86016    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.647    |
|    learning_rate        | 0.001    |
|    loss                 | 48.3     |
|    n_updates            | 3350     |
|    policy_gradient_loss | 4.19e-10 |
|    value_loss           | 149      |
--------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.05e+03    |
|    ep_rew_mean          | 1.3e+03     |
| time/      

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1.03e+03 |
|    ep_rew_mean          | 1.3e+03  |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 346      |
|    time_elapsed         | 4445     |
|    total_timesteps      | 88576    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.763    |
|    learning_rate        | 0.001    |
|    loss                 | 30.5     |
|    n_updates            | 3450     |
|    policy_gradient_loss | 6.05e-10 |
|    value_loss           | 106      |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.03e+03  |
|    ep_rew_mean          | 1.3e+03   |
| time/              

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1.01e+03 |
|    ep_rew_mean          | 1.31e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 356      |
|    time_elapsed         | 4580     |
|    total_timesteps      | 91136    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.707    |
|    learning_rate        | 0.001    |
|    loss                 | 33.5     |
|    n_updates            | 3550     |
|    policy_gradient_loss | 6.98e-10 |
|    value_loss           | 123      |
--------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.01e+03   |
|    ep_rew_mean          | 1.31e+03   |
| time/          

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 995       |
|    ep_rew_mean          | 1.31e+03  |
| time/                   |           |
|    fps                  | 19        |
|    iterations           | 366       |
|    time_elapsed         | 4721      |
|    total_timesteps      | 93696     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.738     |
|    learning_rate        | 0.001     |
|    loss                 | 34.1      |
|    n_updates            | 3650      |
|    policy_gradient_loss | -7.45e-10 |
|    value_loss           | 113       |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 995         |
|    ep_rew_mean          | 1.31e+

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 981      |
|    ep_rew_mean          | 1.31e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 376      |
|    time_elapsed         | 4859     |
|    total_timesteps      | 96256    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.656    |
|    learning_rate        | 0.001    |
|    loss                 | 58.3     |
|    n_updates            | 3750     |
|    policy_gradient_loss | 4.19e-10 |
|    value_loss           | 148      |
--------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 981          |
|    ep_rew_mean          | 1.31e+03     |
| time/  

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 881      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 386      |
|    time_elapsed         | 4994     |
|    total_timesteps      | 98816    |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.668    |
|    learning_rate        | 0.001    |
|    loss                 | 30.7     |
|    n_updates            | 3850     |
|    policy_gradient_loss | 1.96e-09 |
|    value_loss           | 134      |
--------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 881          |
|    ep_rew_mean          | 1.32e+03     |
| time/  

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 396      |
|    time_elapsed         | 5130     |
|    total_timesteps      | 101376   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.688    |
|    learning_rate        | 0.001    |
|    loss                 | 6.43     |
|    n_updates            | 3950     |
|    policy_gradient_loss | 9.31e-11 |
|    value_loss           | 92.6     |
--------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 637           |
|    ep_rew_mean          | 1.32e+03      |
| tim

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/                   |           |
|    fps                  | 19        |
|    iterations           | 406       |
|    time_elapsed         | 5259      |
|    total_timesteps      | 103936    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.63      |
|    learning_rate        | 0.001     |
|    loss                 | 18.3      |
|    n_updates            | 4050      |
|    policy_gradient_loss | -4.12e-09 |
|    value_loss           | 212       |
---------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 637           |
|    ep_rew_mean          | 

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/                   |           |
|    fps                  | 19        |
|    iterations           | 416       |
|    time_elapsed         | 5394      |
|    total_timesteps      | 106496    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.747     |
|    learning_rate        | 0.001     |
|    loss                 | 45        |
|    n_updates            | 4150      |
|    policy_gradient_loss | -2.28e-09 |
|    value_loss           | 147       |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 637         |
|    ep_rew_mean          | 1.32e+

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/                   |           |
|    fps                  | 19        |
|    iterations           | 426       |
|    time_elapsed         | 5532      |
|    total_timesteps      | 109056    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.719     |
|    learning_rate        | 0.001     |
|    loss                 | 65.9      |
|    n_updates            | 4250      |
|    policy_gradient_loss | -5.45e-09 |
|    value_loss           | 208       |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 637         |
|    ep_rew_mean          | 1.32e+

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 436      |
|    time_elapsed         | 5672     |
|    total_timesteps      | 111616   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.739    |
|    learning_rate        | 0.001    |
|    loss                 | 76.7     |
|    n_updates            | 4350     |
|    policy_gradient_loss | 1.37e-09 |
|    value_loss           | 271      |
--------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 637           |
|    ep_rew_mean          | 1.32e+03      |
| tim

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/                   |           |
|    fps                  | 19        |
|    iterations           | 446       |
|    time_elapsed         | 5813      |
|    total_timesteps      | 114176    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.786     |
|    learning_rate        | 0.001     |
|    loss                 | 78.1      |
|    n_updates            | 4450      |
|    policy_gradient_loss | -3.38e-10 |
|    value_loss           | 170       |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 637         |
|    ep_rew_mean          | 1.32e+

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 637           |
|    ep_rew_mean          | 1.32e+03      |
| time/                   |               |
|    fps                  | 19            |
|    iterations           | 456           |
|    time_elapsed         | 5949          |
|    total_timesteps      | 116736        |
| train/                  |               |
|    approx_kl            | 5.9232116e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.000343     |
|    explained_variance   | 0.753         |
|    learning_rate        | 0.001         |
|    loss                 | 309           |
|    n_updates            | 4550          |
|    policy_gradient_loss | 2.2e-05       |
|    value_loss           | 682           |
-------------------------------------------
--------------------------------------
| rollout/                |          

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 637           |
|    ep_rew_mean          | 1.32e+03      |
| time/                   |               |
|    fps                  | 19            |
|    iterations           | 466           |
|    time_elapsed         | 6082          |
|    total_timesteps      | 119296        |
| train/                  |               |
|    approx_kl            | 1.2596138e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.000541     |
|    explained_variance   | 0.763         |
|    learning_rate        | 0.001         |
|    loss                 | 362           |
|    n_updates            | 4650          |
|    policy_gradient_loss | 2.91e-05      |
|    value_loss           | 672           |
-------------------------------------------
---------------------------------------
| rollout/                |         

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 637           |
|    ep_rew_mean          | 1.32e+03      |
| time/                   |               |
|    fps                  | 19            |
|    iterations           | 476           |
|    time_elapsed         | 6214          |
|    total_timesteps      | 121856        |
| train/                  |               |
|    approx_kl            | 1.9092113e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.000212     |
|    explained_variance   | 0.8           |
|    learning_rate        | 0.001         |
|    loss                 | 341           |
|    n_updates            | 4750          |
|    policy_gradient_loss | 2.14e-06      |
|    value_loss           | 573           |
-------------------------------------------
---------------------------------------
| rollout/                |         

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 637         |
|    ep_rew_mean          | 1.32e+03    |
| time/                   |             |
|    fps                  | 19          |
|    iterations           | 486         |
|    time_elapsed         | 6346        |
|    total_timesteps      | 124416      |
| train/                  |             |
|    approx_kl            | 0.021555042 |
|    clip_fraction        | 0.00273     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.00113    |
|    explained_variance   | 0.79        |
|    learning_rate        | 0.001       |
|    loss                 | 259         |
|    n_updates            | 4850        |
|    policy_gradient_loss | 0.00116     |
|    value_loss           | 393         |
-----------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 637           |
|    ep_rew_mean          | 1.32e+03      |
| time/                   |               |
|    fps                  | 19            |
|    iterations           | 496           |
|    time_elapsed         | 6491          |
|    total_timesteps      | 126976        |
| train/                  |               |
|    approx_kl            | 7.0780516e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.000295     |
|    explained_variance   | 0.866         |
|    learning_rate        | 0.001         |
|    loss                 | 65.7          |
|    n_updates            | 4950          |
|    policy_gradient_loss | -2.52e-05     |
|    value_loss           | 325           |
-------------------------------------------
---------------------------------------
| rollout/                |         

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/                   |           |
|    fps                  | 19        |
|    iterations           | 506       |
|    time_elapsed         | 6626      |
|    total_timesteps      | 129536    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.9e-18  |
|    explained_variance   | 0.818     |
|    learning_rate        | 0.001     |
|    loss                 | 177       |
|    n_updates            | 5050      |
|    policy_gradient_loss | -2.61e-09 |
|    value_loss           | 464       |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 637          |
|    ep_rew_mean          | 1.32e+03     |
| time/                   |              |
|    fps                  | 19           |
|    iterations           | 516          |
|    time_elapsed         | 6761         |
|    total_timesteps      | 132096       |
| train/                  |              |
|    approx_kl            | 0.0027255109 |
|    clip_fraction        | 0.000391     |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.000414    |
|    explained_variance   | 0.816        |
|    learning_rate        | 0.001        |
|    loss                 | 174          |
|    n_updates            | 5150         |
|    policy_gradient_loss | 0.000218     |
|    value_loss           | 420          |
------------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean   

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 637          |
|    ep_rew_mean          | 1.32e+03     |
| time/                   |              |
|    fps                  | 19           |
|    iterations           | 526          |
|    time_elapsed         | 6895         |
|    total_timesteps      | 134656       |
| train/                  |              |
|    approx_kl            | 0.0020420854 |
|    clip_fraction        | 0.00117      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.00109     |
|    explained_variance   | 0.749        |
|    learning_rate        | 0.001        |
|    loss                 | 195          |
|    n_updates            | 5250         |
|    policy_gradient_loss | -5.94e-05    |
|    value_loss           | 510          |
------------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 637         |
|    ep_rew_mean          | 1.32e+03    |
| time/                   |             |
|    fps                  | 19          |
|    iterations           | 536         |
|    time_elapsed         | 7031        |
|    total_timesteps      | 137216      |
| train/                  |             |
|    approx_kl            | 0.015308206 |
|    clip_fraction        | 0.00313     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.000689   |
|    explained_variance   | 0.79        |
|    learning_rate        | 0.001       |
|    loss                 | 78.9        |
|    n_updates            | 5350        |
|    policy_gradient_loss | 0.00102     |
|    value_loss           | 416         |
-----------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 637        |
|    ep_rew_mean          | 1.32e+03   |
| time/                   |            |
|    fps                  | 19         |
|    iterations           | 546        |
|    time_elapsed         | 7168       |
|    total_timesteps      | 139776     |
| train/                  |            |
|    approx_kl            | 0.03385414 |
|    clip_fraction        | 0.00273    |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.000352  |
|    explained_variance   | 0.778      |
|    learning_rate        | 0.001      |
|    loss                 | 185        |
|    n_updates            | 5450       |
|    policy_gradient_loss | -0.000452  |
|    value_loss           | 408        |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean   

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 637          |
|    ep_rew_mean          | 1.32e+03     |
| time/                   |              |
|    fps                  | 19           |
|    iterations           | 556          |
|    time_elapsed         | 7306         |
|    total_timesteps      | 142336       |
| train/                  |              |
|    approx_kl            | 0.0009105783 |
|    clip_fraction        | 0.000391     |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.000378    |
|    explained_variance   | 0.729        |
|    learning_rate        | 0.001        |
|    loss                 | 435          |
|    n_updates            | 5550         |
|    policy_gradient_loss | -0.00012     |
|    value_loss           | 597          |
------------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean   

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 637        |
|    ep_rew_mean          | 1.32e+03   |
| time/                   |            |
|    fps                  | 19         |
|    iterations           | 566        |
|    time_elapsed         | 7448       |
|    total_timesteps      | 144896     |
| train/                  |            |
|    approx_kl            | 0.00231617 |
|    clip_fraction        | 0.000391   |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.000314  |
|    explained_variance   | 0.779      |
|    learning_rate        | 0.001      |
|    loss                 | 215        |
|    n_updates            | 5650       |
|    policy_gradient_loss | -6.05e-06  |
|    value_loss           | 577        |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 637         |
|    ep_rew_mean          | 1.32e+03    |
| time/                   |             |
|    fps                  | 19          |
|    iterations           | 576         |
|    time_elapsed         | 7580        |
|    total_timesteps      | 147456      |
| train/                  |             |
|    approx_kl            | 0.028327508 |
|    clip_fraction        | 0.00195     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.000268   |
|    explained_variance   | 0.77        |
|    learning_rate        | 0.001       |
|    loss                 | 283         |
|    n_updates            | 5750        |
|    policy_gradient_loss | -8.73e-05   |
|    value_loss           | 523         |
-----------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/                   |           |
|    fps                  | 19        |
|    iterations           | 586       |
|    time_elapsed         | 7713      |
|    total_timesteps      | 150016    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -3.43e-09 |
|    explained_variance   | 0.787     |
|    learning_rate        | 0.001     |
|    loss                 | 111       |
|    n_updates            | 5850      |
|    policy_gradient_loss | -3.8e-09  |
|    value_loss           | 451       |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |


-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 637           |
|    ep_rew_mean          | 1.32e+03      |
| time/                   |               |
|    fps                  | 19            |
|    iterations           | 596           |
|    time_elapsed         | 7845          |
|    total_timesteps      | 152576        |
| train/                  |               |
|    approx_kl            | 0.00015325472 |
|    clip_fraction        | 0.00313       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.00094      |
|    explained_variance   | 0.777         |
|    learning_rate        | 0.001         |
|    loss                 | 120           |
|    n_updates            | 5950          |
|    policy_gradient_loss | -0.000154     |
|    value_loss           | 565           |
-------------------------------------------
--------------------------------------
| rollout/                |          

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 637           |
|    ep_rew_mean          | 1.32e+03      |
| time/                   |               |
|    fps                  | 19            |
|    iterations           | 606           |
|    time_elapsed         | 7981          |
|    total_timesteps      | 155136        |
| train/                  |               |
|    approx_kl            | 2.3283064e-10 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -7.39e-06     |
|    explained_variance   | 0.752         |
|    learning_rate        | 0.001         |
|    loss                 | 113           |
|    n_updates            | 6050          |
|    policy_gradient_loss | 1.76e-07      |
|    value_loss           | 614           |
-------------------------------------------
---------------------------------------
| rollout/                |         

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/                   |           |
|    fps                  | 19        |
|    iterations           | 616       |
|    time_elapsed         | 8115      |
|    total_timesteps      | 157696    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.33e-06 |
|    explained_variance   | 0.754     |
|    learning_rate        | 0.001     |
|    loss                 | 128       |
|    n_updates            | 6150      |
|    policy_gradient_loss | 5.27e-08  |
|    value_loss           | 598       |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 637         |
|    ep_rew_mean          | 1.32e+03    |
| time/                   |             |
|    fps                  | 19          |
|    iterations           | 626         |
|    time_elapsed         | 8254        |
|    total_timesteps      | 160256      |
| train/                  |             |
|    approx_kl            | 0.048354242 |
|    clip_fraction        | 0.00352     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.000276   |
|    explained_variance   | 0.591       |
|    learning_rate        | 0.001       |
|    loss                 | 71.6        |
|    n_updates            | 6250        |
|    policy_gradient_loss | -8.78e-05   |
|    value_loss           | 742         |
-----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 636      |
|    time_elapsed         | 8384     |
|    total_timesteps      | 162816   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | -3.5e-05 |
|    explained_variance   | 0.74     |
|    learning_rate        | 0.001    |
|    loss                 | 245      |
|    n_updates            | 6350     |
|    policy_gradient_loss | 1.1e-06  |
|    value_loss           | 532      |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/              

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/                   |           |
|    fps                  | 19        |
|    iterations           | 646       |
|    time_elapsed         | 8517      |
|    total_timesteps      | 165376    |
| train/                  |           |
|    approx_kl            | 0.0485935 |
|    clip_fraction        | 0.00313   |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.000117 |
|    explained_variance   | 0.763     |
|    learning_rate        | 0.001     |
|    loss                 | 202       |
|    n_updates            | 6450      |
|    policy_gradient_loss | -0.000353 |
|    value_loss           | 529       |
---------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| ti

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/                   |           |
|    fps                  | 19        |
|    iterations           | 656       |
|    time_elapsed         | 8652      |
|    total_timesteps      | 167936    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.99e-08 |
|    explained_variance   | 0.752     |
|    learning_rate        | 0.001     |
|    loss                 | 320       |
|    n_updates            | 6550      |
|    policy_gradient_loss | -6.05e-10 |
|    value_loss           | 405       |
---------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| ti

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 666      |
|    time_elapsed         | 8789     |
|    total_timesteps      | 170496   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.862    |
|    learning_rate        | 0.001    |
|    loss                 | 3.09     |
|    n_updates            | 6650     |
|    policy_gradient_loss | 9.03e-09 |
|    value_loss           | 28       |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/              

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 676      |
|    time_elapsed         | 8926     |
|    total_timesteps      | 173056   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.872    |
|    learning_rate        | 0.001    |
|    loss                 | 4.03     |
|    n_updates            | 6750     |
|    policy_gradient_loss | 2.91e-09 |
|    value_loss           | 28.5     |
--------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                  

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/                   |           |
|    fps                  | 19        |
|    iterations           | 686       |
|    time_elapsed         | 9059      |
|    total_timesteps      | 175616    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.859     |
|    learning_rate        | 0.001     |
|    loss                 | 16.7      |
|    n_updates            | 6850      |
|    policy_gradient_loss | -3.21e-09 |
|    value_loss           | 40.5      |
---------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| ti

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 696      |
|    time_elapsed         | 9195     |
|    total_timesteps      | 178176   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.846    |
|    learning_rate        | 0.001    |
|    loss                 | 25.4     |
|    n_updates            | 6950     |
|    policy_gradient_loss | 3.49e-10 |
|    value_loss           | 49.8     |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/              

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/                   |           |
|    fps                  | 19        |
|    iterations           | 706       |
|    time_elapsed         | 9329      |
|    total_timesteps      | 180736    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.819     |
|    learning_rate        | 0.001     |
|    loss                 | 25.2      |
|    n_updates            | 7050      |
|    policy_gradient_loss | -1.51e-09 |
|    value_loss           | 70.9      |
---------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| ti

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/                   |           |
|    fps                  | 19        |
|    iterations           | 716       |
|    time_elapsed         | 9463      |
|    total_timesteps      | 183296    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.837     |
|    learning_rate        | 0.001     |
|    loss                 | 13.1      |
|    n_updates            | 7150      |
|    policy_gradient_loss | -9.31e-11 |
|    value_loss           | 67.1      |
---------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| ti

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 726      |
|    time_elapsed         | 9599     |
|    total_timesteps      | 185856   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.81     |
|    learning_rate        | 0.001    |
|    loss                 | 14       |
|    n_updates            | 7250     |
|    policy_gradient_loss | 8.03e-09 |
|    value_loss           | 75.9     |
--------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                  

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 736      |
|    time_elapsed         | 9742     |
|    total_timesteps      | 188416   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.742    |
|    learning_rate        | 0.001    |
|    loss                 | 15.3     |
|    n_updates            | 7350     |
|    policy_gradient_loss | 2.75e-09 |
|    value_loss           | 48.2     |
--------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                  

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 746      |
|    time_elapsed         | 9874     |
|    total_timesteps      | 190976   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.746    |
|    learning_rate        | 0.001    |
|    loss                 | 6.83     |
|    n_updates            | 7450     |
|    policy_gradient_loss | 7.92e-10 |
|    value_loss           | 67.9     |
--------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                  

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 756      |
|    time_elapsed         | 10002    |
|    total_timesteps      | 193536   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.825    |
|    learning_rate        | 0.001    |
|    loss                 | 25.9     |
|    n_updates            | 7550     |
|    policy_gradient_loss | 7.22e-10 |
|    value_loss           | 52.6     |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/              

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/                   |           |
|    fps                  | 19        |
|    iterations           | 766       |
|    time_elapsed         | 10138     |
|    total_timesteps      | 196096    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.834     |
|    learning_rate        | 0.001     |
|    loss                 | 7.43      |
|    n_updates            | 7650      |
|    policy_gradient_loss | -6.05e-10 |
|    value_loss           | 57.6      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 776      |
|    time_elapsed         | 10270    |
|    total_timesteps      | 198656   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.89     |
|    learning_rate        | 0.001    |
|    loss                 | 16       |
|    n_updates            | 7750     |
|    policy_gradient_loss | 4.8e-09  |
|    value_loss           | 45.4     |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/              

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 786      |
|    time_elapsed         | 10405    |
|    total_timesteps      | 201216   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.9      |
|    learning_rate        | 0.001    |
|    loss                 | 15.1     |
|    n_updates            | 7850     |
|    policy_gradient_loss | 1.33e-09 |
|    value_loss           | 44.9     |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/              

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/                   |           |
|    fps                  | 19        |
|    iterations           | 796       |
|    time_elapsed         | 10541     |
|    total_timesteps      | 203776    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.943     |
|    learning_rate        | 0.001     |
|    loss                 | 13.1      |
|    n_updates            | 7950      |
|    policy_gradient_loss | -2.33e-10 |
|    value_loss           | 55.2      |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/                   |           |
|    fps                  | 19        |
|    iterations           | 806       |
|    time_elapsed         | 10675     |
|    total_timesteps      | 206336    |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | 0         |
|    explained_variance   | 0.679     |
|    learning_rate        | 0.001     |
|    loss                 | 67        |
|    n_updates            | 8050      |
|    policy_gradient_loss | -2.05e-09 |
|    value_loss           | 258       |
---------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |


--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 816      |
|    time_elapsed         | 10818    |
|    total_timesteps      | 208896   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.633    |
|    learning_rate        | 0.001    |
|    loss                 | 71.4     |
|    n_updates            | 8150     |
|    policy_gradient_loss | 2.96e-09 |
|    value_loss           | 223      |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/              

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 826      |
|    time_elapsed         | 10958    |
|    total_timesteps      | 211456   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.692    |
|    learning_rate        | 0.001    |
|    loss                 | 89.7     |
|    n_updates            | 8250     |
|    policy_gradient_loss | 1.75e-09 |
|    value_loss           | 166      |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/              

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 836      |
|    time_elapsed         | 11090    |
|    total_timesteps      | 214016   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.753    |
|    learning_rate        | 0.001    |
|    loss                 | 61.9     |
|    n_updates            | 8350     |
|    policy_gradient_loss | 7.92e-10 |
|    value_loss           | 160      |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/              

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 846      |
|    time_elapsed         | 11224    |
|    total_timesteps      | 216576   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.753    |
|    learning_rate        | 0.001    |
|    loss                 | 27.3     |
|    n_updates            | 8450     |
|    policy_gradient_loss | -1.4e-10 |
|    value_loss           | 166      |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/              

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 856      |
|    time_elapsed         | 11360    |
|    total_timesteps      | 219136   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.628    |
|    learning_rate        | 0.001    |
|    loss                 | 63.6     |
|    n_updates            | 8550     |
|    policy_gradient_loss | 3.05e-09 |
|    value_loss           | 196      |
--------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 637       |
|    ep_rew_mean          | 1.32e+03  |
| time/              

--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                   |          |
|    fps                  | 19       |
|    iterations           | 866      |
|    time_elapsed         | 11501    |
|    total_timesteps      | 221696   |
| train/                  |          |
|    approx_kl            | 0.0      |
|    clip_fraction        | 0        |
|    clip_range           | 0.2      |
|    entropy_loss         | 0        |
|    explained_variance   | 0.733    |
|    learning_rate        | 0.001    |
|    loss                 | 141      |
|    n_updates            | 8650     |
|    policy_gradient_loss | 8.15e-10 |
|    value_loss           | 193      |
--------------------------------------
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 637      |
|    ep_rew_mean          | 1.32e+03 |
| time/                  

In [None]:
import numpy as np
import tensorflow as tf

In [None]:
obs = env.reset()
start_time = time.time()
total_rewards = 0
game_scores = []
steps_per_episode = []
episode_rewards = []
while True:
    action, states = model.predict(obs.copy())
    action = action.item()  # convert NumPy array to scalar integer
    obs, rewards, done, info = env.step((action))
    total_rewards += rewards
    episode_rewards.append(rewards)
    if done:
        game_scores.append(info['score'])
        steps_per_episode.append(len(episode_rewards))
        episode_rewards = []
        obs = env.reset()
        if len(game_scores) == 10:
            break
test_time = time.time() - start_time

In [None]:
print(f'Average steps per episode for seed 10: {sum(steps_per_episode)/len(steps_per_episode)}')
print(f'Average mean reward for seed 10: {total_rewards/len(game_scores)}')
print(f'Average game score for seed 10: {sum(game_scores)/len(game_scores)}')
print(f'Training time for seed 10: {training_time:.2f} seconds')
print(f'Test time for seed 10: {test_time:.2f} seconds')

In [None]:
model = A2C.load('./train/best_model_1000000/')