### Training a RL agent on Atari games

In [7]:
make_atari_env??

### CartPole Environment

### Multiprocessing: Unleashing the Power of Vectorized Environments

In [None]:
import gymnasium as gym

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed

def make_env(env_id: str, rank: int, seed: int = 0):
    """
    Utility function for multiprocessed env.

    :param env_id: the environment ID
    :param num_env: the number of environments you wish to have in subprocesses
    :param seed: the inital seed for RNG
    :param rank: index of the subprocess
    """
    def _init():
        env = gym.make(env_id, render_mode="human")
        env.reset(seed=seed + rank)
        return env
    set_random_seed(seed)
    return _init

# if __name__ == "__main__":

env_id = "CartPole-v1"
num_cpu = 4  # Number of processes to use
# Create the vectorized environment
vec_env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])

# Stable Baselines provides you with make_vec_env() helper
# which does exactly the previous steps for you.
# You can choose between `DummyVecEnv` (usually faster) and `SubprocVecEnv`
# env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv)

model = PPO("MlpPolicy", vec_env, verbose=1)
model.learn(total_timesteps=50_000)
model.save("models/CartPole")

obs = vec_env.reset()
for _ in range(1000):
    action, _states = model.predict(obs)
    obs, rewards, dones, info = vec_env.step(action)
    vec_env.render()

Using cuda device


### Callbacks: Evaluate Agent Performance

In [None]:
import os
import gymnasium as gym

from stable_baselines3 import SAC
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.env_util import make_vec_env

env_id = "Pendulum-v1"
n_training_envs = 1
n_eval_envs = 5

# Create log dir where evaluation results will be saved
eval_log_dir = "./models/Pendulum_eval_logs/"
os.makedirs(eval_log_dir, exist_ok=True)

# Initialize a vectorized training environment with default parameters
train_env = make_vec_env(env_id, n_envs=n_training_envs, seed=0)

# Separate evaluation env, with different parameters passed via env_kwargs
# Eval environments can be vectorized to speed up evaluation.
eval_env = make_vec_env(env_id, n_envs=n_eval_envs, seed=0,
                        env_kwargs={'g':0.7})

# Create callback that evaluates agent for 5 episodes every 500 training environment steps.
# When using multiple training environments, agent will be evaluated every
# eval_freq calls to train_env.step(), thus it will be evaluated every
# (eval_freq * n_envs) training steps. See EvalCallback doc for more information.
eval_callback = EvalCallback(eval_env, best_model_save_path=eval_log_dir,
                              log_path=eval_log_dir, eval_freq=max(500 // n_training_envs, 1),
                              n_eval_episodes=5, deterministic=True,
                              render=False)

model = SAC("MlpPolicy", train_env)
# model = SAC.load("./models/Pendulum_eval_logs/best_model", env=train_env)

model.learn(50000, callback=eval_callback)


model = SAC.load("./models/Pendulum_eval_logs/best_model", env=train_env)

obs = train_env.reset()
while True:
    action, _states = model.predict(obs, deterministic=False)
    obs, rewards, dones, info = train_env.step(action)
    train_env.render("human")

Eval num_timesteps=500, episode_reward=-131.90 +/- 121.17
Episode length: 200.00 +/- 0.00
New best mean reward!


#### Pong

In [None]:
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3 import A2C
import time

t0=time.time()

# There already exists an environment generator
# that will make and wrap atari environments correctly.
# Here we are also multi-worker training (n_envs=4 => 4 environments)
vec_env = make_atari_env("PongNoFrameskip-v4", n_envs=4, seed=0)
# Frame-stacking with 4 frames
vec_env = VecFrameStack(vec_env, n_stack=4)

# model = A2C("CnnPolicy", vec_env, verbose=1)
model = A2C.load("models/Pong", env=vec_env)

model.learn(total_timesteps=50_000)
model.save("models/Pong")

print(f"train time: {time.time()-t0}")

obs = vec_env.reset()
while True:
    action, _states = model.predict(obs, deterministic=False)
    obs, rewards, dones, info = vec_env.step(action)
    vec_env.render("human")

Wrapping the env in a VecTransposeImage.
------------------------------------
| time/                 |          |
|    fps                | 283      |
|    iterations         | 100      |
|    time_elapsed       | 7        |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -1.03    |
|    explained_variance | 0.927    |
|    learning_rate      | 0.0007   |
|    n_updates          | 6349     |
|    policy_loss        | -0.00657 |
|    value_loss         | 0.00536  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 3.36e+03 |
|    ep_rew_mean        | -20.5    |
| time/                 |          |
|    fps                | 320      |
|    iterations         | 200      |
|    time_elapsed       | 12       |
|    total_timesteps    | 4000     |
| train/                |          |
|    entropy_loss       | -1.35    |
|    explained_variance | 0.303   

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 3.49e+03 |
|    ep_rew_mean        | -20.5    |
| time/                 |          |
|    fps                | 329      |
|    iterations         | 1500     |
|    time_elapsed       | 90       |
|    total_timesteps    | 30000    |
| train/                |          |
|    entropy_loss       | -1.5     |
|    explained_variance | 0.655    |
|    learning_rate      | 0.0007   |
|    n_updates          | 7749     |
|    policy_loss        | 0.0464   |
|    value_loss         | 0.0167   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 3.48e+03 |
|    ep_rew_mean        | -20.5    |
| time/                 |          |
|    fps                | 329      |
|    iterations         | 1600     |
|    time_elapsed       | 97       |
|    total_timesteps    | 32000    |
| train/                |          |
|

  logger.warn(


#### Breakout

In [None]:
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3 import A2C
import time

t0=time.time()

# There already exists an environment generator
# that will make and wrap atari environments correctly.
# Here we are also multi-worker training (n_envs=4 => 4 environments)
# vec_env = make_atari_env("PongNoFrameskip-v4", n_envs=4, seed=0)
vec_env = make_atari_env("BreakoutNoFrameskip-v4", n_envs=1, seed=0)
# Frame-stacking with 4 frames
vec_env = VecFrameStack(vec_env, n_stack=4)

model = A2C("CnnPolicy", vec_env, verbose=1)
# model = A2C.load("models/Breakout", env=vec_env)

model.learn(total_timesteps=20_000)
model.save("models/Breakout")

print(f"train time: {time.time()-t0}")

obs = vec_env.reset()
while True:
    action, _states = model.predict(obs, deterministic=False)
    obs, rewards, dones, info = vec_env.step(action)
    vec_env.render("human")
    

Using cuda device
Wrapping the env in a VecTransposeImage.
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 700      |
|    ep_rew_mean        | 1        |
| time/                 |          |
|    fps                | 85       |
|    iterations         | 100      |
|    time_elapsed       | 5        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -1.39    |
|    explained_variance | 0.222    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.000157 |
|    value_loss         | 3.21e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 827      |
|    ep_rew_mean        | 1.8      |
| time/                 |          |
|    fps                | 102      |
|    iterations         | 200      |
|    time_elapsed       | 9        |
|    total_times

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 751      |
|    ep_rew_mean        | 1.4      |
| time/                 |          |
|    fps                | 122      |
|    iterations         | 1400     |
|    time_elapsed       | 57       |
|    total_timesteps    | 7000     |
| train/                |          |
|    entropy_loss       | -1.38    |
|    explained_variance | 0.12     |
|    learning_rate      | 0.0007   |
|    n_updates          | 1399     |
|    policy_loss        | -0.00099 |
|    value_loss         | 2.05e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 748      |
|    ep_rew_mean        | 1.39     |
| time/                 |          |
|    fps                | 122      |
|    iterations         | 1500     |
|    time_elapsed       | 61       |
|    total_timesteps    | 7500     |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 735      |
|    ep_rew_mean        | 1.31     |
| time/                 |          |
|    fps                | 124      |
|    iterations         | 2700     |
|    time_elapsed       | 108      |
|    total_timesteps    | 13500    |
| train/                |          |
|    entropy_loss       | -0.193   |
|    explained_variance | -231     |
|    learning_rate      | 0.0007   |
|    n_updates          | 2699     |
|    policy_loss        | 0.00447  |
|    value_loss         | 9.51e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 728       |
|    ep_rew_mean        | 1.26      |
| time/                 |           |
|    fps                | 124       |
|    iterations         | 2800      |
|    time_elapsed       | 112       |
|    total_timesteps    | 14000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 770      |
|    ep_rew_mean        | 1.65     |
| time/                 |          |
|    fps                | 124      |
|    iterations         | 4000     |
|    time_elapsed       | 160      |
|    total_timesteps    | 20000    |
| train/                |          |
|    entropy_loss       | -0.196   |
|    explained_variance | -8.38    |
|    learning_rate      | 0.0007   |
|    n_updates          | 3999     |
|    policy_loss        | -0.00107 |
|    value_loss         | 0.00163  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 781      |
|    ep_rew_mean        | 1.74     |
| time/                 |          |
|    fps                | 124      |
|    iterations         | 4100     |
|    time_elapsed       | 164      |
|    total_timesteps    | 20500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 820      |
|    ep_rew_mean        | 2.15     |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 5300     |
|    time_elapsed       | 211      |
|    total_timesteps    | 26500    |
| train/                |          |
|    entropy_loss       | -0.195   |
|    explained_variance | -36.8    |
|    learning_rate      | 0.0007   |
|    n_updates          | 5299     |
|    policy_loss        | 0.000192 |
|    value_loss         | 0.000134 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 820      |
|    ep_rew_mean        | 2.15     |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 5400     |
|    time_elapsed       | 215      |
|    total_timesteps    | 27000    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 886       |
|    ep_rew_mean        | 2.73      |
| time/                 |           |
|    fps                | 125       |
|    iterations         | 6600      |
|    time_elapsed       | 262       |
|    total_timesteps    | 33000     |
| train/                |           |
|    entropy_loss       | -0.117    |
|    explained_variance | -13.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 6599      |
|    policy_loss        | -0.000517 |
|    value_loss         | 0.00256   |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 895      |
|    ep_rew_mean        | 2.78     |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 6700     |
|    time_elapsed       | 266      |
|    total_timesteps    | 33500    |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 849       |
|    ep_rew_mean        | 2.43      |
| time/                 |           |
|    fps                | 125       |
|    iterations         | 7900      |
|    time_elapsed       | 314       |
|    total_timesteps    | 39500     |
| train/                |           |
|    entropy_loss       | -0.708    |
|    explained_variance | -2.63e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 7899      |
|    policy_loss        | -0.0901   |
|    value_loss         | 0.00306   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 849       |
|    ep_rew_mean        | 2.43      |
| time/                 |           |
|    fps                | 125       |
|    iterations         | 8000      |
|    time_elapsed       | 318       |
|    total_timesteps    | 40000     |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 872      |
|    ep_rew_mean        | 2.54     |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 9200     |
|    time_elapsed       | 365      |
|    total_timesteps    | 46000    |
| train/                |          |
|    entropy_loss       | -0.489   |
|    explained_variance | 0.997    |
|    learning_rate      | 0.0007   |
|    n_updates          | 9199     |
|    policy_loss        | 0.00382  |
|    value_loss         | 0.00238  |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 869      |
|    ep_rew_mean        | 2.5      |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 9300     |
|    time_elapsed       | 369      |
|    total_timesteps    | 46500    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 803       |
|    ep_rew_mean        | 2.05      |
| time/                 |           |
|    fps                | 125       |
|    iterations         | 10500     |
|    time_elapsed       | 417       |
|    total_timesteps    | 52500     |
| train/                |           |
|    entropy_loss       | -0.93     |
|    explained_variance | -3.1e+03  |
|    learning_rate      | 0.0007    |
|    n_updates          | 10499     |
|    policy_loss        | -0.000511 |
|    value_loss         | 0.000171  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 814      |
|    ep_rew_mean        | 2.14     |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 10600    |
|    time_elapsed       | 421      |
|    total_timesteps    | 53000    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 843      |
|    ep_rew_mean        | 2.4      |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 11800    |
|    time_elapsed       | 468      |
|    total_timesteps    | 59000    |
| train/                |          |
|    entropy_loss       | -0.418   |
|    explained_variance | -17      |
|    learning_rate      | 0.0007   |
|    n_updates          | 11799    |
|    policy_loss        | 0.00162  |
|    value_loss         | 0.000266 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 854       |
|    ep_rew_mean        | 2.49      |
| time/                 |           |
|    fps                | 125       |
|    iterations         | 11900     |
|    time_elapsed       | 472       |
|    total_timesteps    | 59500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 885      |
|    ep_rew_mean        | 2.8      |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 13100    |
|    time_elapsed       | 520      |
|    total_timesteps    | 65500    |
| train/                |          |
|    entropy_loss       | -0.686   |
|    explained_variance | -0.0623  |
|    learning_rate      | 0.0007   |
|    n_updates          | 13099    |
|    policy_loss        | 0.000784 |
|    value_loss         | 0.000186 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 878       |
|    ep_rew_mean        | 2.76      |
| time/                 |           |
|    fps                | 125       |
|    iterations         | 13200     |
|    time_elapsed       | 524       |
|    total_timesteps    | 66000     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 857      |
|    ep_rew_mean        | 2.61     |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 14400    |
|    time_elapsed       | 571      |
|    total_timesteps    | 72000    |
| train/                |          |
|    entropy_loss       | -1.06    |
|    explained_variance | 0.633    |
|    learning_rate      | 0.0007   |
|    n_updates          | 14399    |
|    policy_loss        | 0.00571  |
|    value_loss         | 8.26e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 857       |
|    ep_rew_mean        | 2.61      |
| time/                 |           |
|    fps                | 125       |
|    iterations         | 14500     |
|    time_elapsed       | 575       |
|    total_timesteps    | 72500     |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 844       |
|    ep_rew_mean        | 2.52      |
| time/                 |           |
|    fps                | 125       |
|    iterations         | 15700     |
|    time_elapsed       | 623       |
|    total_timesteps    | 78500     |
| train/                |           |
|    entropy_loss       | -0.734    |
|    explained_variance | -666      |
|    learning_rate      | 0.0007    |
|    n_updates          | 15699     |
|    policy_loss        | -0.000842 |
|    value_loss         | 4.98e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 811       |
|    ep_rew_mean        | 2.25      |
| time/                 |           |
|    fps                | 125       |
|    iterations         | 15800     |
|    time_elapsed       | 627       |
|    total_timesteps    | 79000     |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 760      |
|    ep_rew_mean        | 1.88     |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 17000    |
|    time_elapsed       | 674      |
|    total_timesteps    | 85000    |
| train/                |          |
|    entropy_loss       | -0.455   |
|    explained_variance | -4.64    |
|    learning_rate      | 0.0007   |
|    n_updates          | 16999    |
|    policy_loss        | -0.00104 |
|    value_loss         | 7.36e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 760      |
|    ep_rew_mean        | 1.88     |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 17100    |
|    time_elapsed       | 678      |
|    total_timesteps    | 85500    |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 772      |
|    ep_rew_mean        | 1.96     |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 18300    |
|    time_elapsed       | 726      |
|    total_timesteps    | 91500    |
| train/                |          |
|    entropy_loss       | -0.629   |
|    explained_variance | -6.35    |
|    learning_rate      | 0.0007   |
|    n_updates          | 18299    |
|    policy_loss        | 0.00182  |
|    value_loss         | 3.25e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 769      |
|    ep_rew_mean        | 1.94     |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 18400    |
|    time_elapsed       | 730      |
|    total_timesteps    | 92000    |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 825       |
|    ep_rew_mean        | 2.39      |
| time/                 |           |
|    fps                | 125       |
|    iterations         | 19600     |
|    time_elapsed       | 777       |
|    total_timesteps    | 98000     |
| train/                |           |
|    entropy_loss       | -0.207    |
|    explained_variance | -6.58e+12 |
|    learning_rate      | 0.0007    |
|    n_updates          | 19599     |
|    policy_loss        | 0.0168    |
|    value_loss         | 0.000383  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 825      |
|    ep_rew_mean        | 2.39     |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 19700    |
|    time_elapsed       | 781      |
|    total_timesteps    | 98500    |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 770       |
|    ep_rew_mean        | 1.94      |
| time/                 |           |
|    fps                | 125       |
|    iterations         | 20900     |
|    time_elapsed       | 829       |
|    total_timesteps    | 104500    |
| train/                |           |
|    entropy_loss       | -0.0746   |
|    explained_variance | -70.6     |
|    learning_rate      | 0.0007    |
|    n_updates          | 20899     |
|    policy_loss        | -3.67e-06 |
|    value_loss         | 1.83e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 770      |
|    ep_rew_mean        | 1.94     |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 21000    |
|    time_elapsed       | 833      |
|    total_timesteps    | 105000   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 847       |
|    ep_rew_mean        | 2.57      |
| time/                 |           |
|    fps                | 125       |
|    iterations         | 22200     |
|    time_elapsed       | 881       |
|    total_timesteps    | 111000    |
| train/                |           |
|    entropy_loss       | -0.0744   |
|    explained_variance | -3.62e+10 |
|    learning_rate      | 0.0007    |
|    n_updates          | 22199     |
|    policy_loss        | 0.000371  |
|    value_loss         | 0.000883  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 846      |
|    ep_rew_mean        | 2.57     |
| time/                 |          |
|    fps                | 125      |
|    iterations         | 22300    |
|    time_elapsed       | 884      |
|    total_timesteps    | 111500   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 797       |
|    ep_rew_mean        | 2.18      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 23500     |
|    time_elapsed       | 932       |
|    total_timesteps    | 117500    |
| train/                |           |
|    entropy_loss       | -0.218    |
|    explained_variance | -1.07e+04 |
|    learning_rate      | 0.0007    |
|    n_updates          | 23499     |
|    policy_loss        | 4.46e-05  |
|    value_loss         | 4.7e-06   |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 808      |
|    ep_rew_mean        | 2.27     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 23600    |
|    time_elapsed       | 936      |
|    total_timesteps    | 118000   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 863      |
|    ep_rew_mean        | 2.72     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 24800    |
|    time_elapsed       | 983      |
|    total_timesteps    | 124000   |
| train/                |          |
|    entropy_loss       | -0.0378  |
|    explained_variance | 0.991    |
|    learning_rate      | 0.0007   |
|    n_updates          | 24799    |
|    policy_loss        | 7.57e-05 |
|    value_loss         | 0.000188 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 874       |
|    ep_rew_mean        | 2.81      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 24900     |
|    time_elapsed       | 987       |
|    total_timesteps    | 124500    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 863      |
|    ep_rew_mean        | 2.72     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 26100    |
|    time_elapsed       | 1034     |
|    total_timesteps    | 130500   |
| train/                |          |
|    entropy_loss       | -0.461   |
|    explained_variance | -0.331   |
|    learning_rate      | 0.0007   |
|    n_updates          | 26099    |
|    policy_loss        | -0.00269 |
|    value_loss         | 0.000601 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 863       |
|    ep_rew_mean        | 2.72      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 26200     |
|    time_elapsed       | 1038      |
|    total_timesteps    | 131000    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 847       |
|    ep_rew_mean        | 2.57      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 27400     |
|    time_elapsed       | 1085      |
|    total_timesteps    | 137000    |
| train/                |           |
|    entropy_loss       | -0.653    |
|    explained_variance | 0.722     |
|    learning_rate      | 0.0007    |
|    n_updates          | 27399     |
|    policy_loss        | -0.000836 |
|    value_loss         | 3.76e-05  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 858      |
|    ep_rew_mean        | 2.66     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 27500    |
|    time_elapsed       | 1089     |
|    total_timesteps    | 137500   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 836       |
|    ep_rew_mean        | 2.48      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 28700     |
|    time_elapsed       | 1136      |
|    total_timesteps    | 143500    |
| train/                |           |
|    entropy_loss       | -0.479    |
|    explained_variance | -25.7     |
|    learning_rate      | 0.0007    |
|    n_updates          | 28699     |
|    policy_loss        | -0.000803 |
|    value_loss         | 1.74e-05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 847       |
|    ep_rew_mean        | 2.57      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 28800     |
|    time_elapsed       | 1140      |
|    total_timesteps    | 144000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 846      |
|    ep_rew_mean        | 2.57     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 30000    |
|    time_elapsed       | 1187     |
|    total_timesteps    | 150000   |
| train/                |          |
|    entropy_loss       | -0.259   |
|    explained_variance | 0.984    |
|    learning_rate      | 0.0007   |
|    n_updates          | 29999    |
|    policy_loss        | 0.00917  |
|    value_loss         | 0.00782  |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 835       |
|    ep_rew_mean        | 2.48      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 30100     |
|    time_elapsed       | 1191      |
|    total_timesteps    | 150500    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 881       |
|    ep_rew_mean        | 2.88      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 31300     |
|    time_elapsed       | 1239      |
|    total_timesteps    | 156500    |
| train/                |           |
|    entropy_loss       | -0.0857   |
|    explained_variance | -1.06e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 31299     |
|    policy_loss        | -6.27e-05 |
|    value_loss         | 1.84e-05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 859       |
|    ep_rew_mean        | 2.7       |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 31400     |
|    time_elapsed       | 1243      |
|    total_timesteps    | 157000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 815      |
|    ep_rew_mean        | 2.34     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 32600    |
|    time_elapsed       | 1290     |
|    total_timesteps    | 163000   |
| train/                |          |
|    entropy_loss       | -0.0953  |
|    explained_variance | 0.986    |
|    learning_rate      | 0.0007   |
|    n_updates          | 32599    |
|    policy_loss        | 2.82e-06 |
|    value_loss         | 1.49e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 826      |
|    ep_rew_mean        | 2.43     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 32700    |
|    time_elapsed       | 1294     |
|    total_timesteps    | 163500   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 804       |
|    ep_rew_mean        | 2.25      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 33900     |
|    time_elapsed       | 1341      |
|    total_timesteps    | 169500    |
| train/                |           |
|    entropy_loss       | -0.0256   |
|    explained_variance | -4.36e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 33899     |
|    policy_loss        | 3.15e-06  |
|    value_loss         | 4.18e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 803       |
|    ep_rew_mean        | 2.25      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 34000     |
|    time_elapsed       | 1345      |
|    total_timesteps    | 170000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 814      |
|    ep_rew_mean        | 2.34     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 35200    |
|    time_elapsed       | 1392     |
|    total_timesteps    | 176000   |
| train/                |          |
|    entropy_loss       | -0.0249  |
|    explained_variance | -151     |
|    learning_rate      | 0.0007   |
|    n_updates          | 35199    |
|    policy_loss        | 8.14e-06 |
|    value_loss         | 9.15e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 814      |
|    ep_rew_mean        | 2.34     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 35300    |
|    time_elapsed       | 1396     |
|    total_timesteps    | 176500   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 825       |
|    ep_rew_mean        | 2.43      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 36500     |
|    time_elapsed       | 1444      |
|    total_timesteps    | 182500    |
| train/                |           |
|    entropy_loss       | -0.0108   |
|    explained_variance | -1.18e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 36499     |
|    policy_loss        | -9.29e-07 |
|    value_loss         | 4.77e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 825      |
|    ep_rew_mean        | 2.43     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 36600    |
|    time_elapsed       | 1448     |
|    total_timesteps    | 183000   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 836      |
|    ep_rew_mean        | 2.52     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 37800    |
|    time_elapsed       | 1495     |
|    total_timesteps    | 189000   |
| train/                |          |
|    entropy_loss       | -0.0232  |
|    explained_variance | 0.905    |
|    learning_rate      | 0.0007   |
|    n_updates          | 37799    |
|    policy_loss        | -2.2e-05 |
|    value_loss         | 5.9e-05  |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 837       |
|    ep_rew_mean        | 2.52      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 37900     |
|    time_elapsed       | 1499      |
|    total_timesteps    | 189500    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 782      |
|    ep_rew_mean        | 2.07     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 39100    |
|    time_elapsed       | 1546     |
|    total_timesteps    | 195500   |
| train/                |          |
|    entropy_loss       | -0.0194  |
|    explained_variance | -239     |
|    learning_rate      | 0.0007   |
|    n_updates          | 39099    |
|    policy_loss        | 1.83e-05 |
|    value_loss         | 7.51e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 782      |
|    ep_rew_mean        | 2.07     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 39200    |
|    time_elapsed       | 1550     |
|    total_timesteps    | 196000   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 793       |
|    ep_rew_mean        | 2.16      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 40400     |
|    time_elapsed       | 1597      |
|    total_timesteps    | 202000    |
| train/                |           |
|    entropy_loss       | -0.0623   |
|    explained_variance | 0.854     |
|    learning_rate      | 0.0007    |
|    n_updates          | 40399     |
|    policy_loss        | -1.65e-05 |
|    value_loss         | 2.32e-05  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 804      |
|    ep_rew_mean        | 2.25     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 40500    |
|    time_elapsed       | 1601     |
|    total_timesteps    | 202500   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 836       |
|    ep_rew_mean        | 2.52      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 41700     |
|    time_elapsed       | 1648      |
|    total_timesteps    | 208500    |
| train/                |           |
|    entropy_loss       | -0.000502 |
|    explained_variance | 0.682     |
|    learning_rate      | 0.0007    |
|    n_updates          | 41699     |
|    policy_loss        | -1.43e-06 |
|    value_loss         | 0.00124   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 847       |
|    ep_rew_mean        | 2.61      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 41800     |
|    time_elapsed       | 1652      |
|    total_timesteps    | 209000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 870      |
|    ep_rew_mean        | 2.79     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 43000    |
|    time_elapsed       | 1699     |
|    total_timesteps    | 215000   |
| train/                |          |
|    entropy_loss       | -0.00479 |
|    explained_variance | -41.4    |
|    learning_rate      | 0.0007   |
|    n_updates          | 42999    |
|    policy_loss        | 3.45e-08 |
|    value_loss         | 5.87e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 870      |
|    ep_rew_mean        | 2.79     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 43100    |
|    time_elapsed       | 1703     |
|    total_timesteps    | 215500   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 881      |
|    ep_rew_mean        | 2.88     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 44300    |
|    time_elapsed       | 1750     |
|    total_timesteps    | 221500   |
| train/                |          |
|    entropy_loss       | -0.00672 |
|    explained_variance | -6.47    |
|    learning_rate      | 0.0007   |
|    n_updates          | 44299    |
|    policy_loss        | 1.85e-07 |
|    value_loss         | 4.26e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 870       |
|    ep_rew_mean        | 2.79      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 44400     |
|    time_elapsed       | 1754      |
|    total_timesteps    | 222000    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 794       |
|    ep_rew_mean        | 2.16      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 45600     |
|    time_elapsed       | 1801      |
|    total_timesteps    | 228000    |
| train/                |           |
|    entropy_loss       | -0.00345  |
|    explained_variance | -3.13     |
|    learning_rate      | 0.0007    |
|    n_updates          | 45599     |
|    policy_loss        | -4.32e-07 |
|    value_loss         | 3.18e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 772      |
|    ep_rew_mean        | 1.98     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 45700    |
|    time_elapsed       | 1805     |
|    total_timesteps    | 228500   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 826      |
|    ep_rew_mean        | 2.43     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 46900    |
|    time_elapsed       | 1852     |
|    total_timesteps    | 234500   |
| train/                |          |
|    entropy_loss       | -0.00188 |
|    explained_variance | -4.21    |
|    learning_rate      | 0.0007   |
|    n_updates          | 46899    |
|    policy_loss        | 3.08e-07 |
|    value_loss         | 5.87e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 826       |
|    ep_rew_mean        | 2.43      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 47000     |
|    time_elapsed       | 1856      |
|    total_timesteps    | 235000    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 837       |
|    ep_rew_mean        | 2.52      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 48200     |
|    time_elapsed       | 1903      |
|    total_timesteps    | 241000    |
| train/                |           |
|    entropy_loss       | -0.0193   |
|    explained_variance | -12.9     |
|    learning_rate      | 0.0007    |
|    n_updates          | 48199     |
|    policy_loss        | -2.75e-05 |
|    value_loss         | 0.000133  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 825       |
|    ep_rew_mean        | 2.43      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 48300     |
|    time_elapsed       | 1907      |
|    total_timesteps    | 241500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 814       |
|    ep_rew_mean        | 2.34      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 49500     |
|    time_elapsed       | 1954      |
|    total_timesteps    | 247500    |
| train/                |           |
|    entropy_loss       | -0.00381  |
|    explained_variance | -10.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 49499     |
|    policy_loss        | -1.02e-08 |
|    value_loss         | 4.83e-08  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 825      |
|    ep_rew_mean        | 2.43     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 49600    |
|    time_elapsed       | 1958     |
|    total_timesteps    | 248000   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 759       |
|    ep_rew_mean        | 1.89      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 50800     |
|    time_elapsed       | 2006      |
|    total_timesteps    | 254000    |
| train/                |           |
|    entropy_loss       | -0.0235   |
|    explained_variance | 0.981     |
|    learning_rate      | 0.0007    |
|    n_updates          | 50799     |
|    policy_loss        | -1.81e-06 |
|    value_loss         | 3.76e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 759       |
|    ep_rew_mean        | 1.89      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 50900     |
|    time_elapsed       | 2010      |
|    total_timesteps    | 254500    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 769      |
|    ep_rew_mean        | 1.98     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 52100    |
|    time_elapsed       | 2057     |
|    total_timesteps    | 260500   |
| train/                |          |
|    entropy_loss       | -0.0381  |
|    explained_variance | 0.926    |
|    learning_rate      | 0.0007   |
|    n_updates          | 52099    |
|    policy_loss        | 5.09e-06 |
|    value_loss         | 1.11e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 780       |
|    ep_rew_mean        | 2.07      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 52200     |
|    time_elapsed       | 2061      |
|    total_timesteps    | 261000    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 802       |
|    ep_rew_mean        | 2.25      |
| time/                 |           |
|    fps                | 126       |
|    iterations         | 53400     |
|    time_elapsed       | 2108      |
|    total_timesteps    | 267000    |
| train/                |           |
|    entropy_loss       | -0.00258  |
|    explained_variance | -101      |
|    learning_rate      | 0.0007    |
|    n_updates          | 53399     |
|    policy_loss        | -5.41e-07 |
|    value_loss         | 5.38e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 802      |
|    ep_rew_mean        | 2.25     |
| time/                 |          |
|    fps                | 126      |
|    iterations         | 53500    |
|    time_elapsed       | 2112     |
|    total_timesteps    | 267500   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 781      |
|    ep_rew_mean        | 2.07     |
| time/                 |          |
|    fps                | 7        |
|    iterations         | 54700    |
|    time_elapsed       | 37986    |
|    total_timesteps    | 273500   |
| train/                |          |
|    entropy_loss       | -0.00334 |
|    explained_variance | -232     |
|    learning_rate      | 0.0007   |
|    n_updates          | 54699    |
|    policy_loss        | -1.6e-07 |
|    value_loss         | 2.44e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 748      |
|    ep_rew_mean        | 1.8      |
| time/                 |          |
|    fps                | 7        |
|    iterations         | 54800    |
|    time_elapsed       | 37991    |
|    total_timesteps    | 274000   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 804       |
|    ep_rew_mean        | 2.25      |
| time/                 |           |
|    fps                | 7         |
|    iterations         | 56000     |
|    time_elapsed       | 38039     |
|    total_timesteps    | 280000    |
| train/                |           |
|    entropy_loss       | -0.0418   |
|    explained_variance | 0.976     |
|    learning_rate      | 0.0007    |
|    n_updates          | 55999     |
|    policy_loss        | -4.43e-06 |
|    value_loss         | 4.1e-06   |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 815      |
|    ep_rew_mean        | 2.34     |
| time/                 |          |
|    fps                | 7        |
|    iterations         | 56100    |
|    time_elapsed       | 38043    |
|    total_timesteps    | 280500   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 849       |
|    ep_rew_mean        | 2.61      |
| time/                 |           |
|    fps                | 7         |
|    iterations         | 57300     |
|    time_elapsed       | 38091     |
|    total_timesteps    | 286500    |
| train/                |           |
|    entropy_loss       | -0.0105   |
|    explained_variance | -83.6     |
|    learning_rate      | 0.0007    |
|    n_updates          | 57299     |
|    policy_loss        | -2.32e-06 |
|    value_loss         | 8.31e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 860       |
|    ep_rew_mean        | 2.7       |
| time/                 |           |
|    fps                | 7         |
|    iterations         | 57400     |
|    time_elapsed       | 38095     |
|    total_timesteps    | 287000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 860      |
|    ep_rew_mean        | 2.7      |
| time/                 |          |
|    fps                | 7        |
|    iterations         | 58600    |
|    time_elapsed       | 38142    |
|    total_timesteps    | 293000   |
| train/                |          |
|    entropy_loss       | -0.00274 |
|    explained_variance | -63.2    |
|    learning_rate      | 0.0007   |
|    n_updates          | 58599    |
|    policy_loss        | 3.01e-07 |
|    value_loss         | 3.32e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 860       |
|    ep_rew_mean        | 2.7       |
| time/                 |           |
|    fps                | 7         |
|    iterations         | 58700     |
|    time_elapsed       | 38146     |
|    total_timesteps    | 293500    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 816       |
|    ep_rew_mean        | 2.34      |
| time/                 |           |
|    fps                | 7         |
|    iterations         | 59900     |
|    time_elapsed       | 38194     |
|    total_timesteps    | 299500    |
| train/                |           |
|    entropy_loss       | -0.00337  |
|    explained_variance | -1.12     |
|    learning_rate      | 0.0007    |
|    n_updates          | 59899     |
|    policy_loss        | -1.94e-07 |
|    value_loss         | 1.51e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 816      |
|    ep_rew_mean        | 2.34     |
| time/                 |          |
|    fps                | 7        |
|    iterations         | 60000    |
|    time_elapsed       | 38198    |
|    total_timesteps    | 300000   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 827       |
|    ep_rew_mean        | 2.43      |
| time/                 |           |
|    fps                | 8         |
|    iterations         | 61200     |
|    time_elapsed       | 38246     |
|    total_timesteps    | 306000    |
| train/                |           |
|    entropy_loss       | -0.074    |
|    explained_variance | 0.945     |
|    learning_rate      | 0.0007    |
|    n_updates          | 61199     |
|    policy_loss        | -4.04e-05 |
|    value_loss         | 2e-05     |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 827       |
|    ep_rew_mean        | 2.43      |
| time/                 |           |
|    fps                | 8         |
|    iterations         | 61300     |
|    time_elapsed       | 38250     |
|    total_timesteps    | 306500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 838       |
|    ep_rew_mean        | 2.52      |
| time/                 |           |
|    fps                | 8         |
|    iterations         | 62500     |
|    time_elapsed       | 38297     |
|    total_timesteps    | 312500    |
| train/                |           |
|    entropy_loss       | -0.0534   |
|    explained_variance | 0.993     |
|    learning_rate      | 0.0007    |
|    n_updates          | 62499     |
|    policy_loss        | -1.16e-05 |
|    value_loss         | 3.03e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 849      |
|    ep_rew_mean        | 2.61     |
| time/                 |          |
|    fps                | 8        |
|    iterations         | 62600    |
|    time_elapsed       | 38301    |
|    total_timesteps    | 313000   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 915      |
|    ep_rew_mean        | 3.15     |
| time/                 |          |
|    fps                | 8        |
|    iterations         | 63800    |
|    time_elapsed       | 38349    |
|    total_timesteps    | 319000   |
| train/                |          |
|    entropy_loss       | -0.0127  |
|    explained_variance | 1        |
|    learning_rate      | 0.0007   |
|    n_updates          | 63799    |
|    policy_loss        | 1.44e-06 |
|    value_loss         | 1.52e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 904      |
|    ep_rew_mean        | 3.06     |
| time/                 |          |
|    fps                | 8        |
|    iterations         | 63900    |
|    time_elapsed       | 38353    |
|    total_timesteps    | 319500   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 826      |
|    ep_rew_mean        | 2.43     |
| time/                 |          |
|    fps                | 8        |
|    iterations         | 65100    |
|    time_elapsed       | 38401    |
|    total_timesteps    | 325500   |
| train/                |          |
|    entropy_loss       | -0.00238 |
|    explained_variance | -14.4    |
|    learning_rate      | 0.0007   |
|    n_updates          | 65099    |
|    policy_loss        | 1.49e-07 |
|    value_loss         | 6.84e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 816      |
|    ep_rew_mean        | 2.34     |
| time/                 |          |
|    fps                | 8        |
|    iterations         | 65200    |
|    time_elapsed       | 38405    |
|    total_timesteps    | 326000   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 815       |
|    ep_rew_mean        | 2.34      |
| time/                 |           |
|    fps                | 8         |
|    iterations         | 66400     |
|    time_elapsed       | 38452     |
|    total_timesteps    | 332000    |
| train/                |           |
|    entropy_loss       | -0.00193  |
|    explained_variance | -0.932    |
|    learning_rate      | 0.0007    |
|    n_updates          | 66399     |
|    policy_loss        | -1.33e-07 |
|    value_loss         | 9.71e-07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 782      |
|    ep_rew_mean        | 2.07     |
| time/                 |          |
|    fps                | 8        |
|    iterations         | 66500    |
|    time_elapsed       | 38456    |
|    total_timesteps    | 332500   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 782       |
|    ep_rew_mean        | 2.07      |
| time/                 |           |
|    fps                | 8         |
|    iterations         | 67700     |
|    time_elapsed       | 38504     |
|    total_timesteps    | 338500    |
| train/                |           |
|    entropy_loss       | -0.0112   |
|    explained_variance | -37.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 67699     |
|    policy_loss        | -1.47e-06 |
|    value_loss         | 5.48e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 792      |
|    ep_rew_mean        | 2.16     |
| time/                 |          |
|    fps                | 8        |
|    iterations         | 67800    |
|    time_elapsed       | 38508    |
|    total_timesteps    | 339000   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 859      |
|    ep_rew_mean        | 2.7      |
| time/                 |          |
|    fps                | 8        |
|    iterations         | 69000    |
|    time_elapsed       | 38556    |
|    total_timesteps    | 345000   |
| train/                |          |
|    entropy_loss       | -0.0127  |
|    explained_variance | -0.148   |
|    learning_rate      | 0.0007   |
|    n_updates          | 68999    |
|    policy_loss        | 4.25e-07 |
|    value_loss         | 9.87e-08 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 858      |
|    ep_rew_mean        | 2.7      |
| time/                 |          |
|    fps                | 8        |
|    iterations         | 69100    |
|    time_elapsed       | 38560    |
|    total_timesteps    | 345500   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 902      |
|    ep_rew_mean        | 3.06     |
| time/                 |          |
|    fps                | 9        |
|    iterations         | 70300    |
|    time_elapsed       | 38607    |
|    total_timesteps    | 351500   |
| train/                |          |
|    entropy_loss       | -0.0108  |
|    explained_variance | -161     |
|    learning_rate      | 0.0007   |
|    n_updates          | 70299    |
|    policy_loss        | 4.22e-07 |
|    value_loss         | 3.13e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 924      |
|    ep_rew_mean        | 3.24     |
| time/                 |          |
|    fps                | 9        |
|    iterations         | 70400    |
|    time_elapsed       | 38611    |
|    total_timesteps    | 352000   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 913       |
|    ep_rew_mean        | 3.15      |
| time/                 |           |
|    fps                | 9         |
|    iterations         | 71600     |
|    time_elapsed       | 38659     |
|    total_timesteps    | 358000    |
| train/                |           |
|    entropy_loss       | -0.0539   |
|    explained_variance | 0.959     |
|    learning_rate      | 0.0007    |
|    n_updates          | 71599     |
|    policy_loss        | -5.09e-06 |
|    value_loss         | 4.68e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 913       |
|    ep_rew_mean        | 3.15      |
| time/                 |           |
|    fps                | 9         |
|    iterations         | 71700     |
|    time_elapsed       | 38663     |
|    total_timesteps    | 358500    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 914      |
|    ep_rew_mean        | 3.15     |
| time/                 |          |
|    fps                | 9        |
|    iterations         | 72900    |
|    time_elapsed       | 38710    |
|    total_timesteps    | 364500   |
| train/                |          |
|    entropy_loss       | -0.0531  |
|    explained_variance | 1        |
|    learning_rate      | 0.0007   |
|    n_updates          | 72899    |
|    policy_loss        | 1.13e-05 |
|    value_loss         | 1.19e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 903       |
|    ep_rew_mean        | 3.06      |
| time/                 |           |
|    fps                | 9         |
|    iterations         | 73000     |
|    time_elapsed       | 38714     |
|    total_timesteps    | 365000    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 827      |
|    ep_rew_mean        | 2.43     |
| time/                 |          |
|    fps                | 9        |
|    iterations         | 74200    |
|    time_elapsed       | 38762    |
|    total_timesteps    | 371000   |
| train/                |          |
|    entropy_loss       | -0.00228 |
|    explained_variance | 0.00187  |
|    learning_rate      | 0.0007   |
|    n_updates          | 74199    |
|    policy_loss        | 3.15e-07 |
|    value_loss         | 2.42e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 805      |
|    ep_rew_mean        | 2.25     |
| time/                 |          |
|    fps                | 9        |
|    iterations         | 74300    |
|    time_elapsed       | 38766    |
|    total_timesteps    | 371500   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 817       |
|    ep_rew_mean        | 2.34      |
| time/                 |           |
|    fps                | 9         |
|    iterations         | 75500     |
|    time_elapsed       | 38814     |
|    total_timesteps    | 377500    |
| train/                |           |
|    entropy_loss       | -0.0216   |
|    explained_variance | 0.816     |
|    learning_rate      | 0.0007    |
|    n_updates          | 75499     |
|    policy_loss        | -2.27e-05 |
|    value_loss         | 8.97e-05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 828       |
|    ep_rew_mean        | 2.43      |
| time/                 |           |
|    fps                | 9         |
|    iterations         | 75600     |
|    time_elapsed       | 38818     |
|    total_timesteps    | 378000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 816      |
|    ep_rew_mean        | 2.34     |
| time/                 |          |
|    fps                | 9        |
|    iterations         | 76800    |
|    time_elapsed       | 38866    |
|    total_timesteps    | 384000   |
| train/                |          |
|    entropy_loss       | -0.00186 |
|    explained_variance | -2.54    |
|    learning_rate      | 0.0007   |
|    n_updates          | 76799    |
|    policy_loss        | 1.64e-07 |
|    value_loss         | 1.22e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 816      |
|    ep_rew_mean        | 2.34     |
| time/                 |          |
|    fps                | 9        |
|    iterations         | 76900    |
|    time_elapsed       | 38870    |
|    total_timesteps    | 384500   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 850       |
|    ep_rew_mean        | 2.61      |
| time/                 |           |
|    fps                | 10        |
|    iterations         | 78100     |
|    time_elapsed       | 38917     |
|    total_timesteps    | 390500    |
| train/                |           |
|    entropy_loss       | -0.00229  |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 78099     |
|    policy_loss        | -6.31e-08 |
|    value_loss         | 9.62e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 860       |
|    ep_rew_mean        | 2.7       |
| time/                 |           |
|    fps                | 10        |
|    iterations         | 78200     |
|    time_elapsed       | 38921     |
|    total_timesteps    | 391000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 871      |
|    ep_rew_mean        | 2.79     |
| time/                 |          |
|    fps                | 10       |
|    iterations         | 79400    |
|    time_elapsed       | 38969    |
|    total_timesteps    | 397000   |
| train/                |          |
|    entropy_loss       | -0.0398  |
|    explained_variance | 0.979    |
|    learning_rate      | 0.0007   |
|    n_updates          | 79399    |
|    policy_loss        | 0.00473  |
|    value_loss         | 3.72e-06 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 882      |
|    ep_rew_mean        | 2.88     |
| time/                 |          |
|    fps                | 10       |
|    iterations         | 79500    |
|    time_elapsed       | 38973    |
|    total_timesteps    | 397500   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 827       |
|    ep_rew_mean        | 2.43      |
| time/                 |           |
|    fps                | 10        |
|    iterations         | 80700     |
|    time_elapsed       | 39020     |
|    total_timesteps    | 403500    |
| train/                |           |
|    entropy_loss       | -0.035    |
|    explained_variance | 0.737     |
|    learning_rate      | 0.0007    |
|    n_updates          | 80699     |
|    policy_loss        | -4.04e-05 |
|    value_loss         | 0.000119  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 816      |
|    ep_rew_mean        | 2.34     |
| time/                 |          |
|    fps                | 10       |
|    iterations         | 80800    |
|    time_elapsed       | 39024    |
|    total_timesteps    | 404000   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 771       |
|    ep_rew_mean        | 1.98      |
| time/                 |           |
|    fps                | 10        |
|    iterations         | 82000     |
|    time_elapsed       | 39071     |
|    total_timesteps    | 410000    |
| train/                |           |
|    entropy_loss       | -0.00241  |
|    explained_variance | 0.00944   |
|    learning_rate      | 0.0007    |
|    n_updates          | 81999     |
|    policy_loss        | -9.13e-08 |
|    value_loss         | 1.82e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 760       |
|    ep_rew_mean        | 1.89      |
| time/                 |           |
|    fps                | 10        |
|    iterations         | 82100     |
|    time_elapsed       | 39075     |
|    total_timesteps    | 410500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 737       |
|    ep_rew_mean        | 1.71      |
| time/                 |           |
|    fps                | 10        |
|    iterations         | 83300     |
|    time_elapsed       | 39122     |
|    total_timesteps    | 416500    |
| train/                |           |
|    entropy_loss       | -0.0118   |
|    explained_variance | -26.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 83299     |
|    policy_loss        | -3.84e-08 |
|    value_loss         | 2.97e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 748       |
|    ep_rew_mean        | 1.8       |
| time/                 |           |
|    fps                | 10        |
|    iterations         | 83400     |
|    time_elapsed       | 39126     |
|    total_timesteps    | 417000    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 803      |
|    ep_rew_mean        | 2.25     |
| time/                 |          |
|    fps                | 10       |
|    iterations         | 84600    |
|    time_elapsed       | 39173    |
|    total_timesteps    | 423000   |
| train/                |          |
|    entropy_loss       | -0.0439  |
|    explained_variance | 1        |
|    learning_rate      | 0.0007   |
|    n_updates          | 84599    |
|    policy_loss        | 5.26e-06 |
|    value_loss         | 1.27e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 825      |
|    ep_rew_mean        | 2.43     |
| time/                 |          |
|    fps                | 10       |
|    iterations         | 84700    |
|    time_elapsed       | 39177    |
|    total_timesteps    | 423500   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 791       |
|    ep_rew_mean        | 2.16      |
| time/                 |           |
|    fps                | 10        |
|    iterations         | 85900     |
|    time_elapsed       | 39224     |
|    total_timesteps    | 429500    |
| train/                |           |
|    entropy_loss       | -0.00206  |
|    explained_variance | -56.9     |
|    learning_rate      | 0.0007    |
|    n_updates          | 85899     |
|    policy_loss        | -1.17e-06 |
|    value_loss         | 7.55e-05  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 780      |
|    ep_rew_mean        | 2.07     |
| time/                 |          |
|    fps                | 10       |
|    iterations         | 86000    |
|    time_elapsed       | 39228    |
|    total_timesteps    | 430000   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 747      |
|    ep_rew_mean        | 1.8      |
| time/                 |          |
|    fps                | 11       |
|    iterations         | 87200    |
|    time_elapsed       | 39275    |
|    total_timesteps    | 436000   |
| train/                |          |
|    entropy_loss       | -0.0546  |
|    explained_variance | 0.917    |
|    learning_rate      | 0.0007   |
|    n_updates          | 87199    |
|    policy_loss        | -2.2e-05 |
|    value_loss         | 1.49e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 758       |
|    ep_rew_mean        | 1.89      |
| time/                 |           |
|    fps                | 11        |
|    iterations         | 87300     |
|    time_elapsed       | 39279     |
|    total_timesteps    | 436500    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 727      |
|    ep_rew_mean        | 1.62     |
| time/                 |          |
|    fps                | 11       |
|    iterations         | 88500    |
|    time_elapsed       | 39326    |
|    total_timesteps    | 442500   |
| train/                |          |
|    entropy_loss       | -0.0249  |
|    explained_variance | -0.268   |
|    learning_rate      | 0.0007   |
|    n_updates          | 88499    |
|    policy_loss        | -1e-05   |
|    value_loss         | 1.06e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 738      |
|    ep_rew_mean        | 1.71     |
| time/                 |          |
|    fps                | 11       |
|    iterations         | 88600    |
|    time_elapsed       | 39330    |
|    total_timesteps    | 443000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 782      |
|    ep_rew_mean        | 2.07     |
| time/                 |          |
|    fps                | 11       |
|    iterations         | 89800    |
|    time_elapsed       | 39377    |
|    total_timesteps    | 449000   |
| train/                |          |
|    entropy_loss       | -0.00312 |
|    explained_variance | -214     |
|    learning_rate      | 0.0007   |
|    n_updates          | 89799    |
|    policy_loss        | 7.74e-09 |
|    value_loss         | 1.86e-09 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 782       |
|    ep_rew_mean        | 2.07      |
| time/                 |           |
|    fps                | 11        |
|    iterations         | 89900     |
|    time_elapsed       | 39381     |
|    total_timesteps    | 449500    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 815      |
|    ep_rew_mean        | 2.34     |
| time/                 |          |
|    fps                | 11       |
|    iterations         | 91100    |
|    time_elapsed       | 39428    |
|    total_timesteps    | 455500   |
| train/                |          |
|    entropy_loss       | -0.00185 |
|    explained_variance | 0        |
|    learning_rate      | 0.0007   |
|    n_updates          | 91099    |
|    policy_loss        | 6.86e-09 |
|    value_loss         | 1.88e-09 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 804       |
|    ep_rew_mean        | 2.25      |
| time/                 |           |
|    fps                | 11        |
|    iterations         | 91200     |
|    time_elapsed       | 39432     |
|    total_timesteps    | 456000    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 837      |
|    ep_rew_mean        | 2.52     |
| time/                 |          |
|    fps                | 11       |
|    iterations         | 92400    |
|    time_elapsed       | 39479    |
|    total_timesteps    | 462000   |
| train/                |          |
|    entropy_loss       | -0.00153 |
|    explained_variance | 0.417    |
|    learning_rate      | 0.0007   |
|    n_updates          | 92399    |
|    policy_loss        | 2.39e-08 |
|    value_loss         | 3.4e-08  |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 825       |
|    ep_rew_mean        | 2.43      |
| time/                 |           |
|    fps                | 11        |
|    iterations         | 92500     |
|    time_elapsed       | 39483     |
|    total_timesteps    | 462500    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 848      |
|    ep_rew_mean        | 2.61     |
| time/                 |          |
|    fps                | 11       |
|    iterations         | 93700    |
|    time_elapsed       | 39530    |
|    total_timesteps    | 468500   |
| train/                |          |
|    entropy_loss       | -0.00121 |
|    explained_variance | -0.25    |
|    learning_rate      | 0.0007   |
|    n_updates          | 93699    |
|    policy_loss        | 3.73e-07 |
|    value_loss         | 1.16e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 848       |
|    ep_rew_mean        | 2.61      |
| time/                 |           |
|    fps                | 11        |
|    iterations         | 93800     |
|    time_elapsed       | 39534     |
|    total_timesteps    | 469000    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 869      |
|    ep_rew_mean        | 2.79     |
| time/                 |          |
|    fps                | 12       |
|    iterations         | 95000    |
|    time_elapsed       | 39581    |
|    total_timesteps    | 475000   |
| train/                |          |
|    entropy_loss       | -0.00586 |
|    explained_variance | -271     |
|    learning_rate      | 0.0007   |
|    n_updates          | 94999    |
|    policy_loss        | 6.85e-07 |
|    value_loss         | 3.25e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 869       |
|    ep_rew_mean        | 2.79      |
| time/                 |           |
|    fps                | 12        |
|    iterations         | 95100     |
|    time_elapsed       | 39585     |
|    total_timesteps    | 475500    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 825       |
|    ep_rew_mean        | 2.43      |
| time/                 |           |
|    fps                | 12        |
|    iterations         | 96300     |
|    time_elapsed       | 39632     |
|    total_timesteps    | 481500    |
| train/                |           |
|    entropy_loss       | -0.00124  |
|    explained_variance | -1.28e+08 |
|    learning_rate      | 0.0007    |
|    n_updates          | 96299     |
|    policy_loss        | 1.54e-07  |
|    value_loss         | 1.89e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 814      |
|    ep_rew_mean        | 2.34     |
| time/                 |          |
|    fps                | 12       |
|    iterations         | 96400    |
|    time_elapsed       | 39636    |
|    total_timesteps    | 482000   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 824      |
|    ep_rew_mean        | 2.43     |
| time/                 |          |
|    fps                | 12       |
|    iterations         | 97600    |
|    time_elapsed       | 39683    |
|    total_timesteps    | 488000   |
| train/                |          |
|    entropy_loss       | -0.00112 |
|    explained_variance | 0.345    |
|    learning_rate      | 0.0007   |
|    n_updates          | 97599    |
|    policy_loss        | -8.1e-08 |
|    value_loss         | 1.96e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 824       |
|    ep_rew_mean        | 2.43      |
| time/                 |           |
|    fps                | 12        |
|    iterations         | 97700     |
|    time_elapsed       | 39687     |
|    total_timesteps    | 488500    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 792      |
|    ep_rew_mean        | 2.16     |
| time/                 |          |
|    fps                | 12       |
|    iterations         | 98900    |
|    time_elapsed       | 39734    |
|    total_timesteps    | 494500   |
| train/                |          |
|    entropy_loss       | -0.00668 |
|    explained_variance | 0.999    |
|    learning_rate      | 0.0007   |
|    n_updates          | 98899    |
|    policy_loss        | 2.13e-05 |
|    value_loss         | 0.000901 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 792       |
|    ep_rew_mean        | 2.16      |
| time/                 |           |
|    fps                | 12        |
|    iterations         | 99000     |
|    time_elapsed       | 39738     |
|    total_timesteps    | 495000    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 803      |
|    ep_rew_mean        | 2.25     |
| time/                 |          |
|    fps                | 12       |
|    iterations         | 100200   |
|    time_elapsed       | 39785    |
|    total_timesteps    | 501000   |
| train/                |          |
|    entropy_loss       | -0.0227  |
|    explained_variance | 0.997    |
|    learning_rate      | 0.0007   |
|    n_updates          | 100199   |
|    policy_loss        | 1.06e-05 |
|    value_loss         | 1.33e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 803       |
|    ep_rew_mean        | 2.25      |
| time/                 |           |
|    fps                | 12        |
|    iterations         | 100300    |
|    time_elapsed       | 39789     |
|    total_timesteps    | 501500    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 738       |
|    ep_rew_mean        | 1.71      |
| time/                 |           |
|    fps                | 12        |
|    iterations         | 101500    |
|    time_elapsed       | 39836     |
|    total_timesteps    | 507500    |
| train/                |           |
|    entropy_loss       | -0.000626 |
|    explained_variance | 0.37      |
|    learning_rate      | 0.0007    |
|    n_updates          | 101499    |
|    policy_loss        | -3.52e-09 |
|    value_loss         | 5.44e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 715       |
|    ep_rew_mean        | 1.53      |
| time/                 |           |
|    fps                | 12        |
|    iterations         | 101600    |
|    time_elapsed       | 39840     |
|    total_timesteps    | 508000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 715       |
|    ep_rew_mean        | 1.53      |
| time/                 |           |
|    fps                | 12        |
|    iterations         | 102800    |
|    time_elapsed       | 39887     |
|    total_timesteps    | 514000    |
| train/                |           |
|    entropy_loss       | -0.000582 |
|    explained_variance | -1.81e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 102799    |
|    policy_loss        | -9.03e-10 |
|    value_loss         | 2.72e-08  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 715      |
|    ep_rew_mean        | 1.53     |
| time/                 |          |
|    fps                | 12       |
|    iterations         | 102900   |
|    time_elapsed       | 39891    |
|    total_timesteps    | 514500   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 759      |
|    ep_rew_mean        | 1.89     |
| time/                 |          |
|    fps                | 13       |
|    iterations         | 104100   |
|    time_elapsed       | 39938    |
|    total_timesteps    | 520500   |
| train/                |          |
|    entropy_loss       | -0.0102  |
|    explained_variance | 1        |
|    learning_rate      | 0.0007   |
|    n_updates          | 104099   |
|    policy_loss        | -1.3e-06 |
|    value_loss         | 1.19e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 770       |
|    ep_rew_mean        | 1.98      |
| time/                 |           |
|    fps                | 13        |
|    iterations         | 104200    |
|    time_elapsed       | 39942     |
|    total_timesteps    | 521000    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 835       |
|    ep_rew_mean        | 2.52      |
| time/                 |           |
|    fps                | 13        |
|    iterations         | 105400    |
|    time_elapsed       | 39989     |
|    total_timesteps    | 527000    |
| train/                |           |
|    entropy_loss       | -0.000473 |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 105399    |
|    policy_loss        | 1.67e-08  |
|    value_loss         | 2.22e-07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 836      |
|    ep_rew_mean        | 2.52     |
| time/                 |          |
|    fps                | 13       |
|    iterations         | 105500   |
|    time_elapsed       | 39993    |
|    total_timesteps    | 527500   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 814       |
|    ep_rew_mean        | 2.34      |
| time/                 |           |
|    fps                | 13        |
|    iterations         | 106700    |
|    time_elapsed       | 40040     |
|    total_timesteps    | 533500    |
| train/                |           |
|    entropy_loss       | -0.00157  |
|    explained_variance | 0.99      |
|    learning_rate      | 0.0007    |
|    n_updates          | 106699    |
|    policy_loss        | -9.13e-07 |
|    value_loss         | 4.13e-05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 825       |
|    ep_rew_mean        | 2.43      |
| time/                 |           |
|    fps                | 13        |
|    iterations         | 106800    |
|    time_elapsed       | 40044     |
|    total_timesteps    | 534000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 792       |
|    ep_rew_mean        | 2.16      |
| time/                 |           |
|    fps                | 13        |
|    iterations         | 108000    |
|    time_elapsed       | 40091     |
|    total_timesteps    | 540000    |
| train/                |           |
|    entropy_loss       | -0.00969  |
|    explained_variance | 1         |
|    learning_rate      | 0.0007    |
|    n_updates          | 107999    |
|    policy_loss        | -1.11e-06 |
|    value_loss         | 8.48e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 792       |
|    ep_rew_mean        | 2.16      |
| time/                 |           |
|    fps                | 13        |
|    iterations         | 108100    |
|    time_elapsed       | 40095     |
|    total_timesteps    | 540500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 814       |
|    ep_rew_mean        | 2.34      |
| time/                 |           |
|    fps                | 13        |
|    iterations         | 109300    |
|    time_elapsed       | 40142     |
|    total_timesteps    | 546500    |
| train/                |           |
|    entropy_loss       | -0.000571 |
|    explained_variance | 0.0748    |
|    learning_rate      | 0.0007    |
|    n_updates          | 109299    |
|    policy_loss        | -4.92e-09 |
|    value_loss         | 1.28e-08  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 803      |
|    ep_rew_mean        | 2.25     |
| time/                 |          |
|    fps                | 13       |
|    iterations         | 109400   |
|    time_elapsed       | 40146    |
|    total_timesteps    | 547000   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 870       |
|    ep_rew_mean        | 2.79      |
| time/                 |           |
|    fps                | 13        |
|    iterations         | 110600    |
|    time_elapsed       | 40621     |
|    total_timesteps    | 553000    |
| train/                |           |
|    entropy_loss       | -0.000557 |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 110599    |
|    policy_loss        | -8.17e-09 |
|    value_loss         | 3.79e-08  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 870      |
|    ep_rew_mean        | 2.79     |
| time/                 |          |
|    fps                | 13       |
|    iterations         | 110700   |
|    time_elapsed       | 40625    |
|    total_timesteps    | 553500   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 816       |
|    ep_rew_mean        | 2.34      |
| time/                 |           |
|    fps                | 13        |
|    iterations         | 111900    |
|    time_elapsed       | 40676     |
|    total_timesteps    | 559500    |
| train/                |           |
|    entropy_loss       | -0.000478 |
|    explained_variance | 0.242     |
|    learning_rate      | 0.0007    |
|    n_updates          | 111899    |
|    policy_loss        | -2.35e-09 |
|    value_loss         | 4.78e-09  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 805       |
|    ep_rew_mean        | 2.25      |
| time/                 |           |
|    fps                | 13        |
|    iterations         | 112000    |
|    time_elapsed       | 40680     |
|    total_timesteps    | 560000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 738       |
|    ep_rew_mean        | 1.71      |
| time/                 |           |
|    fps                | 13        |
|    iterations         | 113200    |
|    time_elapsed       | 40729     |
|    total_timesteps    | 566000    |
| train/                |           |
|    entropy_loss       | -0.0272   |
|    explained_variance | 0.703     |
|    learning_rate      | 0.0007    |
|    n_updates          | 113199    |
|    policy_loss        | -2.38e-05 |
|    value_loss         | 9.8e-05   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 749       |
|    ep_rew_mean        | 1.8       |
| time/                 |           |
|    fps                | 13        |
|    iterations         | 113300    |
|    time_elapsed       | 40733     |
|    total_timesteps    | 566500    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 771      |
|    ep_rew_mean        | 1.98     |
| time/                 |          |
|    fps                | 14       |
|    iterations         | 114500   |
|    time_elapsed       | 40778    |
|    total_timesteps    | 572500   |
| train/                |          |
|    entropy_loss       | -0.0102  |
|    explained_variance | 0.952    |
|    learning_rate      | 0.0007   |
|    n_updates          | 114499   |
|    policy_loss        | 1.49e-05 |
|    value_loss         | 0.000155 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 782       |
|    ep_rew_mean        | 2.07      |
| time/                 |           |
|    fps                | 14        |
|    iterations         | 114600    |
|    time_elapsed       | 40782     |
|    total_timesteps    | 573000    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 738       |
|    ep_rew_mean        | 1.71      |
| time/                 |           |
|    fps                | 14        |
|    iterations         | 115800    |
|    time_elapsed       | 40827     |
|    total_timesteps    | 579000    |
| train/                |           |
|    entropy_loss       | -0.000502 |
|    explained_variance | 0.389     |
|    learning_rate      | 0.0007    |
|    n_updates          | 115799    |
|    policy_loss        | 1.47e-08  |
|    value_loss         | 1.45e-07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 738      |
|    ep_rew_mean        | 1.71     |
| time/                 |          |
|    fps                | 14       |
|    iterations         | 115900   |
|    time_elapsed       | 40831    |
|    total_timesteps    | 579500   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 738       |
|    ep_rew_mean        | 1.71      |
| time/                 |           |
|    fps                | 14        |
|    iterations         | 117100    |
|    time_elapsed       | 40877     |
|    total_timesteps    | 585500    |
| train/                |           |
|    entropy_loss       | -0.015    |
|    explained_variance | 0.95      |
|    learning_rate      | 0.0007    |
|    n_updates          | 117099    |
|    policy_loss        | -1.65e-07 |
|    value_loss         | 8.12e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 749       |
|    ep_rew_mean        | 1.8       |
| time/                 |           |
|    fps                | 14        |
|    iterations         | 117200    |
|    time_elapsed       | 40881     |
|    total_timesteps    | 586000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 760       |
|    ep_rew_mean        | 1.89      |
| time/                 |           |
|    fps                | 14        |
|    iterations         | 118400    |
|    time_elapsed       | 40925     |
|    total_timesteps    | 592000    |
| train/                |           |
|    entropy_loss       | -0.000526 |
|    explained_variance | -1.59e+05 |
|    learning_rate      | 0.0007    |
|    n_updates          | 118399    |
|    policy_loss        | 4.07e-09  |
|    value_loss         | 3.21e-08  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 749      |
|    ep_rew_mean        | 1.8      |
| time/                 |          |
|    fps                | 14       |
|    iterations         | 118500   |
|    time_elapsed       | 40929    |
|    total_timesteps    | 592500   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 803      |
|    ep_rew_mean        | 2.25     |
| time/                 |          |
|    fps                | 14       |
|    iterations         | 119700   |
|    time_elapsed       | 40982    |
|    total_timesteps    | 598500   |
| train/                |          |
|    entropy_loss       | -0.0182  |
|    explained_variance | 0.899    |
|    learning_rate      | 0.0007   |
|    n_updates          | 119699   |
|    policy_loss        | 1.33e-05 |
|    value_loss         | 4.95e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 814       |
|    ep_rew_mean        | 2.34      |
| time/                 |           |
|    fps                | 14        |
|    iterations         | 119800    |
|    time_elapsed       | 40986     |
|    total_timesteps    | 599000    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 747       |
|    ep_rew_mean        | 1.8       |
| time/                 |           |
|    fps                | 14        |
|    iterations         | 121000    |
|    time_elapsed       | 41035     |
|    total_timesteps    | 605000    |
| train/                |           |
|    entropy_loss       | -0.000606 |
|    explained_variance | -16.5     |
|    learning_rate      | 0.0007    |
|    n_updates          | 120999    |
|    policy_loss        | 7.12e-09  |
|    value_loss         | 5.54e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 747       |
|    ep_rew_mean        | 1.8       |
| time/                 |           |
|    fps                | 14        |
|    iterations         | 121100    |
|    time_elapsed       | 41039     |
|    total_timesteps    | 605500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 792       |
|    ep_rew_mean        | 2.16      |
| time/                 |           |
|    fps                | 14        |
|    iterations         | 122300    |
|    time_elapsed       | 41088     |
|    total_timesteps    | 611500    |
| train/                |           |
|    entropy_loss       | -0.000665 |
|    explained_variance | -74.9     |
|    learning_rate      | 0.0007    |
|    n_updates          | 122299    |
|    policy_loss        | 1.49e-08  |
|    value_loss         | 1.13e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 781       |
|    ep_rew_mean        | 2.07      |
| time/                 |           |
|    fps                | 14        |
|    iterations         | 122400    |
|    time_elapsed       | 41092     |
|    total_timesteps    | 612000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 748       |
|    ep_rew_mean        | 1.8       |
| time/                 |           |
|    fps                | 15        |
|    iterations         | 123600    |
|    time_elapsed       | 41139     |
|    total_timesteps    | 618000    |
| train/                |           |
|    entropy_loss       | -0.00699  |
|    explained_variance | -234      |
|    learning_rate      | 0.0007    |
|    n_updates          | 123599    |
|    policy_loss        | -9.02e-08 |
|    value_loss         | 4.92e-08  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 759       |
|    ep_rew_mean        | 1.89      |
| time/                 |           |
|    fps                | 15        |
|    iterations         | 123700    |
|    time_elapsed       | 41143     |
|    total_timesteps    | 618500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 771       |
|    ep_rew_mean        | 1.98      |
| time/                 |           |
|    fps                | 15        |
|    iterations         | 124900    |
|    time_elapsed       | 41191     |
|    total_timesteps    | 624500    |
| train/                |           |
|    entropy_loss       | -0.000689 |
|    explained_variance | -41.2     |
|    learning_rate      | 0.0007    |
|    n_updates          | 124899    |
|    policy_loss        | -1.59e-08 |
|    value_loss         | 1.07e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 782       |
|    ep_rew_mean        | 2.07      |
| time/                 |           |
|    fps                | 15        |
|    iterations         | 125000    |
|    time_elapsed       | 41195     |
|    total_timesteps    | 625000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 804       |
|    ep_rew_mean        | 2.25      |
| time/                 |           |
|    fps                | 15        |
|    iterations         | 126200    |
|    time_elapsed       | 41243     |
|    total_timesteps    | 631000    |
| train/                |           |
|    entropy_loss       | -0.00117  |
|    explained_variance | 0.984     |
|    learning_rate      | 0.0007    |
|    n_updates          | 126199    |
|    policy_loss        | -1.61e-06 |
|    value_loss         | 0.000228  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 803       |
|    ep_rew_mean        | 2.25      |
| time/                 |           |
|    fps                | 15        |
|    iterations         | 126300    |
|    time_elapsed       | 41247     |
|    total_timesteps    | 631500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 759       |
|    ep_rew_mean        | 1.89      |
| time/                 |           |
|    fps                | 15        |
|    iterations         | 127500    |
|    time_elapsed       | 41295     |
|    total_timesteps    | 637500    |
| train/                |           |
|    entropy_loss       | -0.00536  |
|    explained_variance | 0.949     |
|    learning_rate      | 0.0007    |
|    n_updates          | 127499    |
|    policy_loss        | -3.26e-06 |
|    value_loss         | 4.91e-05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 759       |
|    ep_rew_mean        | 1.89      |
| time/                 |           |
|    fps                | 15        |
|    iterations         | 127600    |
|    time_elapsed       | 41299     |
|    total_timesteps    | 638000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 815       |
|    ep_rew_mean        | 2.34      |
| time/                 |           |
|    fps                | 15        |
|    iterations         | 128800    |
|    time_elapsed       | 41347     |
|    total_timesteps    | 644000    |
| train/                |           |
|    entropy_loss       | -0.000675 |
|    explained_variance | -111      |
|    learning_rate      | 0.0007    |
|    n_updates          | 128799    |
|    policy_loss        | 9.59e-08  |
|    value_loss         | 4.4e-06   |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 816      |
|    ep_rew_mean        | 2.34     |
| time/                 |          |
|    fps                | 15       |
|    iterations         | 128900   |
|    time_elapsed       | 41351    |
|    total_timesteps    | 644500   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 783       |
|    ep_rew_mean        | 2.07      |
| time/                 |           |
|    fps                | 15        |
|    iterations         | 130100    |
|    time_elapsed       | 41398     |
|    total_timesteps    | 650500    |
| train/                |           |
|    entropy_loss       | -0.000772 |
|    explained_variance | -162      |
|    learning_rate      | 0.0007    |
|    n_updates          | 130099    |
|    policy_loss        | -1.57e-09 |
|    value_loss         | 2.54e-07  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 794      |
|    ep_rew_mean        | 2.16     |
| time/                 |          |
|    fps                | 15       |
|    iterations         | 130200   |
|    time_elapsed       | 41402    |
|    total_timesteps    | 651000   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 849       |
|    ep_rew_mean        | 2.61      |
| time/                 |           |
|    fps                | 15        |
|    iterations         | 131400    |
|    time_elapsed       | 41450     |
|    total_timesteps    | 657000    |
| train/                |           |
|    entropy_loss       | -0.000805 |
|    explained_variance | -3.39     |
|    learning_rate      | 0.0007    |
|    n_updates          | 131399    |
|    policy_loss        | 2e-08     |
|    value_loss         | 6.12e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 838       |
|    ep_rew_mean        | 2.52      |
| time/                 |           |
|    fps                | 15        |
|    iterations         | 131500    |
|    time_elapsed       | 41454     |
|    total_timesteps    | 657500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 835       |
|    ep_rew_mean        | 2.48      |
| time/                 |           |
|    fps                | 15        |
|    iterations         | 132700    |
|    time_elapsed       | 41501     |
|    total_timesteps    | 663500    |
| train/                |           |
|    entropy_loss       | -0.000659 |
|    explained_variance | -10.1     |
|    learning_rate      | 0.0007    |
|    n_updates          | 132699    |
|    policy_loss        | 1.7e-08   |
|    value_loss         | 1.46e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 824       |
|    ep_rew_mean        | 2.39      |
| time/                 |           |
|    fps                | 15        |
|    iterations         | 132800    |
|    time_elapsed       | 41505     |
|    total_timesteps    | 664000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 823       |
|    ep_rew_mean        | 2.39      |
| time/                 |           |
|    fps                | 16        |
|    iterations         | 134000    |
|    time_elapsed       | 41553     |
|    total_timesteps    | 670000    |
| train/                |           |
|    entropy_loss       | -0.000635 |
|    explained_variance | -14.3     |
|    learning_rate      | 0.0007    |
|    n_updates          | 133999    |
|    policy_loss        | 2.09e-08  |
|    value_loss         | 2.37e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 801       |
|    ep_rew_mean        | 2.21      |
| time/                 |           |
|    fps                | 16        |
|    iterations         | 134100    |
|    time_elapsed       | 41557     |
|    total_timesteps    | 670500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 801       |
|    ep_rew_mean        | 2.21      |
| time/                 |           |
|    fps                | 16        |
|    iterations         | 135300    |
|    time_elapsed       | 41605     |
|    total_timesteps    | 676500    |
| train/                |           |
|    entropy_loss       | -0.00404  |
|    explained_variance | 0.992     |
|    learning_rate      | 0.0007    |
|    n_updates          | 135299    |
|    policy_loss        | -3.83e-07 |
|    value_loss         | 3.94e-06  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 802      |
|    ep_rew_mean        | 2.21     |
| time/                 |          |
|    fps                | 16       |
|    iterations         | 135400   |
|    time_elapsed       | 41609    |
|    total_timesteps    | 677000   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 847      |
|    ep_rew_mean        | 2.61     |
| time/                 |          |
|    fps                | 16       |
|    iterations         | 136600   |
|    time_elapsed       | 41656    |
|    total_timesteps    | 683000   |
| train/                |          |
|    entropy_loss       | -0.029   |
|    explained_variance | 0.991    |
|    learning_rate      | 0.0007   |
|    n_updates          | 136599   |
|    policy_loss        | 1.57e-05 |
|    value_loss         | 1.69e-05 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 858      |
|    ep_rew_mean        | 2.7      |
| time/                 |          |
|    fps                | 16       |
|    iterations         | 136700   |
|    time_elapsed       | 41660    |
|    total_timesteps    | 683500   |
| train/                |          |
|

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 870       |
|    ep_rew_mean        | 2.79      |
| time/                 |           |
|    fps                | 16        |
|    iterations         | 137900    |
|    time_elapsed       | 41708     |
|    total_timesteps    | 689500    |
| train/                |           |
|    entropy_loss       | -0.00931  |
|    explained_variance | 0.985     |
|    learning_rate      | 0.0007    |
|    n_updates          | 137899    |
|    policy_loss        | -2.14e-06 |
|    value_loss         | 6.7e-06   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 881       |
|    ep_rew_mean        | 2.88      |
| time/                 |           |
|    fps                | 16        |
|    iterations         | 138000    |
|    time_elapsed       | 41712     |
|    total_timesteps    | 690000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 891       |
|    ep_rew_mean        | 2.97      |
| time/                 |           |
|    fps                | 16        |
|    iterations         | 139200    |
|    time_elapsed       | 41759     |
|    total_timesteps    | 696000    |
| train/                |           |
|    entropy_loss       | -0.000491 |
|    explained_variance | -0.0362   |
|    learning_rate      | 0.0007    |
|    n_updates          | 139199    |
|    policy_loss        | -6.33e-08 |
|    value_loss         | 3.05e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 891       |
|    ep_rew_mean        | 2.97      |
| time/                 |           |
|    fps                | 16        |
|    iterations         | 139300    |
|    time_elapsed       | 41763     |
|    total_timesteps    | 696500    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 836       |
|    ep_rew_mean        | 2.52      |
| time/                 |           |
|    fps                | 16        |
|    iterations         | 140500    |
|    time_elapsed       | 41811     |
|    total_timesteps    | 702500    |
| train/                |           |
|    entropy_loss       | -0.000488 |
|    explained_variance | 0         |
|    learning_rate      | 0.0007    |
|    n_updates          | 140499    |
|    policy_loss        | -1.31e-08 |
|    value_loss         | 1.3e-07   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 814       |
|    ep_rew_mean        | 2.34      |
| time/                 |           |
|    fps                | 16        |
|    iterations         | 140600    |
|    time_elapsed       | 41815     |
|    total_timesteps    | 703000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 771       |
|    ep_rew_mean        | 1.98      |
| time/                 |           |
|    fps                | 16        |
|    iterations         | 141800    |
|    time_elapsed       | 41863     |
|    total_timesteps    | 709000    |
| train/                |           |
|    entropy_loss       | -0.000562 |
|    explained_variance | -0.247    |
|    learning_rate      | 0.0007    |
|    n_updates          | 141799    |
|    policy_loss        | 2.8e-09   |
|    value_loss         | 4.37e-09  |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 760      |
|    ep_rew_mean        | 1.89     |
| time/                 |          |
|    fps                | 16       |
|    iterations         | 141900   |
|    time_elapsed       | 41867    |
|    total_timesteps    | 709500   |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 760      |
|    ep_rew_mean        | 1.89     |
| time/                 |          |
|    fps                | 17       |
|    iterations         | 143100   |
|    time_elapsed       | 41914    |
|    total_timesteps    | 715500   |
| train/                |          |
|    entropy_loss       | -0.00673 |
|    explained_variance | 0.996    |
|    learning_rate      | 0.0007   |
|    n_updates          | 143099   |
|    policy_loss        | 1.41e-06 |
|    value_loss         | 0.000954 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 760       |
|    ep_rew_mean        | 1.89      |
| time/                 |           |
|    fps                | 17        |
|    iterations         | 143200    |
|    time_elapsed       | 41918     |
|    total_timesteps    | 716000    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 793       |
|    ep_rew_mean        | 2.16      |
| time/                 |           |
|    fps                | 17        |
|    iterations         | 144400    |
|    time_elapsed       | 41965     |
|    total_timesteps    | 722000    |
| train/                |           |
|    entropy_loss       | -0.000505 |
|    explained_variance | 0.0274    |
|    learning_rate      | 0.0007    |
|    n_updates          | 144399    |
|    policy_loss        | -1.77e-08 |
|    value_loss         | 2.19e-07  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 793       |
|    ep_rew_mean        | 2.16      |
| time/                 |           |
|    fps                | 17        |
|    iterations         | 144500    |
|    time_elapsed       | 41969     |
|    total_timesteps    | 722500    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 837      |
|    ep_rew_mean        | 2.52     |
| time/                 |          |
|    fps                | 17       |
|    iterations         | 145700   |
|    time_elapsed       | 42017    |
|    total_timesteps    | 728500   |
| train/                |          |
|    entropy_loss       | -0.0139  |
|    explained_variance | 0.984    |
|    learning_rate      | 0.0007   |
|    n_updates          | 145699   |
|    policy_loss        | 1.1e-06  |
|    value_loss         | 2.96e-06 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 859       |
|    ep_rew_mean        | 2.7       |
| time/                 |           |
|    fps                | 17        |
|    iterations         | 145800    |
|    time_elapsed       | 42021     |
|    total_timesteps    | 729000    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 848      |
|    ep_rew_mean        | 2.61     |
| time/                 |          |
|    fps                | 17       |
|    iterations         | 147000   |
|    time_elapsed       | 42068    |
|    total_timesteps    | 735000   |
| train/                |          |
|    entropy_loss       | -0.00455 |
|    explained_variance | -1.46    |
|    learning_rate      | 0.0007   |
|    n_updates          | 146999   |
|    policy_loss        | 3.06e-07 |
|    value_loss         | 7.29e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 858       |
|    ep_rew_mean        | 2.7       |
| time/                 |           |
|    fps                | 17        |
|    iterations         | 147100    |
|    time_elapsed       | 42072     |
|    total_timesteps    | 735500    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 837      |
|    ep_rew_mean        | 2.52     |
| time/                 |          |
|    fps                | 17       |
|    iterations         | 148300   |
|    time_elapsed       | 42120    |
|    total_timesteps    | 741500   |
| train/                |          |
|    entropy_loss       | -0.0202  |
|    explained_variance | 0.8      |
|    learning_rate      | 0.0007   |
|    n_updates          | 148299   |
|    policy_loss        | 1.71e-05 |
|    value_loss         | 7.18e-05 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 848       |
|    ep_rew_mean        | 2.61      |
| time/                 |           |
|    fps                | 17        |
|    iterations         | 148400    |
|    time_elapsed       | 42124     |
|    total_timesteps    | 742000    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 839       |
|    ep_rew_mean        | 2.52      |
| time/                 |           |
|    fps                | 17        |
|    iterations         | 149600    |
|    time_elapsed       | 42171     |
|    total_timesteps    | 748000    |
| train/                |           |
|    entropy_loss       | -0.0306   |
|    explained_variance | 0.998     |
|    learning_rate      | 0.0007    |
|    n_updates          | 149599    |
|    policy_loss        | -1.34e-06 |
|    value_loss         | 3e-07     |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 816      |
|    ep_rew_mean        | 2.34     |
| time/                 |          |
|    fps                | 17       |
|    iterations         | 149700   |
|    time_elapsed       | 42175    |
|    total_timesteps    | 748500   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 816       |
|    ep_rew_mean        | 2.34      |
| time/                 |           |
|    fps                | 17        |
|    iterations         | 150900    |
|    time_elapsed       | 42223     |
|    total_timesteps    | 754500    |
| train/                |           |
|    entropy_loss       | -0.0329   |
|    explained_variance | 0.989     |
|    learning_rate      | 0.0007    |
|    n_updates          | 150899    |
|    policy_loss        | -2.08e-06 |
|    value_loss         | 1.51e-06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 827       |
|    ep_rew_mean        | 2.43      |
| time/                 |           |
|    fps                | 17        |
|    iterations         | 151000    |
|    time_elapsed       | 42226     |
|    total_timesteps    | 755000    |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 850       |
|    ep_rew_mean        | 2.61      |
| time/                 |           |
|    fps                | 18        |
|    iterations         | 152200    |
|    time_elapsed       | 42274     |
|    total_timesteps    | 761000    |
| train/                |           |
|    entropy_loss       | -0.000994 |
|    explained_variance | -4.05e+08 |
|    learning_rate      | 0.0007    |
|    n_updates          | 152199    |
|    policy_loss        | 4.65e-07  |
|    value_loss         | 2.85e-05  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 816       |
|    ep_rew_mean        | 2.34      |
| time/                 |           |
|    fps                | 18        |
|    iterations         | 152300    |
|    time_elapsed       | 42278     |
|    total_timesteps    | 761500    |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 750      |
|    ep_rew_mean        | 1.8      |
| time/                 |          |
|    fps                | 18       |
|    iterations         | 153500   |
|    time_elapsed       | 42325    |
|    total_timesteps    | 767500   |
| train/                |          |
|    entropy_loss       | -0.00131 |
|    explained_variance | -1.22    |
|    learning_rate      | 0.0007   |
|    n_updates          | 153499   |
|    policy_loss        | 1.37e-08 |
|    value_loss         | 1.75e-08 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 717      |
|    ep_rew_mean        | 1.53     |
| time/                 |          |
|    fps                | 18       |
|    iterations         | 153600   |
|    time_elapsed       | 42329    |
|    total_timesteps    | 768000   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 717      |
|    ep_rew_mean        | 1.53     |
| time/                 |          |
|    fps                | 18       |
|    iterations         | 154800   |
|    time_elapsed       | 42378    |
|    total_timesteps    | 774000   |
| train/                |          |
|    entropy_loss       | -0.00624 |
|    explained_variance | 0        |
|    learning_rate      | 0.0007   |
|    n_updates          | 154799   |
|    policy_loss        | 4.6e-07  |
|    value_loss         | 5.81e-07 |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 728      |
|    ep_rew_mean        | 1.62     |
| time/                 |          |
|    fps                | 18       |
|    iterations         | 154900   |
|    time_elapsed       | 42382    |
|    total_timesteps    | 774500   |
| train/                |          |
|

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 770      |
|    ep_rew_mean        | 1.98     |
| time/                 |          |
|    fps                | 18       |
|    iterations         | 156100   |
|    time_elapsed       | 42431    |
|    total_timesteps    | 780500   |
| train/                |          |
|    entropy_loss       | -0.0384  |
|    explained_variance | 0.996    |
|    learning_rate      | 0.0007   |
|    n_updates          | 156099   |
|    policy_loss        | 3.95e-06 |
|    value_loss         | 9.85e-07 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 781       |
|    ep_rew_mean        | 2.07      |
| time/                 |           |
|    fps                | 18        |
|    iterations         | 156200    |
|    time_elapsed       | 42435     |
|    total_timesteps    | 781000    |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 825      |
|    ep_rew_mean        | 2.43     |
| time/                 |          |
|    fps                | 18       |
|    iterations         | 157400   |
|    time_elapsed       | 42483    |
|    total_timesteps    | 787000   |
| train/                |          |
|    entropy_loss       | -0.0129  |
|    explained_variance | 0.996    |
|    learning_rate      | 0.0007   |
|    n_updates          | 157399   |
|    policy_loss        | 7.72e-06 |
|    value_loss         | 0.00066  |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 814       |
|    ep_rew_mean        | 2.34      |
| time/                 |           |
|    fps                | 18        |
|    iterations         | 157500    |
|    time_elapsed       | 42487     |
|    total_timesteps    | 787500    |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 803       |
|    ep_rew_mean        | 2.25      |
| time/                 |           |
|    fps                | 18        |
|    iterations         | 158700    |
|    time_elapsed       | 42535     |
|    total_timesteps    | 793500    |
| train/                |           |
|    entropy_loss       | -0.00112  |
|    explained_variance | -46.7     |
|    learning_rate      | 0.0007    |
|    n_updates          | 158699    |
|    policy_loss        | -4.41e-10 |
|    value_loss         | 6e-10     |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 804      |
|    ep_rew_mean        | 2.25     |
| time/                 |          |
|    fps                | 18       |
|    iterations         | 158800   |
|    time_elapsed       | 42539    |
|    total_timesteps    | 794000   |
| train/             

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 826       |
|    ep_rew_mean        | 2.43      |
| time/                 |           |
|    fps                | 18        |
|    iterations         | 160000    |
|    time_elapsed       | 42586     |
|    total_timesteps    | 800000    |
| train/                |           |
|    entropy_loss       | -0.000879 |
|    explained_variance | 0.493     |
|    learning_rate      | 0.0007    |
|    n_updates          | 159999    |
|    policy_loss        | -2.24e-09 |
|    value_loss         | 1.03e-09  |
-------------------------------------
train time: 42587.294620513916


  logger.warn(


In [None]:
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3 import A2C

vec_env = make_atari_env("BreakoutNoFrameskip-v4", n_envs=1, seed=0)
# Frame-stacking with 4 frames
vec_env = VecFrameStack(vec_env, n_stack=4)

model = A2C.load("models/Breakout", env=vec_env)

obs = vec_env.reset()
while True:
    action, _states = model.predict(obs, deterministic=False)
    obs, rewards, dones, info = vec_env.step(action)
    vec_env.render("human")
    

Wrapping the env in a VecTransposeImage.


  logger.warn(


In [12]:
A2C.load??

In [1]:
int(2e4)

20000