# dependencies
connect to cpu if want to use behaviour cloning

In [1]:
#!git clone http://github.com/HumanCompatibleAI/imitation
#!pip install -e "/content/imitation[dev]"

In [2]:
!pip install gymnasium[mujoco]

Collecting gymnasium[mujoco]
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
Collecting farama-notifications>=0.0.1 (from gymnasium[mujoco])
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Collecting mujoco>=2.3.3 (from gymnasium[mujoco])
  Downloading mujoco-3.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m76.8 MB/s[0m eta [36m0:00:00[0m
Collecting glfw (from mujoco>=2.3.3->gymnasium[mujoco])
  Downloading glfw-2.7.0-py2.py27.py3.py30.py31.py32.py33.py34.py35.py36.py37.py38-none-manylinux2014_x86_64.whl (211 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.8/211.8 kB[0m [31m27.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: glfw, farama-notifications, gymnasium, mujoco
Successfully insta

In [3]:
!pip install imitation
!pip install shimmy
!pip install git+https://github.com/Stable-Baselines-Team/stable-baselines3-contrib

Collecting imitation
  Downloading imitation-1.0.0-py3-none-any.whl (216 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m216.4/216.4 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Collecting seals~=0.2.1 (from imitation)
  Downloading seals-0.2.1-py3-none-any.whl (35 kB)
Collecting stable-baselines3~=2.0 (from imitation)
  Downloading stable_baselines3-2.2.1-py3-none-any.whl (181 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m181.7/181.7 kB[0m [31m22.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sacred>=0.8.4 (from imitation)
  Downloading sacred-0.8.5-py2.py3-none-any.whl (107 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.9/107.9 kB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-sb3~=3.0 (from imitation)
  Downloading huggingface_sb3-3.0-py3-none-any.whl (9.7 kB)
Collecting optuna>=3.0.1 (from imitation)
  Downloading optuna-3.5.0-py3-none-any.whl (413 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━

# environment and expert

In [4]:
import gymnasium as gym
import imitation

In [5]:
import numpy as np
from imitation.policies.serialize import load_policy
from imitation.util.util import make_vec_env
from imitation.data.wrappers import RolloutInfoWrapper

  and should_run_async(code)


In [27]:
SEED = 42

env = make_vec_env(
    "seals:seals/HalfCheetah-v1", # seals:seals/CartPole-v0
    rng=np.random.default_rng(SEED),
    n_envs=8,
    post_wrappers=[
        lambda env, _: RolloutInfoWrapper(env)
    ],  # needed for computing rollouts later
)
expert = load_policy(
    "ppo-huggingface",
    organization="HumanCompatibleAI",
    env_name="seals/HalfCheetah-v1",
    venv=env,
)

In [34]:
from imitation.data import rollout

rollouts_4 = rollout.rollout(
    expert,
    env,
    rollout.make_sample_until(min_timesteps=1000, min_episodes=4), # timestep config: https://huggingface.co/HumanCompatibleAI/ppo-seals-HalfCheetah-v1
    rng=np.random.default_rng(SEED), # 随机数？ The random state to use for sampling trajectories.
)
# timestep config: https://huggingface.co/HumanCompatibleAI/ppo-seals-HalfCheetah-v1
# 4 trajectories given, each 1,000,000 timesteps

# GAIL-PPO

In [35]:
from imitation.algorithms.adversarial.gail import GAIL
from imitation.rewards.reward_nets import BasicRewardNet
from imitation.util.networks import RunningNorm
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.evaluation import evaluate_policy
import sb3_contrib
from sb3_contrib import TRPO

learner = PPO(
    env=env,
    policy=MlpPolicy,
    batch_size=64,
    ent_coef=0.0,
    learning_rate=0.0005,
    gamma=0.95,
    n_epochs=50,
    seed=SEED,
)

reward_net = BasicRewardNet(
    observation_space=env.observation_space,
    action_space=env.action_space,
    normalize_input_layer=RunningNorm,
)

gail_trainer = GAIL(
    demonstrations=rollouts_4, # expert demo is here
    demo_batch_size=1024,
    gen_replay_buffer_capacity=512,
    n_disc_updates_per_round=8, #
    venv=env,
    gen_algo=learner,
    reward_net=reward_net,
    allow_variable_horizon=True,
    gen_train_timesteps = 1000
)

Running with `allow_variable_horizon` set to True. Some algorithms are biased towards shorter or longer episodes, which may significantly confound results. Additionally, even unbiased algorithms can exploit the information leak from the termination condition, producing spuriously high performance. See https://imitation.readthedocs.io/en/latest/getting-started/variable-horizon.html for more information.


In [29]:
expert_reward, _ = evaluate_policy(
    expert, env, 20, return_episode_rewards=True
)
print(
    "expert rewards after training:",
    np.mean(expert_reward),
    "+/-",
    np.std(expert_reward),
)

expert rewards after training: 1660.79641295 +/- 52.399360723476825


In [36]:
env.seed(SEED)
learner_rewards_before_training, _ = evaluate_policy(
    learner, env, 20, return_episode_rewards=True
)

In [37]:
gail_trainer.train(100000) #进度条显示的是gen_train_timestep除以这里的input

round:   0%|          | 0/100 [00:00<?, ?it/s]

------------------------------------------
| raw/                        |          |
|    gen/rollout/ep_len_mean  | 1e+03    |
|    gen/rollout/ep_rew_mean  | -370     |
|    gen/time/fps             | 2695     |
|    gen/time/iterations      | 1        |
|    gen/time/time_elapsed    | 6        |
|    gen/time/total_timesteps | 16384    |
------------------------------------------
--------------------------------------------------
| raw/                                |          |
|    disc/disc_acc                    | 0.608    |
|    disc/disc_acc_expert             | 0.337    |
|    disc/disc_acc_gen                | 0.88     |
|    disc/disc_entropy                | 0.693    |
|    disc/disc_loss                   | 0.686    |
|    disc/disc_proportion_expert_pred | 0.229    |
|    disc/disc_proportion_expert_true | 0.5      |
|    disc/global_step                 | 1        |
|    disc/n_expert                    | 1.02e+03 |
|    disc/n_generated                 | 1.02e+03 |
-

round:   1%|          | 1/100 [01:02<1:42:56, 62.39s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | -355       |
|    gen/rollout/ep_rew_wrapped_mean | 756        |
|    gen/time/fps                    | 2484       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 32768      |
|    gen/train/approx_kl             | 0.14682981 |
|    gen/train/clip_fraction         | 0.606      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -8.62      |
|    gen/train/explained_variance    | -0.626     |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.154      |
|    gen/train/n_updates             | 50         |
|    gen/train/policy_gradient_loss  | 0.0187     |
|    gen/train/std                   | 1.01       |
|    gen/tra

round:   2%|▏         | 2/100 [02:04<1:41:48, 62.34s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -368      |
|    gen/rollout/ep_rew_wrapped_mean | 713       |
|    gen/time/fps                    | 2482      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 49152     |
|    gen/train/approx_kl             | 0.3219918 |
|    gen/train/clip_fraction         | 0.75      |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -8.68     |
|    gen/train/explained_variance    | -0.698    |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.0342    |
|    gen/train/n_updates             | 100       |
|    gen/train/policy_gradient_loss  | 0.0159    |
|    gen/train/std                   | 1.03      |
|    gen/train/value_loss      

round:   3%|▎         | 3/100 [03:06<1:40:35, 62.23s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | -367       |
|    gen/rollout/ep_rew_wrapped_mean | 690        |
|    gen/time/fps                    | 2408       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 65536      |
|    gen/train/approx_kl             | 0.43231696 |
|    gen/train/clip_fraction         | 0.793      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -8.81      |
|    gen/train/explained_variance    | 0.434      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | -0.0693    |
|    gen/train/n_updates             | 150        |
|    gen/train/policy_gradient_loss  | 0.0301     |
|    gen/train/std                   | 1.03       |
|    gen/tra

round:   4%|▍         | 4/100 [04:08<1:39:20, 62.09s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | -366       |
|    gen/rollout/ep_rew_wrapped_mean | 683        |
|    gen/time/fps                    | 2499       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 81920      |
|    gen/train/approx_kl             | 0.49590316 |
|    gen/train/clip_fraction         | 0.815      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -8.87      |
|    gen/train/explained_variance    | 0.781      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.00692    |
|    gen/train/n_updates             | 200        |
|    gen/train/policy_gradient_loss  | 0.0466     |
|    gen/train/std                   | 1.04       |
|    gen/tra

round:   5%|▌         | 5/100 [05:10<1:38:15, 62.05s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | -373       |
|    gen/rollout/ep_rew_wrapped_mean | 677        |
|    gen/time/fps                    | 2675       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 98304      |
|    gen/train/approx_kl             | 0.55193627 |
|    gen/train/clip_fraction         | 0.824      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -8.95      |
|    gen/train/explained_variance    | 0.836      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.102      |
|    gen/train/n_updates             | 250        |
|    gen/train/policy_gradient_loss  | 0.0566     |
|    gen/train/std                   | 1.08       |
|    gen/tra

round:   6%|▌         | 6/100 [06:12<1:37:02, 61.94s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | -374       |
|    gen/rollout/ep_rew_wrapped_mean | 659        |
|    gen/time/fps                    | 2691       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 114688     |
|    gen/train/approx_kl             | 0.87408113 |
|    gen/train/clip_fraction         | 0.851      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -9.1       |
|    gen/train/explained_variance    | 0.866      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.0611     |
|    gen/train/n_updates             | 300        |
|    gen/train/policy_gradient_loss  | 0.0573     |
|    gen/train/std                   | 1.07       |
|    gen/tra

round:   7%|▋         | 7/100 [07:13<1:35:49, 61.82s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -377      |
|    gen/rollout/ep_rew_wrapped_mean | 629       |
|    gen/time/fps                    | 2661      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 131072    |
|    gen/train/approx_kl             | 1.0928328 |
|    gen/train/clip_fraction         | 0.874     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -9.12     |
|    gen/train/explained_variance    | 0.917     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.0684    |
|    gen/train/n_updates             | 350       |
|    gen/train/policy_gradient_loss  | 0.08      |
|    gen/train/std                   | 1.1       |
|    gen/train/value_loss      

round:   8%|▊         | 8/100 [08:15<1:34:43, 61.78s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -369      |
|    gen/rollout/ep_rew_wrapped_mean | 590       |
|    gen/time/fps                    | 2435      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 147456    |
|    gen/train/approx_kl             | 1.1818163 |
|    gen/train/clip_fraction         | 0.887     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -9.28     |
|    gen/train/explained_variance    | 0.928     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.00463   |
|    gen/train/n_updates             | 400       |
|    gen/train/policy_gradient_loss  | 0.0881    |
|    gen/train/std                   | 1.11      |
|    gen/train/value_loss      

round:   9%|▉         | 9/100 [09:17<1:33:45, 61.81s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -349      |
|    gen/rollout/ep_rew_wrapped_mean | 541       |
|    gen/time/fps                    | 2424      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 163840    |
|    gen/train/approx_kl             | 1.3883685 |
|    gen/train/clip_fraction         | 0.902     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -9.39     |
|    gen/train/explained_variance    | 0.918     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.0509    |
|    gen/train/n_updates             | 450       |
|    gen/train/policy_gradient_loss  | 0.0875    |
|    gen/train/std                   | 1.16      |
|    gen/train/value_loss      

round:  10%|█         | 10/100 [10:18<1:32:29, 61.66s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -328      |
|    gen/rollout/ep_rew_wrapped_mean | 483       |
|    gen/time/fps                    | 2482      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 180224    |
|    gen/train/approx_kl             | 1.7315704 |
|    gen/train/clip_fraction         | 0.91      |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -9.75     |
|    gen/train/explained_variance    | 0.922     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.0165    |
|    gen/train/n_updates             | 500       |
|    gen/train/policy_gradient_loss  | 0.0895    |
|    gen/train/std                   | 1.22      |
|    gen/train/value_loss      

round:  11%|█         | 11/100 [11:19<1:31:08, 61.44s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -301      |
|    gen/rollout/ep_rew_wrapped_mean | 419       |
|    gen/time/fps                    | 2480      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 196608    |
|    gen/train/approx_kl             | 2.0649948 |
|    gen/train/clip_fraction         | 0.917     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -10.1     |
|    gen/train/explained_variance    | 0.949     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.186     |
|    gen/train/n_updates             | 550       |
|    gen/train/policy_gradient_loss  | 0.0956    |
|    gen/train/std                   | 1.26      |
|    gen/train/value_loss      

round:  12%|█▏        | 12/100 [12:20<1:29:53, 61.29s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -270      |
|    gen/rollout/ep_rew_wrapped_mean | 357       |
|    gen/time/fps                    | 2486      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 212992    |
|    gen/train/approx_kl             | 3.7890322 |
|    gen/train/clip_fraction         | 0.946     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -10.2     |
|    gen/train/explained_variance    | 0.961     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.118     |
|    gen/train/n_updates             | 600       |
|    gen/train/policy_gradient_loss  | 0.0977    |
|    gen/train/std                   | 1.29      |
|    gen/train/value_loss      

round:  13%|█▎        | 13/100 [13:22<1:29:00, 61.39s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -251      |
|    gen/rollout/ep_rew_wrapped_mean | 301       |
|    gen/time/fps                    | 2686      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 229376    |
|    gen/train/approx_kl             | 2.8545332 |
|    gen/train/clip_fraction         | 0.936     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -10.5     |
|    gen/train/explained_variance    | 0.97      |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.27      |
|    gen/train/n_updates             | 650       |
|    gen/train/policy_gradient_loss  | 0.129     |
|    gen/train/std                   | 1.33      |
|    gen/train/value_loss      

round:  14%|█▍        | 14/100 [14:23<1:27:44, 61.21s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -245      |
|    gen/rollout/ep_rew_wrapped_mean | 265       |
|    gen/time/fps                    | 2726      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 245760    |
|    gen/train/approx_kl             | 1.2510793 |
|    gen/train/clip_fraction         | 0.9       |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -10.7     |
|    gen/train/explained_variance    | 0.949     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.3       |
|    gen/train/n_updates             | 700       |
|    gen/train/policy_gradient_loss  | 0.131     |
|    gen/train/std                   | 1.4       |
|    gen/train/value_loss      

round:  15%|█▌        | 15/100 [15:23<1:26:23, 60.99s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -257      |
|    gen/rollout/ep_rew_wrapped_mean | 223       |
|    gen/time/fps                    | 2725      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 262144    |
|    gen/train/approx_kl             | 0.9164945 |
|    gen/train/clip_fraction         | 0.871     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -10.9     |
|    gen/train/explained_variance    | 0.815     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.129     |
|    gen/train/n_updates             | 750       |
|    gen/train/policy_gradient_loss  | 0.109     |
|    gen/train/std                   | 1.46      |
|    gen/train/value_loss      

round:  16%|█▌        | 16/100 [16:24<1:25:19, 60.94s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -280      |
|    gen/rollout/ep_rew_wrapped_mean | 190       |
|    gen/time/fps                    | 2738      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 5         |
|    gen/time/total_timesteps        | 278528    |
|    gen/train/approx_kl             | 0.7929547 |
|    gen/train/clip_fraction         | 0.849     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -11.1     |
|    gen/train/explained_variance    | 0.678     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.125     |
|    gen/train/n_updates             | 800       |
|    gen/train/policy_gradient_loss  | 0.102     |
|    gen/train/std                   | 1.5       |
|    gen/train/value_loss      

round:  17%|█▋        | 17/100 [17:24<1:24:01, 60.75s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -307      |
|    gen/rollout/ep_rew_wrapped_mean | 163       |
|    gen/time/fps                    | 2618      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 294912    |
|    gen/train/approx_kl             | 0.7832879 |
|    gen/train/clip_fraction         | 0.842     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -11.2     |
|    gen/train/explained_variance    | 0.794     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.136     |
|    gen/train/n_updates             | 850       |
|    gen/train/policy_gradient_loss  | 0.101     |
|    gen/train/std                   | 1.52      |
|    gen/train/value_loss      

round:  18%|█▊        | 18/100 [18:26<1:23:16, 60.93s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | -336       |
|    gen/rollout/ep_rew_wrapped_mean | 133        |
|    gen/time/fps                    | 2727       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 311296     |
|    gen/train/approx_kl             | 0.77312267 |
|    gen/train/clip_fraction         | 0.841      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -11.2      |
|    gen/train/explained_variance    | 0.892      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.129      |
|    gen/train/n_updates             | 900        |
|    gen/train/policy_gradient_loss  | 0.0894     |
|    gen/train/std                   | 1.55       |
|    gen/tra

round:  19%|█▉        | 19/100 [19:26<1:22:13, 60.90s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -346      |
|    gen/rollout/ep_rew_wrapped_mean | 114       |
|    gen/time/fps                    | 2583      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 327680    |
|    gen/train/approx_kl             | 1.0307647 |
|    gen/train/clip_fraction         | 0.871     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -11.3     |
|    gen/train/explained_variance    | 0.906     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.111     |
|    gen/train/n_updates             | 950       |
|    gen/train/policy_gradient_loss  | 0.0734    |
|    gen/train/std                   | 1.59      |
|    gen/train/value_loss      

round:  20%|██        | 20/100 [20:27<1:21:06, 60.83s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_len_mean         | 1e+03    |
|    gen/rollout/ep_rew_mean         | -356     |
|    gen/rollout/ep_rew_wrapped_mean | 86.2     |
|    gen/time/fps                    | 2468     |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 6        |
|    gen/time/total_timesteps        | 344064   |
|    gen/train/approx_kl             | 1.105455 |
|    gen/train/clip_fraction         | 0.875    |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -11.5    |
|    gen/train/explained_variance    | 0.861    |
|    gen/train/learning_rate         | 0.005    |
|    gen/train/loss                  | 0.0877   |
|    gen/train/n_updates             | 1000     |
|    gen/train/policy_gradient_loss  | 0.0844   |
|    gen/train/std                   | 1.63     |
|    gen/train/value_loss            | 0.288    |


round:  21%|██        | 21/100 [21:29<1:20:34, 61.20s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -358      |
|    gen/rollout/ep_rew_wrapped_mean | 89.4      |
|    gen/time/fps                    | 2423      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 360448    |
|    gen/train/approx_kl             | 2.5204911 |
|    gen/train/clip_fraction         | 0.912     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -11.7     |
|    gen/train/explained_variance    | 0.259     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.174     |
|    gen/train/n_updates             | 1050      |
|    gen/train/policy_gradient_loss  | 0.0714    |
|    gen/train/std                   | 1.69      |
|    gen/train/value_loss      

round:  22%|██▏       | 22/100 [22:31<1:19:46, 61.36s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_len_mean         | 1e+03    |
|    gen/rollout/ep_rew_mean         | -352     |
|    gen/rollout/ep_rew_wrapped_mean | 105      |
|    gen/time/fps                    | 2477     |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 6        |
|    gen/time/total_timesteps        | 376832   |
|    gen/train/approx_kl             | 1.659461 |
|    gen/train/clip_fraction         | 0.901    |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -12      |
|    gen/train/explained_variance    | 0.858    |
|    gen/train/learning_rate         | 0.005    |
|    gen/train/loss                  | 0.491    |
|    gen/train/n_updates             | 1100     |
|    gen/train/policy_gradient_loss  | 0.0938   |
|    gen/train/std                   | 1.77     |
|    gen/train/value_loss            | 1.09     |


round:  23%|██▎       | 23/100 [23:32<1:18:35, 61.25s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -332      |
|    gen/rollout/ep_rew_wrapped_mean | 119       |
|    gen/time/fps                    | 2462      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 393216    |
|    gen/train/approx_kl             | 1.7575128 |
|    gen/train/clip_fraction         | 0.909     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -12.2     |
|    gen/train/explained_variance    | 0.921     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.579     |
|    gen/train/n_updates             | 1150      |
|    gen/train/policy_gradient_loss  | 0.101     |
|    gen/train/std                   | 1.86      |
|    gen/train/value_loss      

round:  24%|██▍       | 24/100 [24:33<1:17:38, 61.30s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -308      |
|    gen/rollout/ep_rew_wrapped_mean | 134       |
|    gen/time/fps                    | 2611      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 409600    |
|    gen/train/approx_kl             | 1.2414013 |
|    gen/train/clip_fraction         | 0.877     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -12.4     |
|    gen/train/explained_variance    | 0.62      |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.471     |
|    gen/train/n_updates             | 1200      |
|    gen/train/policy_gradient_loss  | 0.104     |
|    gen/train/std                   | 1.87      |
|    gen/train/value_loss      

round:  25%|██▌       | 25/100 [25:34<1:16:30, 61.20s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -289      |
|    gen/rollout/ep_rew_wrapped_mean | 159       |
|    gen/time/fps                    | 2499      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 425984    |
|    gen/train/approx_kl             | 1.1035453 |
|    gen/train/clip_fraction         | 0.856     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -12.3     |
|    gen/train/explained_variance    | 0.562     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.589     |
|    gen/train/n_updates             | 1250      |
|    gen/train/policy_gradient_loss  | 0.0987    |
|    gen/train/std                   | 1.89      |
|    gen/train/value_loss      

round:  26%|██▌       | 26/100 [26:36<1:15:47, 61.45s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_len_mean         | 1e+03    |
|    gen/rollout/ep_rew_mean         | -252     |
|    gen/rollout/ep_rew_wrapped_mean | 184      |
|    gen/time/fps                    | 2673     |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 6        |
|    gen/time/total_timesteps        | 442368   |
|    gen/train/approx_kl             | 2.11313  |
|    gen/train/clip_fraction         | 0.899    |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -12.6    |
|    gen/train/explained_variance    | 0.267    |
|    gen/train/learning_rate         | 0.005    |
|    gen/train/loss                  | 0.667    |
|    gen/train/n_updates             | 1300     |
|    gen/train/policy_gradient_loss  | 0.0993   |
|    gen/train/std                   | 1.99     |
|    gen/train/value_loss            | 2.48     |


round:  27%|██▋       | 27/100 [27:38<1:14:46, 61.46s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -212      |
|    gen/rollout/ep_rew_wrapped_mean | 209       |
|    gen/time/fps                    | 2574      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 458752    |
|    gen/train/approx_kl             | 1.6690503 |
|    gen/train/clip_fraction         | 0.895     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -12.9     |
|    gen/train/explained_variance    | 0.504     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.801     |
|    gen/train/n_updates             | 1350      |
|    gen/train/policy_gradient_loss  | 0.102     |
|    gen/train/std                   | 2.05      |
|    gen/train/value_loss      

round:  28%|██▊       | 28/100 [28:40<1:13:53, 61.57s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -175      |
|    gen/rollout/ep_rew_wrapped_mean | 205       |
|    gen/time/fps                    | 2464      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 475136    |
|    gen/train/approx_kl             | 1.2742622 |
|    gen/train/clip_fraction         | 0.891     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -13.1     |
|    gen/train/explained_variance    | 0.764     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.903     |
|    gen/train/n_updates             | 1400      |
|    gen/train/policy_gradient_loss  | 0.118     |
|    gen/train/std                   | 2.13      |
|    gen/train/value_loss      

round:  29%|██▉       | 29/100 [29:41<1:12:51, 61.57s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -140      |
|    gen/rollout/ep_rew_wrapped_mean | 222       |
|    gen/time/fps                    | 2480      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 491520    |
|    gen/train/approx_kl             | 1.1115882 |
|    gen/train/clip_fraction         | 0.868     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -13.2     |
|    gen/train/explained_variance    | 0.846     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 1.06      |
|    gen/train/n_updates             | 1450      |
|    gen/train/policy_gradient_loss  | 0.0896    |
|    gen/train/std                   | 2.2       |
|    gen/train/value_loss      

round:  30%|███       | 30/100 [30:43<1:11:56, 61.67s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -95.5     |
|    gen/rollout/ep_rew_wrapped_mean | 231       |
|    gen/time/fps                    | 2490      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 507904    |
|    gen/train/approx_kl             | 0.8985874 |
|    gen/train/clip_fraction         | 0.849     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -13.3     |
|    gen/train/explained_variance    | 0.771     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.717     |
|    gen/train/n_updates             | 1500      |
|    gen/train/policy_gradient_loss  | 0.104     |
|    gen/train/std                   | 2.27      |
|    gen/train/value_loss      

round:  31%|███       | 31/100 [31:45<1:10:56, 61.68s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -56.6     |
|    gen/rollout/ep_rew_wrapped_mean | 223       |
|    gen/time/fps                    | 2498      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 524288    |
|    gen/train/approx_kl             | 0.6259606 |
|    gen/train/clip_fraction         | 0.806     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -13.4     |
|    gen/train/explained_variance    | 0.774     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.43      |
|    gen/train/n_updates             | 1550      |
|    gen/train/policy_gradient_loss  | 0.0822    |
|    gen/train/std                   | 2.25      |
|    gen/train/value_loss      

round:  32%|███▏      | 32/100 [32:47<1:10:02, 61.80s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | -35.2      |
|    gen/rollout/ep_rew_wrapped_mean | 195        |
|    gen/time/fps                    | 2754       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 5          |
|    gen/time/total_timesteps        | 540672     |
|    gen/train/approx_kl             | 0.55479544 |
|    gen/train/clip_fraction         | 0.802      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -13.4      |
|    gen/train/explained_variance    | 0.772      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.436      |
|    gen/train/n_updates             | 1600       |
|    gen/train/policy_gradient_loss  | 0.0806     |
|    gen/train/std                   | 2.26       |
|    gen/tra

round:  33%|███▎      | 33/100 [33:48<1:08:49, 61.63s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | -14.8      |
|    gen/rollout/ep_rew_wrapped_mean | 169        |
|    gen/time/fps                    | 2718       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 557056     |
|    gen/train/approx_kl             | 0.68165797 |
|    gen/train/clip_fraction         | 0.809      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -13.5      |
|    gen/train/explained_variance    | 0.686      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.32       |
|    gen/train/n_updates             | 1650       |
|    gen/train/policy_gradient_loss  | 0.0834     |
|    gen/train/std                   | 2.32       |
|    gen/tra

round:  34%|███▍      | 34/100 [34:49<1:07:42, 61.55s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | -1.2      |
|    gen/rollout/ep_rew_wrapped_mean | 150       |
|    gen/time/fps                    | 2774      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 5         |
|    gen/time/total_timesteps        | 573440    |
|    gen/train/approx_kl             | 0.5623518 |
|    gen/train/clip_fraction         | 0.804     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -13.7     |
|    gen/train/explained_variance    | 0.844     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.13      |
|    gen/train/n_updates             | 1700      |
|    gen/train/policy_gradient_loss  | 0.0691    |
|    gen/train/std                   | 2.35      |
|    gen/train/value_loss      

round:  35%|███▌      | 35/100 [35:51<1:06:43, 61.60s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 26.6      |
|    gen/rollout/ep_rew_wrapped_mean | 120       |
|    gen/time/fps                    | 2602      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 589824    |
|    gen/train/approx_kl             | 0.7730118 |
|    gen/train/clip_fraction         | 0.809     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -13.6     |
|    gen/train/explained_variance    | 0.82      |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.714     |
|    gen/train/n_updates             | 1750      |
|    gen/train/policy_gradient_loss  | 0.0609    |
|    gen/train/std                   | 2.43      |
|    gen/train/value_loss      

round:  36%|███▌      | 36/100 [36:53<1:05:46, 61.67s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 34.2       |
|    gen/rollout/ep_rew_wrapped_mean | 132        |
|    gen/time/fps                    | 2478       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 606208     |
|    gen/train/approx_kl             | 0.51328266 |
|    gen/train/clip_fraction         | 0.759      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -13.8      |
|    gen/train/explained_variance    | 0.788      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 1.77       |
|    gen/train/n_updates             | 1800       |
|    gen/train/policy_gradient_loss  | 0.06       |
|    gen/train/std                   | 2.43       |
|    gen/tra

round:  37%|███▋      | 37/100 [37:55<1:04:56, 61.84s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 51.1      |
|    gen/rollout/ep_rew_wrapped_mean | 132       |
|    gen/time/fps                    | 2458      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 622592    |
|    gen/train/approx_kl             | 0.6069723 |
|    gen/train/clip_fraction         | 0.801     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -13.9     |
|    gen/train/explained_variance    | 0.845     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.54      |
|    gen/train/n_updates             | 1850      |
|    gen/train/policy_gradient_loss  | 0.0812    |
|    gen/train/std                   | 2.52      |
|    gen/train/value_loss      

round:  38%|███▊      | 38/100 [38:57<1:03:54, 61.85s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 64.5       |
|    gen/rollout/ep_rew_wrapped_mean | 127        |
|    gen/time/fps                    | 2411       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 638976     |
|    gen/train/approx_kl             | 0.50222653 |
|    gen/train/clip_fraction         | 0.801      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -14.2      |
|    gen/train/explained_variance    | 0.799      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.263      |
|    gen/train/n_updates             | 1900       |
|    gen/train/policy_gradient_loss  | 0.0895     |
|    gen/train/std                   | 2.64       |
|    gen/tra

round:  39%|███▉      | 39/100 [40:00<1:03:04, 62.03s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 80.9       |
|    gen/rollout/ep_rew_wrapped_mean | 124        |
|    gen/time/fps                    | 2720       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 655360     |
|    gen/train/approx_kl             | 0.48038638 |
|    gen/train/clip_fraction         | 0.759      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -14.3      |
|    gen/train/explained_variance    | 0.851      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.611      |
|    gen/train/n_updates             | 1950       |
|    gen/train/policy_gradient_loss  | 0.0592     |
|    gen/train/std                   | 2.7        |
|    gen/tra

round:  40%|████      | 40/100 [41:01<1:01:51, 61.85s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 105       |
|    gen/rollout/ep_rew_wrapped_mean | 124       |
|    gen/time/fps                    | 2696      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 671744    |
|    gen/train/approx_kl             | 0.4970357 |
|    gen/train/clip_fraction         | 0.769     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -14.4     |
|    gen/train/explained_variance    | 0.787     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.561     |
|    gen/train/n_updates             | 2000      |
|    gen/train/policy_gradient_loss  | 0.055     |
|    gen/train/std                   | 2.71      |
|    gen/train/value_loss      

round:  41%|████      | 41/100 [42:03<1:00:45, 61.79s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 131        |
|    gen/rollout/ep_rew_wrapped_mean | 127        |
|    gen/time/fps                    | 2655       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 688128     |
|    gen/train/approx_kl             | 0.56931734 |
|    gen/train/clip_fraction         | 0.775      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -14.4      |
|    gen/train/explained_variance    | 0.832      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 1.12       |
|    gen/train/n_updates             | 2050       |
|    gen/train/policy_gradient_loss  | 0.0588     |
|    gen/train/std                   | 2.74       |
|    gen/tra

round:  42%|████▏     | 42/100 [43:05<59:54, 61.97s/it]  

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 150       |
|    gen/rollout/ep_rew_wrapped_mean | 112       |
|    gen/time/fps                    | 2496      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 704512    |
|    gen/train/approx_kl             | 0.7077401 |
|    gen/train/clip_fraction         | 0.796     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -14.5     |
|    gen/train/explained_variance    | 0.85      |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.468     |
|    gen/train/n_updates             | 2100      |
|    gen/train/policy_gradient_loss  | 0.0684    |
|    gen/train/std                   | 2.78      |
|    gen/train/value_loss      

round:  43%|████▎     | 43/100 [44:07<58:43, 61.82s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_len_mean         | 1e+03    |
|    gen/rollout/ep_rew_mean         | 151      |
|    gen/rollout/ep_rew_wrapped_mean | 91.7     |
|    gen/time/fps                    | 2490     |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 6        |
|    gen/time/total_timesteps        | 720896   |
|    gen/train/approx_kl             | 0.467546 |
|    gen/train/clip_fraction         | 0.76     |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -14.6    |
|    gen/train/explained_variance    | 0.0309   |
|    gen/train/learning_rate         | 0.005    |
|    gen/train/loss                  | 0.242    |
|    gen/train/n_updates             | 2150     |
|    gen/train/policy_gradient_loss  | 0.064    |
|    gen/train/std                   | 2.83     |
|    gen/train/value_loss            | 0.44     |


round:  44%|████▍     | 44/100 [45:08<57:35, 61.71s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 142        |
|    gen/rollout/ep_rew_wrapped_mean | 86         |
|    gen/time/fps                    | 2462       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 737280     |
|    gen/train/approx_kl             | 0.40079135 |
|    gen/train/clip_fraction         | 0.73       |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -14.6      |
|    gen/train/explained_variance    | -0.511     |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | -0.00424   |
|    gen/train/n_updates             | 2200       |
|    gen/train/policy_gradient_loss  | 0.0397     |
|    gen/train/std                   | 2.88       |
|    gen/tra

round:  45%|████▌     | 45/100 [46:10<56:31, 61.66s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 143        |
|    gen/rollout/ep_rew_wrapped_mean | 84.9       |
|    gen/time/fps                    | 2468       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 753664     |
|    gen/train/approx_kl             | 0.39173362 |
|    gen/train/clip_fraction         | 0.706      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -14.7      |
|    gen/train/explained_variance    | 0.325      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.171      |
|    gen/train/n_updates             | 2250       |
|    gen/train/policy_gradient_loss  | 0.047      |
|    gen/train/std                   | 2.86       |
|    gen/tra

round:  46%|████▌     | 46/100 [47:12<55:40, 61.85s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 134       |
|    gen/rollout/ep_rew_wrapped_mean | 91.3      |
|    gen/time/fps                    | 2659      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 770048    |
|    gen/train/approx_kl             | 0.4690674 |
|    gen/train/clip_fraction         | 0.712     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -14.6     |
|    gen/train/explained_variance    | 0.377     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.521     |
|    gen/train/n_updates             | 2300      |
|    gen/train/policy_gradient_loss  | 0.0483    |
|    gen/train/std                   | 2.86      |
|    gen/train/value_loss      

round:  47%|████▋     | 47/100 [48:13<54:32, 61.75s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 121        |
|    gen/rollout/ep_rew_wrapped_mean | 99         |
|    gen/time/fps                    | 2701       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 786432     |
|    gen/train/approx_kl             | 0.44119656 |
|    gen/train/clip_fraction         | 0.727      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -14.7      |
|    gen/train/explained_variance    | 0.425      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.936      |
|    gen/train/n_updates             | 2350       |
|    gen/train/policy_gradient_loss  | 0.0596     |
|    gen/train/std                   | 2.85       |
|    gen/tra

round:  48%|████▊     | 48/100 [49:15<53:24, 61.63s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 124       |
|    gen/rollout/ep_rew_wrapped_mean | 105       |
|    gen/time/fps                    | 2719      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 802816    |
|    gen/train/approx_kl             | 0.4718963 |
|    gen/train/clip_fraction         | 0.725     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -14.6     |
|    gen/train/explained_variance    | 0.527     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 1.19      |
|    gen/train/n_updates             | 2400      |
|    gen/train/policy_gradient_loss  | 0.0569    |
|    gen/train/std                   | 2.84      |
|    gen/train/value_loss      

round:  49%|████▉     | 49/100 [50:16<52:24, 61.66s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 137       |
|    gen/rollout/ep_rew_wrapped_mean | 118       |
|    gen/time/fps                    | 2598      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 819200    |
|    gen/train/approx_kl             | 0.4471837 |
|    gen/train/clip_fraction         | 0.74      |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -14.7     |
|    gen/train/explained_variance    | 0.56      |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.595     |
|    gen/train/n_updates             | 2450      |
|    gen/train/policy_gradient_loss  | 0.0718    |
|    gen/train/std                   | 2.85      |
|    gen/train/value_loss      

round:  50%|█████     | 50/100 [51:18<51:19, 61.59s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 147        |
|    gen/rollout/ep_rew_wrapped_mean | 130        |
|    gen/time/fps                    | 2477       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 835584     |
|    gen/train/approx_kl             | 0.44421306 |
|    gen/train/clip_fraction         | 0.725      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -14.7      |
|    gen/train/explained_variance    | 0.689      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.305      |
|    gen/train/n_updates             | 2500       |
|    gen/train/policy_gradient_loss  | 0.0579     |
|    gen/train/std                   | 2.84       |
|    gen/tra

round:  51%|█████     | 51/100 [52:19<50:18, 61.61s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 167        |
|    gen/rollout/ep_rew_wrapped_mean | 137        |
|    gen/time/fps                    | 2493       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 851968     |
|    gen/train/approx_kl             | 0.49467638 |
|    gen/train/clip_fraction         | 0.738      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -14.7      |
|    gen/train/explained_variance    | 0.721      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 1.06       |
|    gen/train/n_updates             | 2550       |
|    gen/train/policy_gradient_loss  | 0.0586     |
|    gen/train/std                   | 2.93       |
|    gen/tra

round:  52%|█████▏    | 52/100 [53:22<49:27, 61.83s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 197        |
|    gen/rollout/ep_rew_wrapped_mean | 136        |
|    gen/time/fps                    | 2479       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 868352     |
|    gen/train/approx_kl             | 0.48417553 |
|    gen/train/clip_fraction         | 0.722      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -14.8      |
|    gen/train/explained_variance    | 0.671      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.427      |
|    gen/train/n_updates             | 2600       |
|    gen/train/policy_gradient_loss  | 0.0576     |
|    gen/train/std                   | 2.96       |
|    gen/tra

round:  53%|█████▎    | 53/100 [54:24<48:31, 61.94s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 222        |
|    gen/rollout/ep_rew_wrapped_mean | 137        |
|    gen/time/fps                    | 2713       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 884736     |
|    gen/train/approx_kl             | 0.48622817 |
|    gen/train/clip_fraction         | 0.715      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -14.9      |
|    gen/train/explained_variance    | 0.721      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 1.75       |
|    gen/train/n_updates             | 2650       |
|    gen/train/policy_gradient_loss  | 0.0531     |
|    gen/train/std                   | 2.98       |
|    gen/tra

round:  54%|█████▍    | 54/100 [55:25<47:17, 61.69s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 233       |
|    gen/rollout/ep_rew_wrapped_mean | 126       |
|    gen/time/fps                    | 2748      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 5         |
|    gen/time/total_timesteps        | 901120    |
|    gen/train/approx_kl             | 0.4806345 |
|    gen/train/clip_fraction         | 0.727     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -14.9     |
|    gen/train/explained_variance    | 0.668     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.438     |
|    gen/train/n_updates             | 2700      |
|    gen/train/policy_gradient_loss  | 0.0568    |
|    gen/train/std                   | 3         |
|    gen/train/value_loss      

round:  55%|█████▌    | 55/100 [56:26<46:05, 61.45s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 249        |
|    gen/rollout/ep_rew_wrapped_mean | 118        |
|    gen/time/fps                    | 2716       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 917504     |
|    gen/train/approx_kl             | 0.38929683 |
|    gen/train/clip_fraction         | 0.689      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15        |
|    gen/train/explained_variance    | 0.662      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.626      |
|    gen/train/n_updates             | 2750       |
|    gen/train/policy_gradient_loss  | 0.0458     |
|    gen/train/std                   | 3.03       |
|    gen/tra

round:  56%|█████▌    | 56/100 [57:27<45:01, 61.39s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 271        |
|    gen/rollout/ep_rew_wrapped_mean | 118        |
|    gen/time/fps                    | 2723       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 933888     |
|    gen/train/approx_kl             | 0.43007958 |
|    gen/train/clip_fraction         | 0.692      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15        |
|    gen/train/explained_variance    | 0.731      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.439      |
|    gen/train/n_updates             | 2800       |
|    gen/train/policy_gradient_loss  | 0.0451     |
|    gen/train/std                   | 3.05       |
|    gen/tra

round:  57%|█████▋    | 57/100 [58:28<43:54, 61.26s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 284        |
|    gen/rollout/ep_rew_wrapped_mean | 116        |
|    gen/time/fps                    | 2686       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 950272     |
|    gen/train/approx_kl             | 0.38718674 |
|    gen/train/clip_fraction         | 0.687      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.1      |
|    gen/train/explained_variance    | 0.698      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.448      |
|    gen/train/n_updates             | 2850       |
|    gen/train/policy_gradient_loss  | 0.0522     |
|    gen/train/std                   | 3.06       |
|    gen/tra

round:  58%|█████▊    | 58/100 [59:30<42:53, 61.27s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 278       |
|    gen/rollout/ep_rew_wrapped_mean | 107       |
|    gen/time/fps                    | 2520      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 966656    |
|    gen/train/approx_kl             | 0.4350642 |
|    gen/train/clip_fraction         | 0.687     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -15.1     |
|    gen/train/explained_variance    | 0.542     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.108     |
|    gen/train/n_updates             | 2900      |
|    gen/train/policy_gradient_loss  | 0.0521    |
|    gen/train/std                   | 3.12      |
|    gen/train/value_loss      

round:  59%|█████▉    | 59/100 [1:00:31<41:51, 61.24s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 274        |
|    gen/rollout/ep_rew_wrapped_mean | 89.6       |
|    gen/time/fps                    | 2392       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 983040     |
|    gen/train/approx_kl             | 0.40618658 |
|    gen/train/clip_fraction         | 0.679      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.1      |
|    gen/train/explained_variance    | 0.713      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.244      |
|    gen/train/n_updates             | 2950       |
|    gen/train/policy_gradient_loss  | 0.0399     |
|    gen/train/std                   | 3.08       |
|    gen/tra

round:  60%|██████    | 60/100 [1:01:32<40:51, 61.29s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 269        |
|    gen/rollout/ep_rew_wrapped_mean | 83         |
|    gen/time/fps                    | 2422       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 999424     |
|    gen/train/approx_kl             | 0.46678847 |
|    gen/train/clip_fraction         | 0.673      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.1      |
|    gen/train/explained_variance    | 0.538      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.276      |
|    gen/train/n_updates             | 3000       |
|    gen/train/policy_gradient_loss  | 0.0386     |
|    gen/train/std                   | 3.08       |
|    gen/tra

round:  61%|██████    | 61/100 [1:02:34<40:01, 61.57s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 278        |
|    gen/rollout/ep_rew_wrapped_mean | 83         |
|    gen/time/fps                    | 2378       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1015808    |
|    gen/train/approx_kl             | 0.44034165 |
|    gen/train/clip_fraction         | 0.687      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.1      |
|    gen/train/explained_variance    | 0.589      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.342      |
|    gen/train/n_updates             | 3050       |
|    gen/train/policy_gradient_loss  | 0.0452     |
|    gen/train/std                   | 3.07       |
|    gen/tra

round:  62%|██████▏   | 62/100 [1:03:37<39:09, 61.84s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 301       |
|    gen/rollout/ep_rew_wrapped_mean | 77.8      |
|    gen/time/fps                    | 2711      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 1032192   |
|    gen/train/approx_kl             | 0.3582011 |
|    gen/train/clip_fraction         | 0.675     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -15.1     |
|    gen/train/explained_variance    | 0.564     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.297     |
|    gen/train/n_updates             | 3100      |
|    gen/train/policy_gradient_loss  | 0.0485    |
|    gen/train/std                   | 3.04      |
|    gen/train/value_loss      

round:  63%|██████▎   | 63/100 [1:04:38<37:59, 61.61s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 291       |
|    gen/rollout/ep_rew_wrapped_mean | 69.8      |
|    gen/time/fps                    | 2763      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 5         |
|    gen/time/total_timesteps        | 1048576   |
|    gen/train/approx_kl             | 0.5244577 |
|    gen/train/clip_fraction         | 0.684     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -15.1     |
|    gen/train/explained_variance    | 0.534     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.2       |
|    gen/train/n_updates             | 3150      |
|    gen/train/policy_gradient_loss  | 0.0515    |
|    gen/train/std                   | 3.05      |
|    gen/train/value_loss      

round:  64%|██████▍   | 64/100 [1:05:39<36:53, 61.49s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 296       |
|    gen/rollout/ep_rew_wrapped_mean | 67.3      |
|    gen/time/fps                    | 2712      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 1064960   |
|    gen/train/approx_kl             | 0.5783909 |
|    gen/train/clip_fraction         | 0.691     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -15       |
|    gen/train/explained_variance    | 0.546     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.143     |
|    gen/train/n_updates             | 3200      |
|    gen/train/policy_gradient_loss  | 0.0449    |
|    gen/train/std                   | 3.11      |
|    gen/train/value_loss      

round:  65%|██████▌   | 65/100 [1:06:40<35:51, 61.46s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 313        |
|    gen/rollout/ep_rew_wrapped_mean | 67.3       |
|    gen/time/fps                    | 2651       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1081344    |
|    gen/train/approx_kl             | 0.50759655 |
|    gen/train/clip_fraction         | 0.693      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.1      |
|    gen/train/explained_variance    | 0.478      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.116      |
|    gen/train/n_updates             | 3250       |
|    gen/train/policy_gradient_loss  | 0.0418     |
|    gen/train/std                   | 3.11       |
|    gen/tra

round:  66%|██████▌   | 66/100 [1:07:42<34:53, 61.57s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 322       |
|    gen/rollout/ep_rew_wrapped_mean | 68.3      |
|    gen/time/fps                    | 2466      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 1097728   |
|    gen/train/approx_kl             | 0.5562224 |
|    gen/train/clip_fraction         | 0.684     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -15.1     |
|    gen/train/explained_variance    | 0.411     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.354     |
|    gen/train/n_updates             | 3300      |
|    gen/train/policy_gradient_loss  | 0.0442    |
|    gen/train/std                   | 3.07      |
|    gen/train/value_loss      

round:  67%|██████▋   | 67/100 [1:08:44<33:48, 61.48s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 324       |
|    gen/rollout/ep_rew_wrapped_mean | 70.8      |
|    gen/time/fps                    | 2505      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 1114112   |
|    gen/train/approx_kl             | 0.5771736 |
|    gen/train/clip_fraction         | 0.694     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -15.1     |
|    gen/train/explained_variance    | 0.435     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 1.14      |
|    gen/train/n_updates             | 3350      |
|    gen/train/policy_gradient_loss  | 0.0521    |
|    gen/train/std                   | 3.06      |
|    gen/train/value_loss      

round:  68%|██████▊   | 68/100 [1:09:46<32:52, 61.63s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 330        |
|    gen/rollout/ep_rew_wrapped_mean | 79.5       |
|    gen/time/fps                    | 2411       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1130496    |
|    gen/train/approx_kl             | 0.62014365 |
|    gen/train/clip_fraction         | 0.693      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.1      |
|    gen/train/explained_variance    | 0.58       |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.803      |
|    gen/train/n_updates             | 3400       |
|    gen/train/policy_gradient_loss  | 0.0509     |
|    gen/train/std                   | 3.05       |
|    gen/tra

round:  69%|██████▉   | 69/100 [1:10:48<31:55, 61.79s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 347       |
|    gen/rollout/ep_rew_wrapped_mean | 94.5      |
|    gen/time/fps                    | 2446      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 1146880   |
|    gen/train/approx_kl             | 0.6423343 |
|    gen/train/clip_fraction         | 0.711     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -15       |
|    gen/train/explained_variance    | 0.624     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.805     |
|    gen/train/n_updates             | 3450      |
|    gen/train/policy_gradient_loss  | 0.0576    |
|    gen/train/std                   | 3.1       |
|    gen/train/value_loss      

round:  70%|███████   | 70/100 [1:11:50<30:56, 61.88s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 357       |
|    gen/rollout/ep_rew_wrapped_mean | 109       |
|    gen/time/fps                    | 2723      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 1163264   |
|    gen/train/approx_kl             | 0.5887209 |
|    gen/train/clip_fraction         | 0.721     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -15.1     |
|    gen/train/explained_variance    | 0.617     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 1.59      |
|    gen/train/n_updates             | 3500      |
|    gen/train/policy_gradient_loss  | 0.0562    |
|    gen/train/std                   | 3.13      |
|    gen/train/value_loss      

round:  71%|███████   | 71/100 [1:12:52<29:53, 61.84s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 371       |
|    gen/rollout/ep_rew_wrapped_mean | 118       |
|    gen/time/fps                    | 2701      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 1179648   |
|    gen/train/approx_kl             | 0.6218439 |
|    gen/train/clip_fraction         | 0.712     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -15.2     |
|    gen/train/explained_variance    | 0.556     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 1.1       |
|    gen/train/n_updates             | 3550      |
|    gen/train/policy_gradient_loss  | 0.0547    |
|    gen/train/std                   | 3.14      |
|    gen/train/value_loss      

round:  72%|███████▏  | 72/100 [1:13:52<28:41, 61.49s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 377        |
|    gen/rollout/ep_rew_wrapped_mean | 114        |
|    gen/time/fps                    | 2720       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1196032    |
|    gen/train/approx_kl             | 0.48034668 |
|    gen/train/clip_fraction         | 0.683      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.2      |
|    gen/train/explained_variance    | 0.569      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.335      |
|    gen/train/n_updates             | 3600       |
|    gen/train/policy_gradient_loss  | 0.0564     |
|    gen/train/std                   | 3.11       |
|    gen/tra

round:  73%|███████▎  | 73/100 [1:14:53<27:34, 61.27s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 383       |
|    gen/rollout/ep_rew_wrapped_mean | 104       |
|    gen/time/fps                    | 2750      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 5         |
|    gen/time/total_timesteps        | 1212416   |
|    gen/train/approx_kl             | 0.5114902 |
|    gen/train/clip_fraction         | 0.675     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -15.2     |
|    gen/train/explained_variance    | 0.571     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.089     |
|    gen/train/n_updates             | 3650      |
|    gen/train/policy_gradient_loss  | 0.0442    |
|    gen/train/std                   | 3.11      |
|    gen/train/value_loss      

round:  74%|███████▍  | 74/100 [1:15:54<26:29, 61.13s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 390        |
|    gen/rollout/ep_rew_wrapped_mean | 91.6       |
|    gen/time/fps                    | 2561       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1228800    |
|    gen/train/approx_kl             | 0.47979173 |
|    gen/train/clip_fraction         | 0.679      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.2      |
|    gen/train/explained_variance    | 0.623      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.206      |
|    gen/train/n_updates             | 3700       |
|    gen/train/policy_gradient_loss  | 0.0495     |
|    gen/train/std                   | 3.14       |
|    gen/tra

round:  75%|███████▌  | 75/100 [1:16:55<25:30, 61.21s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 384        |
|    gen/rollout/ep_rew_wrapped_mean | 77.7       |
|    gen/time/fps                    | 2448       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1245184    |
|    gen/train/approx_kl             | 0.46838346 |
|    gen/train/clip_fraction         | 0.684      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.2      |
|    gen/train/explained_variance    | 0.641      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.187      |
|    gen/train/n_updates             | 3750       |
|    gen/train/policy_gradient_loss  | 0.0486     |
|    gen/train/std                   | 3.14       |
|    gen/tra

round:  76%|███████▌  | 76/100 [1:17:58<24:43, 61.82s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 389        |
|    gen/rollout/ep_rew_wrapped_mean | 62.9       |
|    gen/time/fps                    | 2418       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1261568    |
|    gen/train/approx_kl             | 0.43682522 |
|    gen/train/clip_fraction         | 0.67       |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.2      |
|    gen/train/explained_variance    | 0.451      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.116      |
|    gen/train/n_updates             | 3800       |
|    gen/train/policy_gradient_loss  | 0.045      |
|    gen/train/std                   | 3.12       |
|    gen/tra

round:  77%|███████▋  | 77/100 [1:19:01<23:46, 62.01s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 389        |
|    gen/rollout/ep_rew_wrapped_mean | 51         |
|    gen/time/fps                    | 2479       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1277952    |
|    gen/train/approx_kl             | 0.47686282 |
|    gen/train/clip_fraction         | 0.669      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.2      |
|    gen/train/explained_variance    | 0.513      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.127      |
|    gen/train/n_updates             | 3850       |
|    gen/train/policy_gradient_loss  | 0.0438     |
|    gen/train/std                   | 3.08       |
|    gen/tra

round:  78%|███████▊  | 78/100 [1:20:04<22:48, 62.19s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 389        |
|    gen/rollout/ep_rew_wrapped_mean | 45.9       |
|    gen/time/fps                    | 2698       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1294336    |
|    gen/train/approx_kl             | 0.43107992 |
|    gen/train/clip_fraction         | 0.667      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.2      |
|    gen/train/explained_variance    | 0.453      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.155      |
|    gen/train/n_updates             | 3900       |
|    gen/train/policy_gradient_loss  | 0.0398     |
|    gen/train/std                   | 3.06       |
|    gen/tra

round:  79%|███████▉  | 79/100 [1:21:05<21:43, 62.09s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 408       |
|    gen/rollout/ep_rew_wrapped_mean | 46.6      |
|    gen/time/fps                    | 2627      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 1310720   |
|    gen/train/approx_kl             | 0.5487025 |
|    gen/train/clip_fraction         | 0.66      |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -15.1     |
|    gen/train/explained_variance    | 0.442     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.547     |
|    gen/train/n_updates             | 3950      |
|    gen/train/policy_gradient_loss  | 0.0368    |
|    gen/train/std                   | 3.05      |
|    gen/train/value_loss      

round:  80%|████████  | 80/100 [1:22:08<20:44, 62.22s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 402        |
|    gen/rollout/ep_rew_wrapped_mean | 46.9       |
|    gen/time/fps                    | 2298       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 7          |
|    gen/time/total_timesteps        | 1327104    |
|    gen/train/approx_kl             | 0.44490063 |
|    gen/train/clip_fraction         | 0.67       |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.1      |
|    gen/train/explained_variance    | 0.489      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.253      |
|    gen/train/n_updates             | 4000       |
|    gen/train/policy_gradient_loss  | 0.0455     |
|    gen/train/std                   | 3.07       |
|    gen/tra

round:  81%|████████  | 81/100 [1:23:12<19:51, 62.71s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 410        |
|    gen/rollout/ep_rew_wrapped_mean | 42.1       |
|    gen/time/fps                    | 2378       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1343488    |
|    gen/train/approx_kl             | 0.40752238 |
|    gen/train/clip_fraction         | 0.628      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.1      |
|    gen/train/explained_variance    | 0.195      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.125      |
|    gen/train/n_updates             | 4050       |
|    gen/train/policy_gradient_loss  | 0.045      |
|    gen/train/std                   | 3.1        |
|    gen/tra

round:  82%|████████▏ | 82/100 [1:24:16<18:54, 63.03s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 407        |
|    gen/rollout/ep_rew_wrapped_mean | 39.6       |
|    gen/time/fps                    | 2497       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1359872    |
|    gen/train/approx_kl             | 0.45306054 |
|    gen/train/clip_fraction         | 0.663      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.2      |
|    gen/train/explained_variance    | 0.366      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.11       |
|    gen/train/n_updates             | 4100       |
|    gen/train/policy_gradient_loss  | 0.0384     |
|    gen/train/std                   | 3.11       |
|    gen/tra

round:  83%|████████▎ | 83/100 [1:25:19<17:54, 63.18s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 403        |
|    gen/rollout/ep_rew_wrapped_mean | 40.2       |
|    gen/time/fps                    | 2693       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1376256    |
|    gen/train/approx_kl             | 0.48471665 |
|    gen/train/clip_fraction         | 0.682      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.2      |
|    gen/train/explained_variance    | 0.334      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.145      |
|    gen/train/n_updates             | 4150       |
|    gen/train/policy_gradient_loss  | 0.043      |
|    gen/train/std                   | 3.09       |
|    gen/tra

round:  84%|████████▍ | 84/100 [1:26:22<16:51, 63.21s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 406       |
|    gen/rollout/ep_rew_wrapped_mean | 38        |
|    gen/time/fps                    | 2495      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 1392640   |
|    gen/train/approx_kl             | 0.4629935 |
|    gen/train/clip_fraction         | 0.676     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -15.2     |
|    gen/train/explained_variance    | 0.247     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.112     |
|    gen/train/n_updates             | 4200      |
|    gen/train/policy_gradient_loss  | 0.0436    |
|    gen/train/std                   | 3.12      |
|    gen/train/value_loss      

round:  85%|████████▌ | 85/100 [1:27:24<15:42, 62.81s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 393        |
|    gen/rollout/ep_rew_wrapped_mean | 34.9       |
|    gen/time/fps                    | 2485       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1409024    |
|    gen/train/approx_kl             | 0.49074918 |
|    gen/train/clip_fraction         | 0.674      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.2      |
|    gen/train/explained_variance    | 0.263      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.0615     |
|    gen/train/n_updates             | 4250       |
|    gen/train/policy_gradient_loss  | 0.0398     |
|    gen/train/std                   | 3.11       |
|    gen/tra

round:  86%|████████▌ | 86/100 [1:28:25<14:32, 62.34s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 386        |
|    gen/rollout/ep_rew_wrapped_mean | 34.4       |
|    gen/time/fps                    | 2472       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1425408    |
|    gen/train/approx_kl             | 0.51956785 |
|    gen/train/clip_fraction         | 0.67       |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.2      |
|    gen/train/explained_variance    | 0.31       |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.362      |
|    gen/train/n_updates             | 4300       |
|    gen/train/policy_gradient_loss  | 0.037      |
|    gen/train/std                   | 3.14       |
|    gen/tra

round:  87%|████████▋ | 87/100 [1:29:26<13:25, 61.95s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 390        |
|    gen/rollout/ep_rew_wrapped_mean | 39.9       |
|    gen/time/fps                    | 2472       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1441792    |
|    gen/train/approx_kl             | 0.61824507 |
|    gen/train/clip_fraction         | 0.69       |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.2      |
|    gen/train/explained_variance    | 0.364      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.184      |
|    gen/train/n_updates             | 4350       |
|    gen/train/policy_gradient_loss  | 0.0414     |
|    gen/train/std                   | 3.16       |
|    gen/tra

round:  88%|████████▊ | 88/100 [1:30:27<12:19, 61.64s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 372       |
|    gen/rollout/ep_rew_wrapped_mean | 51.9      |
|    gen/time/fps                    | 2433      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 1458176   |
|    gen/train/approx_kl             | 0.6669382 |
|    gen/train/clip_fraction         | 0.693     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -15.2     |
|    gen/train/explained_variance    | 0.426     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.443     |
|    gen/train/n_updates             | 4400      |
|    gen/train/policy_gradient_loss  | 0.047     |
|    gen/train/std                   | 3.16      |
|    gen/train/value_loss      

round:  89%|████████▉ | 89/100 [1:31:30<11:20, 61.90s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_len_mean         | 1e+03     |
|    gen/rollout/ep_rew_mean         | 378       |
|    gen/rollout/ep_rew_wrapped_mean | 58.7      |
|    gen/time/fps                    | 2712      |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 6         |
|    gen/time/total_timesteps        | 1474560   |
|    gen/train/approx_kl             | 0.6019821 |
|    gen/train/clip_fraction         | 0.7       |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -15.2     |
|    gen/train/explained_variance    | 0.529     |
|    gen/train/learning_rate         | 0.005     |
|    gen/train/loss                  | 0.496     |
|    gen/train/n_updates             | 4450      |
|    gen/train/policy_gradient_loss  | 0.0457    |
|    gen/train/std                   | 3.13      |
|    gen/train/value_loss      

round:  90%|█████████ | 90/100 [1:32:31<10:16, 61.64s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 379        |
|    gen/rollout/ep_rew_wrapped_mean | 74.9       |
|    gen/time/fps                    | 2623       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1490944    |
|    gen/train/approx_kl             | 0.66256833 |
|    gen/train/clip_fraction         | 0.681      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.1      |
|    gen/train/explained_variance    | 0.549      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 1.54       |
|    gen/train/n_updates             | 4500       |
|    gen/train/policy_gradient_loss  | 0.0461     |
|    gen/train/std                   | 3.11       |
|    gen/tra

round:  91%|█████████ | 91/100 [1:33:32<09:13, 61.55s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 367        |
|    gen/rollout/ep_rew_wrapped_mean | 81         |
|    gen/time/fps                    | 2752       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 5          |
|    gen/time/total_timesteps        | 1507328    |
|    gen/train/approx_kl             | 0.63896716 |
|    gen/train/clip_fraction         | 0.697      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.2      |
|    gen/train/explained_variance    | 0.574      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.583      |
|    gen/train/n_updates             | 4550       |
|    gen/train/policy_gradient_loss  | 0.0551     |
|    gen/train/std                   | 3.09       |
|    gen/tra

round:  92%|█████████▏| 92/100 [1:34:33<08:10, 61.33s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 367        |
|    gen/rollout/ep_rew_wrapped_mean | 80.3       |
|    gen/time/fps                    | 2737       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 5          |
|    gen/time/total_timesteps        | 1523712    |
|    gen/train/approx_kl             | 0.47639742 |
|    gen/train/clip_fraction         | 0.648      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.1      |
|    gen/train/explained_variance    | 0.469      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.356      |
|    gen/train/n_updates             | 4600       |
|    gen/train/policy_gradient_loss  | 0.0583     |
|    gen/train/std                   | 3.07       |
|    gen/tra

round:  93%|█████████▎| 93/100 [1:35:34<07:08, 61.15s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 350        |
|    gen/rollout/ep_rew_wrapped_mean | 76.9       |
|    gen/time/fps                    | 2603       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1540096    |
|    gen/train/approx_kl             | 0.49796915 |
|    gen/train/clip_fraction         | 0.633      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.1      |
|    gen/train/explained_variance    | 0.52       |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.13       |
|    gen/train/n_updates             | 4650       |
|    gen/train/policy_gradient_loss  | 0.0492     |
|    gen/train/std                   | 3.07       |
|    gen/tra

round:  94%|█████████▍| 94/100 [1:36:35<06:06, 61.15s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 351        |
|    gen/rollout/ep_rew_wrapped_mean | 68.3       |
|    gen/time/fps                    | 2400       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1556480    |
|    gen/train/approx_kl             | 0.61663747 |
|    gen/train/clip_fraction         | 0.701      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.1      |
|    gen/train/explained_variance    | 0.545      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.214      |
|    gen/train/n_updates             | 4700       |
|    gen/train/policy_gradient_loss  | 0.0448     |
|    gen/train/std                   | 3.02       |
|    gen/tra

round:  95%|█████████▌| 95/100 [1:37:37<05:07, 61.43s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 350        |
|    gen/rollout/ep_rew_wrapped_mean | 62.1       |
|    gen/time/fps                    | 2429       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1572864    |
|    gen/train/approx_kl             | 0.74074465 |
|    gen/train/clip_fraction         | 0.705      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15        |
|    gen/train/explained_variance    | 0.495      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.249      |
|    gen/train/n_updates             | 4750       |
|    gen/train/policy_gradient_loss  | 0.0439     |
|    gen/train/std                   | 3.04       |
|    gen/tra

round:  96%|█████████▌| 96/100 [1:38:40<04:07, 61.77s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_len_mean         | 1e+03      |
|    gen/rollout/ep_rew_mean         | 369        |
|    gen/rollout/ep_rew_wrapped_mean | 48.6       |
|    gen/time/fps                    | 2522       |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 6          |
|    gen/time/total_timesteps        | 1589248    |
|    gen/train/approx_kl             | 0.69457316 |
|    gen/train/clip_fraction         | 0.702      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -15.1      |
|    gen/train/explained_variance    | 0.565      |
|    gen/train/learning_rate         | 0.005      |
|    gen/train/loss                  | 0.292      |
|    gen/train/n_updates             | 4800       |
|    gen/train/policy_gradient_loss  | 0.0587     |
|    gen/train/std                   | 3          |
|    gen/tra

round:  96%|█████████▌| 96/100 [1:39:02<04:07, 61.90s/it]


KeyboardInterrupt: 

In [None]:
env.seed(SEED)
learner_rewards_after_training, _ = evaluate_policy(
    learner, env, 20, return_episode_rewards=True
)

In [None]:
print(
    "Rewards before training:",
    np.mean(learner_rewards_before_training),
    "+/-",
    np.std(learner_rewards_before_training),
)
print(
    "Rewards after training:",
    np.mean(learner_rewards_after_training),
    "+/-",
    np.std(learner_rewards_after_training),
)

1st round:  509.06895325000005 +/- 104.65142014637341

