# Data Collection

## Vectorise Environment

In [1]:
from crowd_sim.envs.crowd_sim_sgan import CrowdSimSgan
from crowd_sim.envs.crowd_sim_sgan_apf import CrowdSimSganApf
from crowd_sim.envs.crowd_sim_no_pred import CrowdSimNoPred
import gym
import time
import numpy as np

In [2]:
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecTransposeImage
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3 import PPO, A2C

In [3]:
from arguments import get_args
from crowd_nav.configs.config import Config

config = Config()

In [4]:
def make_env(seed, rank, env_config, envNum=1):
    """
    Utility function for multiprocessed env.

    :param env_id: (str) the environment ID
    :param seed: (int) the inital seed for RNG
    :param rank: (int) index of the subprocess
    """

    def _init():
        env = CrowdSimSganApf()
        # use a seed for reproducibility
        # Important: use a different seed for each environment
        # otherwise they would generate the same experiences
        env.configure(env_config)
        env.seed(seed + rank)
        env.setup(seed=seed+rank, num_of_env=envNum)
        return env

    return _init

In [5]:
num_cpu = 1  # Number of processes to use
seed = 0

venv = SubprocVecEnv([make_env(seed, i, config, num_cpu) for i in range(num_cpu)])
venv = VecTransposeImage(venv)

In [6]:
# obs = venv.reset()
# obs.shape

## Collecting dataset

In [7]:
from imitation_learning import rollout
rng = np.random.default_rng()

In [8]:
rollouts = rollout.rollout(
    policy=None,
    venv=venv,
    sample_until=rollout.make_sample_until(min_timesteps=None, min_episodes=60),
    rng=rng,
    unwrap=False,
    exclude_infos=True,
    verbose=True
)

Rollout stats: {'n_traj': 60, 'return_min': -12.255129237337709, 'return_mean': 29.088985581059507, 'return_std': 11.06672175559258, 'return_max': 48.69290168355165, 'len_min': 22, 'len_mean': 49.916666666666664, 'len_std': 17.61655629104495, 'len_max': 95}


## Save and load dataset

In [None]:
from imitation.data import serialize
dataset_path = './train/dataset/8ppl_240mapsize_100eps'

In [None]:
# save dataset
serialize.save(dataset_path, rollouts)

In [None]:
# load dataset
rollouts = serialize.load(dataset_path)

# GAIL

In [9]:
from stable_baselines3 import PPO
from imitation.algorithms.adversarial.gail import GAIL
from imitation.rewards.reward_nets import BasicRewardNet
from imitation.util.networks import RunningNorm

In [10]:
learner = PPO(
    env=venv,
    policy='CnnPolicy',
    batch_size=64,
    ent_coef=0.0,
    learning_rate=0.0003,
    n_epochs=10,
)

In [11]:
reward_net = BasicRewardNet(
    venv.observation_space, venv.action_space, normalize_input_layer=RunningNorm
)

In [12]:
gail_trainer = GAIL(
    demonstrations=rollouts,
    demo_batch_size=64,
    gen_replay_buffer_capacity=1024,
    n_disc_updates_per_round=4,
    venv=venv,
    gen_algo=learner,
    reward_net=reward_net,
    allow_variable_horizon=True
)

Running with `allow_variable_horizon` set to True. Some algorithms are biased towards shorter or longer episodes, which may significantly confound results. Additionally, even unbiased algorithms can exploit the information leak from the termination condition, producing spuriously high performance. See https://imitation.readthedocs.io/en/latest/getting-started/variable-horizon.html for more information.


In [13]:
gail_trainer.train(300000)

round:   0%|                                            | 0/146 [00:00<?, ?it/s]

--------------------------------------
| raw/                        |      |
|    gen/time/fps             | 37   |
|    gen/time/iterations      | 1    |
|    gen/time/time_elapsed    | 54   |
|    gen/time/total_timesteps | 2048 |
--------------------------------------
--------------------------------------------------
| raw/                                |          |
|    disc/disc_acc                    | 0.57     |
|    disc/disc_acc_expert             | 0.203    |
|    disc/disc_acc_gen                | 0.938    |
|    disc/disc_entropy                | 0.693    |
|    disc/disc_loss                   | 0.688    |
|    disc/disc_proportion_expert_pred | 0.133    |
|    disc/disc_proportion_expert_true | 0.5      |
|    disc/global_step                 | 1        |
|    disc/n_expert                    | 64       |
|    disc/n_generated                 | 64       |
--------------------------------------------------
--------------------------------------------------
| raw/       

round:   1%|▏                                 | 1/146 [01:05<2:39:04, 65.82s/it]

-----------------------------------------------------
| raw/                               |              |
|    gen/rollout/ep_rew_wrapped_mean | 80.4         |
|    gen/time/fps                    | 40           |
|    gen/time/iterations             | 1            |
|    gen/time/time_elapsed           | 50           |
|    gen/time/total_timesteps        | 4096         |
|    gen/train/approx_kl             | 0.0075228335 |
|    gen/train/clip_fraction         | 0.165        |
|    gen/train/clip_range            | 0.2          |
|    gen/train/entropy_loss          | -2.82        |
|    gen/train/explained_variance    | -0.000352    |
|    gen/train/learning_rate         | 0.0003       |
|    gen/train/loss                  | 1.46         |
|    gen/train/n_updates             | 10           |
|    gen/train/policy_gradient_loss  | 0.000405     |
|    gen/train/std                   | 0.988        |
|    gen/train/value_loss            | 6.83         |
----------------------------

round:   1%|▍                                 | 2/146 [02:07<2:32:42, 63.63s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_rew_wrapped_mean | 127         |
|    gen/time/fps                    | 45          |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 45          |
|    gen/time/total_timesteps        | 6144        |
|    gen/train/approx_kl             | 0.015974281 |
|    gen/train/clip_fraction         | 0.206       |
|    gen/train/clip_range            | 0.2         |
|    gen/train/entropy_loss          | -2.81       |
|    gen/train/explained_variance    | -0.0265     |
|    gen/train/learning_rate         | 0.0003      |
|    gen/train/loss                  | 28.3        |
|    gen/train/n_updates             | 20          |
|    gen/train/policy_gradient_loss  | 0.0145      |
|    gen/train/std                   | 0.986       |
|    gen/train/value_loss            | 223         |
----------------------------------------------

round:   2%|▋                                 | 3/146 [03:04<2:24:04, 60.45s/it]

----------------------------------------------------
| raw/                               |             |
|    gen/rollout/ep_rew_wrapped_mean | 159         |
|    gen/time/fps                    | 41          |
|    gen/time/iterations             | 1           |
|    gen/time/time_elapsed           | 49          |
|    gen/time/total_timesteps        | 8192        |
|    gen/train/approx_kl             | 0.013806209 |
|    gen/train/clip_fraction         | 0.241       |
|    gen/train/clip_range            | 0.2         |
|    gen/train/entropy_loss          | -2.81       |
|    gen/train/explained_variance    | 0.123       |
|    gen/train/learning_rate         | 0.0003      |
|    gen/train/loss                  | 100         |
|    gen/train/n_updates             | 30          |
|    gen/train/policy_gradient_loss  | 0.0235      |
|    gen/train/std                   | 0.986       |
|    gen/train/value_loss            | 289         |
----------------------------------------------

round:   3%|▉                                 | 4/146 [04:05<2:23:25, 60.61s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_rew_wrapped_mean | 294        |
|    gen/time/fps                    | 49         |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 41         |
|    gen/time/total_timesteps        | 10240      |
|    gen/train/approx_kl             | 0.02602458 |
|    gen/train/clip_fraction         | 0.309      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -2.81      |
|    gen/train/explained_variance    | 0.674      |
|    gen/train/learning_rate         | 0.0003     |
|    gen/train/loss                  | 843        |
|    gen/train/n_updates             | 40         |
|    gen/train/policy_gradient_loss  | 0.042      |
|    gen/train/std                   | 0.986      |
|    gen/train/value_loss            | 1.55e+03   |
---------------------------------------------------
------------

round:   3%|█▏                                | 5/146 [04:57<2:15:30, 57.67s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_rew_wrapped_mean | 253        |
|    gen/time/fps                    | 49         |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 41         |
|    gen/time/total_timesteps        | 12288      |
|    gen/train/approx_kl             | 0.04202067 |
|    gen/train/clip_fraction         | 0.304      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -2.81      |
|    gen/train/explained_variance    | 0.594      |
|    gen/train/learning_rate         | 0.0003     |
|    gen/train/loss                  | 365        |
|    gen/train/n_updates             | 50         |
|    gen/train/policy_gradient_loss  | 0.0149     |
|    gen/train/std                   | 0.986      |
|    gen/train/value_loss            | 1.52e+03   |
---------------------------------------------------
------------

round:   4%|█▍                                | 6/146 [05:50<2:10:28, 55.92s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_rew_wrapped_mean | 231        |
|    gen/time/fps                    | 51         |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 39         |
|    gen/time/total_timesteps        | 14336      |
|    gen/train/approx_kl             | 0.16559553 |
|    gen/train/clip_fraction         | 0.384      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -2.81      |
|    gen/train/explained_variance    | 0.565      |
|    gen/train/learning_rate         | 0.0003     |
|    gen/train/loss                  | 342        |
|    gen/train/n_updates             | 60         |
|    gen/train/policy_gradient_loss  | 0.039      |
|    gen/train/std                   | 0.987      |
|    gen/train/value_loss            | 888        |
---------------------------------------------------
------------

round:   5%|█▋                                | 7/146 [06:41<2:05:56, 54.36s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_rew_wrapped_mean | 252        |
|    gen/time/fps                    | 61         |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 33         |
|    gen/time/total_timesteps        | 16384      |
|    gen/train/approx_kl             | 0.09267752 |
|    gen/train/clip_fraction         | 0.459      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -2.81      |
|    gen/train/explained_variance    | 0.49       |
|    gen/train/learning_rate         | 0.0003     |
|    gen/train/loss                  | 1.16e+03   |
|    gen/train/n_updates             | 70         |
|    gen/train/policy_gradient_loss  | 0.0562     |
|    gen/train/std                   | 0.987      |
|    gen/train/value_loss            | 2.03e+03   |
---------------------------------------------------
------------

round:   5%|█▊                                | 8/146 [07:26<1:57:53, 51.26s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 247       |
|    gen/time/fps                    | 82        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 24        |
|    gen/time/total_timesteps        | 18432     |
|    gen/train/approx_kl             | 3.1465323 |
|    gen/train/clip_fraction         | 0.839     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -2.81     |
|    gen/train/explained_variance    | 0.485     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 398       |
|    gen/train/n_updates             | 80        |
|    gen/train/policy_gradient_loss  | 0.165     |
|    gen/train/std                   | 0.987     |
|    gen/train/value_loss            | 1.45e+03  |
--------------------------------------------------
-------------------------------

round:   6%|██                                | 9/146 [08:02<1:46:13, 46.52s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_rew_wrapped_mean | 276      |
|    gen/time/fps                    | 95       |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 21       |
|    gen/time/total_timesteps        | 20480    |
|    gen/train/approx_kl             | 85.03761 |
|    gen/train/clip_fraction         | 0.903    |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -2.81    |
|    gen/train/explained_variance    | 0.319    |
|    gen/train/learning_rate         | 0.0003   |
|    gen/train/loss                  | 179      |
|    gen/train/n_updates             | 90       |
|    gen/train/policy_gradient_loss  | 0.239    |
|    gen/train/std                   | 0.987    |
|    gen/train/value_loss            | 647      |
-------------------------------------------------
--------------------------------------------------

round:   7%|██▎                              | 10/146 [08:34<1:35:47, 42.26s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 211       |
|    gen/time/fps                    | 82        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 24        |
|    gen/time/total_timesteps        | 22528     |
|    gen/train/approx_kl             | 3.1993055 |
|    gen/train/clip_fraction         | 0.96      |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -2.81     |
|    gen/train/explained_variance    | 0.262     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 632       |
|    gen/train/n_updates             | 100       |
|    gen/train/policy_gradient_loss  | 0.212     |
|    gen/train/std                   | 0.988     |
|    gen/train/value_loss            | 1.23e+03  |
--------------------------------------------------
-------------------------------

round:   8%|██▍                              | 11/146 [09:11<1:30:54, 40.40s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 157       |
|    gen/time/fps                    | 86        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 23        |
|    gen/time/total_timesteps        | 24576     |
|    gen/train/approx_kl             | 55.555958 |
|    gen/train/clip_fraction         | 0.914     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -2.81     |
|    gen/train/explained_variance    | -0.403    |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 252       |
|    gen/train/n_updates             | 110       |
|    gen/train/policy_gradient_loss  | 0.228     |
|    gen/train/std                   | 0.988     |
|    gen/train/value_loss            | 807       |
--------------------------------------------------
-------------------------------

round:   8%|██▋                              | 12/146 [09:46<1:26:30, 38.73s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 154       |
|    gen/time/fps                    | 89        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 22        |
|    gen/time/total_timesteps        | 26624     |
|    gen/train/approx_kl             | 120.69684 |
|    gen/train/clip_fraction         | 0.97      |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -2.81     |
|    gen/train/explained_variance    | 0.474     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 77.6      |
|    gen/train/n_updates             | 120       |
|    gen/train/policy_gradient_loss  | 0.236     |
|    gen/train/std                   | 0.989     |
|    gen/train/value_loss            | 323       |
--------------------------------------------------
-------------------------------

round:   9%|██▉                              | 13/146 [10:20<1:22:45, 37.33s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 155       |
|    gen/time/fps                    | 99        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 20        |
|    gen/time/total_timesteps        | 28672     |
|    gen/train/approx_kl             | 41.920227 |
|    gen/train/clip_fraction         | 0.996     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -2.82     |
|    gen/train/explained_variance    | 0.551     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 69.9      |
|    gen/train/n_updates             | 130       |
|    gen/train/policy_gradient_loss  | 0.217     |
|    gen/train/std                   | 0.989     |
|    gen/train/value_loss            | 364       |
--------------------------------------------------
-------------------------------

round:  10%|███▏                             | 14/146 [10:51<1:18:25, 35.64s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 138       |
|    gen/time/fps                    | 88        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 23        |
|    gen/time/total_timesteps        | 30720     |
|    gen/train/approx_kl             | 36.274067 |
|    gen/train/clip_fraction         | 0.989     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -2.82     |
|    gen/train/explained_variance    | 0.755     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 22.4      |
|    gen/train/n_updates             | 140       |
|    gen/train/policy_gradient_loss  | 0.238     |
|    gen/train/std                   | 0.989     |
|    gen/train/value_loss            | 132       |
--------------------------------------------------
-------------------------------

round:  10%|███▍                             | 15/146 [11:26<1:16:59, 35.26s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_rew_wrapped_mean | 135      |
|    gen/time/fps                    | 95       |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 21       |
|    gen/time/total_timesteps        | 32768    |
|    gen/train/approx_kl             | 71.44812 |
|    gen/train/clip_fraction         | 0.993    |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -2.82    |
|    gen/train/explained_variance    | 0.526    |
|    gen/train/learning_rate         | 0.0003   |
|    gen/train/loss                  | 23.9     |
|    gen/train/n_updates             | 150      |
|    gen/train/policy_gradient_loss  | 0.226    |
|    gen/train/std                   | 0.989    |
|    gen/train/value_loss            | 153      |
-------------------------------------------------
--------------------------------------------------

round:  11%|███▌                             | 16/146 [11:58<1:14:40, 34.46s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_rew_wrapped_mean | 105      |
|    gen/time/fps                    | 87       |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 23       |
|    gen/time/total_timesteps        | 34816    |
|    gen/train/approx_kl             | 73.4429  |
|    gen/train/clip_fraction         | 0.99     |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -2.82    |
|    gen/train/explained_variance    | 0.862    |
|    gen/train/learning_rate         | 0.0003   |
|    gen/train/loss                  | 21.5     |
|    gen/train/n_updates             | 160      |
|    gen/train/policy_gradient_loss  | 0.228    |
|    gen/train/std                   | 0.991    |
|    gen/train/value_loss            | 96.5     |
-------------------------------------------------
--------------------------------------------------

round:  12%|███▊                             | 17/146 [12:33<1:14:15, 34.53s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 102       |
|    gen/time/fps                    | 84        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 24        |
|    gen/time/total_timesteps        | 36864     |
|    gen/train/approx_kl             | 15.153274 |
|    gen/train/clip_fraction         | 0.972     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -2.83     |
|    gen/train/explained_variance    | 0.398     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 8.72      |
|    gen/train/n_updates             | 170       |
|    gen/train/policy_gradient_loss  | 0.179     |
|    gen/train/std                   | 1         |
|    gen/train/value_loss            | 40.6      |
--------------------------------------------------
-------------------------------

round:  12%|████                             | 18/146 [13:09<1:14:17, 34.82s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_rew_wrapped_mean | 98.8     |
|    gen/time/fps                    | 97       |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 21       |
|    gen/time/total_timesteps        | 38912    |
|    gen/train/approx_kl             | 67.4337  |
|    gen/train/clip_fraction         | 0.959    |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -2.86    |
|    gen/train/explained_variance    | 0.713    |
|    gen/train/learning_rate         | 0.0003   |
|    gen/train/loss                  | 16       |
|    gen/train/n_updates             | 180      |
|    gen/train/policy_gradient_loss  | 0.215    |
|    gen/train/std                   | 1.01     |
|    gen/train/value_loss            | 46.9     |
-------------------------------------------------
--------------------------------------------------

round:  13%|████▎                            | 19/146 [13:41<1:12:06, 34.07s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 82.4      |
|    gen/time/fps                    | 92        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 22        |
|    gen/time/total_timesteps        | 40960     |
|    gen/train/approx_kl             | 28.729046 |
|    gen/train/clip_fraction         | 0.994     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -2.87     |
|    gen/train/explained_variance    | 0.758     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 8.1       |
|    gen/train/n_updates             | 190       |
|    gen/train/policy_gradient_loss  | 0.184     |
|    gen/train/std                   | 1.02      |
|    gen/train/value_loss            | 32.5      |
--------------------------------------------------
-------------------------------

round:  14%|████▌                            | 20/146 [14:14<1:11:03, 33.84s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 84.9      |
|    gen/time/fps                    | 88        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 23        |
|    gen/time/total_timesteps        | 43008     |
|    gen/train/approx_kl             | 37.377956 |
|    gen/train/clip_fraction         | 0.986     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -2.87     |
|    gen/train/explained_variance    | 0.681     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 1.79      |
|    gen/train/n_updates             | 200       |
|    gen/train/policy_gradient_loss  | 0.175     |
|    gen/train/std                   | 1.02      |
|    gen/train/value_loss            | 35.3      |
--------------------------------------------------
-------------------------------

round:  14%|████▋                            | 21/146 [14:49<1:10:53, 34.03s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 81.5      |
|    gen/time/fps                    | 87        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 23        |
|    gen/time/total_timesteps        | 45056     |
|    gen/train/approx_kl             | 10.402391 |
|    gen/train/clip_fraction         | 0.902     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -2.89     |
|    gen/train/explained_variance    | 0.887     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 5.45      |
|    gen/train/n_updates             | 210       |
|    gen/train/policy_gradient_loss  | 0.373     |
|    gen/train/std                   | 1.04      |
|    gen/train/value_loss            | 21.1      |
--------------------------------------------------
-------------------------------

round:  15%|████▉                            | 22/146 [15:23<1:10:41, 34.21s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 75.4      |
|    gen/time/fps                    | 80        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 25        |
|    gen/time/total_timesteps        | 47104     |
|    gen/train/approx_kl             | 20.525951 |
|    gen/train/clip_fraction         | 0.903     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -2.93     |
|    gen/train/explained_variance    | 0.0804    |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 0.675     |
|    gen/train/n_updates             | 220       |
|    gen/train/policy_gradient_loss  | 0.107     |
|    gen/train/std                   | 1.06      |
|    gen/train/value_loss            | 28.1      |
--------------------------------------------------
-------------------------------

round:  16%|█████▏                           | 23/146 [16:00<1:11:33, 34.91s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_rew_wrapped_mean | 70.7     |
|    gen/time/fps                    | 89       |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 22       |
|    gen/time/total_timesteps        | 49152    |
|    gen/train/approx_kl             | 4.526258 |
|    gen/train/clip_fraction         | 0.91     |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -2.96    |
|    gen/train/explained_variance    | 0.53     |
|    gen/train/learning_rate         | 0.0003   |
|    gen/train/loss                  | 3.2      |
|    gen/train/n_updates             | 230      |
|    gen/train/policy_gradient_loss  | 0.178    |
|    gen/train/std                   | 1.08     |
|    gen/train/value_loss            | 24.7     |
-------------------------------------------------
--------------------------------------------------

round:  16%|█████▍                           | 24/146 [16:34<1:10:32, 34.70s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_rew_wrapped_mean | 76.7     |
|    gen/time/fps                    | 91       |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 22       |
|    gen/time/total_timesteps        | 51200    |
|    gen/train/approx_kl             | 35.27201 |
|    gen/train/clip_fraction         | 0.994    |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -2.98    |
|    gen/train/explained_variance    | 0.468    |
|    gen/train/learning_rate         | 0.0003   |
|    gen/train/loss                  | 3.81     |
|    gen/train/n_updates             | 240      |
|    gen/train/policy_gradient_loss  | 0.273    |
|    gen/train/std                   | 1.07     |
|    gen/train/value_loss            | 62       |
-------------------------------------------------
--------------------------------------------------

round:  17%|█████▋                           | 25/146 [17:08<1:09:19, 34.38s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_rew_wrapped_mean | 74.3     |
|    gen/time/fps                    | 98       |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 20       |
|    gen/time/total_timesteps        | 53248    |
|    gen/train/approx_kl             | 8.726462 |
|    gen/train/clip_fraction         | 0.846    |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -3       |
|    gen/train/explained_variance    | 0.704    |
|    gen/train/learning_rate         | 0.0003   |
|    gen/train/loss                  | 2.86     |
|    gen/train/n_updates             | 250      |
|    gen/train/policy_gradient_loss  | 0.103    |
|    gen/train/std                   | 1.09     |
|    gen/train/value_loss            | 14.2     |
-------------------------------------------------
--------------------------------------------------

round:  18%|█████▉                           | 26/146 [17:40<1:07:24, 33.70s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 67.6      |
|    gen/time/fps                    | 93        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 21        |
|    gen/time/total_timesteps        | 55296     |
|    gen/train/approx_kl             | 3.8698382 |
|    gen/train/clip_fraction         | 0.905     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.04     |
|    gen/train/explained_variance    | 0.706     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 1.09      |
|    gen/train/n_updates             | 260       |
|    gen/train/policy_gradient_loss  | 0.133     |
|    gen/train/std                   | 1.13      |
|    gen/train/value_loss            | 14.2      |
--------------------------------------------------
-------------------------------

round:  18%|██████                           | 27/146 [18:13<1:06:28, 33.52s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_rew_wrapped_mean | 77.3     |
|    gen/time/fps                    | 87       |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 23       |
|    gen/time/total_timesteps        | 57344    |
|    gen/train/approx_kl             | 291.7721 |
|    gen/train/clip_fraction         | 0.988    |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -3.08    |
|    gen/train/explained_variance    | 0.619    |
|    gen/train/learning_rate         | 0.0003   |
|    gen/train/loss                  | 2.71     |
|    gen/train/n_updates             | 270      |
|    gen/train/policy_gradient_loss  | 0.285    |
|    gen/train/std                   | 1.13     |
|    gen/train/value_loss            | 44.5     |
-------------------------------------------------
--------------------------------------------------

round:  19%|██████▎                          | 28/146 [18:48<1:06:37, 33.88s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 78.4      |
|    gen/time/fps                    | 94        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 21        |
|    gen/time/total_timesteps        | 59392     |
|    gen/train/approx_kl             | 86.129684 |
|    gen/train/clip_fraction         | 0.988     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.08     |
|    gen/train/explained_variance    | 0.645     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 2.5       |
|    gen/train/n_updates             | 280       |
|    gen/train/policy_gradient_loss  | 0.15      |
|    gen/train/std                   | 1.13      |
|    gen/train/value_loss            | 45.1      |
--------------------------------------------------
-------------------------------

round:  20%|██████▌                          | 29/146 [19:21<1:05:30, 33.60s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 93.9      |
|    gen/time/fps                    | 96        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 21        |
|    gen/time/total_timesteps        | 61440     |
|    gen/train/approx_kl             | 4.7174788 |
|    gen/train/clip_fraction         | 0.886     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.1      |
|    gen/train/explained_variance    | 0.596     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 2.86      |
|    gen/train/n_updates             | 290       |
|    gen/train/policy_gradient_loss  | 0.366     |
|    gen/train/std                   | 1.15      |
|    gen/train/value_loss            | 64.5      |
--------------------------------------------------
-------------------------------

round:  21%|██████▊                          | 30/146 [19:53<1:04:19, 33.27s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 87.2      |
|    gen/time/fps                    | 100       |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 20        |
|    gen/time/total_timesteps        | 63488     |
|    gen/train/approx_kl             | 123.90727 |
|    gen/train/clip_fraction         | 0.986     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.11     |
|    gen/train/explained_variance    | 0.414     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 0.674     |
|    gen/train/n_updates             | 300       |
|    gen/train/policy_gradient_loss  | 0.222     |
|    gen/train/std                   | 1.15      |
|    gen/train/value_loss            | 10.5      |
--------------------------------------------------
-------------------------------

round:  21%|███████                          | 31/146 [20:25<1:02:54, 32.82s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 79.2      |
|    gen/time/fps                    | 92        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 22        |
|    gen/time/total_timesteps        | 65536     |
|    gen/train/approx_kl             | 27.447487 |
|    gen/train/clip_fraction         | 0.992     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.12     |
|    gen/train/explained_variance    | 0.812     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 1.36      |
|    gen/train/n_updates             | 310       |
|    gen/train/policy_gradient_loss  | 0.171     |
|    gen/train/std                   | 1.15      |
|    gen/train/value_loss            | 14.4      |
--------------------------------------------------
-------------------------------

round:  22%|███████▏                         | 32/146 [20:58<1:02:40, 32.99s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 82.2      |
|    gen/time/fps                    | 94        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 21        |
|    gen/time/total_timesteps        | 67584     |
|    gen/train/approx_kl             | 6.3181634 |
|    gen/train/clip_fraction         | 0.856     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.15     |
|    gen/train/explained_variance    | 0.834     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 0.734     |
|    gen/train/n_updates             | 320       |
|    gen/train/policy_gradient_loss  | 0.135     |
|    gen/train/std                   | 1.19      |
|    gen/train/value_loss            | 17.1      |
--------------------------------------------------
-------------------------------

round:  23%|███████▍                         | 33/146 [21:31<1:02:03, 32.95s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 85.6      |
|    gen/time/fps                    | 83        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 24        |
|    gen/time/total_timesteps        | 69632     |
|    gen/train/approx_kl             | 33.327248 |
|    gen/train/clip_fraction         | 0.991     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.19     |
|    gen/train/explained_variance    | 0.711     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 5.21      |
|    gen/train/n_updates             | 330       |
|    gen/train/policy_gradient_loss  | 0.196     |
|    gen/train/std                   | 1.19      |
|    gen/train/value_loss            | 9.41      |
--------------------------------------------------
-------------------------------

round:  23%|███████▋                         | 34/146 [22:07<1:03:02, 33.78s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 88.8      |
|    gen/time/fps                    | 98        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 20        |
|    gen/time/total_timesteps        | 71680     |
|    gen/train/approx_kl             | 10.930853 |
|    gen/train/clip_fraction         | 0.963     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.19     |
|    gen/train/explained_variance    | 0.666     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 3.05      |
|    gen/train/n_updates             | 340       |
|    gen/train/policy_gradient_loss  | 0.142     |
|    gen/train/std                   | 1.2       |
|    gen/train/value_loss            | 55.4      |
--------------------------------------------------
-------------------------------

round:  24%|███████▉                         | 35/146 [22:39<1:01:34, 33.29s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 87        |
|    gen/time/fps                    | 93        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 21        |
|    gen/time/total_timesteps        | 73728     |
|    gen/train/approx_kl             | 10.287706 |
|    gen/train/clip_fraction         | 0.878     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.22     |
|    gen/train/explained_variance    | 0.919     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 1.58      |
|    gen/train/n_updates             | 350       |
|    gen/train/policy_gradient_loss  | 0.166     |
|    gen/train/std                   | 1.23      |
|    gen/train/value_loss            | 17.7      |
--------------------------------------------------
-------------------------------

round:  25%|████████▏                        | 36/146 [23:12<1:00:54, 33.22s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_rew_wrapped_mean | 68.4     |
|    gen/time/fps                    | 99       |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 20       |
|    gen/time/total_timesteps        | 75776    |
|    gen/train/approx_kl             | 4.341628 |
|    gen/train/clip_fraction         | 0.758    |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -3.26    |
|    gen/train/explained_variance    | 0.878    |
|    gen/train/learning_rate         | 0.0003   |
|    gen/train/loss                  | 4.68     |
|    gen/train/n_updates             | 360      |
|    gen/train/policy_gradient_loss  | 0.0707   |
|    gen/train/std                   | 1.25     |
|    gen/train/value_loss            | 15.7     |
-------------------------------------------------
--------------------------------------------------

round:  25%|████████▊                          | 37/146 [23:44<59:36, 32.81s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 70.7      |
|    gen/time/fps                    | 88        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 23        |
|    gen/time/total_timesteps        | 77824     |
|    gen/train/approx_kl             | 46.063892 |
|    gen/train/clip_fraction         | 0.987     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.29     |
|    gen/train/explained_variance    | 0.65      |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 5.57      |
|    gen/train/n_updates             | 370       |
|    gen/train/policy_gradient_loss  | 0.189     |
|    gen/train/std                   | 1.26      |
|    gen/train/value_loss            | 6.45      |
--------------------------------------------------
-------------------------------

round:  26%|█████████                          | 38/146 [24:18<59:58, 33.32s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 75.1      |
|    gen/time/fps                    | 88        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 23        |
|    gen/time/total_timesteps        | 79872     |
|    gen/train/approx_kl             | 18.231052 |
|    gen/train/clip_fraction         | 0.974     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.3      |
|    gen/train/explained_variance    | 0.729     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 1.67      |
|    gen/train/n_updates             | 380       |
|    gen/train/policy_gradient_loss  | 0.145     |
|    gen/train/std                   | 1.27      |
|    gen/train/value_loss            | 30.3      |
--------------------------------------------------
-------------------------------

round:  27%|████████▊                        | 39/146 [24:53<1:00:01, 33.66s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 68.8      |
|    gen/time/fps                    | 77        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 26        |
|    gen/time/total_timesteps        | 81920     |
|    gen/train/approx_kl             | 1.9798772 |
|    gen/train/clip_fraction         | 0.676     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.33     |
|    gen/train/explained_variance    | 0.272     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 0.254     |
|    gen/train/n_updates             | 390       |
|    gen/train/policy_gradient_loss  | 0.0333    |
|    gen/train/std                   | 1.27      |
|    gen/train/value_loss            | 3.55      |
--------------------------------------------------
-------------------------------

round:  27%|█████████                        | 40/146 [25:30<1:01:34, 34.86s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 60.6      |
|    gen/time/fps                    | 99        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 20        |
|    gen/time/total_timesteps        | 83968     |
|    gen/train/approx_kl             | 0.6343028 |
|    gen/train/clip_fraction         | 0.639     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.31     |
|    gen/train/explained_variance    | 0.74      |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 0.97      |
|    gen/train/n_updates             | 400       |
|    gen/train/policy_gradient_loss  | 0.0615    |
|    gen/train/std                   | 1.27      |
|    gen/train/value_loss            | 7.79      |
--------------------------------------------------
-------------------------------

round:  28%|█████████▊                         | 41/146 [26:02<59:22, 33.93s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 56.4      |
|    gen/time/fps                    | 91        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 22        |
|    gen/time/total_timesteps        | 86016     |
|    gen/train/approx_kl             | 30.778393 |
|    gen/train/clip_fraction         | 0.957     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.31     |
|    gen/train/explained_variance    | 0.742     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 0.359     |
|    gen/train/n_updates             | 410       |
|    gen/train/policy_gradient_loss  | 0.0742    |
|    gen/train/std                   | 1.27      |
|    gen/train/value_loss            | 11.8      |
--------------------------------------------------
-------------------------------

round:  29%|██████████                         | 42/146 [26:36<58:41, 33.86s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_rew_wrapped_mean | 52.8     |
|    gen/time/fps                    | 91       |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 22       |
|    gen/time/total_timesteps        | 88064    |
|    gen/train/approx_kl             | 4.520108 |
|    gen/train/clip_fraction         | 0.794    |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -3.32    |
|    gen/train/explained_variance    | 0.433    |
|    gen/train/learning_rate         | 0.0003   |
|    gen/train/loss                  | 3.24     |
|    gen/train/n_updates             | 420      |
|    gen/train/policy_gradient_loss  | 0.0984   |
|    gen/train/std                   | 1.27     |
|    gen/train/value_loss            | 6.4      |
-------------------------------------------------
--------------------------------------------------

round:  29%|██████████▎                        | 43/146 [27:10<57:58, 33.78s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 52.3      |
|    gen/time/fps                    | 100       |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 20        |
|    gen/time/total_timesteps        | 90112     |
|    gen/train/approx_kl             | 16.275944 |
|    gen/train/clip_fraction         | 0.926     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.32     |
|    gen/train/explained_variance    | 0.869     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 5.92      |
|    gen/train/n_updates             | 430       |
|    gen/train/policy_gradient_loss  | 0.135     |
|    gen/train/std                   | 1.27      |
|    gen/train/value_loss            | 17.2      |
--------------------------------------------------
-------------------------------

round:  30%|██████████▌                        | 44/146 [27:41<56:21, 33.15s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 50.1      |
|    gen/time/fps                    | 82        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 24        |
|    gen/time/total_timesteps        | 92160     |
|    gen/train/approx_kl             | 4.6162148 |
|    gen/train/clip_fraction         | 0.895     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.32     |
|    gen/train/explained_variance    | 0.747     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 1.95      |
|    gen/train/n_updates             | 440       |
|    gen/train/policy_gradient_loss  | 0.182     |
|    gen/train/std                   | 1.28      |
|    gen/train/value_loss            | 9.26      |
--------------------------------------------------
-------------------------------

round:  31%|██████████▊                        | 45/146 [28:17<57:18, 34.04s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_rew_wrapped_mean | 52.4       |
|    gen/time/fps                    | 91         |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 22         |
|    gen/time/total_timesteps        | 94208      |
|    gen/train/approx_kl             | 0.44660452 |
|    gen/train/clip_fraction         | 0.702      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -3.34      |
|    gen/train/explained_variance    | 0.814      |
|    gen/train/learning_rate         | 0.0003     |
|    gen/train/loss                  | 2.45       |
|    gen/train/n_updates             | 450        |
|    gen/train/policy_gradient_loss  | 0.144      |
|    gen/train/std                   | 1.29       |
|    gen/train/value_loss            | 21.5       |
---------------------------------------------------
------------

round:  32%|███████████                        | 46/146 [28:51<56:29, 33.89s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 58.1      |
|    gen/time/fps                    | 89        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 22        |
|    gen/time/total_timesteps        | 96256     |
|    gen/train/approx_kl             | 0.5244912 |
|    gen/train/clip_fraction         | 0.694     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.36     |
|    gen/train/explained_variance    | 0.8       |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 4.27      |
|    gen/train/n_updates             | 460       |
|    gen/train/policy_gradient_loss  | 0.133     |
|    gen/train/std                   | 1.3       |
|    gen/train/value_loss            | 26.3      |
--------------------------------------------------
-------------------------------

round:  32%|███████████▎                       | 47/146 [29:25<56:00, 33.94s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 59.4      |
|    gen/time/fps                    | 90        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 22        |
|    gen/time/total_timesteps        | 98304     |
|    gen/train/approx_kl             | 3.9333467 |
|    gen/train/clip_fraction         | 0.783     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.36     |
|    gen/train/explained_variance    | 0.847     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 13.3      |
|    gen/train/n_updates             | 470       |
|    gen/train/policy_gradient_loss  | 0.0968    |
|    gen/train/std                   | 1.3       |
|    gen/train/value_loss            | 47.1      |
--------------------------------------------------
-------------------------------

round:  33%|███████████▌                       | 48/146 [29:59<55:25, 33.94s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_rew_wrapped_mean | 49       |
|    gen/time/fps                    | 101      |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 20       |
|    gen/time/total_timesteps        | 100352   |
|    gen/train/approx_kl             | 5.216282 |
|    gen/train/clip_fraction         | 0.733    |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -3.4     |
|    gen/train/explained_variance    | 0.798    |
|    gen/train/learning_rate         | 0.0003   |
|    gen/train/loss                  | 1.25     |
|    gen/train/n_updates             | 480      |
|    gen/train/policy_gradient_loss  | 0.137    |
|    gen/train/std                   | 1.33     |
|    gen/train/value_loss            | 6.38     |
-------------------------------------------------
--------------------------------------------------

round:  34%|███████████▋                       | 49/146 [30:30<53:39, 33.19s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_rew_wrapped_mean | 49.4     |
|    gen/time/fps                    | 75       |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 27       |
|    gen/time/total_timesteps        | 102400   |
|    gen/train/approx_kl             | 31.14492 |
|    gen/train/clip_fraction         | 0.911    |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -3.46    |
|    gen/train/explained_variance    | 0.637    |
|    gen/train/learning_rate         | 0.0003   |
|    gen/train/loss                  | 0.25     |
|    gen/train/n_updates             | 490      |
|    gen/train/policy_gradient_loss  | 0.102    |
|    gen/train/std                   | 1.37     |
|    gen/train/value_loss            | 1        |
-------------------------------------------------
--------------------------------------------------

round:  34%|███████████▉                       | 50/146 [31:09<55:37, 34.77s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 60.1      |
|    gen/time/fps                    | 63        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 32        |
|    gen/time/total_timesteps        | 104448    |
|    gen/train/approx_kl             | 11.414211 |
|    gen/train/clip_fraction         | 0.94      |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.47     |
|    gen/train/explained_variance    | 0.392     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 0.879     |
|    gen/train/n_updates             | 500       |
|    gen/train/policy_gradient_loss  | 0.0883    |
|    gen/train/std                   | 1.37      |
|    gen/train/value_loss            | 29        |
--------------------------------------------------
-------------------------------

round:  35%|████████████▏                      | 51/146 [31:52<59:07, 37.34s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_rew_wrapped_mean | 68.5     |
|    gen/time/fps                    | 91       |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 22       |
|    gen/time/total_timesteps        | 106496   |
|    gen/train/approx_kl             | 0.33725  |
|    gen/train/clip_fraction         | 0.71     |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -3.48    |
|    gen/train/explained_variance    | 0.65     |
|    gen/train/learning_rate         | 0.0003   |
|    gen/train/loss                  | 1.28     |
|    gen/train/n_updates             | 510      |
|    gen/train/policy_gradient_loss  | 0.119    |
|    gen/train/std                   | 1.38     |
|    gen/train/value_loss            | 56.6     |
-------------------------------------------------
--------------------------------------------------

round:  36%|████████████▍                      | 52/146 [32:26<56:47, 36.25s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 63        |
|    gen/time/fps                    | 84        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 24        |
|    gen/time/total_timesteps        | 108544    |
|    gen/train/approx_kl             | 1.6724799 |
|    gen/train/clip_fraction         | 0.732     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.5      |
|    gen/train/explained_variance    | 0.738     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 0.21      |
|    gen/train/n_updates             | 520       |
|    gen/train/policy_gradient_loss  | 0.0545    |
|    gen/train/std                   | 1.39      |
|    gen/train/value_loss            | 5.55      |
--------------------------------------------------
-------------------------------

round:  36%|████████████▋                      | 53/146 [33:01<55:48, 36.01s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 58.7      |
|    gen/time/fps                    | 94        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 21        |
|    gen/time/total_timesteps        | 110592    |
|    gen/train/approx_kl             | 1.1709127 |
|    gen/train/clip_fraction         | 0.717     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.51     |
|    gen/train/explained_variance    | 0.707     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 0.429     |
|    gen/train/n_updates             | 530       |
|    gen/train/policy_gradient_loss  | 0.0622    |
|    gen/train/std                   | 1.4       |
|    gen/train/value_loss            | 8.93      |
--------------------------------------------------
-------------------------------

round:  37%|████████████▉                      | 54/146 [33:34<53:45, 35.06s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_rew_wrapped_mean | 64.2       |
|    gen/time/fps                    | 95         |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 21         |
|    gen/time/total_timesteps        | 112640     |
|    gen/train/approx_kl             | 0.21972765 |
|    gen/train/clip_fraction         | 0.592      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -3.51      |
|    gen/train/explained_variance    | 0.855      |
|    gen/train/learning_rate         | 0.0003     |
|    gen/train/loss                  | 11.4       |
|    gen/train/n_updates             | 540        |
|    gen/train/policy_gradient_loss  | 0.0863     |
|    gen/train/std                   | 1.4        |
|    gen/train/value_loss            | 76.4       |
---------------------------------------------------
------------

round:  38%|█████████████▏                     | 55/146 [34:07<52:03, 34.32s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 70.8      |
|    gen/time/fps                    | 86        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 23        |
|    gen/time/total_timesteps        | 114688    |
|    gen/train/approx_kl             | 0.6477883 |
|    gen/train/clip_fraction         | 0.631     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.52     |
|    gen/train/explained_variance    | 0.826     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 5.3       |
|    gen/train/n_updates             | 550       |
|    gen/train/policy_gradient_loss  | 0.0878    |
|    gen/train/std                   | 1.41      |
|    gen/train/value_loss            | 32.8      |
--------------------------------------------------
-------------------------------

round:  38%|█████████████▍                     | 56/146 [34:42<51:44, 34.50s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_rew_wrapped_mean | 80.1       |
|    gen/time/fps                    | 88         |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 23         |
|    gen/time/total_timesteps        | 116736     |
|    gen/train/approx_kl             | 0.51276565 |
|    gen/train/clip_fraction         | 0.728      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -3.54      |
|    gen/train/explained_variance    | 0.655      |
|    gen/train/learning_rate         | 0.0003     |
|    gen/train/loss                  | 12.8       |
|    gen/train/n_updates             | 560        |
|    gen/train/policy_gradient_loss  | 0.14       |
|    gen/train/std                   | 1.43       |
|    gen/train/value_loss            | 34.7       |
---------------------------------------------------
------------

round:  39%|█████████████▋                     | 57/146 [35:16<51:07, 34.46s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_rew_wrapped_mean | 75.6     |
|    gen/time/fps                    | 103      |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 19       |
|    gen/time/total_timesteps        | 118784   |
|    gen/train/approx_kl             | 9.74083  |
|    gen/train/clip_fraction         | 0.736    |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -3.56    |
|    gen/train/explained_variance    | 0.646    |
|    gen/train/learning_rate         | 0.0003   |
|    gen/train/loss                  | 12.1     |
|    gen/train/n_updates             | 570      |
|    gen/train/policy_gradient_loss  | 0.0539   |
|    gen/train/std                   | 1.43     |
|    gen/train/value_loss            | 41       |
-------------------------------------------------
--------------------------------------------------

round:  40%|█████████████▉                     | 58/146 [35:47<49:01, 33.42s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_rew_wrapped_mean | 68.3     |
|    gen/time/fps                    | 91       |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 22       |
|    gen/time/total_timesteps        | 120832   |
|    gen/train/approx_kl             | 9.468474 |
|    gen/train/clip_fraction         | 0.691    |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -3.58    |
|    gen/train/explained_variance    | 0.746    |
|    gen/train/learning_rate         | 0.0003   |
|    gen/train/loss                  | 6.75     |
|    gen/train/n_updates             | 580      |
|    gen/train/policy_gradient_loss  | 0.0802   |
|    gen/train/std                   | 1.45     |
|    gen/train/value_loss            | 17.1     |
-------------------------------------------------
--------------------------------------------------

round:  40%|██████████████▏                    | 59/146 [36:21<48:34, 33.50s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_rew_wrapped_mean | 67.9       |
|    gen/time/fps                    | 100        |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 20         |
|    gen/time/total_timesteps        | 122880     |
|    gen/train/approx_kl             | 0.18224283 |
|    gen/train/clip_fraction         | 0.64       |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -3.6       |
|    gen/train/explained_variance    | 0.831      |
|    gen/train/learning_rate         | 0.0003     |
|    gen/train/loss                  | 8.55       |
|    gen/train/n_updates             | 590        |
|    gen/train/policy_gradient_loss  | 0.0892     |
|    gen/train/std                   | 1.48       |
|    gen/train/value_loss            | 22.1       |
---------------------------------------------------
------------

round:  41%|██████████████▍                    | 60/146 [36:52<47:14, 32.96s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_rew_wrapped_mean | 73.3       |
|    gen/time/fps                    | 94         |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 21         |
|    gen/time/total_timesteps        | 124928     |
|    gen/train/approx_kl             | 0.22215417 |
|    gen/train/clip_fraction         | 0.621      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -3.63      |
|    gen/train/explained_variance    | 0.86       |
|    gen/train/learning_rate         | 0.0003     |
|    gen/train/loss                  | 5.37       |
|    gen/train/n_updates             | 600        |
|    gen/train/policy_gradient_loss  | 0.0642     |
|    gen/train/std                   | 1.5        |
|    gen/train/value_loss            | 13.1       |
---------------------------------------------------
------------

round:  42%|██████████████▌                    | 61/146 [37:25<46:40, 32.94s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 77.8      |
|    gen/time/fps                    | 99        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 20        |
|    gen/time/total_timesteps        | 126976    |
|    gen/train/approx_kl             | 29.301003 |
|    gen/train/clip_fraction         | 0.797     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.67     |
|    gen/train/explained_variance    | 0.918     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 4.44      |
|    gen/train/n_updates             | 610       |
|    gen/train/policy_gradient_loss  | 0.0798    |
|    gen/train/std                   | 1.53      |
|    gen/train/value_loss            | 20.4      |
--------------------------------------------------
-------------------------------

round:  42%|██████████████▊                    | 62/146 [37:57<45:39, 32.61s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 75.6      |
|    gen/time/fps                    | 107       |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 19        |
|    gen/time/total_timesteps        | 129024    |
|    gen/train/approx_kl             | 1.8958442 |
|    gen/train/clip_fraction         | 0.7       |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.69     |
|    gen/train/explained_variance    | 0.858     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 4.3       |
|    gen/train/n_updates             | 620       |
|    gen/train/policy_gradient_loss  | 0.0544    |
|    gen/train/std                   | 1.53      |
|    gen/train/value_loss            | 45.7      |
--------------------------------------------------
-------------------------------

round:  43%|███████████████                    | 63/146 [38:27<44:08, 31.91s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 70.6      |
|    gen/time/fps                    | 100       |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 20        |
|    gen/time/total_timesteps        | 131072    |
|    gen/train/approx_kl             | 0.3566906 |
|    gen/train/clip_fraction         | 0.584     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.72     |
|    gen/train/explained_variance    | 0.659     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 0.336     |
|    gen/train/n_updates             | 630       |
|    gen/train/policy_gradient_loss  | 0.0657    |
|    gen/train/std                   | 1.56      |
|    gen/train/value_loss            | 2.26      |
--------------------------------------------------
-------------------------------

round:  44%|███████████████▎                   | 64/146 [38:59<43:29, 31.82s/it]

-------------------------------------------------
| raw/                               |          |
|    gen/rollout/ep_rew_wrapped_mean | 79.8     |
|    gen/time/fps                    | 91       |
|    gen/time/iterations             | 1        |
|    gen/time/time_elapsed           | 22       |
|    gen/time/total_timesteps        | 133120   |
|    gen/train/approx_kl             | 12.37154 |
|    gen/train/clip_fraction         | 0.966    |
|    gen/train/clip_range            | 0.2      |
|    gen/train/entropy_loss          | -3.75    |
|    gen/train/explained_variance    | 0.427    |
|    gen/train/learning_rate         | 0.0003   |
|    gen/train/loss                  | 0.461    |
|    gen/train/n_updates             | 640      |
|    gen/train/policy_gradient_loss  | 0.117    |
|    gen/train/std                   | 1.58     |
|    gen/train/value_loss            | 14.4     |
-------------------------------------------------
--------------------------------------------------

round:  45%|███████████████▌                   | 65/146 [39:33<43:40, 32.35s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 76.6      |
|    gen/time/fps                    | 93        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 21        |
|    gen/time/total_timesteps        | 135168    |
|    gen/train/approx_kl             | 2.5287435 |
|    gen/train/clip_fraction         | 0.858     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.76     |
|    gen/train/explained_variance    | 0.845     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 0.597     |
|    gen/train/n_updates             | 650       |
|    gen/train/policy_gradient_loss  | 0.0792    |
|    gen/train/std                   | 1.6       |
|    gen/train/value_loss            | 16.3      |
--------------------------------------------------
-------------------------------

round:  45%|███████████████▊                   | 66/146 [40:06<43:25, 32.57s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 85.2      |
|    gen/time/fps                    | 81        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 25        |
|    gen/time/total_timesteps        | 137216    |
|    gen/train/approx_kl             | 19.085238 |
|    gen/train/clip_fraction         | 0.852     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.78     |
|    gen/train/explained_variance    | 0.757     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 1.56      |
|    gen/train/n_updates             | 660       |
|    gen/train/policy_gradient_loss  | 0.137     |
|    gen/train/std                   | 1.6       |
|    gen/train/value_loss            | 39.5      |
--------------------------------------------------
-------------------------------

round:  46%|████████████████                   | 67/146 [40:42<44:24, 33.72s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 100       |
|    gen/time/fps                    | 93        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 21        |
|    gen/time/total_timesteps        | 139264    |
|    gen/train/approx_kl             | 1.1686864 |
|    gen/train/clip_fraction         | 0.753     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.79     |
|    gen/train/explained_variance    | 0.897     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 3.49      |
|    gen/train/n_updates             | 670       |
|    gen/train/policy_gradient_loss  | 0.11      |
|    gen/train/std                   | 1.62      |
|    gen/train/value_loss            | 34.6      |
--------------------------------------------------
-------------------------------

round:  47%|████████████████▎                  | 68/146 [41:15<43:37, 33.56s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 90.8      |
|    gen/time/fps                    | 85        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 23        |
|    gen/time/total_timesteps        | 141312    |
|    gen/train/approx_kl             | 67.377014 |
|    gen/train/clip_fraction         | 0.985     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.81     |
|    gen/train/explained_variance    | -1.36     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 1.25      |
|    gen/train/n_updates             | 680       |
|    gen/train/policy_gradient_loss  | 0.163     |
|    gen/train/std                   | 1.63      |
|    gen/train/value_loss            | 14.3      |
--------------------------------------------------
-------------------------------

round:  47%|████████████████▌                  | 69/146 [41:50<43:40, 34.03s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 106       |
|    gen/time/fps                    | 107       |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 19        |
|    gen/time/total_timesteps        | 143360    |
|    gen/train/approx_kl             | 0.7398457 |
|    gen/train/clip_fraction         | 0.685     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.82     |
|    gen/train/explained_variance    | 0.789     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 8.34      |
|    gen/train/n_updates             | 690       |
|    gen/train/policy_gradient_loss  | 0.234     |
|    gen/train/std                   | 1.64      |
|    gen/train/value_loss            | 99.9      |
--------------------------------------------------
-------------------------------

round:  48%|████████████████▊                  | 70/146 [42:21<41:41, 32.91s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 102       |
|    gen/time/fps                    | 95        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 21        |
|    gen/time/total_timesteps        | 145408    |
|    gen/train/approx_kl             | 0.9184325 |
|    gen/train/clip_fraction         | 0.57      |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.85     |
|    gen/train/explained_variance    | 0.82      |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 2.06      |
|    gen/train/n_updates             | 700       |
|    gen/train/policy_gradient_loss  | 0.0437    |
|    gen/train/std                   | 1.67      |
|    gen/train/value_loss            | 3.11      |
--------------------------------------------------
-------------------------------

round:  49%|█████████████████                  | 71/146 [42:53<41:01, 32.83s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_rew_wrapped_mean | 99.1       |
|    gen/time/fps                    | 103        |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 19         |
|    gen/time/total_timesteps        | 147456     |
|    gen/train/approx_kl             | 0.45374468 |
|    gen/train/clip_fraction         | 0.539      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -3.88      |
|    gen/train/explained_variance    | 0.784      |
|    gen/train/learning_rate         | 0.0003     |
|    gen/train/loss                  | 0.186      |
|    gen/train/n_updates             | 710        |
|    gen/train/policy_gradient_loss  | 0.0309     |
|    gen/train/std                   | 1.68       |
|    gen/train/value_loss            | 6.48       |
---------------------------------------------------
------------

round:  49%|█████████████████▎                 | 72/146 [43:24<39:51, 32.31s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 99.7      |
|    gen/time/fps                    | 91        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 22        |
|    gen/time/total_timesteps        | 149504    |
|    gen/train/approx_kl             | 22.326126 |
|    gen/train/clip_fraction         | 0.85      |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.88     |
|    gen/train/explained_variance    | 0.629     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 0.994     |
|    gen/train/n_updates             | 720       |
|    gen/train/policy_gradient_loss  | 0.0857    |
|    gen/train/std                   | 1.68      |
|    gen/train/value_loss            | 20        |
--------------------------------------------------
-------------------------------

round:  50%|█████████████████▌                 | 73/146 [43:58<39:47, 32.71s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 106       |
|    gen/time/fps                    | 89        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 22        |
|    gen/time/total_timesteps        | 151552    |
|    gen/train/approx_kl             | 14.308342 |
|    gen/train/clip_fraction         | 0.943     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.88     |
|    gen/train/explained_variance    | 0.751     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 6.43      |
|    gen/train/n_updates             | 730       |
|    gen/train/policy_gradient_loss  | 0.0932    |
|    gen/train/std                   | 1.69      |
|    gen/train/value_loss            | 105       |
--------------------------------------------------
-------------------------------

round:  51%|█████████████████▋                 | 74/146 [44:32<39:45, 33.13s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 97.7      |
|    gen/time/fps                    | 97        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 20        |
|    gen/time/total_timesteps        | 153600    |
|    gen/train/approx_kl             | 1.7433898 |
|    gen/train/clip_fraction         | 0.703     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.89     |
|    gen/train/explained_variance    | 0.506     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 0.861     |
|    gen/train/n_updates             | 740       |
|    gen/train/policy_gradient_loss  | 0.0777    |
|    gen/train/std                   | 1.69      |
|    gen/train/value_loss            | 19        |
--------------------------------------------------
-------------------------------

round:  51%|█████████████████▉                 | 75/146 [45:04<38:51, 32.83s/it]

--------------------------------------------------
| raw/                               |           |
|    gen/rollout/ep_rew_wrapped_mean | 92.4      |
|    gen/time/fps                    | 96        |
|    gen/time/iterations             | 1         |
|    gen/time/time_elapsed           | 21        |
|    gen/time/total_timesteps        | 155648    |
|    gen/train/approx_kl             | 0.7234811 |
|    gen/train/clip_fraction         | 0.718     |
|    gen/train/clip_range            | 0.2       |
|    gen/train/entropy_loss          | -3.9      |
|    gen/train/explained_variance    | 0.278     |
|    gen/train/learning_rate         | 0.0003    |
|    gen/train/loss                  | 0.604     |
|    gen/train/n_updates             | 750       |
|    gen/train/policy_gradient_loss  | 0.101     |
|    gen/train/std                   | 1.71      |
|    gen/train/value_loss            | 9.76      |
--------------------------------------------------
-------------------------------

round:  52%|██████████████████▏                | 76/146 [45:37<38:12, 32.75s/it]

---------------------------------------------------
| raw/                               |            |
|    gen/rollout/ep_rew_wrapped_mean | 85.9       |
|    gen/time/fps                    | 95         |
|    gen/time/iterations             | 1          |
|    gen/time/time_elapsed           | 21         |
|    gen/time/total_timesteps        | 157696     |
|    gen/train/approx_kl             | 0.81636596 |
|    gen/train/clip_fraction         | 0.779      |
|    gen/train/clip_range            | 0.2        |
|    gen/train/entropy_loss          | -3.93      |
|    gen/train/explained_variance    | 0.492      |
|    gen/train/learning_rate         | 0.0003     |
|    gen/train/loss                  | 0.689      |
|    gen/train/n_updates             | 760        |
|    gen/train/policy_gradient_loss  | 0.0948     |
|    gen/train/std                   | 1.74       |
|    gen/train/value_loss            | 12.6       |
---------------------------------------------------


Process ForkServerProcess-1:
Traceback (most recent call last):
  File "/home/koksyuen/anaconda3/envs/rl/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/home/koksyuen/anaconda3/envs/rl/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/koksyuen/anaconda3/envs/rl/lib/python3.8/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 28, in _worker
    cmd, data = remote.recv()
  File "/home/koksyuen/anaconda3/envs/rl/lib/python3.8/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/home/koksyuen/anaconda3/envs/rl/lib/python3.8/multiprocessing/connection.py", line 414, in _recv_bytes
    buf = self._recv(4)
  File "/home/koksyuen/anaconda3/envs/rl/lib/python3.8/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt
round:  52%|██████████████████▏                | 76/146 [46:02<

KeyboardInterrupt: 

In [None]:
learner.save('./train/GAIL/model1')