In [1]:
import gymnasium as gym
from stable_baselines3 import A2C
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback

# supress warnings
import warnings
warnings.filterwarnings("ignore")


In [2]:
env = gym.make("LunarLander-v3", render_mode="human")
vec_env = make_vec_env(lambda: env, n_envs=1)

In [3]:
eval_callback = EvalCallback(
    env, 
    best_model_save_path='./logs/',
    log_path='./logs/',
    eval_freq=10000,
    deterministic=True,
    render=False
)


In [11]:
model = A2C("MlpPolicy", vec_env, verbose=1, learning_rate=0.001, gamma=0.99)
model.learn(total_timesteps=100000, progress_bar=True, callback=eval_callback)  # Train for 100,000 timesteps

Output()

Using cuda device




------------------------------------
| rollout/              |          |
|    ep_len_mean        | 142      |
|    ep_rew_mean        | -298     |
| time/                 |          |
|    fps                | 48       |
|    iterations         | 100      |
|    time_elapsed       | 10       |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -1.06    |
|    explained_variance | -0.00131 |
|    learning_rate      | 0.001    |
|    n_updates          | 99       |
|    policy_loss        | -19.2    |
|    value_loss         | 346      |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 118      |
|    ep_rew_mean        | -404     |
| time/                 |          |
|    fps                | 48       |
|    iterations         | 200      |
|    time_elapsed       | 20       |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -1.32    |
|    explained_variance | 1.21e-05 |
|    learning_rate      | 0.001    |
|    n_updates          | 199      |
|    policy_loss        | -8.24    |
|    value_loss         | 64.3     |
------------------------------------


-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 124       |
|    ep_rew_mean        | -446      |
| time/                 |           |
|    fps                | 48        |
|    iterations         | 300       |
|    time_elapsed       | 31        |
|    total_timesteps    | 1500      |
| train/                |           |
|    entropy_loss       | -1.28     |
|    explained_variance | -0.000577 |
|    learning_rate      | 0.001     |
|    n_updates          | 299       |
|    policy_loss        | 1.37      |
|    value_loss         | 1.89      |
-------------------------------------


-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 128       |
|    ep_rew_mean        | -479      |
| time/                 |           |
|    fps                | 48        |
|    iterations         | 400       |
|    time_elapsed       | 41        |
|    total_timesteps    | 2000      |
| train/                |           |
|    entropy_loss       | -0.396    |
|    explained_variance | -2.26e-05 |
|    learning_rate      | 0.001     |
|    n_updates          | 399       |
|    policy_loss        | -2.24     |
|    value_loss         | 676       |
-------------------------------------


-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 121       |
|    ep_rew_mean        | -499      |
| time/                 |           |
|    fps                | 48        |
|    iterations         | 500       |
|    time_elapsed       | 52        |
|    total_timesteps    | 2500      |
| train/                |           |
|    entropy_loss       | -0.619    |
|    explained_variance | -2.44e-05 |
|    learning_rate      | 0.001     |
|    n_updates          | 499       |
|    policy_loss        | -3.19     |
|    value_loss         | 391       |
-------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 124      |
|    ep_rew_mean        | -490     |
| time/                 |          |
|    fps                | 48       |
|    iterations         | 600      |
|    time_elapsed       | 62       |
|    total_timesteps    | 3000     |
| train/                |          |
|    entropy_loss       | -1.15    |
|    explained_variance | 0.173    |
|    learning_rate      | 0.001    |
|    n_updates          | 599      |
|    policy_loss        | -12.1    |
|    value_loss         | 170      |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 133      |
|    ep_rew_mean        | -482     |
| time/                 |          |
|    fps                | 48       |
|    iterations         | 700      |
|    time_elapsed       | 72       |
|    total_timesteps    | 3500     |
| train/                |          |
|    entropy_loss       | -0.749   |
|    explained_variance | 0.000315 |
|    learning_rate      | 0.001    |
|    n_updates          | 699      |
|    policy_loss        | -6.27    |
|    value_loss         | 52.3     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 132      |
|    ep_rew_mean        | -472     |
| time/                 |          |
|    fps                | 48       |
|    iterations         | 800      |
|    time_elapsed       | 82       |
|    total_timesteps    | 4000     |
| train/                |          |
|    entropy_loss       | -0.968   |
|    explained_variance | 0.205    |
|    learning_rate      | 0.001    |
|    n_updates          | 799      |
|    policy_loss        | 3.97     |
|    value_loss         | 30.9     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 156      |
|    ep_rew_mean        | -449     |
| time/                 |          |
|    fps                | 48       |
|    iterations         | 900      |
|    time_elapsed       | 93       |
|    total_timesteps    | 4500     |
| train/                |          |
|    entropy_loss       | -0.773   |
|    explained_variance | -0.0103  |
|    learning_rate      | 0.001    |
|    n_updates          | 899      |
|    policy_loss        | 7.85     |
|    value_loss         | 160      |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 157      |
|    ep_rew_mean        | -431     |
| time/                 |          |
|    fps                | 48       |
|    iterations         | 1000     |
|    time_elapsed       | 103      |
|    total_timesteps    | 5000     |
| train/                |          |
|    entropy_loss       | -0.634   |
|    explained_variance | -0.0029  |
|    learning_rate      | 0.001    |
|    n_updates          | 999      |
|    policy_loss        | -0.573   |
|    value_loss         | 6.15     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 161      |
|    ep_rew_mean        | -414     |
| time/                 |          |
|    fps                | 48       |
|    iterations         | 1100     |
|    time_elapsed       | 113      |
|    total_timesteps    | 5500     |
| train/                |          |
|    entropy_loss       | -0.387   |
|    explained_variance | -0.00553 |
|    learning_rate      | 0.001    |
|    n_updates          | 1099     |
|    policy_loss        | 1.91     |
|    value_loss         | 27.6     |
------------------------------------


-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 164       |
|    ep_rew_mean        | -407      |
| time/                 |           |
|    fps                | 48        |
|    iterations         | 1200      |
|    time_elapsed       | 124       |
|    total_timesteps    | 6000      |
| train/                |           |
|    entropy_loss       | -0.823    |
|    explained_variance | -0.000391 |
|    learning_rate      | 0.001     |
|    n_updates          | 1199      |
|    policy_loss        | -4.66     |
|    value_loss         | 23.4      |
-------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 179      |
|    ep_rew_mean        | -404     |
| time/                 |          |
|    fps                | 48       |
|    iterations         | 1300     |
|    time_elapsed       | 134      |
|    total_timesteps    | 6500     |
| train/                |          |
|    entropy_loss       | -0.187   |
|    explained_variance | 0.00369  |
|    learning_rate      | 0.001    |
|    n_updates          | 1299     |
|    policy_loss        | -3.24    |
|    value_loss         | 10       |
------------------------------------


-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 184       |
|    ep_rew_mean        | -389      |
| time/                 |           |
|    fps                | 48        |
|    iterations         | 1400      |
|    time_elapsed       | 144       |
|    total_timesteps    | 7000      |
| train/                |           |
|    entropy_loss       | -0.805    |
|    explained_variance | -0.000173 |
|    learning_rate      | 0.001     |
|    n_updates          | 1399      |
|    policy_loss        | 1.55      |
|    value_loss         | 10.7      |
-------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 192      |
|    ep_rew_mean        | -381     |
| time/                 |          |
|    fps                | 48       |
|    iterations         | 1500     |
|    time_elapsed       | 155      |
|    total_timesteps    | 7500     |
| train/                |          |
|    entropy_loss       | -0.72    |
|    explained_variance | 0.00011  |
|    learning_rate      | 0.001    |
|    n_updates          | 1499     |
|    policy_loss        | -0.441   |
|    value_loss         | 1.34     |
------------------------------------


-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 195       |
|    ep_rew_mean        | -369      |
| time/                 |           |
|    fps                | 48        |
|    iterations         | 1600      |
|    time_elapsed       | 165       |
|    total_timesteps    | 8000      |
| train/                |           |
|    entropy_loss       | -0.0663   |
|    explained_variance | -2.29e-05 |
|    learning_rate      | 0.001     |
|    n_updates          | 1599      |
|    policy_loss        | 0.0696    |
|    value_loss         | 77.1      |
-------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 197      |
|    ep_rew_mean        | -357     |
| time/                 |          |
|    fps                | 48       |
|    iterations         | 1700     |
|    time_elapsed       | 175      |
|    total_timesteps    | 8500     |
| train/                |          |
|    entropy_loss       | -0.11    |
|    explained_variance | 7.16e-05 |
|    learning_rate      | 0.001    |
|    n_updates          | 1699     |
|    policy_loss        | 0.0278   |
|    value_loss         | 8.76     |
------------------------------------


-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 197       |
|    ep_rew_mean        | -351      |
| time/                 |           |
|    fps                | 48        |
|    iterations         | 1800      |
|    time_elapsed       | 186       |
|    total_timesteps    | 9000      |
| train/                |           |
|    entropy_loss       | -0.612    |
|    explained_variance | -5.25e-06 |
|    learning_rate      | 0.001     |
|    n_updates          | 1799      |
|    policy_loss        | 4.85      |
|    value_loss         | 96.3      |
-------------------------------------


-------------------------------------
| eval/                 |           |
|    mean_ep_length     | 505       |
|    mean_reward        | -509      |
| time/                 |           |
|    total_timesteps    | 9397      |
| train/                |           |
|    entropy_loss       | -0.422    |
|    explained_variance | -1.57e-05 |
|    learning_rate      | 0.001     |
|    n_updates          | 1879      |
|    policy_loss        | -1.08     |
|    value_loss         | 5.42      |
-------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 199      |
|    ep_rew_mean        | -340     |
| time/                 |          |
|    fps                | 38       |
|    iterations         | 1900     |
|    time_elapsed       | 248      |
|    total_timesteps    | 9500     |
| train/                |          |
|    entropy_loss       | -0.473   |
|    explained_variance | 1.32e-05 |
|    learning_rate      | 0.001    |
|    n_updates          | 1899     |
|    policy_loss        | 1.96     |
|    value_loss         | 16.4     |
------------------------------------


-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 203       |
|    ep_rew_mean        | -331      |
| time/                 |           |
|    fps                | 38        |
|    iterations         | 2000      |
|    time_elapsed       | 258       |
|    total_timesteps    | 10000     |
| train/                |           |
|    entropy_loss       | -0.248    |
|    explained_variance | -0.000107 |
|    learning_rate      | 0.001     |
|    n_updates          | 1999      |
|    policy_loss        | -1.63     |
|    value_loss         | 6.04      |
-------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 203      |
|    ep_rew_mean        | -324     |
| time/                 |          |
|    fps                | 39       |
|    iterations         | 2100     |
|    time_elapsed       | 268      |
|    total_timesteps    | 10500    |
| train/                |          |
|    entropy_loss       | -0.575   |
|    explained_variance | -0.0036  |
|    learning_rate      | 0.001    |
|    n_updates          | 2099     |
|    policy_loss        | 7.93     |
|    value_loss         | 262      |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 203      |
|    ep_rew_mean        | -316     |
| time/                 |          |
|    fps                | 39       |
|    iterations         | 2200     |
|    time_elapsed       | 279      |
|    total_timesteps    | 11000    |
| train/                |          |
|    entropy_loss       | -1.2     |
|    explained_variance | 0.0783   |
|    learning_rate      | 0.001    |
|    n_updates          | 2199     |
|    policy_loss        | 14.1     |
|    value_loss         | 160      |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 206      |
|    ep_rew_mean        | -300     |
| time/                 |          |
|    fps                | 39       |
|    iterations         | 2300     |
|    time_elapsed       | 289      |
|    total_timesteps    | 11500    |
| train/                |          |
|    entropy_loss       | -0.282   |
|    explained_variance | 0.0115   |
|    learning_rate      | 0.001    |
|    n_updates          | 2299     |
|    policy_loss        | 0.305    |
|    value_loss         | 32.9     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 208      |
|    ep_rew_mean        | -296     |
| time/                 |          |
|    fps                | 40       |
|    iterations         | 2400     |
|    time_elapsed       | 299      |
|    total_timesteps    | 12000    |
| train/                |          |
|    entropy_loss       | -0.609   |
|    explained_variance | -0.395   |
|    learning_rate      | 0.001    |
|    n_updates          | 2399     |
|    policy_loss        | 4.09     |
|    value_loss         | 40.6     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 208      |
|    ep_rew_mean        | -290     |
| time/                 |          |
|    fps                | 40       |
|    iterations         | 2500     |
|    time_elapsed       | 310      |
|    total_timesteps    | 12500    |
| train/                |          |
|    entropy_loss       | -0.00366 |
|    explained_variance | -0.00018 |
|    learning_rate      | 0.001    |
|    n_updates          | 2499     |
|    policy_loss        | 0.0119   |
|    value_loss         | 1.42e+03 |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 209      |
|    ep_rew_mean        | -285     |
| time/                 |          |
|    fps                | 40       |
|    iterations         | 2600     |
|    time_elapsed       | 320      |
|    total_timesteps    | 13000    |
| train/                |          |
|    entropy_loss       | -0.416   |
|    explained_variance | 0.115    |
|    learning_rate      | 0.001    |
|    n_updates          | 2599     |
|    policy_loss        | 0.0357   |
|    value_loss         | 0.484    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 211      |
|    ep_rew_mean        | -284     |
| time/                 |          |
|    fps                | 40       |
|    iterations         | 2700     |
|    time_elapsed       | 330      |
|    total_timesteps    | 13500    |
| train/                |          |
|    entropy_loss       | -0.672   |
|    explained_variance | -0.00798 |
|    learning_rate      | 0.001    |
|    n_updates          | 2699     |
|    policy_loss        | -1.87    |
|    value_loss         | 24.4     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 214      |
|    ep_rew_mean        | -277     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 2800     |
|    time_elapsed       | 341      |
|    total_timesteps    | 14000    |
| train/                |          |
|    entropy_loss       | -0.673   |
|    explained_variance | 0.0159   |
|    learning_rate      | 0.001    |
|    n_updates          | 2799     |
|    policy_loss        | 0.666    |
|    value_loss         | 3.78     |
------------------------------------


-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 216       |
|    ep_rew_mean        | -273      |
| time/                 |           |
|    fps                | 41        |
|    iterations         | 2900      |
|    time_elapsed       | 351       |
|    total_timesteps    | 14500     |
| train/                |           |
|    entropy_loss       | -0.000607 |
|    explained_variance | -0.0622   |
|    learning_rate      | 0.001     |
|    n_updates          | 2899      |
|    policy_loss        | 0.000315  |
|    value_loss         | 41.5      |
-------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 216      |
|    ep_rew_mean        | -266     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 3000     |
|    time_elapsed       | 361      |
|    total_timesteps    | 15000    |
| train/                |          |
|    entropy_loss       | -0.67    |
|    explained_variance | -0.00325 |
|    learning_rate      | 0.001    |
|    n_updates          | 2999     |
|    policy_loss        | 2.2      |
|    value_loss         | 21       |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 217      |
|    ep_rew_mean        | -263     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 3100     |
|    time_elapsed       | 372      |
|    total_timesteps    | 15500    |
| train/                |          |
|    entropy_loss       | -0.633   |
|    explained_variance | -0.368   |
|    learning_rate      | 0.001    |
|    n_updates          | 3099     |
|    policy_loss        | -0.395   |
|    value_loss         | 1.41     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 219      |
|    ep_rew_mean        | -263     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 3200     |
|    time_elapsed       | 382      |
|    total_timesteps    | 16000    |
| train/                |          |
|    entropy_loss       | -0.564   |
|    explained_variance | 0.952    |
|    learning_rate      | 0.001    |
|    n_updates          | 3199     |
|    policy_loss        | 0.22     |
|    value_loss         | 4.09     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 219      |
|    ep_rew_mean        | -263     |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 3300     |
|    time_elapsed       | 392      |
|    total_timesteps    | 16500    |
| train/                |          |
|    entropy_loss       | -0.478   |
|    explained_variance | -0.589   |
|    learning_rate      | 0.001    |
|    n_updates          | 3299     |
|    policy_loss        | 0.364    |
|    value_loss         | 0.61     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 230      |
|    ep_rew_mean        | -258     |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 3400     |
|    time_elapsed       | 402      |
|    total_timesteps    | 17000    |
| train/                |          |
|    entropy_loss       | -0.0125  |
|    explained_variance | 0.796    |
|    learning_rate      | 0.001    |
|    n_updates          | 3399     |
|    policy_loss        | -0.0011  |
|    value_loss         | 0.938    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 236      |
|    ep_rew_mean        | -251     |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 3500     |
|    time_elapsed       | 413      |
|    total_timesteps    | 17500    |
| train/                |          |
|    entropy_loss       | -0.648   |
|    explained_variance | -1.18    |
|    learning_rate      | 0.001    |
|    n_updates          | 3499     |
|    policy_loss        | 4.88     |
|    value_loss         | 69.9     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 236      |
|    ep_rew_mean        | -251     |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 3600     |
|    time_elapsed       | 423      |
|    total_timesteps    | 18000    |
| train/                |          |
|    entropy_loss       | -0.277   |
|    explained_variance | 0.0113   |
|    learning_rate      | 0.001    |
|    n_updates          | 3599     |
|    policy_loss        | 0.00125  |
|    value_loss         | 0.000454 |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 238      |
|    ep_rew_mean        | -244     |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 3700     |
|    time_elapsed       | 433      |
|    total_timesteps    | 18500    |
| train/                |          |
|    entropy_loss       | -0.343   |
|    explained_variance | 0.0403   |
|    learning_rate      | 0.001    |
|    n_updates          | 3699     |
|    policy_loss        | -7.62    |
|    value_loss         | 47.9     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 237      |
|    ep_rew_mean        | -241     |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 3800     |
|    time_elapsed       | 444      |
|    total_timesteps    | 19000    |
| train/                |          |
|    entropy_loss       | -0.936   |
|    explained_variance | -0.26    |
|    learning_rate      | 0.001    |
|    n_updates          | 3799     |
|    policy_loss        | 0.715    |
|    value_loss         | 1.83     |
------------------------------------


------------------------------------
| eval/                 |          |
|    mean_ep_length     | 257      |
|    mean_reward        | 9.74     |
| time/                 |          |
|    total_timesteps    | 19397    |
| train/                |          |
|    entropy_loss       | -0.599   |
|    explained_variance | -0.0284  |
|    learning_rate      | 0.001    |
|    n_updates          | 3879     |
|    policy_loss        | -0.0696  |
|    value_loss         | 0.0688   |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 237      |
|    ep_rew_mean        | -241     |
| time/                 |          |
|    fps                | 40       |
|    iterations         | 3900     |
|    time_elapsed       | 480      |
|    total_timesteps    | 19500    |
| train/                |          |
|    entropy_loss       | -0.472   |
|    explained_variance | -5.38    |
|    learning_rate      | 0.001    |
|    n_updates          | 3899     |
|    policy_loss        | -3.83    |
|    value_loss         | 425      |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 240      |
|    ep_rew_mean        | -235     |
| time/                 |          |
|    fps                | 40       |
|    iterations         | 4000     |
|    time_elapsed       | 491      |
|    total_timesteps    | 20000    |
| train/                |          |
|    entropy_loss       | -0.64    |
|    explained_variance | -0.0433  |
|    learning_rate      | 0.001    |
|    n_updates          | 3999     |
|    policy_loss        | 3.15     |
|    value_loss         | 69.7     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 240      |
|    ep_rew_mean        | -235     |
| time/                 |          |
|    fps                | 40       |
|    iterations         | 4100     |
|    time_elapsed       | 501      |
|    total_timesteps    | 20500    |
| train/                |          |
|    entropy_loss       | -0.205   |
|    explained_variance | 0.0194   |
|    learning_rate      | 0.001    |
|    n_updates          | 4099     |
|    policy_loss        | 0.000106 |
|    value_loss         | 0.000481 |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 244      |
|    ep_rew_mean        | -226     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 4200     |
|    time_elapsed       | 511      |
|    total_timesteps    | 21000    |
| train/                |          |
|    entropy_loss       | -0.813   |
|    explained_variance | 0.427    |
|    learning_rate      | 0.001    |
|    n_updates          | 4199     |
|    policy_loss        | -1.79    |
|    value_loss         | 7.33     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 244      |
|    ep_rew_mean        | -221     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 4300     |
|    time_elapsed       | 522      |
|    total_timesteps    | 21500    |
| train/                |          |
|    entropy_loss       | -0.452   |
|    explained_variance | -0.318   |
|    learning_rate      | 0.001    |
|    n_updates          | 4299     |
|    policy_loss        | 5.7      |
|    value_loss         | 48.3     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 239      |
|    ep_rew_mean        | -214     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 4400     |
|    time_elapsed       | 532      |
|    total_timesteps    | 22000    |
| train/                |          |
|    entropy_loss       | -0.679   |
|    explained_variance | 0.0638   |
|    learning_rate      | 0.001    |
|    n_updates          | 4399     |
|    policy_loss        | -4.08    |
|    value_loss         | 51.5     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 240      |
|    ep_rew_mean        | -211     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 4500     |
|    time_elapsed       | 542      |
|    total_timesteps    | 22500    |
| train/                |          |
|    entropy_loss       | -0.703   |
|    explained_variance | 0.947    |
|    learning_rate      | 0.001    |
|    n_updates          | 4499     |
|    policy_loss        | -1.01    |
|    value_loss         | 3.92     |
------------------------------------


-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 242       |
|    ep_rew_mean        | -212      |
| time/                 |           |
|    fps                | 41        |
|    iterations         | 4600      |
|    time_elapsed       | 553       |
|    total_timesteps    | 23000     |
| train/                |           |
|    entropy_loss       | -0.0396   |
|    explained_variance | -0.537    |
|    learning_rate      | 0.001     |
|    n_updates          | 4599      |
|    policy_loss        | -0.000534 |
|    value_loss         | 0.0075    |
-------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 242      |
|    ep_rew_mean        | -206     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 4700     |
|    time_elapsed       | 563      |
|    total_timesteps    | 23500    |
| train/                |          |
|    entropy_loss       | -0.637   |
|    explained_variance | -0.0418  |
|    learning_rate      | 0.001    |
|    n_updates          | 4699     |
|    policy_loss        | 4.86     |
|    value_loss         | 88.5     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 241      |
|    ep_rew_mean        | -201     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 4800     |
|    time_elapsed       | 573      |
|    total_timesteps    | 24000    |
| train/                |          |
|    entropy_loss       | -0.176   |
|    explained_variance | 0.645    |
|    learning_rate      | 0.001    |
|    n_updates          | 4799     |
|    policy_loss        | -0.52    |
|    value_loss         | 2.74     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 241      |
|    ep_rew_mean        | -201     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 4900     |
|    time_elapsed       | 583      |
|    total_timesteps    | 24500    |
| train/                |          |
|    entropy_loss       | -0.382   |
|    explained_variance | -0.876   |
|    learning_rate      | 0.001    |
|    n_updates          | 4899     |
|    policy_loss        | 0.0125   |
|    value_loss         | 0.0436   |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 248      |
|    ep_rew_mean        | -191     |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 5000     |
|    time_elapsed       | 594      |
|    total_timesteps    | 25000    |
| train/                |          |
|    entropy_loss       | -0.725   |
|    explained_variance | -13.9    |
|    learning_rate      | 0.001    |
|    n_updates          | 4999     |
|    policy_loss        | -3.06    |
|    value_loss         | 27.8     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 249      |
|    ep_rew_mean        | -187     |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 5100     |
|    time_elapsed       | 604      |
|    total_timesteps    | 25500    |
| train/                |          |
|    entropy_loss       | -0.392   |
|    explained_variance | 0.118    |
|    learning_rate      | 0.001    |
|    n_updates          | 5099     |
|    policy_loss        | -1.81    |
|    value_loss         | 42.9     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 253      |
|    ep_rew_mean        | -178     |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 5200     |
|    time_elapsed       | 614      |
|    total_timesteps    | 26000    |
| train/                |          |
|    entropy_loss       | -0.142   |
|    explained_variance | -0.206   |
|    learning_rate      | 0.001    |
|    n_updates          | 5199     |
|    policy_loss        | -0.129   |
|    value_loss         | 18.8     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 257      |
|    ep_rew_mean        | -166     |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 5300     |
|    time_elapsed       | 625      |
|    total_timesteps    | 26500    |
| train/                |          |
|    entropy_loss       | -0.672   |
|    explained_variance | -3.22    |
|    learning_rate      | 0.001    |
|    n_updates          | 5299     |
|    policy_loss        | -19.8    |
|    value_loss         | 5.96e+03 |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 259      |
|    ep_rew_mean        | -159     |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 5400     |
|    time_elapsed       | 635      |
|    total_timesteps    | 27000    |
| train/                |          |
|    entropy_loss       | -0.647   |
|    explained_variance | 0.995    |
|    learning_rate      | 0.001    |
|    n_updates          | 5399     |
|    policy_loss        | 0.00272  |
|    value_loss         | 0.0287   |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 261      |
|    ep_rew_mean        | -151     |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 5500     |
|    time_elapsed       | 645      |
|    total_timesteps    | 27500    |
| train/                |          |
|    entropy_loss       | -0.423   |
|    explained_variance | 0.705    |
|    learning_rate      | 0.001    |
|    n_updates          | 5499     |
|    policy_loss        | -4.88    |
|    value_loss         | 94.9     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 263      |
|    ep_rew_mean        | -140     |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 5600     |
|    time_elapsed       | 656      |
|    total_timesteps    | 28000    |
| train/                |          |
|    entropy_loss       | -0.656   |
|    explained_variance | -0.0265  |
|    learning_rate      | 0.001    |
|    n_updates          | 5599     |
|    policy_loss        | 5.05     |
|    value_loss         | 105      |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 265      |
|    ep_rew_mean        | -124     |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 5700     |
|    time_elapsed       | 666      |
|    total_timesteps    | 28500    |
| train/                |          |
|    entropy_loss       | -0.597   |
|    explained_variance | -0.656   |
|    learning_rate      | 0.001    |
|    n_updates          | 5699     |
|    policy_loss        | 1.48     |
|    value_loss         | 5.67     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 268      |
|    ep_rew_mean        | -112     |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 5800     |
|    time_elapsed       | 676      |
|    total_timesteps    | 29000    |
| train/                |          |
|    entropy_loss       | -0.535   |
|    explained_variance | 0.509    |
|    learning_rate      | 0.001    |
|    n_updates          | 5799     |
|    policy_loss        | -1.66    |
|    value_loss         | 10.7     |
------------------------------------


------------------------------------
| eval/                 |          |
|    mean_ep_length     | 355      |
|    mean_reward        | 109      |
| time/                 |          |
|    total_timesteps    | 29397    |
| train/                |          |
|    entropy_loss       | -0.621   |
|    explained_variance | -6.87    |
|    learning_rate      | 0.001    |
|    n_updates          | 5879     |
|    policy_loss        | -3.93    |
|    value_loss         | 128      |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 269      |
|    ep_rew_mean        | -101     |
| time/                 |          |
|    fps                | 40       |
|    iterations         | 5900     |
|    time_elapsed       | 723      |
|    total_timesteps    | 29500    |
| train/                |          |
|    entropy_loss       | -0.485   |
|    explained_variance | -0.0151  |
|    learning_rate      | 0.001    |
|    n_updates          | 5899     |
|    policy_loss        | 1.35     |
|    value_loss         | 23.6     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 272      |
|    ep_rew_mean        | -95      |
| time/                 |          |
|    fps                | 40       |
|    iterations         | 6000     |
|    time_elapsed       | 733      |
|    total_timesteps    | 30000    |
| train/                |          |
|    entropy_loss       | -0.627   |
|    explained_variance | -1.43    |
|    learning_rate      | 0.001    |
|    n_updates          | 5999     |
|    policy_loss        | 2.4      |
|    value_loss         | 47.1     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 272      |
|    ep_rew_mean        | -87.1    |
| time/                 |          |
|    fps                | 40       |
|    iterations         | 6100     |
|    time_elapsed       | 744      |
|    total_timesteps    | 30500    |
| train/                |          |
|    entropy_loss       | -0.192   |
|    explained_variance | -13.8    |
|    learning_rate      | 0.001    |
|    n_updates          | 6099     |
|    policy_loss        | -15.9    |
|    value_loss         | 392      |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 265      |
|    ep_rew_mean        | -85.1    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 6200     |
|    time_elapsed       | 754      |
|    total_timesteps    | 31000    |
| train/                |          |
|    entropy_loss       | -0.561   |
|    explained_variance | 0.778    |
|    learning_rate      | 0.001    |
|    n_updates          | 6199     |
|    policy_loss        | 1.09     |
|    value_loss         | 10.7     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 264      |
|    ep_rew_mean        | -77.9    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 6300     |
|    time_elapsed       | 765      |
|    total_timesteps    | 31500    |
| train/                |          |
|    entropy_loss       | -0.622   |
|    explained_variance | -0.107   |
|    learning_rate      | 0.001    |
|    n_updates          | 6299     |
|    policy_loss        | 2.51     |
|    value_loss         | 39.6     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 255      |
|    ep_rew_mean        | -71.3    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 6400     |
|    time_elapsed       | 775      |
|    total_timesteps    | 32000    |
| train/                |          |
|    entropy_loss       | -0.227   |
|    explained_variance | 0.532    |
|    learning_rate      | 0.001    |
|    n_updates          | 6399     |
|    policy_loss        | 0.0549   |
|    value_loss         | 1.25     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 251      |
|    ep_rew_mean        | -65.7    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 6500     |
|    time_elapsed       | 785      |
|    total_timesteps    | 32500    |
| train/                |          |
|    entropy_loss       | -0.637   |
|    explained_variance | 0.337    |
|    learning_rate      | 0.001    |
|    n_updates          | 6499     |
|    policy_loss        | 1.01     |
|    value_loss         | 4.23     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 252      |
|    ep_rew_mean        | -59.5    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 6600     |
|    time_elapsed       | 796      |
|    total_timesteps    | 33000    |
| train/                |          |
|    entropy_loss       | -0.584   |
|    explained_variance | 0.588    |
|    learning_rate      | 0.001    |
|    n_updates          | 6599     |
|    policy_loss        | 0.958    |
|    value_loss         | 4.37     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 249      |
|    ep_rew_mean        | -57.8    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 6700     |
|    time_elapsed       | 806      |
|    total_timesteps    | 33500    |
| train/                |          |
|    entropy_loss       | -0.705   |
|    explained_variance | 0.583    |
|    learning_rate      | 0.001    |
|    n_updates          | 6699     |
|    policy_loss        | -1.76    |
|    value_loss         | 13.1     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 251      |
|    ep_rew_mean        | -52.2    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 6800     |
|    time_elapsed       | 817      |
|    total_timesteps    | 34000    |
| train/                |          |
|    entropy_loss       | -0.078   |
|    explained_variance | 0.889    |
|    learning_rate      | 0.001    |
|    n_updates          | 6799     |
|    policy_loss        | 0.0331   |
|    value_loss         | 4.56     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 248      |
|    ep_rew_mean        | -52.2    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 6900     |
|    time_elapsed       | 827      |
|    total_timesteps    | 34500    |
| train/                |          |
|    entropy_loss       | -0.338   |
|    explained_variance | -0.293   |
|    learning_rate      | 0.001    |
|    n_updates          | 6899     |
|    policy_loss        | -3.28    |
|    value_loss         | 51.6     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 246      |
|    ep_rew_mean        | -46.5    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 7000     |
|    time_elapsed       | 837      |
|    total_timesteps    | 35000    |
| train/                |          |
|    entropy_loss       | -0.453   |
|    explained_variance | -0.0203  |
|    learning_rate      | 0.001    |
|    n_updates          | 6999     |
|    policy_loss        | -0.316   |
|    value_loss         | 6.11     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 246      |
|    ep_rew_mean        | -45.9    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 7100     |
|    time_elapsed       | 848      |
|    total_timesteps    | 35500    |
| train/                |          |
|    entropy_loss       | -0.614   |
|    explained_variance | 0.642    |
|    learning_rate      | 0.001    |
|    n_updates          | 7099     |
|    policy_loss        | -0.369   |
|    value_loss         | 1.41     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 248      |
|    ep_rew_mean        | -48.2    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 7200     |
|    time_elapsed       | 858      |
|    total_timesteps    | 36000    |
| train/                |          |
|    entropy_loss       | -0.591   |
|    explained_variance | 0.962    |
|    learning_rate      | 0.001    |
|    n_updates          | 7199     |
|    policy_loss        | -0.0982  |
|    value_loss         | 0.139    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 247      |
|    ep_rew_mean        | -45.5    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 7300     |
|    time_elapsed       | 868      |
|    total_timesteps    | 36500    |
| train/                |          |
|    entropy_loss       | -1       |
|    explained_variance | 0.591    |
|    learning_rate      | 0.001    |
|    n_updates          | 7299     |
|    policy_loss        | 1.59     |
|    value_loss         | 5.35     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 249      |
|    ep_rew_mean        | -43.5    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 7400     |
|    time_elapsed       | 879      |
|    total_timesteps    | 37000    |
| train/                |          |
|    entropy_loss       | -0.815   |
|    explained_variance | 0.878    |
|    learning_rate      | 0.001    |
|    n_updates          | 7399     |
|    policy_loss        | -2.27    |
|    value_loss         | 19.1     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 248      |
|    ep_rew_mean        | -42      |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 7500     |
|    time_elapsed       | 889      |
|    total_timesteps    | 37500    |
| train/                |          |
|    entropy_loss       | -0.616   |
|    explained_variance | 0.741    |
|    learning_rate      | 0.001    |
|    n_updates          | 7499     |
|    policy_loss        | 2.13     |
|    value_loss         | 10.5     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 248      |
|    ep_rew_mean        | -36.3    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 7600     |
|    time_elapsed       | 899      |
|    total_timesteps    | 38000    |
| train/                |          |
|    entropy_loss       | -0.0987  |
|    explained_variance | 0.585    |
|    learning_rate      | 0.001    |
|    n_updates          | 7599     |
|    policy_loss        | 0.0121   |
|    value_loss         | 0.399    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 246      |
|    ep_rew_mean        | -29.6    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 7700     |
|    time_elapsed       | 910      |
|    total_timesteps    | 38500    |
| train/                |          |
|    entropy_loss       | -0.0689  |
|    explained_variance | 0.483    |
|    learning_rate      | 0.001    |
|    n_updates          | 7699     |
|    policy_loss        | 0.0525   |
|    value_loss         | 26.5     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 244      |
|    ep_rew_mean        | -29.7    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 7800     |
|    time_elapsed       | 920      |
|    total_timesteps    | 39000    |
| train/                |          |
|    entropy_loss       | -0.0701  |
|    explained_variance | -0.0167  |
|    learning_rate      | 0.001    |
|    n_updates          | 7799     |
|    policy_loss        | 0.0152   |
|    value_loss         | 1.78     |
------------------------------------


------------------------------------
| eval/                 |          |
|    mean_ep_length     | 275      |
|    mean_reward        | 107      |
| time/                 |          |
|    total_timesteps    | 39397    |
| train/                |          |
|    entropy_loss       | -0.937   |
|    explained_variance | 0.485    |
|    learning_rate      | 0.001    |
|    n_updates          | 7879     |
|    policy_loss        | -1.49    |
|    value_loss         | 8.97     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 245      |
|    ep_rew_mean        | -26.8    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 7900     |
|    time_elapsed       | 959      |
|    total_timesteps    | 39500    |
| train/                |          |
|    entropy_loss       | -0.549   |
|    explained_variance | 0.827    |
|    learning_rate      | 0.001    |
|    n_updates          | 7899     |
|    policy_loss        | -0.158   |
|    value_loss         | 0.395    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 248      |
|    ep_rew_mean        | -20.9    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 8000     |
|    time_elapsed       | 969      |
|    total_timesteps    | 40000    |
| train/                |          |
|    entropy_loss       | -0.7     |
|    explained_variance | -1.12    |
|    learning_rate      | 0.001    |
|    n_updates          | 7999     |
|    policy_loss        | 2.2      |
|    value_loss         | 6.01     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 237      |
|    ep_rew_mean        | -17      |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 8100     |
|    time_elapsed       | 979      |
|    total_timesteps    | 40500    |
| train/                |          |
|    entropy_loss       | -0.598   |
|    explained_variance | 0.57     |
|    learning_rate      | 0.001    |
|    n_updates          | 8099     |
|    policy_loss        | -41.7    |
|    value_loss         | 8.57e+03 |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 230      |
|    ep_rew_mean        | -20.5    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 8200     |
|    time_elapsed       | 990      |
|    total_timesteps    | 41000    |
| train/                |          |
|    entropy_loss       | -0.825   |
|    explained_variance | -0.282   |
|    learning_rate      | 0.001    |
|    n_updates          | 8199     |
|    policy_loss        | -1.3     |
|    value_loss         | 39       |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 226      |
|    ep_rew_mean        | -15.5    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 8300     |
|    time_elapsed       | 1000     |
|    total_timesteps    | 41500    |
| train/                |          |
|    entropy_loss       | -0.648   |
|    explained_variance | 0.909    |
|    learning_rate      | 0.001    |
|    n_updates          | 8299     |
|    policy_loss        | -2.38    |
|    value_loss         | 18.7     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 220      |
|    ep_rew_mean        | -12.1    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 8400     |
|    time_elapsed       | 1010     |
|    total_timesteps    | 42000    |
| train/                |          |
|    entropy_loss       | -0.551   |
|    explained_variance | -0.13    |
|    learning_rate      | 0.001    |
|    n_updates          | 8399     |
|    policy_loss        | 0.466    |
|    value_loss         | 2.77     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 213      |
|    ep_rew_mean        | -12.1    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 8500     |
|    time_elapsed       | 1021     |
|    total_timesteps    | 42500    |
| train/                |          |
|    entropy_loss       | -0.256   |
|    explained_variance | 0.802    |
|    learning_rate      | 0.001    |
|    n_updates          | 8499     |
|    policy_loss        | -0.0472  |
|    value_loss         | 1.03e+03 |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 213      |
|    ep_rew_mean        | -12.3    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 8600     |
|    time_elapsed       | 1031     |
|    total_timesteps    | 43000    |
| train/                |          |
|    entropy_loss       | -0.633   |
|    explained_variance | -0.357   |
|    learning_rate      | 0.001    |
|    n_updates          | 8599     |
|    policy_loss        | -2.48    |
|    value_loss         | 9.11     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 208      |
|    ep_rew_mean        | -9.13    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 8700     |
|    time_elapsed       | 1042     |
|    total_timesteps    | 43500    |
| train/                |          |
|    entropy_loss       | -1.02    |
|    explained_variance | 0.458    |
|    learning_rate      | 0.001    |
|    n_updates          | 8699     |
|    policy_loss        | 2.34     |
|    value_loss         | 4.21     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 201      |
|    ep_rew_mean        | -11.4    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 8800     |
|    time_elapsed       | 1052     |
|    total_timesteps    | 44000    |
| train/                |          |
|    entropy_loss       | -0.185   |
|    explained_variance | -0.0781  |
|    learning_rate      | 0.001    |
|    n_updates          | 8799     |
|    policy_loss        | -0.972   |
|    value_loss         | 20.7     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 192      |
|    ep_rew_mean        | -15.8    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 8900     |
|    time_elapsed       | 1062     |
|    total_timesteps    | 44500    |
| train/                |          |
|    entropy_loss       | -1.03    |
|    explained_variance | 0.754    |
|    learning_rate      | 0.001    |
|    n_updates          | 8899     |
|    policy_loss        | -4.28    |
|    value_loss         | 12.7     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 186      |
|    ep_rew_mean        | -19.4    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 9000     |
|    time_elapsed       | 1073     |
|    total_timesteps    | 45000    |
| train/                |          |
|    entropy_loss       | -0.831   |
|    explained_variance | 0.645    |
|    learning_rate      | 0.001    |
|    n_updates          | 8999     |
|    policy_loss        | 0.699    |
|    value_loss         | 0.763    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 177      |
|    ep_rew_mean        | -24.8    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 9100     |
|    time_elapsed       | 1083     |
|    total_timesteps    | 45500    |
| train/                |          |
|    entropy_loss       | -1.24    |
|    explained_variance | 0.455    |
|    learning_rate      | 0.001    |
|    n_updates          | 9099     |
|    policy_loss        | 1.04     |
|    value_loss         | 1.67     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 175      |
|    ep_rew_mean        | -20.9    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 9200     |
|    time_elapsed       | 1094     |
|    total_timesteps    | 46000    |
| train/                |          |
|    entropy_loss       | -0.824   |
|    explained_variance | 0.896    |
|    learning_rate      | 0.001    |
|    n_updates          | 9199     |
|    policy_loss        | 0.693    |
|    value_loss         | 3.88     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 172      |
|    ep_rew_mean        | -17.2    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 9300     |
|    time_elapsed       | 1104     |
|    total_timesteps    | 46500    |
| train/                |          |
|    entropy_loss       | -0.561   |
|    explained_variance | 0.474    |
|    learning_rate      | 0.001    |
|    n_updates          | 9299     |
|    policy_loss        | -0.382   |
|    value_loss         | 1.85e+03 |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 168      |
|    ep_rew_mean        | -12.3    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 9400     |
|    time_elapsed       | 1115     |
|    total_timesteps    | 47000    |
| train/                |          |
|    entropy_loss       | -1.03    |
|    explained_variance | 0.446    |
|    learning_rate      | 0.001    |
|    n_updates          | 9399     |
|    policy_loss        | 3.16     |
|    value_loss         | 7.81     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 167      |
|    ep_rew_mean        | -10.1    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 9500     |
|    time_elapsed       | 1125     |
|    total_timesteps    | 47500    |
| train/                |          |
|    entropy_loss       | -0.767   |
|    explained_variance | 0.65     |
|    learning_rate      | 0.001    |
|    n_updates          | 9499     |
|    policy_loss        | 1.46     |
|    value_loss         | 1.53     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 168      |
|    ep_rew_mean        | -6.48    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 9600     |
|    time_elapsed       | 1135     |
|    total_timesteps    | 48000    |
| train/                |          |
|    entropy_loss       | -0.962   |
|    explained_variance | 0.509    |
|    learning_rate      | 0.001    |
|    n_updates          | 9599     |
|    policy_loss        | -55.9    |
|    value_loss         | 6.62e+03 |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 170      |
|    ep_rew_mean        | -3.61    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 9700     |
|    time_elapsed       | 1146     |
|    total_timesteps    | 48500    |
| train/                |          |
|    entropy_loss       | -0.678   |
|    explained_variance | -1.39    |
|    learning_rate      | 0.001    |
|    n_updates          | 9699     |
|    policy_loss        | -1.51    |
|    value_loss         | 17.4     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 174      |
|    ep_rew_mean        | 0.0494   |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 9800     |
|    time_elapsed       | 1156     |
|    total_timesteps    | 49000    |
| train/                |          |
|    entropy_loss       | -0.727   |
|    explained_variance | 0.323    |
|    learning_rate      | 0.001    |
|    n_updates          | 9799     |
|    policy_loss        | 0.345    |
|    value_loss         | 6.45     |
------------------------------------


-------------------------------------
| eval/                 |           |
|    mean_ep_length     | 492       |
|    mean_reward        | -1.25e+03 |
| time/                 |           |
|    total_timesteps    | 49397     |
| train/                |           |
|    entropy_loss       | -1.14     |
|    explained_variance | -0.603    |
|    learning_rate      | 0.001     |
|    n_updates          | 9879      |
|    policy_loss        | -2.34     |
|    value_loss         | 6.42      |
-------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 174      |
|    ep_rew_mean        | 0.0494   |
| time/                 |          |
|    fps                | 40       |
|    iterations         | 9900     |
|    time_elapsed       | 1217     |
|    total_timesteps    | 49500    |
| train/                |          |
|    entropy_loss       | -0.348   |
|    explained_variance | -2.97    |
|    learning_rate      | 0.001    |
|    n_updates          | 9899     |
|    policy_loss        | -3.09    |
|    value_loss         | 16.1     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 174      |
|    ep_rew_mean        | 0.0494   |
| time/                 |          |
|    fps                | 40       |
|    iterations         | 10000    |
|    time_elapsed       | 1227     |
|    total_timesteps    | 50000    |
| train/                |          |
|    entropy_loss       | -1.06    |
|    explained_variance | -0.17    |
|    learning_rate      | 0.001    |
|    n_updates          | 9999     |
|    policy_loss        | 0.537    |
|    value_loss         | 2.09     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 190      |
|    ep_rew_mean        | -1.46    |
| time/                 |          |
|    fps                | 40       |
|    iterations         | 10100    |
|    time_elapsed       | 1237     |
|    total_timesteps    | 50500    |
| train/                |          |
|    entropy_loss       | -0.176   |
|    explained_variance | -1.19    |
|    learning_rate      | 0.001    |
|    n_updates          | 10099    |
|    policy_loss        | -2.06    |
|    value_loss         | 14.5     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 192      |
|    ep_rew_mean        | 1.83     |
| time/                 |          |
|    fps                | 40       |
|    iterations         | 10200    |
|    time_elapsed       | 1247     |
|    total_timesteps    | 51000    |
| train/                |          |
|    entropy_loss       | -0.64    |
|    explained_variance | 0.429    |
|    learning_rate      | 0.001    |
|    n_updates          | 10199    |
|    policy_loss        | -4.16    |
|    value_loss         | 21.6     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 192      |
|    ep_rew_mean        | 1.83     |
| time/                 |          |
|    fps                | 40       |
|    iterations         | 10300    |
|    time_elapsed       | 1258     |
|    total_timesteps    | 51500    |
| train/                |          |
|    entropy_loss       | -0.559   |
|    explained_variance | -0.348   |
|    learning_rate      | 0.001    |
|    n_updates          | 10299    |
|    policy_loss        | -0.956   |
|    value_loss         | 20.1     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 201      |
|    ep_rew_mean        | 1.48     |
| time/                 |          |
|    fps                | 40       |
|    iterations         | 10400    |
|    time_elapsed       | 1268     |
|    total_timesteps    | 52000    |
| train/                |          |
|    entropy_loss       | -0.872   |
|    explained_variance | -0.0114  |
|    learning_rate      | 0.001    |
|    n_updates          | 10399    |
|    policy_loss        | -0.839   |
|    value_loss         | 11.4     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 201      |
|    ep_rew_mean        | 1.48     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 10500    |
|    time_elapsed       | 1278     |
|    total_timesteps    | 52500    |
| train/                |          |
|    entropy_loss       | -0.543   |
|    explained_variance | 0.317    |
|    learning_rate      | 0.001    |
|    n_updates          | 10499    |
|    policy_loss        | -0.0875  |
|    value_loss         | 1.98     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 210      |
|    ep_rew_mean        | -0.168   |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 10600    |
|    time_elapsed       | 1289     |
|    total_timesteps    | 53000    |
| train/                |          |
|    entropy_loss       | -0.383   |
|    explained_variance | -0.0134  |
|    learning_rate      | 0.001    |
|    n_updates          | 10599    |
|    policy_loss        | 2.15     |
|    value_loss         | 52.3     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 212      |
|    ep_rew_mean        | -3.58    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 10700    |
|    time_elapsed       | 1299     |
|    total_timesteps    | 53500    |
| train/                |          |
|    entropy_loss       | -0.908   |
|    explained_variance | 0.372    |
|    learning_rate      | 0.001    |
|    n_updates          | 10699    |
|    policy_loss        | 0.449    |
|    value_loss         | 1.26     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 212      |
|    ep_rew_mean        | -3.58    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 10800    |
|    time_elapsed       | 1309     |
|    total_timesteps    | 54000    |
| train/                |          |
|    entropy_loss       | -0.31    |
|    explained_variance | 0.982    |
|    learning_rate      | 0.001    |
|    n_updates          | 10799    |
|    policy_loss        | 0.464    |
|    value_loss         | 0.739    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 221      |
|    ep_rew_mean        | -1.99    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 10900    |
|    time_elapsed       | 1319     |
|    total_timesteps    | 54500    |
| train/                |          |
|    entropy_loss       | -0.189   |
|    explained_variance | 0.767    |
|    learning_rate      | 0.001    |
|    n_updates          | 10899    |
|    policy_loss        | -0.0245  |
|    value_loss         | 2.38     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 222      |
|    ep_rew_mean        | -1.94    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 11000    |
|    time_elapsed       | 1330     |
|    total_timesteps    | 55000    |
| train/                |          |
|    entropy_loss       | -0.454   |
|    explained_variance | -0.259   |
|    learning_rate      | 0.001    |
|    n_updates          | 10999    |
|    policy_loss        | 0.0162   |
|    value_loss         | 3.16     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 224      |
|    ep_rew_mean        | -3.77    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 11100    |
|    time_elapsed       | 1340     |
|    total_timesteps    | 55500    |
| train/                |          |
|    entropy_loss       | -0.258   |
|    explained_variance | 0.474    |
|    learning_rate      | 0.001    |
|    n_updates          | 11099    |
|    policy_loss        | 0.158    |
|    value_loss         | 48.5     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 224      |
|    ep_rew_mean        | -3.77    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 11200    |
|    time_elapsed       | 1350     |
|    total_timesteps    | 56000    |
| train/                |          |
|    entropy_loss       | -0.154   |
|    explained_variance | -1.29    |
|    learning_rate      | 0.001    |
|    n_updates          | 11199    |
|    policy_loss        | 0.64     |
|    value_loss         | 213      |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 232      |
|    ep_rew_mean        | 0.38     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 11300    |
|    time_elapsed       | 1361     |
|    total_timesteps    | 56500    |
| train/                |          |
|    entropy_loss       | -0.409   |
|    explained_variance | -0.131   |
|    learning_rate      | 0.001    |
|    n_updates          | 11299    |
|    policy_loss        | 3.64     |
|    value_loss         | 24.3     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 230      |
|    ep_rew_mean        | -4.48    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 11400    |
|    time_elapsed       | 1371     |
|    total_timesteps    | 57000    |
| train/                |          |
|    entropy_loss       | -0.00614 |
|    explained_variance | 0.514    |
|    learning_rate      | 0.001    |
|    n_updates          | 11399    |
|    policy_loss        | 0.00629  |
|    value_loss         | 3.57e+03 |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 231      |
|    ep_rew_mean        | -3.51    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 11500    |
|    time_elapsed       | 1381     |
|    total_timesteps    | 57500    |
| train/                |          |
|    entropy_loss       | -0.27    |
|    explained_variance | 0.769    |
|    learning_rate      | 0.001    |
|    n_updates          | 11499    |
|    policy_loss        | -0.608   |
|    value_loss         | 20       |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 233      |
|    ep_rew_mean        | -2.04    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 11600    |
|    time_elapsed       | 1392     |
|    total_timesteps    | 58000    |
| train/                |          |
|    entropy_loss       | -0.292   |
|    explained_variance | 0.998    |
|    learning_rate      | 0.001    |
|    n_updates          | 11599    |
|    policy_loss        | 0.0798   |
|    value_loss         | 0.636    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 233      |
|    ep_rew_mean        | -1.76    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 11700    |
|    time_elapsed       | 1402     |
|    total_timesteps    | 58500    |
| train/                |          |
|    entropy_loss       | -0.742   |
|    explained_variance | 0.556    |
|    learning_rate      | 0.001    |
|    n_updates          | 11699    |
|    policy_loss        | 1.13     |
|    value_loss         | 1.37     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 232      |
|    ep_rew_mean        | 1.24     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 11800    |
|    time_elapsed       | 1412     |
|    total_timesteps    | 59000    |
| train/                |          |
|    entropy_loss       | -0.278   |
|    explained_variance | -0.848   |
|    learning_rate      | 0.001    |
|    n_updates          | 11799    |
|    policy_loss        | 0.244    |
|    value_loss         | 171      |
------------------------------------


------------------------------------
| eval/                 |          |
|    mean_ep_length     | 202      |
|    mean_reward        | 148      |
| time/                 |          |
|    total_timesteps    | 59397    |
| train/                |          |
|    entropy_loss       | -0.00502 |
|    explained_variance | -0.156   |
|    learning_rate      | 0.001    |
|    n_updates          | 11879    |
|    policy_loss        | 0.000741 |
|    value_loss         | 2.45     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 231      |
|    ep_rew_mean        | 4.15     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 11900    |
|    time_elapsed       | 1444     |
|    total_timesteps    | 59500    |
| train/                |          |
|    entropy_loss       | -0.0443  |
|    explained_variance | 0.92     |
|    learning_rate      | 0.001    |
|    n_updates          | 11899    |
|    policy_loss        | -1.28    |
|    value_loss         | 1.29e+04 |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 228      |
|    ep_rew_mean        | 5.53     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 12000    |
|    time_elapsed       | 1454     |
|    total_timesteps    | 60000    |
| train/                |          |
|    entropy_loss       | -0.703   |
|    explained_variance | 0.838    |
|    learning_rate      | 0.001    |
|    n_updates          | 11999    |
|    policy_loss        | -1.8     |
|    value_loss         | 7.07     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 230      |
|    ep_rew_mean        | 7.61     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 12100    |
|    time_elapsed       | 1464     |
|    total_timesteps    | 60500    |
| train/                |          |
|    entropy_loss       | -0.5     |
|    explained_variance | 0.247    |
|    learning_rate      | 0.001    |
|    n_updates          | 12099    |
|    policy_loss        | -7.18    |
|    value_loss         | 199      |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 228      |
|    ep_rew_mean        | 4.91     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 12200    |
|    time_elapsed       | 1475     |
|    total_timesteps    | 61000    |
| train/                |          |
|    entropy_loss       | -0.514   |
|    explained_variance | -1.59    |
|    learning_rate      | 0.001    |
|    n_updates          | 12199    |
|    policy_loss        | -1.81    |
|    value_loss         | 55.4     |
------------------------------------


-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 225       |
|    ep_rew_mean        | 5.41      |
| time/                 |           |
|    fps                | 41        |
|    iterations         | 12300     |
|    time_elapsed       | 1485      |
|    total_timesteps    | 61500     |
| train/                |           |
|    entropy_loss       | -5.52e-05 |
|    explained_variance | -3        |
|    learning_rate      | 0.001     |
|    n_updates          | 12299     |
|    policy_loss        | -6.68e-05 |
|    value_loss         | 146       |
-------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 220      |
|    ep_rew_mean        | 2.61     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 12400    |
|    time_elapsed       | 1495     |
|    total_timesteps    | 62000    |
| train/                |          |
|    entropy_loss       | -0.26    |
|    explained_variance | -0.0692  |
|    learning_rate      | 0.001    |
|    n_updates          | 12399    |
|    policy_loss        | 0.0563   |
|    value_loss         | 0.873    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 219      |
|    ep_rew_mean        | 0.11     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 12500    |
|    time_elapsed       | 1506     |
|    total_timesteps    | 62500    |
| train/                |          |
|    entropy_loss       | -0.634   |
|    explained_variance | -3.34    |
|    learning_rate      | 0.001    |
|    n_updates          | 12499    |
|    policy_loss        | -7.38    |
|    value_loss         | 103      |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 219      |
|    ep_rew_mean        | -0.618   |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 12600    |
|    time_elapsed       | 1516     |
|    total_timesteps    | 63000    |
| train/                |          |
|    entropy_loss       | -0.746   |
|    explained_variance | -1.3     |
|    learning_rate      | 0.001    |
|    n_updates          | 12599    |
|    policy_loss        | 0.393    |
|    value_loss         | 9.14     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 222      |
|    ep_rew_mean        | 1.96     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 12700    |
|    time_elapsed       | 1526     |
|    total_timesteps    | 63500    |
| train/                |          |
|    entropy_loss       | -0.0691  |
|    explained_variance | 0.72     |
|    learning_rate      | 0.001    |
|    n_updates          | 12699    |
|    policy_loss        | 0.0353   |
|    value_loss         | 15.8     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 223      |
|    ep_rew_mean        | 3.63     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 12800    |
|    time_elapsed       | 1537     |
|    total_timesteps    | 64000    |
| train/                |          |
|    entropy_loss       | -0.23    |
|    explained_variance | 0.656    |
|    learning_rate      | 0.001    |
|    n_updates          | 12799    |
|    policy_loss        | -0.103   |
|    value_loss         | 9.93     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 223      |
|    ep_rew_mean        | 2.35     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 12900    |
|    time_elapsed       | 1547     |
|    total_timesteps    | 64500    |
| train/                |          |
|    entropy_loss       | -0.524   |
|    explained_variance | 0.885    |
|    learning_rate      | 0.001    |
|    n_updates          | 12899    |
|    policy_loss        | 0.0581   |
|    value_loss         | 0.492    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 222      |
|    ep_rew_mean        | 1.06     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 13000    |
|    time_elapsed       | 1558     |
|    total_timesteps    | 65000    |
| train/                |          |
|    entropy_loss       | -0.54    |
|    explained_variance | 0.449    |
|    learning_rate      | 0.001    |
|    n_updates          | 12999    |
|    policy_loss        | -1.57    |
|    value_loss         | 49       |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 222      |
|    ep_rew_mean        | 0.209    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 13100    |
|    time_elapsed       | 1568     |
|    total_timesteps    | 65500    |
| train/                |          |
|    entropy_loss       | -0.422   |
|    explained_variance | 0.776    |
|    learning_rate      | 0.001    |
|    n_updates          | 13099    |
|    policy_loss        | 0.193    |
|    value_loss         | 0.949    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 222      |
|    ep_rew_mean        | 0.378    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 13200    |
|    time_elapsed       | 1578     |
|    total_timesteps    | 66000    |
| train/                |          |
|    entropy_loss       | -0.433   |
|    explained_variance | 0.691    |
|    learning_rate      | 0.001    |
|    n_updates          | 13199    |
|    policy_loss        | 0.789    |
|    value_loss         | 31.8     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 224      |
|    ep_rew_mean        | 2.13     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 13300    |
|    time_elapsed       | 1589     |
|    total_timesteps    | 66500    |
| train/                |          |
|    entropy_loss       | -0.523   |
|    explained_variance | -6.21    |
|    learning_rate      | 0.001    |
|    n_updates          | 13299    |
|    policy_loss        | -8.04    |
|    value_loss         | 720      |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 224      |
|    ep_rew_mean        | 3.18     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 13400    |
|    time_elapsed       | 1599     |
|    total_timesteps    | 67000    |
| train/                |          |
|    entropy_loss       | -0.222   |
|    explained_variance | 0.94     |
|    learning_rate      | 0.001    |
|    n_updates          | 13399    |
|    policy_loss        | 0.0105   |
|    value_loss         | 1.55     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 225      |
|    ep_rew_mean        | 5.55     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 13500    |
|    time_elapsed       | 1610     |
|    total_timesteps    | 67500    |
| train/                |          |
|    entropy_loss       | -0.434   |
|    explained_variance | 0.872    |
|    learning_rate      | 0.001    |
|    n_updates          | 13499    |
|    policy_loss        | 0.281    |
|    value_loss         | 1.64     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 227      |
|    ep_rew_mean        | 7        |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 13600    |
|    time_elapsed       | 1620     |
|    total_timesteps    | 68000    |
| train/                |          |
|    entropy_loss       | -0.858   |
|    explained_variance | -12.8    |
|    learning_rate      | 0.001    |
|    n_updates          | 13599    |
|    policy_loss        | -3.88    |
|    value_loss         | 97.3     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 228      |
|    ep_rew_mean        | 8.66     |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 13700    |
|    time_elapsed       | 1630     |
|    total_timesteps    | 68500    |
| train/                |          |
|    entropy_loss       | -0.388   |
|    explained_variance | 0.0257   |
|    learning_rate      | 0.001    |
|    n_updates          | 13699    |
|    policy_loss        | 0.0885   |
|    value_loss         | 1.72     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 228      |
|    ep_rew_mean        | 10       |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 13800    |
|    time_elapsed       | 1641     |
|    total_timesteps    | 69000    |
| train/                |          |
|    entropy_loss       | -0.289   |
|    explained_variance | 0.885    |
|    learning_rate      | 0.001    |
|    n_updates          | 13799    |
|    policy_loss        | 0.321    |
|    value_loss         | 2.05     |
------------------------------------


------------------------------------
| eval/                 |          |
|    mean_ep_length     | 189      |
|    mean_reward        | 13.8     |
| time/                 |          |
|    total_timesteps    | 69397    |
| train/                |          |
|    entropy_loss       | -0.0915  |
|    explained_variance | -10.2    |
|    learning_rate      | 0.001    |
|    n_updates          | 13879    |
|    policy_loss        | -0.219   |
|    value_loss         | 1.37e+03 |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 228      |
|    ep_rew_mean        | 11.5     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 13900    |
|    time_elapsed       | 1671     |
|    total_timesteps    | 69500    |
| train/                |          |
|    entropy_loss       | -0.403   |
|    explained_variance | 0.403    |
|    learning_rate      | 0.001    |
|    n_updates          | 13899    |
|    policy_loss        | -0.166   |
|    value_loss         | 7.48     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 231      |
|    ep_rew_mean        | 12.2     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 14000    |
|    time_elapsed       | 1681     |
|    total_timesteps    | 70000    |
| train/                |          |
|    entropy_loss       | -0.285   |
|    explained_variance | 0.935    |
|    learning_rate      | 0.001    |
|    n_updates          | 13999    |
|    policy_loss        | -0.21    |
|    value_loss         | 10.4     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 230      |
|    ep_rew_mean        | 12.4     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 14100    |
|    time_elapsed       | 1692     |
|    total_timesteps    | 70500    |
| train/                |          |
|    entropy_loss       | -0.692   |
|    explained_variance | -1.7     |
|    learning_rate      | 0.001    |
|    n_updates          | 14099    |
|    policy_loss        | 5.18     |
|    value_loss         | 81.2     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 226      |
|    ep_rew_mean        | 9.37     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 14200    |
|    time_elapsed       | 1702     |
|    total_timesteps    | 71000    |
| train/                |          |
|    entropy_loss       | -0.539   |
|    explained_variance | 0.292    |
|    learning_rate      | 0.001    |
|    n_updates          | 14199    |
|    policy_loss        | -0.952   |
|    value_loss         | 3.98     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 196      |
|    ep_rew_mean        | 5.3      |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 14300    |
|    time_elapsed       | 1712     |
|    total_timesteps    | 71500    |
| train/                |          |
|    entropy_loss       | -0.573   |
|    explained_variance | 0.52     |
|    learning_rate      | 0.001    |
|    n_updates          | 14299    |
|    policy_loss        | -0.124   |
|    value_loss         | 3.08     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 176      |
|    ep_rew_mean        | 8.92     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 14400    |
|    time_elapsed       | 1723     |
|    total_timesteps    | 72000    |
| train/                |          |
|    entropy_loss       | -0.369   |
|    explained_variance | -0.0398  |
|    learning_rate      | 0.001    |
|    n_updates          | 14399    |
|    policy_loss        | 3.2      |
|    value_loss         | 29.4     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 159      |
|    ep_rew_mean        | 4.33     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 14500    |
|    time_elapsed       | 1733     |
|    total_timesteps    | 72500    |
| train/                |          |
|    entropy_loss       | -0.231   |
|    explained_variance | 0.929    |
|    learning_rate      | 0.001    |
|    n_updates          | 14499    |
|    policy_loss        | -4.19    |
|    value_loss         | 1.32e+03 |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 154      |
|    ep_rew_mean        | 7.17     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 14600    |
|    time_elapsed       | 1744     |
|    total_timesteps    | 73000    |
| train/                |          |
|    entropy_loss       | -0.303   |
|    explained_variance | 0.748    |
|    learning_rate      | 0.001    |
|    n_updates          | 14599    |
|    policy_loss        | 1.41     |
|    value_loss         | 25.9     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 150      |
|    ep_rew_mean        | 4.74     |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 14700    |
|    time_elapsed       | 1754     |
|    total_timesteps    | 73500    |
| train/                |          |
|    entropy_loss       | -0.583   |
|    explained_variance | -25.3    |
|    learning_rate      | 0.001    |
|    n_updates          | 14699    |
|    policy_loss        | 0.374    |
|    value_loss         | 4.39     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 144      |
|    ep_rew_mean        | -6.43    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 14800    |
|    time_elapsed       | 1764     |
|    total_timesteps    | 74000    |
| train/                |          |
|    entropy_loss       | -0.346   |
|    explained_variance | 0.879    |
|    learning_rate      | 0.001    |
|    n_updates          | 14799    |
|    policy_loss        | -0.901   |
|    value_loss         | 11.9     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 137      |
|    ep_rew_mean        | -11      |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 14900    |
|    time_elapsed       | 1775     |
|    total_timesteps    | 74500    |
| train/                |          |
|    entropy_loss       | -0.447   |
|    explained_variance | 0.99     |
|    learning_rate      | 0.001    |
|    n_updates          | 14899    |
|    policy_loss        | -0.186   |
|    value_loss         | 1.44     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 137      |
|    ep_rew_mean        | -11.5    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 15000    |
|    time_elapsed       | 1785     |
|    total_timesteps    | 75000    |
| train/                |          |
|    entropy_loss       | -0.474   |
|    explained_variance | 0.95     |
|    learning_rate      | 0.001    |
|    n_updates          | 14999    |
|    policy_loss        | 0.111    |
|    value_loss         | 0.29     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 136      |
|    ep_rew_mean        | -12.3    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 15100    |
|    time_elapsed       | 1796     |
|    total_timesteps    | 75500    |
| train/                |          |
|    entropy_loss       | -0.625   |
|    explained_variance | 0.87     |
|    learning_rate      | 0.001    |
|    n_updates          | 15099    |
|    policy_loss        | 0.00438  |
|    value_loss         | 1.04     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 133      |
|    ep_rew_mean        | -12.4    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 15200    |
|    time_elapsed       | 1806     |
|    total_timesteps    | 76000    |
| train/                |          |
|    entropy_loss       | -0.571   |
|    explained_variance | 0.678    |
|    learning_rate      | 0.001    |
|    n_updates          | 15199    |
|    policy_loss        | 1.44     |
|    value_loss         | 10.7     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 129      |
|    ep_rew_mean        | -16.5    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 15300    |
|    time_elapsed       | 1817     |
|    total_timesteps    | 76500    |
| train/                |          |
|    entropy_loss       | -0.746   |
|    explained_variance | 0.79     |
|    learning_rate      | 0.001    |
|    n_updates          | 15299    |
|    policy_loss        | -0.685   |
|    value_loss         | 2.85     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 127      |
|    ep_rew_mean        | -21.3    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 15400    |
|    time_elapsed       | 1827     |
|    total_timesteps    | 77000    |
| train/                |          |
|    entropy_loss       | -0.498   |
|    explained_variance | -3.6     |
|    learning_rate      | 0.001    |
|    n_updates          | 15399    |
|    policy_loss        | 1.41     |
|    value_loss         | 38.1     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 127      |
|    ep_rew_mean        | -18.5    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 15500    |
|    time_elapsed       | 1837     |
|    total_timesteps    | 77500    |
| train/                |          |
|    entropy_loss       | -0.0522  |
|    explained_variance | -29.3    |
|    learning_rate      | 0.001    |
|    n_updates          | 15499    |
|    policy_loss        | 0.00068  |
|    value_loss         | 49       |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 126      |
|    ep_rew_mean        | -18.8    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 15600    |
|    time_elapsed       | 1848     |
|    total_timesteps    | 78000    |
| train/                |          |
|    entropy_loss       | -0.529   |
|    explained_variance | 0.766    |
|    learning_rate      | 0.001    |
|    n_updates          | 15599    |
|    policy_loss        | 0.874    |
|    value_loss         | 2.85     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 126      |
|    ep_rew_mean        | -16      |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 15700    |
|    time_elapsed       | 1858     |
|    total_timesteps    | 78500    |
| train/                |          |
|    entropy_loss       | -0.354   |
|    explained_variance | -1.82    |
|    learning_rate      | 0.001    |
|    n_updates          | 15699    |
|    policy_loss        | -0.514   |
|    value_loss         | 58.2     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 123      |
|    ep_rew_mean        | -20.1    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 15800    |
|    time_elapsed       | 1869     |
|    total_timesteps    | 79000    |
| train/                |          |
|    entropy_loss       | -0.283   |
|    explained_variance | -0.384   |
|    learning_rate      | 0.001    |
|    n_updates          | 15799    |
|    policy_loss        | 1.24     |
|    value_loss         | 3.98     |
------------------------------------


------------------------------------
| eval/                 |          |
|    mean_ep_length     | 137      |
|    mean_reward        | -0.0941  |
| time/                 |          |
|    total_timesteps    | 79397    |
| train/                |          |
|    entropy_loss       | -0.0215  |
|    explained_variance | 0.144    |
|    learning_rate      | 0.001    |
|    n_updates          | 15879    |
|    policy_loss        | 0.0194   |
|    value_loss         | 40.3     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 121      |
|    ep_rew_mean        | -21.3    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 15900    |
|    time_elapsed       | 1893     |
|    total_timesteps    | 79500    |
| train/                |          |
|    entropy_loss       | -0.27    |
|    explained_variance | 0.0854   |
|    learning_rate      | 0.001    |
|    n_updates          | 15899    |
|    policy_loss        | -0.639   |
|    value_loss         | 88.1     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 118      |
|    ep_rew_mean        | -23.8    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 16000    |
|    time_elapsed       | 1904     |
|    total_timesteps    | 80000    |
| train/                |          |
|    entropy_loss       | -0.876   |
|    explained_variance | 0.995    |
|    learning_rate      | 0.001    |
|    n_updates          | 15999    |
|    policy_loss        | 0.251    |
|    value_loss         | 0.237    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 118      |
|    ep_rew_mean        | -25.5    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 16100    |
|    time_elapsed       | 1914     |
|    total_timesteps    | 80500    |
| train/                |          |
|    entropy_loss       | -0.00324 |
|    explained_variance | -0.00362 |
|    learning_rate      | 0.001    |
|    n_updates          | 16099    |
|    policy_loss        | -0.00206 |
|    value_loss         | 45.9     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 116      |
|    ep_rew_mean        | -35      |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 16200    |
|    time_elapsed       | 1925     |
|    total_timesteps    | 81000    |
| train/                |          |
|    entropy_loss       | -0.516   |
|    explained_variance | 0.816    |
|    learning_rate      | 0.001    |
|    n_updates          | 16199    |
|    policy_loss        | 0.822    |
|    value_loss         | 2.03     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 111      |
|    ep_rew_mean        | -40.3    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 16300    |
|    time_elapsed       | 1935     |
|    total_timesteps    | 81500    |
| train/                |          |
|    entropy_loss       | -0.0882  |
|    explained_variance | -21.6    |
|    learning_rate      | 0.001    |
|    n_updates          | 16299    |
|    policy_loss        | -1.53    |
|    value_loss         | 9.5e+03  |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 110      |
|    ep_rew_mean        | -40.1    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 16400    |
|    time_elapsed       | 1946     |
|    total_timesteps    | 82000    |
| train/                |          |
|    entropy_loss       | -0.345   |
|    explained_variance | -0.787   |
|    learning_rate      | 0.001    |
|    n_updates          | 16399    |
|    policy_loss        | 0.08     |
|    value_loss         | 0.175    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 110      |
|    ep_rew_mean        | -39.9    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 16500    |
|    time_elapsed       | 1956     |
|    total_timesteps    | 82500    |
| train/                |          |
|    entropy_loss       | -0.87    |
|    explained_variance | 0.931    |
|    learning_rate      | 0.001    |
|    n_updates          | 16499    |
|    policy_loss        | 0.264    |
|    value_loss         | 0.857    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 109      |
|    ep_rew_mean        | -38.6    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 16600    |
|    time_elapsed       | 1967     |
|    total_timesteps    | 83000    |
| train/                |          |
|    entropy_loss       | -0.806   |
|    explained_variance | -0.337   |
|    learning_rate      | 0.001    |
|    n_updates          | 16599    |
|    policy_loss        | -1.22    |
|    value_loss         | 3.61     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 109      |
|    ep_rew_mean        | -36.9    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 16700    |
|    time_elapsed       | 1977     |
|    total_timesteps    | 83500    |
| train/                |          |
|    entropy_loss       | -0.56    |
|    explained_variance | 0.548    |
|    learning_rate      | 0.001    |
|    n_updates          | 16699    |
|    policy_loss        | 5.71     |
|    value_loss         | 63.1     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 108      |
|    ep_rew_mean        | -39      |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 16800    |
|    time_elapsed       | 1988     |
|    total_timesteps    | 84000    |
| train/                |          |
|    entropy_loss       | -0.506   |
|    explained_variance | 0.337    |
|    learning_rate      | 0.001    |
|    n_updates          | 16799    |
|    policy_loss        | -32.6    |
|    value_loss         | 2.42e+03 |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 109      |
|    ep_rew_mean        | -44.6    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 16900    |
|    time_elapsed       | 1998     |
|    total_timesteps    | 84500    |
| train/                |          |
|    entropy_loss       | -0.799   |
|    explained_variance | -2.34    |
|    learning_rate      | 0.001    |
|    n_updates          | 16899    |
|    policy_loss        | 0.541    |
|    value_loss         | 9.18     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 108      |
|    ep_rew_mean        | -42.8    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 17000    |
|    time_elapsed       | 2009     |
|    total_timesteps    | 85000    |
| train/                |          |
|    entropy_loss       | -0.411   |
|    explained_variance | 0.704    |
|    learning_rate      | 0.001    |
|    n_updates          | 16999    |
|    policy_loss        | -1.44    |
|    value_loss         | 11.4     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 106      |
|    ep_rew_mean        | -50.3    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 17100    |
|    time_elapsed       | 2019     |
|    total_timesteps    | 85500    |
| train/                |          |
|    entropy_loss       | -0.823   |
|    explained_variance | -0.373   |
|    learning_rate      | 0.001    |
|    n_updates          | 17099    |
|    policy_loss        | -43.1    |
|    value_loss         | 3.32e+03 |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 105      |
|    ep_rew_mean        | -54.8    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 17200    |
|    time_elapsed       | 2030     |
|    total_timesteps    | 86000    |
| train/                |          |
|    entropy_loss       | -0.622   |
|    explained_variance | 0.966    |
|    learning_rate      | 0.001    |
|    n_updates          | 17199    |
|    policy_loss        | -0.77    |
|    value_loss         | 3.14     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 103      |
|    ep_rew_mean        | -58.3    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 17300    |
|    time_elapsed       | 2040     |
|    total_timesteps    | 86500    |
| train/                |          |
|    entropy_loss       | -0.839   |
|    explained_variance | 0.239    |
|    learning_rate      | 0.001    |
|    n_updates          | 17299    |
|    policy_loss        | 0.529    |
|    value_loss         | 0.447    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 102      |
|    ep_rew_mean        | -59.5    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 17400    |
|    time_elapsed       | 2051     |
|    total_timesteps    | 87000    |
| train/                |          |
|    entropy_loss       | -0.0101  |
|    explained_variance | 0.0267   |
|    learning_rate      | 0.001    |
|    n_updates          | 17399    |
|    policy_loss        | -0.00966 |
|    value_loss         | 166      |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 101      |
|    ep_rew_mean        | -61.4    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 17500    |
|    time_elapsed       | 2061     |
|    total_timesteps    | 87500    |
| train/                |          |
|    entropy_loss       | -0.839   |
|    explained_variance | 0.615    |
|    learning_rate      | 0.001    |
|    n_updates          | 17499    |
|    policy_loss        | 2.61     |
|    value_loss         | 33.5     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 99.7     |
|    ep_rew_mean        | -64.9    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 17600    |
|    time_elapsed       | 2072     |
|    total_timesteps    | 88000    |
| train/                |          |
|    entropy_loss       | -0.49    |
|    explained_variance | -3.98    |
|    learning_rate      | 0.001    |
|    n_updates          | 17599    |
|    policy_loss        | -5.87    |
|    value_loss         | 296      |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 100      |
|    ep_rew_mean        | -67.1    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 17700    |
|    time_elapsed       | 2082     |
|    total_timesteps    | 88500    |
| train/                |          |
|    entropy_loss       | -0.794   |
|    explained_variance | 0.28     |
|    learning_rate      | 0.001    |
|    n_updates          | 17699    |
|    policy_loss        | 2.94     |
|    value_loss         | 41.3     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 101      |
|    ep_rew_mean        | -69.9    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 17800    |
|    time_elapsed       | 2092     |
|    total_timesteps    | 89000    |
| train/                |          |
|    entropy_loss       | -0.587   |
|    explained_variance | 0.537    |
|    learning_rate      | 0.001    |
|    n_updates          | 17799    |
|    policy_loss        | -0.775   |
|    value_loss         | 14.1     |
------------------------------------


------------------------------------
| eval/                 |          |
|    mean_ep_length     | 149      |
|    mean_reward        | -74.8    |
| time/                 |          |
|    total_timesteps    | 89397    |
| train/                |          |
|    entropy_loss       | -0.899   |
|    explained_variance | -0.0967  |
|    learning_rate      | 0.001    |
|    n_updates          | 17879    |
|    policy_loss        | -1.07    |
|    value_loss         | 3.63     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 102      |
|    ep_rew_mean        | -68.4    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 17900    |
|    time_elapsed       | 2118     |
|    total_timesteps    | 89500    |
| train/                |          |
|    entropy_loss       | -0.856   |
|    explained_variance | 0.647    |
|    learning_rate      | 0.001    |
|    n_updates          | 17899    |
|    policy_loss        | -0.88    |
|    value_loss         | 3.9      |
------------------------------------


-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 104       |
|    ep_rew_mean        | -68.6     |
| time/                 |           |
|    fps                | 42        |
|    iterations         | 18000     |
|    time_elapsed       | 2129      |
|    total_timesteps    | 90000     |
| train/                |           |
|    entropy_loss       | -0.00132  |
|    explained_variance | -0.502    |
|    learning_rate      | 0.001     |
|    n_updates          | 17999     |
|    policy_loss        | -0.000524 |
|    value_loss         | 64.8      |
-------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 105      |
|    ep_rew_mean        | -69.8    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 18100    |
|    time_elapsed       | 2139     |
|    total_timesteps    | 90500    |
| train/                |          |
|    entropy_loss       | -0.533   |
|    explained_variance | 0.987    |
|    learning_rate      | 0.001    |
|    n_updates          | 18099    |
|    policy_loss        | -0.24    |
|    value_loss         | 0.284    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 106      |
|    ep_rew_mean        | -71.4    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 18200    |
|    time_elapsed       | 2150     |
|    total_timesteps    | 91000    |
| train/                |          |
|    entropy_loss       | -0.817   |
|    explained_variance | 0.62     |
|    learning_rate      | 0.001    |
|    n_updates          | 18199    |
|    policy_loss        | 1.21     |
|    value_loss         | 9.32     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 108      |
|    ep_rew_mean        | -69.6    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 18300    |
|    time_elapsed       | 2160     |
|    total_timesteps    | 91500    |
| train/                |          |
|    entropy_loss       | -0.441   |
|    explained_variance | -1.68    |
|    learning_rate      | 0.001    |
|    n_updates          | 18299    |
|    policy_loss        | -7.87    |
|    value_loss         | 45.7     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 111      |
|    ep_rew_mean        | -65.8    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 18400    |
|    time_elapsed       | 2170     |
|    total_timesteps    | 92000    |
| train/                |          |
|    entropy_loss       | -0.798   |
|    explained_variance | 0.741    |
|    learning_rate      | 0.001    |
|    n_updates          | 18399    |
|    policy_loss        | -0.208   |
|    value_loss         | 0.149    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 113      |
|    ep_rew_mean        | -61.2    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 18500    |
|    time_elapsed       | 2181     |
|    total_timesteps    | 92500    |
| train/                |          |
|    entropy_loss       | -0.851   |
|    explained_variance | -0.929   |
|    learning_rate      | 0.001    |
|    n_updates          | 18499    |
|    policy_loss        | 3.99     |
|    value_loss         | 15.5     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 117      |
|    ep_rew_mean        | -62.3    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 18600    |
|    time_elapsed       | 2191     |
|    total_timesteps    | 93000    |
| train/                |          |
|    entropy_loss       | -0.369   |
|    explained_variance | 0.924    |
|    learning_rate      | 0.001    |
|    n_updates          | 18599    |
|    policy_loss        | -0.00261 |
|    value_loss         | 0.478    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 118      |
|    ep_rew_mean        | -62.7    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 18700    |
|    time_elapsed       | 2201     |
|    total_timesteps    | 93500    |
| train/                |          |
|    entropy_loss       | -0.565   |
|    explained_variance | 0.972    |
|    learning_rate      | 0.001    |
|    n_updates          | 18699    |
|    policy_loss        | 0.551    |
|    value_loss         | 0.57     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 122      |
|    ep_rew_mean        | -63.5    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 18800    |
|    time_elapsed       | 2212     |
|    total_timesteps    | 94000    |
| train/                |          |
|    entropy_loss       | -0.825   |
|    explained_variance | -0.896   |
|    learning_rate      | 0.001    |
|    n_updates          | 18799    |
|    policy_loss        | 0.502    |
|    value_loss         | 3.87     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 122      |
|    ep_rew_mean        | -63.5    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 18900    |
|    time_elapsed       | 2222     |
|    total_timesteps    | 94500    |
| train/                |          |
|    entropy_loss       | -1.08    |
|    explained_variance | 0.062    |
|    learning_rate      | 0.001    |
|    n_updates          | 18899    |
|    policy_loss        | 0.71     |
|    value_loss         | 2.16     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 131      |
|    ep_rew_mean        | -66.1    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 19000    |
|    time_elapsed       | 2232     |
|    total_timesteps    | 95000    |
| train/                |          |
|    entropy_loss       | -0.83    |
|    explained_variance | 0.992    |
|    learning_rate      | 0.001    |
|    n_updates          | 18999    |
|    policy_loss        | 0.0483   |
|    value_loss         | 0.02     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 136      |
|    ep_rew_mean        | -63.5    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 19100    |
|    time_elapsed       | 2243     |
|    total_timesteps    | 95500    |
| train/                |          |
|    entropy_loss       | -0.608   |
|    explained_variance | -3.22    |
|    learning_rate      | 0.001    |
|    n_updates          | 19099    |
|    policy_loss        | 3.5      |
|    value_loss         | 15.7     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 136      |
|    ep_rew_mean        | -63.5    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 19200    |
|    time_elapsed       | 2253     |
|    total_timesteps    | 96000    |
| train/                |          |
|    entropy_loss       | -0.524   |
|    explained_variance | -1.2     |
|    learning_rate      | 0.001    |
|    n_updates          | 19199    |
|    policy_loss        | 2.25     |
|    value_loss         | 13.3     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 145      |
|    ep_rew_mean        | -62.6    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 19300    |
|    time_elapsed       | 2263     |
|    total_timesteps    | 96500    |
| train/                |          |
|    entropy_loss       | -0.548   |
|    explained_variance | -0.0271  |
|    learning_rate      | 0.001    |
|    n_updates          | 19299    |
|    policy_loss        | -0.047   |
|    value_loss         | 2.29     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 148      |
|    ep_rew_mean        | -65.4    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 19400    |
|    time_elapsed       | 2273     |
|    total_timesteps    | 97000    |
| train/                |          |
|    entropy_loss       | -0.871   |
|    explained_variance | -1.75    |
|    learning_rate      | 0.001    |
|    n_updates          | 19399    |
|    policy_loss        | 0.0307   |
|    value_loss         | 7.74     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 151      |
|    ep_rew_mean        | -66.2    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 19500    |
|    time_elapsed       | 2284     |
|    total_timesteps    | 97500    |
| train/                |          |
|    entropy_loss       | -0.509   |
|    explained_variance | -1.51    |
|    learning_rate      | 0.001    |
|    n_updates          | 19499    |
|    policy_loss        | -1.51    |
|    value_loss         | 10.1     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 154      |
|    ep_rew_mean        | -67.9    |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 19600    |
|    time_elapsed       | 2294     |
|    total_timesteps    | 98000    |
| train/                |          |
|    entropy_loss       | -0.773   |
|    explained_variance | 0.842    |
|    learning_rate      | 0.001    |
|    n_updates          | 19599    |
|    policy_loss        | 1.25     |
|    value_loss         | 4.65     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 160      |
|    ep_rew_mean        | -66      |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 19700    |
|    time_elapsed       | 2304     |
|    total_timesteps    | 98500    |
| train/                |          |
|    entropy_loss       | -0.576   |
|    explained_variance | -3.12    |
|    learning_rate      | 0.001    |
|    n_updates          | 19699    |
|    policy_loss        | -1.94    |
|    value_loss         | 12.4     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 162      |
|    ep_rew_mean        | -67      |
| time/                 |          |
|    fps                | 42       |
|    iterations         | 19800    |
|    time_elapsed       | 2315     |
|    total_timesteps    | 99000    |
| train/                |          |
|    entropy_loss       | -0.543   |
|    explained_variance | 0.14     |
|    learning_rate      | 0.001    |
|    n_updates          | 19799    |
|    policy_loss        | 0.329    |
|    value_loss         | 5.96     |
------------------------------------


------------------------------------
| eval/                 |          |
|    mean_ep_length     | 840      |
|    mean_reward        | 71.8     |
| time/                 |          |
|    total_timesteps    | 99397    |
| train/                |          |
|    entropy_loss       | -0.634   |
|    explained_variance | 0.574    |
|    learning_rate      | 0.001    |
|    n_updates          | 19879    |
|    policy_loss        | 1.05     |
|    value_loss         | 15.9     |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 165      |
|    ep_rew_mean        | -67.5    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 19900    |
|    time_elapsed       | 2411     |
|    total_timesteps    | 99500    |
| train/                |          |
|    entropy_loss       | -0.733   |
|    explained_variance | 0.804    |
|    learning_rate      | 0.001    |
|    n_updates          | 19899    |
|    policy_loss        | 0.249    |
|    value_loss         | 0.232    |
------------------------------------


------------------------------------
| rollout/              |          |
|    ep_len_mean        | 165      |
|    ep_rew_mean        | -67.5    |
| time/                 |          |
|    fps                | 41       |
|    iterations         | 20000    |
|    time_elapsed       | 2422     |
|    total_timesteps    | 100000   |
| train/                |          |
|    entropy_loss       | -0.596   |
|    explained_variance | -0.627   |
|    learning_rate      | 0.001    |
|    n_updates          | 19999    |
|    policy_loss        | -0.958   |
|    value_loss         | 1.64     |
------------------------------------


<stable_baselines3.a2c.a2c.A2C at 0x24014669250>

In [12]:
model.save("ActorCritic_lunar_lander")

In [7]:
# load the model and test the model on the environment. make sure to render the environment
# to see the agent in action.
model = A2C.load("logs/best_model.zip")
obs = vec_env.reset()
while True:
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, dones, info = vec_env.step(action)
    vec_env.render()
    if dones[0]:
        break

In [None]:
# now just load the environment with no model and see the environment in action
env = gym.make("LunarLander-v3", render_mode="human")
obs = env.reset()
while True:
    action = env.action_space.sample()
    obs, rewards, dones, info = env.step(action)
    env.render()
    if dones:
        break