In [1]:
def load_valid_words(file_path='wordle_words.txt'):
    """
    Load valid five-letter words from a specified text file.

    Parameters:
    - file_path (str): The path to the text file containing valid words.

    Returns:
    - list[str]: A list of valid words loaded from the file.
    """
    with open(file_path, 'r') as file:
        valid_words = [line.strip() for line in file if len(line.strip()) == 5]
    return valid_words

In [2]:
from stable_baselines3 import PPO  # Or any other suitable RL algorithm
from stable_baselines3.common.env_checker import check_env
from letter_guess import LetterGuessingEnv
from tqdm import tqdm

In [3]:
env = LetterGuessingEnv(valid_words=load_valid_words())  # Make sure to load your valid words
check_env(env)  # Optional: Verify the environment is compatible with SB3

In [4]:
import wandb
from wandb.integration.sb3 import WandbCallback

In [5]:
config = {
    "policy_type": "MlpPolicy",
    "total_timesteps": 200_000
}
run = wandb.init(
    project="wordle",
    config=config,
    sync_tensorboard=True
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: Currently logged in as: [33mltcptgeneral[0m ([33mfulltime[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [6]:
model = PPO(config["policy_type"], env=env, verbose=0, tensorboard_log=f"runs/{run.id}")

# Train for a certain number of timesteps
model.learn(
    total_timesteps=config["total_timesteps"],
    callback=WandbCallback(
        model_save_path=f"models/{run.id}",
        verbose=2,
    ),
	progress_bar=True
)

run.finish()

# Save the model
model.save("wordle_ppo_model")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to runs/ot2i0b8h/PPO_1


Output()

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2.48     |
|    ep_rew_mean     | -3.7     |
| time/              |          |
|    fps             | 465      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 2048     |
---------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.49       |
|    ep_rew_mean          | -3.65      |
| time/                   |            |
|    fps                  | 395        |
|    iterations           | 2          |
|    time_elapsed         | 10         |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.04501068 |
|    clip_fraction        | 0.427      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.23      |
|    explained_variance   | 0.189      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.205      |
|    n_updates            | 10         |
|    policy_gradient_loss | -0.0667    |
|    value_loss           | 0.997      |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.84       |
|    ep_rew_mean          | -3.4       |
| time/                   |            |
|    fps                  | 381        |
|    iterations           | 3          |
|    time_elapsed         | 16         |
|    total_timesteps      | 6144       |
| train/                  |            |
|    approx_kl            | 0.01765968 |
|    clip_fraction        | 0.319      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.17      |
|    explained_variance   | 0.481      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.123      |
|    n_updates            | 20         |
|    policy_gradient_loss | -0.0525    |
|    value_loss           | 0.383      |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.98        |
|    ep_rew_mean          | -3.28       |
| time/                   |             |
|    fps                  | 374         |
|    iterations           | 4           |
|    time_elapsed         | 21          |
|    total_timesteps      | 8192        |
| train/                  |             |
|    approx_kl            | 0.018652592 |
|    clip_fraction        | 0.368       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.11       |
|    explained_variance   | 0.428       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.181       |
|    n_updates            | 30          |
|    policy_gradient_loss | -0.0572     |
|    value_loss           | 0.51        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.1         |
|    ep_rew_mean          | -3.24       |
| time/                   |             |
|    fps                  | 369         |
|    iterations           | 5           |
|    time_elapsed         | 27          |
|    total_timesteps      | 10240       |
| train/                  |             |
|    approx_kl            | 0.023806999 |
|    clip_fraction        | 0.365       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.04       |
|    explained_variance   | 0.46        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.118       |
|    n_updates            | 40          |
|    policy_gradient_loss | -0.0609     |
|    value_loss           | 0.499       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.15        |
|    ep_rew_mean          | -3.09       |
| time/                   |             |
|    fps                  | 366         |
|    iterations           | 6           |
|    time_elapsed         | 33          |
|    total_timesteps      | 12288       |
| train/                  |             |
|    approx_kl            | 0.024716537 |
|    clip_fraction        | 0.372       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.94       |
|    explained_variance   | 0.495       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.266       |
|    n_updates            | 50          |
|    policy_gradient_loss | -0.0578     |
|    value_loss           | 0.503       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.46        |
|    ep_rew_mean          | -2.8        |
| time/                   |             |
|    fps                  | 365         |
|    iterations           | 7           |
|    time_elapsed         | 39          |
|    total_timesteps      | 14336       |
| train/                  |             |
|    approx_kl            | 0.023435738 |
|    clip_fraction        | 0.357       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.82       |
|    explained_variance   | 0.556       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.105       |
|    n_updates            | 60          |
|    policy_gradient_loss | -0.0537     |
|    value_loss           | 0.491       |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3.54       |
|    ep_rew_mean          | -2.74      |
| time/                   |            |
|    fps                  | 363        |
|    iterations           | 8          |
|    time_elapsed         | 45         |
|    total_timesteps      | 16384      |
| train/                  |            |
|    approx_kl            | 0.02574392 |
|    clip_fraction        | 0.29       |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.71      |
|    explained_variance   | 0.608      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.125      |
|    n_updates            | 70         |
|    policy_gradient_loss | -0.0445    |
|    value_loss           | 0.464      |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.71        |
|    ep_rew_mean          | -2.63       |
| time/                   |             |
|    fps                  | 362         |
|    iterations           | 9           |
|    time_elapsed         | 50          |
|    total_timesteps      | 18432       |
| train/                  |             |
|    approx_kl            | 0.021754535 |
|    clip_fraction        | 0.251       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.56       |
|    explained_variance   | 0.673       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.152       |
|    n_updates            | 80          |
|    policy_gradient_loss | -0.0385     |
|    value_loss           | 0.4         |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.8         |
|    ep_rew_mean          | -2.5        |
| time/                   |             |
|    fps                  | 362         |
|    iterations           | 10          |
|    time_elapsed         | 56          |
|    total_timesteps      | 20480       |
| train/                  |             |
|    approx_kl            | 0.018548178 |
|    clip_fraction        | 0.239       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.46       |
|    explained_variance   | 0.702       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.218       |
|    n_updates            | 90          |
|    policy_gradient_loss | -0.0361     |
|    value_loss           | 0.396       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.8         |
|    ep_rew_mean          | -2.34       |
| time/                   |             |
|    fps                  | 362         |
|    iterations           | 11          |
|    time_elapsed         | 62          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.016667131 |
|    clip_fraction        | 0.24        |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.36       |
|    explained_variance   | 0.698       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.226       |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.037      |
|    value_loss           | 0.411       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.95        |
|    ep_rew_mean          | -2.31       |
| time/                   |             |
|    fps                  | 361         |
|    iterations           | 12          |
|    time_elapsed         | 67          |
|    total_timesteps      | 24576       |
| train/                  |             |
|    approx_kl            | 0.020023255 |
|    clip_fraction        | 0.257       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.24       |
|    explained_variance   | 0.712       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0958      |
|    n_updates            | 110         |
|    policy_gradient_loss | -0.0381     |
|    value_loss           | 0.406       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.19        |
|    ep_rew_mean          | -2.03       |
| time/                   |             |
|    fps                  | 360         |
|    iterations           | 13          |
|    time_elapsed         | 73          |
|    total_timesteps      | 26624       |
| train/                  |             |
|    approx_kl            | 0.019943349 |
|    clip_fraction        | 0.266       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.13       |
|    explained_variance   | 0.712       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0974      |
|    n_updates            | 120         |
|    policy_gradient_loss | -0.0387     |
|    value_loss           | 0.444       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.24        |
|    ep_rew_mean          | -1.96       |
| time/                   |             |
|    fps                  | 360         |
|    iterations           | 14          |
|    time_elapsed         | 79          |
|    total_timesteps      | 28672       |
| train/                  |             |
|    approx_kl            | 0.022638176 |
|    clip_fraction        | 0.298       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.98       |
|    explained_variance   | 0.697       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.145       |
|    n_updates            | 130         |
|    policy_gradient_loss | -0.0433     |
|    value_loss           | 0.486       |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 4.45       |
|    ep_rew_mean          | -1.89      |
| time/                   |            |
|    fps                  | 361        |
|    iterations           | 15         |
|    time_elapsed         | 84         |
|    total_timesteps      | 30720      |
| train/                  |            |
|    approx_kl            | 0.02262218 |
|    clip_fraction        | 0.354      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.77      |
|    explained_variance   | 0.727      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0807     |
|    n_updates            | 140        |
|    policy_gradient_loss | -0.0439    |
|    value_loss           | 0.454      |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.91        |
|    ep_rew_mean          | -1.65       |
| time/                   |             |
|    fps                  | 362         |
|    iterations           | 16          |
|    time_elapsed         | 90          |
|    total_timesteps      | 32768       |
| train/                  |             |
|    approx_kl            | 0.023807548 |
|    clip_fraction        | 0.251       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.62       |
|    explained_variance   | 0.654       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.171       |
|    n_updates            | 150         |
|    policy_gradient_loss | -0.0355     |
|    value_loss           | 0.686       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.86        |
|    ep_rew_mean          | -1.42       |
| time/                   |             |
|    fps                  | 361         |
|    iterations           | 17          |
|    time_elapsed         | 96          |
|    total_timesteps      | 34816       |
| train/                  |             |
|    approx_kl            | 0.018085614 |
|    clip_fraction        | 0.198       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.49       |
|    explained_variance   | 0.699       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.339       |
|    n_updates            | 160         |
|    policy_gradient_loss | -0.0297     |
|    value_loss           | 0.674       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5.4         |
|    ep_rew_mean          | -1.88       |
| time/                   |             |
|    fps                  | 361         |
|    iterations           | 18          |
|    time_elapsed         | 102         |
|    total_timesteps      | 36864       |
| train/                  |             |
|    approx_kl            | 0.015559142 |
|    clip_fraction        | 0.222       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.35       |
|    explained_variance   | 0.719       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.354       |
|    n_updates            | 170         |
|    policy_gradient_loss | -0.0349     |
|    value_loss           | 0.629       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5.54        |
|    ep_rew_mean          | -1.22       |
| time/                   |             |
|    fps                  | 360         |
|    iterations           | 19          |
|    time_elapsed         | 108         |
|    total_timesteps      | 38912       |
| train/                  |             |
|    approx_kl            | 0.014995611 |
|    clip_fraction        | 0.17        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.32       |
|    explained_variance   | 0.436       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.33        |
|    n_updates            | 180         |
|    policy_gradient_loss | -0.031      |
|    value_loss           | 2.79        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5.28        |
|    ep_rew_mean          | -1.2        |
| time/                   |             |
|    fps                  | 360         |
|    iterations           | 20          |
|    time_elapsed         | 113         |
|    total_timesteps      | 40960       |
| train/                  |             |
|    approx_kl            | 0.018023107 |
|    clip_fraction        | 0.169       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.23       |
|    explained_variance   | 0.559       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.743       |
|    n_updates            | 190         |
|    policy_gradient_loss | -0.0327     |
|    value_loss           | 1.24        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 6.22        |
|    ep_rew_mean          | -0.38       |
| time/                   |             |
|    fps                  | 361         |
|    iterations           | 21          |
|    time_elapsed         | 119         |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.023376558 |
|    clip_fraction        | 0.29        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.07       |
|    explained_variance   | 0.628       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.502       |
|    n_updates            | 200         |
|    policy_gradient_loss | -0.0451     |
|    value_loss           | 1.11        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.06        |
|    ep_rew_mean          | 0.26        |
| time/                   |             |
|    fps                  | 362         |
|    iterations           | 22          |
|    time_elapsed         | 124         |
|    total_timesteps      | 45056       |
| train/                  |             |
|    approx_kl            | 0.023838695 |
|    clip_fraction        | 0.269       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.01       |
|    explained_variance   | 0.566       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.925       |
|    n_updates            | 210         |
|    policy_gradient_loss | -0.0463     |
|    value_loss           | 1.71        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.93        |
|    ep_rew_mean          | 1.19        |
| time/                   |             |
|    fps                  | 362         |
|    iterations           | 23          |
|    time_elapsed         | 129         |
|    total_timesteps      | 47104       |
| train/                  |             |
|    approx_kl            | 0.021363221 |
|    clip_fraction        | 0.229       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.906      |
|    explained_variance   | 0.594       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.819       |
|    n_updates            | 220         |
|    policy_gradient_loss | -0.0426     |
|    value_loss           | 1.94        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.52        |
|    ep_rew_mean          | 1.98        |
| time/                   |             |
|    fps                  | 363         |
|    iterations           | 24          |
|    time_elapsed         | 135         |
|    total_timesteps      | 49152       |
| train/                  |             |
|    approx_kl            | 0.022241611 |
|    clip_fraction        | 0.167       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.882      |
|    explained_variance   | 0.667       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.838       |
|    n_updates            | 230         |
|    policy_gradient_loss | -0.0343     |
|    value_loss           | 1.73        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.9         |
|    ep_rew_mean          | 1.8         |
| time/                   |             |
|    fps                  | 364         |
|    iterations           | 25          |
|    time_elapsed         | 140         |
|    total_timesteps      | 51200       |
| train/                  |             |
|    approx_kl            | 0.011297604 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.859      |
|    explained_variance   | 0.763       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.5         |
|    n_updates            | 240         |
|    policy_gradient_loss | -0.024      |
|    value_loss           | 1.35        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.01        |
|    ep_rew_mean          | 2.23        |
| time/                   |             |
|    fps                  | 364         |
|    iterations           | 26          |
|    time_elapsed         | 145         |
|    total_timesteps      | 53248       |
| train/                  |             |
|    approx_kl            | 0.010706454 |
|    clip_fraction        | 0.0958      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.84       |
|    explained_variance   | 0.486       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.872       |
|    n_updates            | 250         |
|    policy_gradient_loss | -0.0237     |
|    value_loss           | 2.77        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.75        |
|    ep_rew_mean          | 1.81        |
| time/                   |             |
|    fps                  | 365         |
|    iterations           | 27          |
|    time_elapsed         | 151         |
|    total_timesteps      | 55296       |
| train/                  |             |
|    approx_kl            | 0.011905432 |
|    clip_fraction        | 0.12        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.785      |
|    explained_variance   | 0.838       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.121       |
|    n_updates            | 260         |
|    policy_gradient_loss | -0.0206     |
|    value_loss           | 0.851       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.07        |
|    ep_rew_mean          | 2.13        |
| time/                   |             |
|    fps                  | 365         |
|    iterations           | 28          |
|    time_elapsed         | 156         |
|    total_timesteps      | 57344       |
| train/                  |             |
|    approx_kl            | 0.009603689 |
|    clip_fraction        | 0.0931      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.808      |
|    explained_variance   | 0.684       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.391       |
|    n_updates            | 270         |
|    policy_gradient_loss | -0.0184     |
|    value_loss           | 1.63        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.87         |
|    ep_rew_mean          | 2.51         |
| time/                   |              |
|    fps                  | 366          |
|    iterations           | 29           |
|    time_elapsed         | 162          |
|    total_timesteps      | 59392        |
| train/                  |              |
|    approx_kl            | 0.0071417904 |
|    clip_fraction        | 0.0738       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.736       |
|    explained_variance   | 0.826        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.83         |
|    n_updates            | 280          |
|    policy_gradient_loss | -0.0131      |
|    value_loss           | 1.72         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.02        |
|    ep_rew_mean          | 2.74        |
| time/                   |             |
|    fps                  | 366         |
|    iterations           | 30          |
|    time_elapsed         | 167         |
|    total_timesteps      | 61440       |
| train/                  |             |
|    approx_kl            | 0.009269893 |
|    clip_fraction        | 0.0918      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.686      |
|    explained_variance   | 0.912       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.163       |
|    n_updates            | 290         |
|    policy_gradient_loss | -0.0166     |
|    value_loss           | 0.537       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.51        |
|    ep_rew_mean          | 2.29        |
| time/                   |             |
|    fps                  | 367         |
|    iterations           | 31          |
|    time_elapsed         | 172         |
|    total_timesteps      | 63488       |
| train/                  |             |
|    approx_kl            | 0.010982089 |
|    clip_fraction        | 0.0863      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.657      |
|    explained_variance   | 0.946       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.339       |
|    n_updates            | 300         |
|    policy_gradient_loss | -0.0149     |
|    value_loss           | 0.341       |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 8.94       |
|    ep_rew_mean          | 2.7        |
| time/                   |            |
|    fps                  | 368        |
|    iterations           | 32         |
|    time_elapsed         | 177        |
|    total_timesteps      | 65536      |
| train/                  |            |
|    approx_kl            | 0.01880536 |
|    clip_fraction        | 0.0622     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.667     |
|    explained_variance   | 0.778      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.13       |
|    n_updates            | 310        |
|    policy_gradient_loss | -0.0107    |
|    value_loss           | 2.3        |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.82        |
|    ep_rew_mean          | 2.6         |
| time/                   |             |
|    fps                  | 369         |
|    iterations           | 33          |
|    time_elapsed         | 182         |
|    total_timesteps      | 67584       |
| train/                  |             |
|    approx_kl            | 0.013803964 |
|    clip_fraction        | 0.1         |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.644      |
|    explained_variance   | 0.952       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0438      |
|    n_updates            | 320         |
|    policy_gradient_loss | -0.0186     |
|    value_loss           | 0.31        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9           |
|    ep_rew_mean          | 2.9         |
| time/                   |             |
|    fps                  | 370         |
|    iterations           | 34          |
|    time_elapsed         | 188         |
|    total_timesteps      | 69632       |
| train/                  |             |
|    approx_kl            | 0.011061303 |
|    clip_fraction        | 0.0942      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.597      |
|    explained_variance   | 0.905       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.293       |
|    n_updates            | 330         |
|    policy_gradient_loss | -0.0158     |
|    value_loss           | 0.61        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.96        |
|    ep_rew_mean          | 2.6         |
| time/                   |             |
|    fps                  | 370         |
|    iterations           | 35          |
|    time_elapsed         | 193         |
|    total_timesteps      | 71680       |
| train/                  |             |
|    approx_kl            | 0.016911192 |
|    clip_fraction        | 0.165       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.551      |
|    explained_variance   | 0.926       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.229       |
|    n_updates            | 340         |
|    policy_gradient_loss | -0.0229     |
|    value_loss           | 0.519       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.5         |
|    ep_rew_mean          | 3.32        |
| time/                   |             |
|    fps                  | 370         |
|    iterations           | 36          |
|    time_elapsed         | 198         |
|    total_timesteps      | 73728       |
| train/                  |             |
|    approx_kl            | 0.012311206 |
|    clip_fraction        | 0.119       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.595      |
|    explained_variance   | 0.709       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.68        |
|    n_updates            | 350         |
|    policy_gradient_loss | -0.0275     |
|    value_loss           | 1.95        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.6         |
|    ep_rew_mean          | 3.3         |
| time/                   |             |
|    fps                  | 370         |
|    iterations           | 37          |
|    time_elapsed         | 204         |
|    total_timesteps      | 75776       |
| train/                  |             |
|    approx_kl            | 0.059752032 |
|    clip_fraction        | 0.154       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.534      |
|    explained_variance   | 0.785       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.835       |
|    n_updates            | 360         |
|    policy_gradient_loss | -0.0234     |
|    value_loss           | 1.46        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 10.8       |
|    ep_rew_mean          | 2.7        |
| time/                   |            |
|    fps                  | 370        |
|    iterations           | 38         |
|    time_elapsed         | 209        |
|    total_timesteps      | 77824      |
| train/                  |            |
|    approx_kl            | 0.01475055 |
|    clip_fraction        | 0.0968     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.455     |
|    explained_variance   | 0.917      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.153      |
|    n_updates            | 370        |
|    policy_gradient_loss | -0.019     |
|    value_loss           | 0.428      |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.2        |
|    ep_rew_mean          | 3.01        |
| time/                   |             |
|    fps                  | 370         |
|    iterations           | 39          |
|    time_elapsed         | 215         |
|    total_timesteps      | 79872       |
| train/                  |             |
|    approx_kl            | 0.012021113 |
|    clip_fraction        | 0.0851      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.558      |
|    explained_variance   | 0.702       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.8         |
|    n_updates            | 380         |
|    policy_gradient_loss | -0.0284     |
|    value_loss           | 4.45        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.83        |
|    ep_rew_mean          | 3.57        |
| time/                   |             |
|    fps                  | 370         |
|    iterations           | 40          |
|    time_elapsed         | 220         |
|    total_timesteps      | 81920       |
| train/                  |             |
|    approx_kl            | 0.010166377 |
|    clip_fraction        | 0.0623      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.454      |
|    explained_variance   | 0.728       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.43        |
|    n_updates            | 390         |
|    policy_gradient_loss | -0.0201     |
|    value_loss           | 2.21        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.3        |
|    ep_rew_mean          | 3.14        |
| time/                   |             |
|    fps                  | 371         |
|    iterations           | 41          |
|    time_elapsed         | 226         |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.017603599 |
|    clip_fraction        | 0.0748      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.384      |
|    explained_variance   | 0.957       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.152       |
|    n_updates            | 400         |
|    policy_gradient_loss | -0.0122     |
|    value_loss           | 0.286       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.95        |
|    ep_rew_mean          | 3.81        |
| time/                   |             |
|    fps                  | 371         |
|    iterations           | 42          |
|    time_elapsed         | 231         |
|    total_timesteps      | 86016       |
| train/                  |             |
|    approx_kl            | 0.028185518 |
|    clip_fraction        | 0.0954      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.383      |
|    explained_variance   | 0.811       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.412       |
|    n_updates            | 410         |
|    policy_gradient_loss | -0.0224     |
|    value_loss           | 2.05        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 10.5       |
|    ep_rew_mean          | 3.57       |
| time/                   |            |
|    fps                  | 371        |
|    iterations           | 43         |
|    time_elapsed         | 236        |
|    total_timesteps      | 88064      |
| train/                  |            |
|    approx_kl            | 0.02107103 |
|    clip_fraction        | 0.0702     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.375     |
|    explained_variance   | 0.732      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.156      |
|    n_updates            | 420        |
|    policy_gradient_loss | -0.0342    |
|    value_loss           | 1.54       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.76        |
|    ep_rew_mean          | 3.7         |
| time/                   |             |
|    fps                  | 371         |
|    iterations           | 44          |
|    time_elapsed         | 242         |
|    total_timesteps      | 90112       |
| train/                  |             |
|    approx_kl            | 0.007913441 |
|    clip_fraction        | 0.0349      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.346      |
|    explained_variance   | 0.827       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.92        |
|    n_updates            | 430         |
|    policy_gradient_loss | -0.00355    |
|    value_loss           | 2.2         |
-----------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 25.8      |
|    ep_rew_mean          | -12.4     |
| time/                   |           |
|    fps                  | 371       |
|    iterations           | 45        |
|    time_elapsed         | 247       |
|    total_timesteps      | 92160     |
| train/                  |           |
|    approx_kl            | 0.5432366 |
|    clip_fraction        | 0.109     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.316    |
|    explained_variance   | 0.958     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.231     |
|    n_updates            | 440       |
|    policy_gradient_loss | -0.0068   |
|    value_loss           | 0.313     |
---------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 45.9        |
|    ep_rew_mean          | -32.4       |
| time/                   |             |
|    fps                  | 372         |
|    iterations           | 46          |
|    time_elapsed         | 252         |
|    total_timesteps      | 94208       |
| train/                  |             |
|    approx_kl            | 0.008981178 |
|    clip_fraction        | 0.0144      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.11       |
|    explained_variance   | -0.0603     |
|    learning_rate        | 0.0003      |
|    loss                 | 1.91        |
|    n_updates            | 450         |
|    policy_gradient_loss | -0.00156    |
|    value_loss           | 17.2        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 62.7         |
|    ep_rew_mean          | -49.3        |
| time/                   |              |
|    fps                  | 373          |
|    iterations           | 47           |
|    time_elapsed         | 257          |
|    total_timesteps      | 96256        |
| train/                  |              |
|    approx_kl            | 0.0025778997 |
|    clip_fraction        | 0.0184       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.165       |
|    explained_variance   | 0.396        |
|    learning_rate        | 0.0003       |
|    loss                 | 19.1         |
|    n_updates            | 460          |
|    policy_gradient_loss | -0.00601     |
|    value_loss           | 45.6         |
------------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 49.3        |
|    ep_rew_mean          | -35.6       |
| time/                   |             |
|    fps                  | 373         |
|    iterations           | 48          |
|    time_elapsed         | 262         |
|    total_timesteps      | 98304       |
| train/                  |             |
|    approx_kl            | 0.014914533 |
|    clip_fraction        | 0.0816      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.272      |
|    explained_variance   | 0.394       |
|    learning_rate        | 0.0003      |
|    loss                 | 33.5        |
|    n_updates            | 470         |
|    policy_gradient_loss | -0.0164     |
|    value_loss           | 74.1        |
-----------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 39.9         |
|    ep_rew_mean          | -26.3        |
| time/                   |              |
|    fps                  | 374          |
|    iterations           | 49           |
|    time_elapsed         | 268          |
|    total_timesteps      | 100352       |
| train/                  |              |
|    approx_kl            | 0.0012997694 |
|    clip_fraction        | 0.026        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.24        |
|    explained_variance   | 0.465        |
|    learning_rate        | 0.0003       |
|    loss                 | 44.5         |
|    n_updates            | 480          |
|    policy_gradient_loss | -0.00881     |
|    value_loss           | 76.6         |
------------------------------------------


------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 27           |
|    ep_rew_mean          | -13.3        |
| time/                   |              |
|    fps                  | 374          |
|    iterations           | 50           |
|    time_elapsed         | 273          |
|    total_timesteps      | 102400       |
| train/                  |              |
|    approx_kl            | 0.0014951692 |
|    clip_fraction        | 0.037        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.25        |
|    explained_variance   | 0.594        |
|    learning_rate        | 0.0003       |
|    loss                 | 30.2         |
|    n_updates            | 490          |
|    policy_gradient_loss | -0.00958     |
|    value_loss           | 77.4         |
------------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 17.4      |
|    ep_rew_mean          | -3.57     |
| time/                   |           |
|    fps                  | 374       |
|    iterations           | 51        |
|    time_elapsed         | 279       |
|    total_timesteps      | 104448    |
| train/                  |           |
|    approx_kl            | 0.0443189 |
|    clip_fraction        | 0.176     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.457    |
|    explained_variance   | 0.338     |
|    learning_rate        | 0.0003    |
|    loss                 | 44.4      |
|    n_updates            | 500       |
|    policy_gradient_loss | -0.0247   |
|    value_loss           | 70.8      |
---------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 13.3        |
|    ep_rew_mean          | 0.45        |
| time/                   |             |
|    fps                  | 374         |
|    iterations           | 52          |
|    time_elapsed         | 284         |
|    total_timesteps      | 106496      |
| train/                  |             |
|    approx_kl            | 0.005663138 |
|    clip_fraction        | 0.069       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.463      |
|    explained_variance   | 0.558       |
|    learning_rate        | 0.0003      |
|    loss                 | 14.7        |
|    n_updates            | 510         |
|    policy_gradient_loss | -0.0117     |
|    value_loss           | 36.8        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 19.8        |
|    ep_rew_mean          | -5.92       |
| time/                   |             |
|    fps                  | 374         |
|    iterations           | 53          |
|    time_elapsed         | 289         |
|    total_timesteps      | 108544      |
| train/                  |             |
|    approx_kl            | 0.020181399 |
|    clip_fraction        | 0.176       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.499      |
|    explained_variance   | 0.182       |
|    learning_rate        | 0.0003      |
|    loss                 | 3.45        |
|    n_updates            | 520         |
|    policy_gradient_loss | -0.0254     |
|    value_loss           | 9.2         |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_mean          | 2.26        |
| time/                   |             |
|    fps                  | 374         |
|    iterations           | 54          |
|    time_elapsed         | 295         |
|    total_timesteps      | 110592      |
| train/                  |             |
|    approx_kl            | 0.009738399 |
|    clip_fraction        | 0.108       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.538      |
|    explained_variance   | 0.891       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.88        |
|    n_updates            | 530         |
|    policy_gradient_loss | -0.00573    |
|    value_loss           | 22          |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 10.4       |
|    ep_rew_mean          | 3.06       |
| time/                   |            |
|    fps                  | 375        |
|    iterations           | 55         |
|    time_elapsed         | 300        |
|    total_timesteps      | 112640     |
| train/                  |            |
|    approx_kl            | 0.01536967 |
|    clip_fraction        | 0.191      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.49      |
|    explained_variance   | 0.315      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.971      |
|    n_updates            | 540        |
|    policy_gradient_loss | -0.0219    |
|    value_loss           | 6.46       |
----------------------------------------


---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 10.8      |
|    ep_rew_mean          | 1.63      |
| time/                   |           |
|    fps                  | 375       |
|    iterations           | 56        |
|    time_elapsed         | 305       |
|    total_timesteps      | 114688    |
| train/                  |           |
|    approx_kl            | 0.0688808 |
|    clip_fraction        | 0.264     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.496    |
|    explained_variance   | 0.838     |
|    learning_rate        | 0.0003    |
|    loss                 | 0.205     |
|    n_updates            | 550       |
|    policy_gradient_loss | -0.0406   |
|    value_loss           | 0.836     |
---------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 10.3       |
|    ep_rew_mean          | 2.98       |
| time/                   |            |
|    fps                  | 375        |
|    iterations           | 57         |
|    time_elapsed         | 311        |
|    total_timesteps      | 116736     |
| train/                  |            |
|    approx_kl            | 0.04210388 |
|    clip_fraction        | 0.326      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.534     |
|    explained_variance   | 0.734      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.907      |
|    n_updates            | 560        |
|    policy_gradient_loss | -0.0421    |
|    value_loss           | 1.91       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.3        |
|    ep_rew_mean          | 3.28        |
| time/                   |             |
|    fps                  | 375         |
|    iterations           | 58          |
|    time_elapsed         | 316         |
|    total_timesteps      | 118784      |
| train/                  |             |
|    approx_kl            | 0.051502623 |
|    clip_fraction        | 0.224       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.434      |
|    explained_variance   | 0.875       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.404       |
|    n_updates            | 570         |
|    policy_gradient_loss | -0.0345     |
|    value_loss           | 0.827       |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 10.2       |
|    ep_rew_mean          | 3.78       |
| time/                   |            |
|    fps                  | 375        |
|    iterations           | 59         |
|    time_elapsed         | 322        |
|    total_timesteps      | 120832     |
| train/                  |            |
|    approx_kl            | 0.07014565 |
|    clip_fraction        | 0.165      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.391     |
|    explained_variance   | 0.903      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.145      |
|    n_updates            | 580        |
|    policy_gradient_loss | -0.0254    |
|    value_loss           | 0.681      |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 16.4        |
|    ep_rew_mean          | -3.11       |
| time/                   |             |
|    fps                  | 375         |
|    iterations           | 60          |
|    time_elapsed         | 327         |
|    total_timesteps      | 122880      |
| train/                  |             |
|    approx_kl            | 0.118292876 |
|    clip_fraction        | 0.151       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.466      |
|    explained_variance   | 0.952       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0778      |
|    n_updates            | 590         |
|    policy_gradient_loss | -0.0214     |
|    value_loss           | 0.316       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 12.5        |
|    ep_rew_mean          | 1.05        |
| time/                   |             |
|    fps                  | 375         |
|    iterations           | 61          |
|    time_elapsed         | 333         |
|    total_timesteps      | 124928      |
| train/                  |             |
|    approx_kl            | 0.061776154 |
|    clip_fraction        | 0.321       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.665      |
|    explained_variance   | 0.401       |
|    learning_rate        | 0.0003      |
|    loss                 | 9.22        |
|    n_updates            | 600         |
|    policy_gradient_loss | -0.0454     |
|    value_loss           | 17          |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 10.8       |
|    ep_rew_mean          | 2.8        |
| time/                   |            |
|    fps                  | 374        |
|    iterations           | 62         |
|    time_elapsed         | 338        |
|    total_timesteps      | 126976     |
| train/                  |            |
|    approx_kl            | 0.01477613 |
|    clip_fraction        | 0.153      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.579     |
|    explained_variance   | 0.682      |
|    learning_rate        | 0.0003     |
|    loss                 | 6.12       |
|    n_updates            | 610        |
|    policy_gradient_loss | -0.027     |
|    value_loss           | 10.5       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.4        |
|    ep_rew_mean          | 2.68        |
| time/                   |             |
|    fps                  | 374         |
|    iterations           | 63          |
|    time_elapsed         | 344         |
|    total_timesteps      | 129024      |
| train/                  |             |
|    approx_kl            | 0.015770137 |
|    clip_fraction        | 0.128       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.507      |
|    explained_variance   | 0.869       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.13        |
|    n_updates            | 620         |
|    policy_gradient_loss | -0.0253     |
|    value_loss           | 5.99        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.2        |
|    ep_rew_mean          | 3.48        |
| time/                   |             |
|    fps                  | 375         |
|    iterations           | 64          |
|    time_elapsed         | 349         |
|    total_timesteps      | 131072      |
| train/                  |             |
|    approx_kl            | 0.007838536 |
|    clip_fraction        | 0.0755      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.446      |
|    explained_variance   | 0.749       |
|    learning_rate        | 0.0003      |
|    loss                 | 11.9        |
|    n_updates            | 630         |
|    policy_gradient_loss | -0.0106     |
|    value_loss           | 9.21        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10          |
|    ep_rew_mean          | 3.8         |
| time/                   |             |
|    fps                  | 375         |
|    iterations           | 65          |
|    time_elapsed         | 354         |
|    total_timesteps      | 133120      |
| train/                  |             |
|    approx_kl            | 0.035350725 |
|    clip_fraction        | 0.101       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.368      |
|    explained_variance   | 0.867       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.388       |
|    n_updates            | 640         |
|    policy_gradient_loss | -0.0213     |
|    value_loss           | 1.11        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.6        |
|    ep_rew_mean          | -0.59       |
| time/                   |             |
|    fps                  | 375         |
|    iterations           | 66          |
|    time_elapsed         | 360         |
|    total_timesteps      | 135168      |
| train/                  |             |
|    approx_kl            | 0.042963736 |
|    clip_fraction        | 0.0817      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.321      |
|    explained_variance   | 0.941       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00919     |
|    n_updates            | 650         |
|    policy_gradient_loss | -0.0127     |
|    value_loss           | 0.325       |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 10.3       |
|    ep_rew_mean          | 3.64       |
| time/                   |            |
|    fps                  | 374        |
|    iterations           | 67         |
|    time_elapsed         | 366        |
|    total_timesteps      | 137216     |
| train/                  |            |
|    approx_kl            | 0.08396668 |
|    clip_fraction        | 0.184      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.409     |
|    explained_variance   | 0.803      |
|    learning_rate        | 0.0003     |
|    loss                 | 2.09       |
|    n_updates            | 660        |
|    policy_gradient_loss | -0.0203    |
|    value_loss           | 8.99       |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 19.8       |
|    ep_rew_mean          | -9.15      |
| time/                   |            |
|    fps                  | 374        |
|    iterations           | 68         |
|    time_elapsed         | 371        |
|    total_timesteps      | 139264     |
| train/                  |            |
|    approx_kl            | 0.15326424 |
|    clip_fraction        | 0.199      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.39      |
|    explained_variance   | 0.894      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.122      |
|    n_updates            | 670        |
|    policy_gradient_loss | -0.0201    |
|    value_loss           | 0.415      |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 9.38       |
|    ep_rew_mean          | 3.18       |
| time/                   |            |
|    fps                  | 374        |
|    iterations           | 69         |
|    time_elapsed         | 376        |
|    total_timesteps      | 141312     |
| train/                  |            |
|    approx_kl            | 0.21504487 |
|    clip_fraction        | 0.411      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.483     |
|    explained_variance   | 0.408      |
|    learning_rate        | 0.0003     |
|    loss                 | 6.24       |
|    n_updates            | 680        |
|    policy_gradient_loss | -0.0591    |
|    value_loss           | 18.4       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.98        |
|    ep_rew_mean          | 3.8         |
| time/                   |             |
|    fps                  | 375         |
|    iterations           | 70          |
|    time_elapsed         | 382         |
|    total_timesteps      | 143360      |
| train/                  |             |
|    approx_kl            | 0.050312966 |
|    clip_fraction        | 0.0584      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.341      |
|    explained_variance   | 0.707       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0602      |
|    n_updates            | 690         |
|    policy_gradient_loss | -0.00661    |
|    value_loss           | 0.845       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.7        |
|    ep_rew_mean          | 2.07        |
| time/                   |             |
|    fps                  | 375         |
|    iterations           | 71          |
|    time_elapsed         | 387         |
|    total_timesteps      | 145408      |
| train/                  |             |
|    approx_kl            | 0.018565401 |
|    clip_fraction        | 0.0643      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.322      |
|    explained_variance   | 0.977       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.348       |
|    n_updates            | 700         |
|    policy_gradient_loss | -0.0112     |
|    value_loss           | 0.175       |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 10.2       |
|    ep_rew_mean          | 3.47       |
| time/                   |            |
|    fps                  | 375        |
|    iterations           | 72         |
|    time_elapsed         | 392        |
|    total_timesteps      | 147456     |
| train/                  |            |
|    approx_kl            | 0.10186449 |
|    clip_fraction        | 0.19       |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.395     |
|    explained_variance   | 0.47       |
|    learning_rate        | 0.0003     |
|    loss                 | 1.66       |
|    n_updates            | 710        |
|    policy_gradient_loss | -0.0419    |
|    value_loss           | 4.71       |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 10.2       |
|    ep_rew_mean          | 3.72       |
| time/                   |            |
|    fps                  | 375        |
|    iterations           | 73         |
|    time_elapsed         | 398        |
|    total_timesteps      | 149504     |
| train/                  |            |
|    approx_kl            | 0.01903234 |
|    clip_fraction        | 0.0737     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.333     |
|    explained_variance   | 0.723      |
|    learning_rate        | 0.0003     |
|    loss                 | 2.71       |
|    n_updates            | 720        |
|    policy_gradient_loss | -0.022     |
|    value_loss           | 2.97       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.94        |
|    ep_rew_mean          | 3.92        |
| time/                   |             |
|    fps                  | 375         |
|    iterations           | 74          |
|    time_elapsed         | 403         |
|    total_timesteps      | 151552      |
| train/                  |             |
|    approx_kl            | 0.026403807 |
|    clip_fraction        | 0.065       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.322      |
|    explained_variance   | 0.934       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.24        |
|    n_updates            | 730         |
|    policy_gradient_loss | -0.00481    |
|    value_loss           | 0.481       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.2        |
|    ep_rew_mean          | 3.81        |
| time/                   |             |
|    fps                  | 376         |
|    iterations           | 75          |
|    time_elapsed         | 408         |
|    total_timesteps      | 153600      |
| train/                  |             |
|    approx_kl            | 0.011051587 |
|    clip_fraction        | 0.0593      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.324      |
|    explained_variance   | 0.984       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00908     |
|    n_updates            | 740         |
|    policy_gradient_loss | -0.00591    |
|    value_loss           | 0.113       |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 10.2       |
|    ep_rew_mean          | 3.63       |
| time/                   |            |
|    fps                  | 376        |
|    iterations           | 76         |
|    time_elapsed         | 413        |
|    total_timesteps      | 155648     |
| train/                  |            |
|    approx_kl            | 0.00972967 |
|    clip_fraction        | 0.0544     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.339     |
|    explained_variance   | 0.922      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.0226     |
|    n_updates            | 750        |
|    policy_gradient_loss | -0.00438   |
|    value_loss           | 0.758      |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.92        |
|    ep_rew_mean          | 3.86        |
| time/                   |             |
|    fps                  | 376         |
|    iterations           | 77          |
|    time_elapsed         | 418         |
|    total_timesteps      | 157696      |
| train/                  |             |
|    approx_kl            | 0.013084366 |
|    clip_fraction        | 0.0481      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.348      |
|    explained_variance   | 0.812       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0736      |
|    n_updates            | 760         |
|    policy_gradient_loss | -0.0157     |
|    value_loss           | 1.47        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5.05       |
|    ep_rew_mean          | -2.29      |
| time/                   |            |
|    fps                  | 376        |
|    iterations           | 78         |
|    time_elapsed         | 424        |
|    total_timesteps      | 159744     |
| train/                  |            |
|    approx_kl            | 0.42402273 |
|    clip_fraction        | 0.215      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.49      |
|    explained_variance   | 0.983      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.00926    |
|    n_updates            | 770        |
|    policy_gradient_loss | -0.0284    |
|    value_loss           | 0.109      |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 4.53       |
|    ep_rew_mean          | -2.17      |
| time/                   |            |
|    fps                  | 376        |
|    iterations           | 79         |
|    time_elapsed         | 429        |
|    total_timesteps      | 161792     |
| train/                  |            |
|    approx_kl            | 0.27915305 |
|    clip_fraction        | 0.138      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.376     |
|    explained_variance   | 0.503      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.373      |
|    n_updates            | 780        |
|    policy_gradient_loss | -0.00444   |
|    value_loss           | 5.03       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.38        |
|    ep_rew_mean          | -1.74       |
| time/                   |             |
|    fps                  | 376         |
|    iterations           | 80          |
|    time_elapsed         | 435         |
|    total_timesteps      | 163840      |
| train/                  |             |
|    approx_kl            | 0.019562341 |
|    clip_fraction        | 0.0921      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.457      |
|    explained_variance   | 0.562       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.638       |
|    n_updates            | 790         |
|    policy_gradient_loss | -0.0128     |
|    value_loss           | 0.895       |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 4.34       |
|    ep_rew_mean          | -1.7       |
| time/                   |            |
|    fps                  | 376        |
|    iterations           | 81         |
|    time_elapsed         | 440        |
|    total_timesteps      | 165888     |
| train/                  |            |
|    approx_kl            | 0.18844175 |
|    clip_fraction        | 0.147      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.279     |
|    explained_variance   | 0.102      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.03       |
|    n_updates            | 800        |
|    policy_gradient_loss | 0.013      |
|    value_loss           | 11.4       |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5.54       |
|    ep_rew_mean          | -0.66      |
| time/                   |            |
|    fps                  | 376        |
|    iterations           | 82         |
|    time_elapsed         | 445        |
|    total_timesteps      | 167936     |
| train/                  |            |
|    approx_kl            | 0.06374202 |
|    clip_fraction        | 0.21       |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.398     |
|    explained_variance   | 0.566      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.599      |
|    n_updates            | 810        |
|    policy_gradient_loss | -0.0207    |
|    value_loss           | 1.59       |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 7.65       |
|    ep_rew_mean          | 1.61       |
| time/                   |            |
|    fps                  | 376        |
|    iterations           | 83         |
|    time_elapsed         | 451        |
|    total_timesteps      | 169984     |
| train/                  |            |
|    approx_kl            | 0.06125373 |
|    clip_fraction        | 0.251      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.434     |
|    explained_variance   | 0.299      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.27       |
|    n_updates            | 820        |
|    policy_gradient_loss | -0.0399    |
|    value_loss           | 3.7        |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.86        |
|    ep_rew_mean          | 2.46        |
| time/                   |             |
|    fps                  | 376         |
|    iterations           | 84          |
|    time_elapsed         | 457         |
|    total_timesteps      | 172032      |
| train/                  |             |
|    approx_kl            | 0.047695376 |
|    clip_fraction        | 0.171       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.387      |
|    explained_variance   | 0.336       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.72        |
|    n_updates            | 830         |
|    policy_gradient_loss | -0.0407     |
|    value_loss           | 3.91        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 9.84       |
|    ep_rew_mean          | 3.66       |
| time/                   |            |
|    fps                  | 376        |
|    iterations           | 85         |
|    time_elapsed         | 462        |
|    total_timesteps      | 174080     |
| train/                  |            |
|    approx_kl            | 0.13684572 |
|    clip_fraction        | 0.133      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.346     |
|    explained_variance   | 0.625      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.13       |
|    n_updates            | 840        |
|    policy_gradient_loss | -0.0231    |
|    value_loss           | 2.36       |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 25.5       |
|    ep_rew_mean          | -12.2      |
| time/                   |            |
|    fps                  | 376        |
|    iterations           | 86         |
|    time_elapsed         | 467        |
|    total_timesteps      | 176128     |
| train/                  |            |
|    approx_kl            | 0.36170986 |
|    clip_fraction        | 0.332      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.473     |
|    explained_variance   | 0.919      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.364      |
|    n_updates            | 850        |
|    policy_gradient_loss | -0.0374    |
|    value_loss           | 0.278      |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 17.2        |
|    ep_rew_mean          | -4.45       |
| time/                   |             |
|    fps                  | 377         |
|    iterations           | 87          |
|    time_elapsed         | 472         |
|    total_timesteps      | 178176      |
| train/                  |             |
|    approx_kl            | 0.102079734 |
|    clip_fraction        | 0.34        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.613      |
|    explained_variance   | 0.0593      |
|    learning_rate        | 0.0003      |
|    loss                 | 5.95        |
|    n_updates            | 860         |
|    policy_gradient_loss | -0.04       |
|    value_loss           | 20.6        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 14.5        |
|    ep_rew_mean          | -2.36       |
| time/                   |             |
|    fps                  | 378         |
|    iterations           | 88          |
|    time_elapsed         | 476         |
|    total_timesteps      | 180224      |
| train/                  |             |
|    approx_kl            | 0.042367905 |
|    clip_fraction        | 0.264       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.799      |
|    explained_variance   | 0.69        |
|    learning_rate        | 0.0003      |
|    loss                 | 4.22        |
|    n_updates            | 870         |
|    policy_gradient_loss | -0.0265     |
|    value_loss           | 16.7        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12.3       |
|    ep_rew_mean          | 0.46       |
| time/                   |            |
|    fps                  | 379        |
|    iterations           | 89         |
|    time_elapsed         | 480        |
|    total_timesteps      | 182272     |
| train/                  |            |
|    approx_kl            | 0.02418825 |
|    clip_fraction        | 0.262      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.744     |
|    explained_variance   | 0.75       |
|    learning_rate        | 0.0003     |
|    loss                 | 3.08       |
|    n_updates            | 880        |
|    policy_gradient_loss | -0.026     |
|    value_loss           | 16.2       |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11          |
|    ep_rew_mean          | 2.19        |
| time/                   |             |
|    fps                  | 380         |
|    iterations           | 90          |
|    time_elapsed         | 484         |
|    total_timesteps      | 184320      |
| train/                  |             |
|    approx_kl            | 0.024347484 |
|    clip_fraction        | 0.271       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.669      |
|    explained_variance   | 0.675       |
|    learning_rate        | 0.0003      |
|    loss                 | 4.76        |
|    n_updates            | 890         |
|    policy_gradient_loss | -0.0243     |
|    value_loss           | 10.1        |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 12.1       |
|    ep_rew_mean          | 1.35       |
| time/                   |            |
|    fps                  | 381        |
|    iterations           | 91         |
|    time_elapsed         | 488        |
|    total_timesteps      | 186368     |
| train/                  |            |
|    approx_kl            | 0.02668532 |
|    clip_fraction        | 0.256      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.576     |
|    explained_variance   | 0.706      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.797      |
|    n_updates            | 900        |
|    policy_gradient_loss | -0.0378    |
|    value_loss           | 1.6        |
----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11          |
|    ep_rew_mean          | 2.55        |
| time/                   |             |
|    fps                  | 382         |
|    iterations           | 92          |
|    time_elapsed         | 492         |
|    total_timesteps      | 188416      |
| train/                  |             |
|    approx_kl            | 0.017341316 |
|    clip_fraction        | 0.0919      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.681      |
|    explained_variance   | 0.865       |
|    learning_rate        | 0.0003      |
|    loss                 | 9.01        |
|    n_updates            | 910         |
|    policy_gradient_loss | -0.0076     |
|    value_loss           | 25.5        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.2        |
|    ep_rew_mean          | 2.27        |
| time/                   |             |
|    fps                  | 383         |
|    iterations           | 93          |
|    time_elapsed         | 496         |
|    total_timesteps      | 190464      |
| train/                  |             |
|    approx_kl            | 0.070169866 |
|    clip_fraction        | 0.224       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.476      |
|    explained_variance   | 0.791       |
|    learning_rate        | 0.0003      |
|    loss                 | 13          |
|    n_updates            | 920         |
|    policy_gradient_loss | -0.0203     |
|    value_loss           | 5.41        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.33        |
|    ep_rew_mean          | 3.17        |
| time/                   |             |
|    fps                  | 384         |
|    iterations           | 94          |
|    time_elapsed         | 500         |
|    total_timesteps      | 192512      |
| train/                  |             |
|    approx_kl            | 0.062242664 |
|    clip_fraction        | 0.184       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.504      |
|    explained_variance   | 0.426       |
|    learning_rate        | 0.0003      |
|    loss                 | 1.94        |
|    n_updates            | 930         |
|    policy_gradient_loss | -0.0392     |
|    value_loss           | 6.22        |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.85        |
|    ep_rew_mean          | 3.79        |
| time/                   |             |
|    fps                  | 385         |
|    iterations           | 95          |
|    time_elapsed         | 504         |
|    total_timesteps      | 194560      |
| train/                  |             |
|    approx_kl            | 0.042221397 |
|    clip_fraction        | 0.0712      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.328      |
|    explained_variance   | 0.893       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.236       |
|    n_updates            | 940         |
|    policy_gradient_loss | -0.0162     |
|    value_loss           | 0.692       |
-----------------------------------------


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.17        |
|    ep_rew_mean          | 2.09        |
| time/                   |             |
|    fps                  | 387         |
|    iterations           | 96          |
|    time_elapsed         | 507         |
|    total_timesteps      | 196608      |
| train/                  |             |
|    approx_kl            | 0.036962293 |
|    clip_fraction        | 0.154       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.374      |
|    explained_variance   | 0.981       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0447     |
|    n_updates            | 950         |
|    policy_gradient_loss | -0.0142     |
|    value_loss           | 0.151       |
-----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 9.38       |
|    ep_rew_mean          | 3.36       |
| time/                   |            |
|    fps                  | 387        |
|    iterations           | 97         |
|    time_elapsed         | 512        |
|    total_timesteps      | 198656     |
| train/                  |            |
|    approx_kl            | 0.06385146 |
|    clip_fraction        | 0.245      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.366     |
|    explained_variance   | 0.553      |
|    learning_rate        | 0.0003     |
|    loss                 | 1.4        |
|    n_updates            | 960        |
|    policy_gradient_loss | -0.0399    |
|    value_loss           | 2.79       |
----------------------------------------


----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 24.8       |
|    ep_rew_mean          | -11.4      |
| time/                   |            |
|    fps                  | 389        |
|    iterations           | 98         |
|    time_elapsed         | 515        |
|    total_timesteps      | 200704     |
| train/                  |            |
|    approx_kl            | 0.26313344 |
|    clip_fraction        | 0.178      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.363     |
|    explained_variance   | 0.898      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.247      |
|    n_updates            | 970        |
|    policy_gradient_loss | -0.0285    |
|    value_loss           | 0.521      |
----------------------------------------


VBox(children=(Label(value='0.438 MB of 0.438 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
rollout/ep_len_mean,▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▄█▅▄▂▂▃▂▂▄▂▂▂▁▁▂▂▃▂▂▂▄
rollout/ep_rew_mean,▇▇▇▇▇▇▇▇▇█████████▅▁▅▆██▇██▆███▇▇██▇███▅
time/fps,█▂▂▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃
train/approx_kl,▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂▂▁▂▃▂▁▁█▁▂▃▂▁▂▂▅
train/clip_fraction,█▇▇▅▅▆▅▄▆▄▂▂▂▂▃▂▂▂▁▂▄▄▅▅▆▃▂▄▂▂▂▄▂▅▃▅▅▅▂▄
train/clip_range,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/entropy_loss,▁▁▂▃▃▄▅▅▆▆▆▇▇▇▇▇▇▇██▇▇▇▇▇▇█▇▇▇█▇▇▇▇▆▇▇█▇
train/explained_variance,▃▄▅▆▆▆▆▄▆▆▅▇█▇▆██▆▁▄▄▃▇▇▄▇█▇▆▆██▅▃▆▆▆▇▇▇
train/learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
global_step,200704.0
rollout/ep_len_mean,24.77
rollout/ep_rew_mean,-11.45
time/fps,389.0
train/approx_kl,0.26313
train/clip_fraction,0.17793
train/clip_range,0.2
train/entropy_loss,-0.36315
train/explained_variance,0.89819
train/learning_rate,0.0003


In [7]:
model.save("wordle_ppo_model")

In [8]:
model = PPO.load("wordle_ppo_model")

In [9]:
rewards = 0
for i in tqdm(range(1000)):
    obs, _ = env.reset()
    done = False
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, _, info = env.step(action)
        rewards += reward
print(rewards / 1000)

100%|██████████| 1000/1000 [00:20<00:00, 49.06it/s]

-6.703



