# Stable Baselines3 - Mountain Car

In [None]:
!apt-get update -qq && apt-get install ffmpeg freeglut3-dev xvfb -qq  # For visualization
#!pip install box2d-py -q
!pip install -q "stable-baselines3[extra]>=2.0.0a4"

## Importações

In [None]:
import pandas as pd
import stable_baselines3
import gymnasium as gym
import numpy as np

### Importando Algoritmo e Política

In [None]:
from stable_baselines3 import DQN
from stable_baselines3.ppo import MlpPolicy

### Função Auxiliar

In [None]:
def evaluate(model, num_episodes=10, deterministic=True, qi=0):
    """
    Evaluate a RL agent
    :param model: (BaseRLModel object) the RL Agent
    :param num_episodes: (int) number of episodes to evaluate it
    :return: (float) Mean reward for the last num_episodes
    """
    # This function will only work for a single Environment
    vec_env = model.get_env()
    all_episode_rewards = []
    for i in range(num_episodes):
        episode_rewards = []
        done = False
        obs = vec_env.reset()
        while not done:
            # _states are only useful when using LSTM policies
            action, _states = model.predict(obs, deterministic=deterministic)
            # here, action, rewards and dones are arrays
            # because we are using vectorized env
            # also note that the step only returns a 4-tuple, as the env that is returned
            # by model.get_env() is an sb3 vecenv that wraps the >v0.26 API

            #obs, reward, done, info = vec_env.step(action)
            obs, reward, done, info = vec_env.step(action)

            episode_rewards.append(reward)

            #done = terminated or truncated


        all_episode_rewards.append(sum(episode_rewards))
        df_treinos.loc[qi][i + 1] = sum(episode_rewards)[0]

    mean_episode_reward = np.mean(all_episode_rewards)
    print("Mean reward:", mean_episode_reward, "Num episodes:", num_episodes)

    return mean_episode_reward

### Método de avaliação do Stable Baselines

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy

### Criando Ambiente

In [None]:
env = gym.make("MountainCar-v0", render_mode="rgb_array" )

  and should_run_async(code)


In [None]:
# Espaço de observação
env.observation_space

Box([-1.2  -0.07], [0.6  0.07], (2,), float32)

In [None]:
# Espaço de ações
env.action_space

Discrete(3)

### Avaliando agente não treinado

In [None]:
# Use a separate environement for evaluation
eval_env = gym.make("MountainCar-v0", render_mode="rgb_array")
model = DQN('MlpPolicy',
                env,
                verbose=1,
                train_freq=16,
                gradient_steps=8,
                gamma=0.99,
                exploration_fraction=0.2,
                exploration_final_eps=0.07,
                target_update_interval=100,
                learning_starts=1000,
                buffer_size=10000,
                batch_size=128,
                learning_rate=4e-3,
                policy_kwargs=dict(net_arch=[256, 256]))

# Random Agent, before training
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=100)

print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")

# Deleta modelo
del model

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
mean_reward:-200.00 +/- 0.00


## Treinando agente e avaliando

### Inicialização

In [None]:
# Define quantidade de treinamentos do agente
quantidades = [2e4, 5e4, 1e5, 2e5, 4e5]
qtd_teste = 10

In [None]:
df_treinos = pd.DataFrame(index=quantidades, columns=np.arange(1, qtd_teste + 1))

### Execução

In [None]:
for qtd in quantidades:

    # Algoritmo + Politica
    model = DQN('MlpPolicy',
                env,
                verbose=1,
                train_freq=16,
                gradient_steps=8,
                gamma=0.99,
                exploration_fraction=0.2,
                exploration_final_eps=0.07,
                target_update_interval=100,
                learning_starts=1000,
                buffer_size=10000,
                batch_size=128,
                learning_rate=4e-3,
                policy_kwargs=dict(net_arch=[256, 256]))

    # Treina agente
    model.learn(total_timesteps=qtd)

    # Salva modelo
    model.save(f"./{qtd}k_mountain_car_DQN")

    # Carrega modelo
    #model = DQN.load(f"./{qtd}k_mountain_car_DQN", env)

    # Avalia modelo
    evaluate(model, num_episodes=qtd_teste, qi=qtd)


    # Deleta modelo
    del model

    # Reseta Ambiente
    env.reset()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.814    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 9195     |
|    time_elapsed     | 0        |
|    total_timesteps  | 800      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.628    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1935     |
|    time_elapsed     | 0        |
|    total_timesteps  | 1600     |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.0027   |
|    n_updates        | 296      |
-------------------------------

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 196      |
|    ep_rew_mean      | -196     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 68       |
|    fps              | 793      |
|    time_elapsed     | 16       |
|    total_timesteps  | 13350    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.00333  |
|    n_updates        | 6176     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 197      |
|    ep_rew_mean      | -197     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 72       |
|    fps              | 791      |
|    time_elapsed     | 17       |
|    total_timesteps  | 14150    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.109    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.405    |
| time/               |          |
|    episodes         | 32       |
|    fps              | 920      |
|    time_elapsed     | 6        |
|    total_timesteps  | 6400     |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.00174  |
|    n_updates        | 2696     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.33     |
| time/               |          |
|    episodes         | 36       |
|    fps              | 895      |
|    time_elapsed     | 8        |
|    total_timesteps  | 7200     |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.000753 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 191      |
|    ep_rew_mean      | -191     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 96       |
|    fps              | 774      |
|    time_elapsed     | 23       |
|    total_timesteps  | 18324    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.626    |
|    n_updates        | 8664     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 191      |
|    ep_rew_mean      | -191     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 100      |
|    fps              | 774      |
|    time_elapsed     | 24       |
|    total_timesteps  | 19051    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.559    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 166      |
|    ep_rew_mean      | -166     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 160      |
|    fps              | 770      |
|    time_elapsed     | 36       |
|    total_timesteps  | 28495    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 14.2     |
|    n_updates        | 13744    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 165      |
|    ep_rew_mean      | -165     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 164      |
|    fps              | 770      |
|    time_elapsed     | 37       |
|    total_timesteps  | 29165    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 21.6     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 155      |
|    ep_rew_mean      | -155     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 224      |
|    fps              | 766      |
|    time_elapsed     | 50       |
|    total_timesteps  | 38597    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 156      |
|    n_updates        | 18800    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 156      |
|    ep_rew_mean      | -156     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 228      |
|    fps              | 766      |
|    time_elapsed     | 51       |
|    total_timesteps  | 39272    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 183      |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 159      |
|    ep_rew_mean      | -159     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 288      |
|    fps              | 764      |
|    time_elapsed     | 63       |
|    total_timesteps  | 48740    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 429      |
|    n_updates        | 23872    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 157      |
|    ep_rew_mean      | -157     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 292      |
|    fps              | 764      |
|    time_elapsed     | 64       |
|    total_timesteps  | 49254    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 496      |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.479    |
| time/               |          |
|    episodes         | 56       |
|    fps              | 894      |
|    time_elapsed     | 12       |
|    total_timesteps  | 11200    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.0234   |
|    n_updates        | 5096     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.442    |
| time/               |          |
|    episodes         | 60       |
|    fps              | 885      |
|    time_elapsed     | 13       |
|    total_timesteps  | 12000    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.000362 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 197      |
|    ep_rew_mean      | -197     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 120      |
|    fps              | 799      |
|    time_elapsed     | 29       |
|    total_timesteps  | 23722    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.199    |
|    n_updates        | 11360    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 196      |
|    ep_rew_mean      | -196     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 124      |
|    fps              | 798      |
|    time_elapsed     | 30       |
|    total_timesteps  | 24391    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.0831   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 170      |
|    ep_rew_mean      | -170     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 184      |
|    fps              | 781      |
|    time_elapsed     | 43       |
|    total_timesteps  | 33808    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.155    |
|    n_updates        | 16400    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 167      |
|    ep_rew_mean      | -167     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 188      |
|    fps              | 780      |
|    time_elapsed     | 43       |
|    total_timesteps  | 34292    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.195    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 139      |
|    ep_rew_mean      | -139     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 248      |
|    fps              | 767      |
|    time_elapsed     | 55       |
|    total_timesteps  | 42388    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.256    |
|    n_updates        | 20696    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 139      |
|    ep_rew_mean      | -139     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 252      |
|    fps              | 766      |
|    time_elapsed     | 55       |
|    total_timesteps  | 42898    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.29     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 126      |
|    ep_rew_mean      | -126     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 312      |
|    fps              | 758      |
|    time_elapsed     | 65       |
|    total_timesteps  | 50080    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 7.29     |
|    n_updates        | 24536    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 125      |
|    ep_rew_mean      | -125     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 316      |
|    fps              | 758      |
|    time_elapsed     | 66       |
|    total_timesteps  | 50513    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 10.2     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 132      |
|    ep_rew_mean      | -132     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 376      |
|    fps              | 755      |
|    time_elapsed     | 78       |
|    total_timesteps  | 58965    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 78       |
|    n_updates        | 28984    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 134      |
|    ep_rew_mean      | -134     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 380      |
|    fps              | 755      |
|    time_elapsed     | 78       |
|    total_timesteps  | 59571    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 136      |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 157      |
|    ep_rew_mean      | -157     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 440      |
|    fps              | 748      |
|    time_elapsed     | 92       |
|    total_timesteps  | 69374    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 912      |
|    n_updates        | 34184    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 158      |
|    ep_rew_mean      | -158     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 444      |
|    fps              | 748      |
|    time_elapsed     | 93       |
|    total_timesteps  | 70060    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 795      |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 171      |
|    ep_rew_mean      | -171     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 504      |
|    fps              | 744      |
|    time_elapsed     | 108      |
|    total_timesteps  | 80570    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.46e+03 |
|    n_updates        | 39784    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 173      |
|    ep_rew_mean      | -173     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 508      |
|    fps              | 744      |
|    time_elapsed     | 109      |
|    total_timesteps  | 81370    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.58e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 191      |
|    ep_rew_mean      | -191     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 568      |
|    fps              | 743      |
|    time_elapsed     | 125      |
|    total_timesteps  | 93283    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.08e+03 |
|    n_updates        | 46144    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 191      |
|    ep_rew_mean      | -191     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 572      |
|    fps              | 743      |
|    time_elapsed     | 126      |
|    total_timesteps  | 94083    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.37e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.851    |
| time/               |          |
|    episodes         | 32       |
|    fps              | 947      |
|    time_elapsed     | 6        |
|    total_timesteps  | 6400     |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.00104  |
|    n_updates        | 2696     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.833    |
| time/               |          |
|    episodes         | 36       |
|    fps              | 930      |
|    time_elapsed     | 7        |
|    total_timesteps  | 7200     |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.00114  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.554    |
| time/               |          |
|    episodes         | 96       |
|    fps              | 834      |
|    time_elapsed     | 23       |
|    total_timesteps  | 19200    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.00232  |
|    n_updates        | 9096     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.535    |
| time/               |          |
|    episodes         | 100      |
|    fps              | 830      |
|    time_elapsed     | 24       |
|    total_timesteps  | 20000    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.0079   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.256    |
| time/               |          |
|    episodes         | 160      |
|    fps              | 774      |
|    time_elapsed     | 41       |
|    total_timesteps  | 32000    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.00154  |
|    n_updates        | 15496    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.237    |
| time/               |          |
|    episodes         | 164      |
|    fps              | 772      |
|    time_elapsed     | 42       |
|    total_timesteps  | 32800    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.15     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 224      |
|    fps              | 754      |
|    time_elapsed     | 59       |
|    total_timesteps  | 44783    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.196    |
|    n_updates        | 21888    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 228      |
|    fps              | 753      |
|    time_elapsed     | 60       |
|    total_timesteps  | 45583    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.00055  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 288      |
|    fps              | 745      |
|    time_elapsed     | 77       |
|    total_timesteps  | 57546    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.0414   |
|    n_updates        | 28272    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 292      |
|    fps              | 744      |
|    time_elapsed     | 78       |
|    total_timesteps  | 58346    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.0963   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 352      |
|    fps              | 734      |
|    time_elapsed     | 95       |
|    total_timesteps  | 70346    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.00564  |
|    n_updates        | 34672    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 356      |
|    fps              | 734      |
|    time_elapsed     | 96       |
|    total_timesteps  | 71146    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.207    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 416      |
|    fps              | 728      |
|    time_elapsed     | 114      |
|    total_timesteps  | 83146    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.000684 |
|    n_updates        | 41072    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 420      |
|    fps              | 727      |
|    time_elapsed     | 115      |
|    total_timesteps  | 83946    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.00153  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 480      |
|    fps              | 725      |
|    time_elapsed     | 132      |
|    total_timesteps  | 95946    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.0267   |
|    n_updates        | 47472    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 484      |
|    fps              | 725      |
|    time_elapsed     | 133      |
|    total_timesteps  | 96746    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.071    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 199      |
|    ep_rew_mean      | -199     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 544      |
|    fps              | 722      |
|    time_elapsed     | 150      |
|    total_timesteps  | 108668   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.00328  |
|    n_updates        | 53832    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 199      |
|    ep_rew_mean      | -199     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 548      |
|    fps              | 722      |
|    time_elapsed     | 151      |
|    total_timesteps  | 109468   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.00298  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 190      |
|    ep_rew_mean      | -190     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 608      |
|    fps              | 720      |
|    time_elapsed     | 167      |
|    total_timesteps  | 120494   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.337    |
|    n_updates        | 59744    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 190      |
|    ep_rew_mean      | -190     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 612      |
|    fps              | 719      |
|    time_elapsed     | 168      |
|    total_timesteps  | 121294   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.353    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 170      |
|    ep_rew_mean      | -170     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 672      |
|    fps              | 706      |
|    time_elapsed     | 185      |
|    total_timesteps  | 131077   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.145    |
|    n_updates        | 65040    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 170      |
|    ep_rew_mean      | -170     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 676      |
|    fps              | 706      |
|    time_elapsed     | 186      |
|    total_timesteps  | 131720   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.312    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 153      |
|    ep_rew_mean      | -153     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 736      |
|    fps              | 696      |
|    time_elapsed     | 201      |
|    total_timesteps  | 140505   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.351    |
|    n_updates        | 69752    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 152      |
|    ep_rew_mean      | -152     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 740      |
|    fps              | 695      |
|    time_elapsed     | 202      |
|    total_timesteps  | 141021   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.249    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 131      |
|    ep_rew_mean      | -131     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 800      |
|    fps              | 686      |
|    time_elapsed     | 216      |
|    total_timesteps  | 148601   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.235    |
|    n_updates        | 73800    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 131      |
|    ep_rew_mean      | -131     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 804      |
|    fps              | 685      |
|    time_elapsed     | 217      |
|    total_timesteps  | 149220   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.142    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 120      |
|    ep_rew_mean      | -120     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 864      |
|    fps              | 679      |
|    time_elapsed     | 229      |
|    total_timesteps  | 156124   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.138    |
|    n_updates        | 77560    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 121      |
|    ep_rew_mean      | -121     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 868      |
|    fps              | 678      |
|    time_elapsed     | 230      |
|    total_timesteps  | 156609   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.192    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 124      |
|    ep_rew_mean      | -124     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 928      |
|    fps              | 671      |
|    time_elapsed     | 244      |
|    total_timesteps  | 164369   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.15     |
|    n_updates        | 81688    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 124      |
|    ep_rew_mean      | -124     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 932      |
|    fps              | 671      |
|    time_elapsed     | 245      |
|    total_timesteps  | 164823   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.309    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 127      |
|    ep_rew_mean      | -127     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 992      |
|    fps              | 665      |
|    time_elapsed     | 259      |
|    total_timesteps  | 172582   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.157    |
|    n_updates        | 85792    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 126      |
|    ep_rew_mean      | -126     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 996      |
|    fps              | 665      |
|    time_elapsed     | 260      |
|    total_timesteps  | 173072   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.171    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 130      |
|    ep_rew_mean      | -130     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1056     |
|    fps              | 659      |
|    time_elapsed     | 274      |
|    total_timesteps  | 181083   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.314    |
|    n_updates        | 90040    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 130      |
|    ep_rew_mean      | -130     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1060     |
|    fps              | 659      |
|    time_elapsed     | 275      |
|    total_timesteps  | 181594   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.27     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 140      |
|    ep_rew_mean      | -140     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1120     |
|    fps              | 650      |
|    time_elapsed     | 292      |
|    total_timesteps  | 190181   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.337    |
|    n_updates        | 94592    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 141      |
|    ep_rew_mean      | -141     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1124     |
|    fps              | 649      |
|    time_elapsed     | 293      |
|    total_timesteps  | 190867   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.362    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 150      |
|    ep_rew_mean      | -150     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1184     |
|    fps              | 640      |
|    time_elapsed     | 312      |
|    total_timesteps  | 199955   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.213    |
|    n_updates        | 99480    |
----------------------------------
Mean reward: -140.4 Num episodes: 10
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.991    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 7650     |
|    time_elapsed     | 0        |
|    total_timesteps  | 800  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.851    |
| time/               |          |
|    episodes         | 64       |
|    fps              | 743      |
|    time_elapsed     | 17       |
|    total_timesteps  | 12800    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.0406   |
|    n_updates        | 5896     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.842    |
| time/               |          |
|    episodes         | 68       |
|    fps              | 746      |
|    time_elapsed     | 18       |
|    total_timesteps  | 13600    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.0411   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.702    |
| time/               |          |
|    episodes         | 128      |
|    fps              | 759      |
|    time_elapsed     | 33       |
|    total_timesteps  | 25600    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.000183 |
|    n_updates        | 12296    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.693    |
| time/               |          |
|    episodes         | 132      |
|    fps              | 761      |
|    time_elapsed     | 34       |
|    total_timesteps  | 26400    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.000326 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.554    |
| time/               |          |
|    episodes         | 192      |
|    fps              | 756      |
|    time_elapsed     | 50       |
|    total_timesteps  | 38400    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.000305 |
|    n_updates        | 18696    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.544    |
| time/               |          |
|    episodes         | 196      |
|    fps              | 755      |
|    time_elapsed     | 51       |
|    total_timesteps  | 39200    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.000467 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.405    |
| time/               |          |
|    episodes         | 256      |
|    fps              | 753      |
|    time_elapsed     | 67       |
|    total_timesteps  | 51200    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.000267 |
|    n_updates        | 25096    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.396    |
| time/               |          |
|    episodes         | 260      |
|    fps              | 753      |
|    time_elapsed     | 69       |
|    total_timesteps  | 52000    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.00142  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.256    |
| time/               |          |
|    episodes         | 320      |
|    fps              | 746      |
|    time_elapsed     | 85       |
|    total_timesteps  | 63964    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.00704  |
|    n_updates        | 31480    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.247    |
| time/               |          |
|    episodes         | 324      |
|    fps              | 746      |
|    time_elapsed     | 86       |
|    total_timesteps  | 64764    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 0.0211   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 187      |
|    ep_rew_mean      | -187     |
|    exploration_rate | 0.123    |
| time/               |          |
|    episodes         | 384      |
|    fps              | 738      |
|    time_elapsed     | 102      |
|    total_timesteps  | 75474    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 2.71     |
|    n_updates        | 37240    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 183      |
|    ep_rew_mean      | -183     |
|    exploration_rate | 0.118    |
| time/               |          |
|    episodes         | 388      |
|    fps              | 737      |
|    time_elapsed     | 102      |
|    total_timesteps  | 75900    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.28     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 142      |
|    ep_rew_mean      | -142     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 448      |
|    fps              | 719      |
|    time_elapsed     | 116      |
|    total_timesteps  | 83574    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 54.8     |
|    n_updates        | 41288    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 140      |
|    ep_rew_mean      | -140     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 452      |
|    fps              | 718      |
|    time_elapsed     | 117      |
|    total_timesteps  | 84165    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 103      |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 151      |
|    ep_rew_mean      | -151     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 512      |
|    fps              | 704      |
|    time_elapsed     | 133      |
|    total_timesteps  | 93959    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 631      |
|    n_updates        | 46480    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 154      |
|    ep_rew_mean      | -154     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 516      |
|    fps              | 703      |
|    time_elapsed     | 134      |
|    total_timesteps  | 94704    |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 789      |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 184      |
|    ep_rew_mean      | -184     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 576      |
|    fps              | 695      |
|    time_elapsed     | 153      |
|    total_timesteps  | 106447   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.29e+03 |
|    n_updates        | 52720    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 186      |
|    ep_rew_mean      | -186     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 580      |
|    fps              | 694      |
|    time_elapsed     | 154      |
|    total_timesteps  | 107247   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.36e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 199      |
|    ep_rew_mean      | -199     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 640      |
|    fps              | 684      |
|    time_elapsed     | 174      |
|    total_timesteps  | 119201   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 2.27e+03 |
|    n_updates        | 59104    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 199      |
|    ep_rew_mean      | -199     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 644      |
|    fps              | 684      |
|    time_elapsed     | 175      |
|    total_timesteps  | 120001   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 2.33e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 704      |
|    fps              | 683      |
|    time_elapsed     | 193      |
|    total_timesteps  | 132001   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.38e+03 |
|    n_updates        | 65504    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 708      |
|    fps              | 683      |
|    time_elapsed     | 194      |
|    total_timesteps  | 132801   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.35e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 768      |
|    fps              | 678      |
|    time_elapsed     | 213      |
|    total_timesteps  | 144801   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.78e+03 |
|    n_updates        | 71904    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 772      |
|    fps              | 678      |
|    time_elapsed     | 214      |
|    total_timesteps  | 145601   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.48e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 832      |
|    fps              | 665      |
|    time_elapsed     | 236      |
|    total_timesteps  | 157601   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.04e+03 |
|    n_updates        | 78304    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 836      |
|    fps              | 665      |
|    time_elapsed     | 238      |
|    total_timesteps  | 158401   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.15e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 198      |
|    ep_rew_mean      | -198     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 896      |
|    fps              | 655      |
|    time_elapsed     | 259      |
|    total_timesteps  | 170191   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.37e+03 |
|    n_updates        | 84592    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 197      |
|    ep_rew_mean      | -197     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 900      |
|    fps              | 655      |
|    time_elapsed     | 260      |
|    total_timesteps  | 170910   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.83e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 177      |
|    ep_rew_mean      | -177     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 960      |
|    fps              | 649      |
|    time_elapsed     | 278      |
|    total_timesteps  | 180907   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 2.72e+03 |
|    n_updates        | 89952    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 176      |
|    ep_rew_mean      | -176     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 964      |
|    fps              | 648      |
|    time_elapsed     | 279      |
|    total_timesteps  | 181559   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 2.83e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 160      |
|    ep_rew_mean      | -160     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1024     |
|    fps              | 644      |
|    time_elapsed     | 296      |
|    total_timesteps  | 191221   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 6.68e+03 |
|    n_updates        | 95112    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 160      |
|    ep_rew_mean      | -160     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1028     |
|    fps              | 643      |
|    time_elapsed     | 298      |
|    total_timesteps  | 191859   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 5.64e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 161      |
|    ep_rew_mean      | -161     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1088     |
|    fps              | 639      |
|    time_elapsed     | 315      |
|    total_timesteps  | 201589   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 5.59e+03 |
|    n_updates        | 100296   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 162      |
|    ep_rew_mean      | -162     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1092     |
|    fps              | 638      |
|    time_elapsed     | 316      |
|    total_timesteps  | 202336   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 4.9e+03  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 181      |
|    ep_rew_mean      | -181     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1152     |
|    fps              | 633      |
|    time_elapsed     | 337      |
|    total_timesteps  | 213870   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 6.3e+03  |
|    n_updates        | 106432   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 183      |
|    ep_rew_mean      | -183     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1156     |
|    fps              | 633      |
|    time_elapsed     | 338      |
|    total_timesteps  | 214667   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 4.27e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1216     |
|    fps              | 628      |
|    time_elapsed     | 360      |
|    total_timesteps  | 226666   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 6.44e+03 |
|    n_updates        | 112832   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1220     |
|    fps              | 628      |
|    time_elapsed     | 361      |
|    total_timesteps  | 227466   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 4.64e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 199      |
|    ep_rew_mean      | -199     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1280     |
|    fps              | 624      |
|    time_elapsed     | 383      |
|    total_timesteps  | 239318   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 7.2e+03  |
|    n_updates        | 119160   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 199      |
|    ep_rew_mean      | -199     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1284     |
|    fps              | 624      |
|    time_elapsed     | 384      |
|    total_timesteps  | 240118   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.09e+04 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 199      |
|    ep_rew_mean      | -199     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1344     |
|    fps              | 622      |
|    time_elapsed     | 405      |
|    total_timesteps  | 252118   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 5.21e+03 |
|    n_updates        | 125560   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 199      |
|    ep_rew_mean      | -199     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1348     |
|    fps              | 621      |
|    time_elapsed     | 406      |
|    total_timesteps  | 252918   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 5.61e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1408     |
|    fps              | 620      |
|    time_elapsed     | 426      |
|    total_timesteps  | 264918   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 5.32e+03 |
|    n_updates        | 131960   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1412     |
|    fps              | 620      |
|    time_elapsed     | 428      |
|    total_timesteps  | 265718   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 5.18e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1472     |
|    fps              | 618      |
|    time_elapsed     | 448      |
|    total_timesteps  | 277718   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 3.56e+03 |
|    n_updates        | 138360   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 200      |
|    ep_rew_mean      | -200     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1476     |
|    fps              | 618      |
|    time_elapsed     | 450      |
|    total_timesteps  | 278518   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 3.58e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 197      |
|    ep_rew_mean      | -197     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1536     |
|    fps              | 617      |
|    time_elapsed     | 469      |
|    total_timesteps  | 290246   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 2.24e+03 |
|    n_updates        | 144624   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 196      |
|    ep_rew_mean      | -196     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1540     |
|    fps              | 617      |
|    time_elapsed     | 471      |
|    total_timesteps  | 290961   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 2.53e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 179      |
|    ep_rew_mean      | -179     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1600     |
|    fps              | 615      |
|    time_elapsed     | 489      |
|    total_timesteps  | 301102   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 2.45e+03 |
|    n_updates        | 150048   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 176      |
|    ep_rew_mean      | -176     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1604     |
|    fps              | 615      |
|    time_elapsed     | 490      |
|    total_timesteps  | 301663   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 4.03e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 156      |
|    ep_rew_mean      | -156     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1664     |
|    fps              | 613      |
|    time_elapsed     | 507      |
|    total_timesteps  | 311011   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 8.71e+03 |
|    n_updates        | 155008   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 155      |
|    ep_rew_mean      | -155     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1668     |
|    fps              | 612      |
|    time_elapsed     | 508      |
|    total_timesteps  | 311614   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 8.81e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 165      |
|    ep_rew_mean      | -165     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1728     |
|    fps              | 612      |
|    time_elapsed     | 525      |
|    total_timesteps  | 321815   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.07e+04 |
|    n_updates        | 160408   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 165      |
|    ep_rew_mean      | -165     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1732     |
|    fps              | 612      |
|    time_elapsed     | 526      |
|    total_timesteps  | 322418   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 8.78e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 163      |
|    ep_rew_mean      | -163     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1792     |
|    fps              | 613      |
|    time_elapsed     | 540      |
|    total_timesteps  | 331933   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 5.71e+03 |
|    n_updates        | 165464   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 162      |
|    ep_rew_mean      | -162     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1796     |
|    fps              | 613      |
|    time_elapsed     | 541      |
|    total_timesteps  | 332519   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 6.71e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 151      |
|    ep_rew_mean      | -151     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1856     |
|    fps              | 614      |
|    time_elapsed     | 555      |
|    total_timesteps  | 341445   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 7.87e+03 |
|    n_updates        | 170224   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 151      |
|    ep_rew_mean      | -151     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1860     |
|    fps              | 614      |
|    time_elapsed     | 556      |
|    total_timesteps  | 342021   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 6.64e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 156      |
|    ep_rew_mean      | -156     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1920     |
|    fps              | 616      |
|    time_elapsed     | 570      |
|    total_timesteps  | 351729   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 2.28e+03 |
|    n_updates        | 175368   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 157      |
|    ep_rew_mean      | -157     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1924     |
|    fps              | 616      |
|    time_elapsed     | 571      |
|    total_timesteps  | 352380   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 3.72e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 177      |
|    ep_rew_mean      | -177     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1984     |
|    fps              | 616      |
|    time_elapsed     | 588      |
|    total_timesteps  | 363227   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 3.03e+03 |
|    n_updates        | 181112   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 176      |
|    ep_rew_mean      | -176     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 1988     |
|    fps              | 617      |
|    time_elapsed     | 589      |
|    total_timesteps  | 363880   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 1.79e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 165      |
|    ep_rew_mean      | -165     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 2048     |
|    fps              | 617      |
|    time_elapsed     | 604      |
|    total_timesteps  | 373191   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 4.41e+03 |
|    n_updates        | 186096   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 163      |
|    ep_rew_mean      | -163     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 2052     |
|    fps              | 617      |
|    time_elapsed     | 605      |
|    total_timesteps  | 373759   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 6.02e+03 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 151      |
|    ep_rew_mean      | -151     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 2112     |
|    fps              | 617      |
|    time_elapsed     | 619      |
|    total_timesteps  | 382959   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 2.61e+04 |
|    n_updates        | 190976   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 152      |
|    ep_rew_mean      | -152     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 2116     |
|    fps              | 617      |
|    time_elapsed     | 620      |
|    total_timesteps  | 383673   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 2.08e+04 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 154      |
|    ep_rew_mean      | -154     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 2176     |
|    fps              | 618      |
|    time_elapsed     | 634      |
|    total_timesteps  | 392688   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 3.3e+04  |
|    n_updates        | 195840   |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 155      |
|    ep_rew_mean      | -155     |
|    exploration_rate | 0.07     |
| time/               |          |
|    episodes         | 2180     |
|    fps              | 618      |
|    time_elapsed     | 635      |
|    total_timesteps  | 393315   |
| train/              |          |
|    learning_rate    | 0.004    |
|    loss             | 3.1e+04  |
|    n_updates      

## Avalia o agente treinado

In [None]:
df_treinos

Unnamed: 0,1,2,3,4,5,6,7,8,9,10
20000.0,-200.0,-200.0,-200.0,-200.0,-200.0,-200.0,-200.0,-200.0,-200.0,-200.0
50000.0,-184.0,-148.0,-154.0,-88.0,-148.0,-90.0,-165.0,-167.0,-164.0,-167.0
100000.0,-200.0,-200.0,-200.0,-200.0,-200.0,-200.0,-200.0,-200.0,-200.0,-200.0
200000.0,-115.0,-106.0,-182.0,-121.0,-114.0,-175.0,-200.0,-113.0,-106.0,-172.0
400000.0,-146.0,-180.0,-171.0,-156.0,-145.0,-149.0,-148.0,-147.0,-90.0,-145.0


In [None]:
df_treinos.T.astype(float).describe()

Unnamed: 0,20000.0,50000.0,100000.0,200000.0,400000.0
count,10.0,10.0,10.0,10.0,10.0
mean,-200.0,-147.5,-200.0,-140.4,-147.7
std,0.0,32.592603,0.0,36.990089,23.579888
min,-200.0,-184.0,-200.0,-200.0,-180.0
25%,-200.0,-166.5,-200.0,-174.25,-154.25
50%,-200.0,-159.0,-200.0,-118.0,-147.5
75%,-200.0,-148.0,-200.0,-113.25,-145.25
max,-200.0,-88.0,-200.0,-106.0,-90.0


### Prepare video recording

In [None]:
# Set up fake display; otherwise rendering will fail
import os
os.system("Xvfb :1 -screen 0 1024x768x24 &")
os.environ['DISPLAY'] = ':1'

In [None]:
import base64
from pathlib import Path

from IPython import display as ipythondisplay


def show_videos(video_path="", prefix=""):
    """
    Taken from https://github.com/eleurent/highway-env

    :param video_path: (str) Path to the folder containing videos
    :param prefix: (str) Filter the video, showing only the only starting with this prefix
    """
    html = []
    for mp4 in Path(video_path).glob("{}*.mp4".format(prefix)):
        video_b64 = base64.b64encode(mp4.read_bytes())
        html.append(
            """<video alt="{}" autoplay
                    loop controls style="height: 400px;">
                    <source src="data:video/mp4;base64,{}" type="video/mp4" />
                </video>""".format(
                mp4, video_b64.decode("ascii")
            )
        )
    ipythondisplay.display(ipythondisplay.HTML(data="<br>".join(html)))

We will record a video using the [VecVideoRecorder](https://stable-baselines.readthedocs.io/en/master/guide/vec_envs.html#vecvideorecorder) wrapper, you will learn about those wrapper in the next notebook.

In [None]:
from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv


def record_video(env_id, model, video_length=500, prefix="", video_folder="videos/"):
    """
    :param env_id: (str)
    :param model: (RL model)
    :param video_length: (int)
    :param prefix: (str)
    :param video_folder: (str)
    """
    eval_env = DummyVecEnv([lambda: gym.make("MountainCar-v0", render_mode="rgb_array")])
    # Start the video at step=0 and record 500 steps
    eval_env = VecVideoRecorder(
        eval_env,
        video_folder=video_folder,
        record_video_trigger=lambda step: step == 0,
        video_length=video_length,
        name_prefix=prefix,
    )

    obs = eval_env.reset()
    for _ in range(video_length):
        action, _ = model.predict(obs)
        obs, _, _, _ = eval_env.step(action)

    # Close the video recorder
    eval_env.close()

### Visualize trained agent



In [None]:
#record_video("MountainCar-v0_50k", model, video_length=500, prefix="ppo-cartpole")

Saving video to /content/videos/ppo-cartpole-step-0-to-step-500.mp4
Moviepy - Building video /content/videos/ppo-cartpole-step-0-to-step-500.mp4.
Moviepy - Writing video /content/videos/ppo-cartpole-step-0-to-step-500.mp4





Moviepy - Done !
Moviepy - video ready /content/videos/ppo-cartpole-step-0-to-step-500.mp4


### Inicializar modelo salvo e grava video

In [None]:
for qtd in quantidades:
    model = DQN.load(f"{qtd}k_mountain_car_DQN", env)
    record_video(f"{qtd}k_mountain_car_DQN", model, video_length=500, prefix=f"ppo-{qtd}k_mountain_car_DQN")

In [None]:
show_videos("videos", prefix="ppo")