In [1]:
import gym
import stable_baselines3.common.env_checker
import gym_jumping_task
import numpy as np
from stable_baselines3 import DQN, PPO, A2C, SAC, DDPG
from gym import spaces

pygame 2.1.2 (SDL 2.0.18, Python 3.9.7)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
class CustomEnv(gym.Env):
    """Custom Environment that follows gym interface."""
    metadata = {"render.modes": ["human"]}

    def __init__(self):
        super().__init__()
        num_actions = 2
        self.action_space = spaces.Discrete(num_actions)
        # Example for using image as input (channel-first; channel-last also works):
        self.observation_space = spaces.Box(low=0, high=1,
                                            shape=(1, 60, 60), dtype=np.uint8)
        self.actualEnv = gym.make('jumping-task-v0')

    def step(self, action):
        observation, r, done, info = self.actualEnv.step(action)
        return observation.astype('uint8').reshape(1, 60, 60), float(r), done, info

    def reset(self):
        observation = self.actualEnv.reset()
        return observation.astype('uint8').reshape(1, 60, 60)  # reward, done, info can't be included

    def render(self, mode="human"):
        pass

    def close(self):
        self.actualEnv.close()


### PPO

In [3]:
env = CustomEnv()

model = PPO("MlpPolicy", env, verbose=1, tensorboard_log="./tensorboard/")
model.learn(total_timesteps=500000)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./tensorboard/PPO_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.9     |
|    ep_rew_mean     | 26.9     |
| time/              |          |
|    fps             | 356      |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 29.4         |
|    ep_rew_mean          | 27.4         |
| time/                   |              |
|    fps                  | 325          |
|    iterations           | 2            |
|    time_elapsed         | 12           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0076264464 |
|    clip_fraction        | 0.0457       |
|    clip_range   

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 30.4         |
|    ep_rew_mean          | 33.5         |
| time/                   |              |
|    fps                  | 301          |
|    iterations           | 11           |
|    time_elapsed         | 74           |
|    total_timesteps      | 22528        |
| train/                  |              |
|    approx_kl            | 0.0049303807 |
|    clip_fraction        | 0.0566       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.585       |
|    explained_variance   | 0.288        |
|    learning_rate        | 0.0003       |
|    loss                 | 37.4         |
|    n_updates            | 100          |
|    policy_gradient_loss | -0.00898     |
|    value_loss           | 72           |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 34.2         |
|    ep_rew_mean          | 47.5         |
| time/                   |              |
|    fps                  | 299          |
|    iterations           | 21           |
|    time_elapsed         | 143          |
|    total_timesteps      | 43008        |
| train/                  |              |
|    approx_kl            | 0.0076810196 |
|    clip_fraction        | 0.0416       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.505       |
|    explained_variance   | 0.566        |
|    learning_rate        | 0.0003       |
|    loss                 | 80.5         |
|    n_updates            | 200          |
|    policy_gradient_loss | -0.00492     |
|    value_loss           | 137          |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 36.5        |
|    ep_rew_mean          | 60          |
| time/                   |             |
|    fps                  | 299         |
|    iterations           | 31          |
|    time_elapsed         | 212         |
|    total_timesteps      | 63488       |
| train/                  |             |
|    approx_kl            | 0.007282842 |
|    clip_fraction        | 0.0522      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.371      |
|    explained_variance   | 0.496       |
|    learning_rate        | 0.0003      |
|    loss                 | 162         |
|    n_updates            | 300         |
|    policy_gradient_loss | -0.00468    |
|    value_loss           | 347         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 37.8  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 40.1        |
|    ep_rew_mean          | 71.7        |
| time/                   |             |
|    fps                  | 295         |
|    iterations           | 41          |
|    time_elapsed         | 283         |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.005183803 |
|    clip_fraction        | 0.0592      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.354      |
|    explained_variance   | 0.52        |
|    learning_rate        | 0.0003      |
|    loss                 | 229         |
|    n_updates            | 400         |
|    policy_gradient_loss | -0.00282    |
|    value_loss           | 478         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 37.8  

<stable_baselines3.ppo.ppo.PPO at 0x1d7a4460b80>

### DQN

In [4]:
env = CustomEnv()

model = DQN("MlpPolicy", env, verbose=1, tensorboard_log="./tensorboard/")
model.learn(total_timesteps=500000)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./tensorboard/DQN_1
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.8     |
|    ep_rew_mean      | 25.8     |
|    exploration_rate | 0.989    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 4624     |
|    time_elapsed     | 0        |
|    total_timesteps  | 111      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.4     |
|    ep_rew_mean      | 27.4     |
|    exploration_rate | 0.978    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 3092     |
|    time_elapsed     | 0        |
|    total_timesteps  | 235      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.8     |

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.1     |
|    ep_rew_mean      | 27.1     |
|    exploration_rate | 0.757    |
| time/               |          |
|    episodes         | 88       |
|    fps              | 5017     |
|    time_elapsed     | 0        |
|    total_timesteps  | 2559     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.1     |
|    ep_rew_mean      | 27.1     |
|    exploration_rate | 0.745    |
| time/               |          |
|    episodes         | 92       |
|    fps              | 5037     |
|    time_elapsed     | 0        |
|    total_timesteps  | 2680     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.1     |
|    ep_rew_mean      | 27.1     |
|    exploration_rate | 0.734    |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.2     |
|    ep_rew_mean      | 28.2     |
|    exploration_rate | 0.515    |
| time/               |          |
|    episodes         | 176      |
|    fps              | 5393     |
|    time_elapsed     | 0        |
|    total_timesteps  | 5102     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.1     |
|    ep_rew_mean      | 28.2     |
|    exploration_rate | 0.506    |
| time/               |          |
|    episodes         | 180      |
|    fps              | 5404     |
|    time_elapsed     | 0        |
|    total_timesteps  | 5205     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.8     |
|    ep_rew_mean      | 27.8     |
|    exploration_rate | 0.496    |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.5     |
|    ep_rew_mean      | 27.5     |
|    exploration_rate | 0.28     |
| time/               |          |
|    episodes         | 264      |
|    fps              | 5583     |
|    time_elapsed     | 1        |
|    total_timesteps  | 7583     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.4     |
|    ep_rew_mean      | 27.4     |
|    exploration_rate | 0.269    |
| time/               |          |
|    episodes         | 268      |
|    fps              | 5573     |
|    time_elapsed     | 1        |
|    total_timesteps  | 7692     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.1     |
|    ep_rew_mean      | 27.2     |
|    exploration_rate | 0.259    |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.1     |
|    ep_rew_mean      | 28.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 352      |
|    fps              | 5595     |
|    time_elapsed     | 1        |
|    total_timesteps  | 10128    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.9     |
|    ep_rew_mean      | 26.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 356      |
|    fps              | 5601     |
|    time_elapsed     | 1        |
|    total_timesteps  | 10262    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.9     |
|    ep_rew_mean      | 26.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.6     |
|    ep_rew_mean      | 26.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 440      |
|    fps              | 5346     |
|    time_elapsed     | 2        |
|    total_timesteps  | 12600    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.4     |
|    ep_rew_mean      | 26.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 444      |
|    fps              | 5340     |
|    time_elapsed     | 2        |
|    total_timesteps  | 12714    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.3     |
|    ep_rew_mean      | 26.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.8     |
|    ep_rew_mean      | 27.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 528      |
|    fps              | 5443     |
|    time_elapsed     | 2        |
|    total_timesteps  | 15153    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.7     |
|    ep_rew_mean      | 27.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 532      |
|    fps              | 5447     |
|    time_elapsed     | 2        |
|    total_timesteps  | 15262    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.8     |
|    ep_rew_mean      | 27.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.2     |
|    ep_rew_mean      | 26.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 616      |
|    fps              | 5553     |
|    time_elapsed     | 3        |
|    total_timesteps  | 17625    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.4     |
|    ep_rew_mean      | 27.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 620      |
|    fps              | 5559     |
|    time_elapsed     | 3        |
|    total_timesteps  | 17759    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.4     |
|    ep_rew_mean      | 27.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.7     |
|    ep_rew_mean      | 27.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 704      |
|    fps              | 5639     |
|    time_elapsed     | 3        |
|    total_timesteps  | 20198    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.7     |
|    ep_rew_mean      | 27.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 708      |
|    fps              | 5642     |
|    time_elapsed     | 3        |
|    total_timesteps  | 20299    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29       |
|    ep_rew_mean      | 28       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.1     |
|    ep_rew_mean      | 26.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 792      |
|    fps              | 5700     |
|    time_elapsed     | 3        |
|    total_timesteps  | 22664    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.1     |
|    ep_rew_mean      | 26.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 796      |
|    fps              | 5704     |
|    time_elapsed     | 3        |
|    total_timesteps  | 22787    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.1     |
|    ep_rew_mean      | 26.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29       |
|    ep_rew_mean      | 27       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 880      |
|    fps              | 5736     |
|    time_elapsed     | 4        |
|    total_timesteps  | 25194    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.1     |
|    ep_rew_mean      | 27.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 884      |
|    fps              | 5740     |
|    time_elapsed     | 4        |
|    total_timesteps  | 25325    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.1     |
|    ep_rew_mean      | 27.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.9     |
|    ep_rew_mean      | 26.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 968      |
|    fps              | 5775     |
|    time_elapsed     | 4        |
|    total_timesteps  | 27718    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.8     |
|    ep_rew_mean      | 26.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 972      |
|    fps              | 5776     |
|    time_elapsed     | 4        |
|    total_timesteps  | 27824    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.8     |
|    ep_rew_mean      | 26.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.8     |
|    ep_rew_mean      | 26.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1056     |
|    fps              | 5814     |
|    time_elapsed     | 5        |
|    total_timesteps  | 30269    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.9     |
|    ep_rew_mean      | 26.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1060     |
|    fps              | 5817     |
|    time_elapsed     | 5        |
|    total_timesteps  | 30380    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29       |
|    ep_rew_mean      | 27       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.6     |
|    ep_rew_mean      | 26.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1144     |
|    fps              | 5848     |
|    time_elapsed     | 5        |
|    total_timesteps  | 32803    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.6     |
|    ep_rew_mean      | 26.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1148     |
|    fps              | 5845     |
|    time_elapsed     | 5        |
|    total_timesteps  | 32909    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.8     |
|    ep_rew_mean      | 26.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.1     |
|    ep_rew_mean      | 26.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1232     |
|    fps              | 5806     |
|    time_elapsed     | 6        |
|    total_timesteps  | 35292    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.9     |
|    ep_rew_mean      | 25.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1236     |
|    fps              | 5806     |
|    time_elapsed     | 6        |
|    total_timesteps  | 35385    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.8     |
|    ep_rew_mean      | 25.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.2     |
|    ep_rew_mean      | 26.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1320     |
|    fps              | 5825     |
|    time_elapsed     | 6        |
|    total_timesteps  | 37736    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28       |
|    ep_rew_mean      | 26       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1324     |
|    fps              | 5824     |
|    time_elapsed     | 6        |
|    total_timesteps  | 37860    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.1     |
|    ep_rew_mean      | 26.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.6     |
|    ep_rew_mean      | 26.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1408     |
|    fps              | 5845     |
|    time_elapsed     | 6        |
|    total_timesteps  | 40230    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.5     |
|    ep_rew_mean      | 26.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1412     |
|    fps              | 5847     |
|    time_elapsed     | 6        |
|    total_timesteps  | 40354    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.5     |
|    ep_rew_mean      | 26.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.8     |
|    ep_rew_mean      | 25.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1496     |
|    fps              | 5862     |
|    time_elapsed     | 7        |
|    total_timesteps  | 42679    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.9     |
|    ep_rew_mean      | 25.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1500     |
|    fps              | 5863     |
|    time_elapsed     | 7        |
|    total_timesteps  | 42784    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.9     |
|    ep_rew_mean      | 25.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.2     |
|    ep_rew_mean      | 27.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1584     |
|    fps              | 5885     |
|    time_elapsed     | 7        |
|    total_timesteps  | 45179    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.1     |
|    ep_rew_mean      | 27.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1588     |
|    fps              | 5886     |
|    time_elapsed     | 7        |
|    total_timesteps  | 45287    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.2     |
|    ep_rew_mean      | 27.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.8     |
|    ep_rew_mean      | 26.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1672     |
|    fps              | 5905     |
|    time_elapsed     | 8        |
|    total_timesteps  | 47717    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.9     |
|    ep_rew_mean      | 26.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1676     |
|    fps              | 5906     |
|    time_elapsed     | 8        |
|    total_timesteps  | 47829    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.8     |
|    ep_rew_mean      | 26.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.7     |
|    ep_rew_mean      | 26.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1756     |
|    fps              | 5680     |
|    time_elapsed     | 8        |
|    total_timesteps  | 50128    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.257    |
|    n_updates        | 31       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.9     |
|    ep_rew_mean      | 27.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1760     |
|    fps              | 5495     |
|    time_elapsed     | 9        |
|    total_timesteps  | 50261    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.104    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.6     |
|    ep_rew_mean      | 27.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1820     |
|    fps              | 3945     |
|    time_elapsed     | 13       |
|    total_timesteps  | 51908    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0925   |
|    n_updates        | 476      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.9     |
|    ep_rew_mean      | 28       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1824     |
|    fps              | 3864     |
|    time_elapsed     | 13       |
|    total_timesteps  | 52039    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0474   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.4     |
|    ep_rew_mean      | 26.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1884     |
|    fps              | 3093     |
|    time_elapsed     | 17       |
|    total_timesteps  | 53644    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.07     |
|    n_updates        | 910      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.6     |
|    ep_rew_mean      | 26.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1888     |
|    fps              | 3042     |
|    time_elapsed     | 17       |
|    total_timesteps  | 53793    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00425  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 26.9     |
|    ep_rew_mean      | 24.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1948     |
|    fps              | 2605     |
|    time_elapsed     | 21       |
|    total_timesteps  | 55375    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0456   |
|    n_updates        | 1343     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.4     |
|    ep_rew_mean      | 26.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1952     |
|    fps              | 2575     |
|    time_elapsed     | 21       |
|    total_timesteps  | 55519    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00757  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.3     |
|    ep_rew_mean      | 26.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2012     |
|    fps              | 2238     |
|    time_elapsed     | 25       |
|    total_timesteps  | 57161    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00207  |
|    n_updates        | 1790     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.6     |
|    ep_rew_mean      | 26.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2016     |
|    fps              | 2218     |
|    time_elapsed     | 25       |
|    total_timesteps  | 57277    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00526  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.6     |
|    ep_rew_mean      | 26.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2076     |
|    fps              | 1969     |
|    time_elapsed     | 29       |
|    total_timesteps  | 59009    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0329   |
|    n_updates        | 2252     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.7     |
|    ep_rew_mean      | 26.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2080     |
|    fps              | 1956     |
|    time_elapsed     | 30       |
|    total_timesteps  | 59113    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0578   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.2     |
|    ep_rew_mean      | 27.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2140     |
|    fps              | 1766     |
|    time_elapsed     | 34       |
|    total_timesteps  | 60807    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0708   |
|    n_updates        | 2701     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.8     |
|    ep_rew_mean      | 26.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2144     |
|    fps              | 1757     |
|    time_elapsed     | 34       |
|    total_timesteps  | 60902    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0462   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 26.8     |
|    ep_rew_mean      | 24.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2204     |
|    fps              | 1625     |
|    time_elapsed     | 38       |
|    total_timesteps  | 62519    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0752   |
|    n_updates        | 3129     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.1     |
|    ep_rew_mean      | 25.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2208     |
|    fps              | 1615     |
|    time_elapsed     | 38       |
|    total_timesteps  | 62645    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0254   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.2     |
|    ep_rew_mean      | 28.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2268     |
|    fps              | 1504     |
|    time_elapsed     | 42       |
|    total_timesteps  | 64338    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0776   |
|    n_updates        | 3584     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.5     |
|    ep_rew_mean      | 28.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2272     |
|    fps              | 1497     |
|    time_elapsed     | 43       |
|    total_timesteps  | 64446    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.061    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.6     |
|    ep_rew_mean      | 27.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2332     |
|    fps              | 1401     |
|    time_elapsed     | 47       |
|    total_timesteps  | 66209    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00248  |
|    n_updates        | 4052     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.6     |
|    ep_rew_mean      | 27.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2336     |
|    fps              | 1397     |
|    time_elapsed     | 47       |
|    total_timesteps  | 66293    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0114   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.9     |
|    ep_rew_mean      | 25.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2396     |
|    fps              | 1316     |
|    time_elapsed     | 51       |
|    total_timesteps  | 67938    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0761   |
|    n_updates        | 4484     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.1     |
|    ep_rew_mean      | 26.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2400     |
|    fps              | 1311     |
|    time_elapsed     | 51       |
|    total_timesteps  | 68052    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.157    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.2     |
|    ep_rew_mean      | 27.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2460     |
|    fps              | 1244     |
|    time_elapsed     | 56       |
|    total_timesteps  | 69766    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0514   |
|    n_updates        | 4941     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.3     |
|    ep_rew_mean      | 27.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2464     |
|    fps              | 1241     |
|    time_elapsed     | 56       |
|    total_timesteps  | 69867    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.037    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29       |
|    ep_rew_mean      | 27       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2524     |
|    fps              | 1185     |
|    time_elapsed     | 60       |
|    total_timesteps  | 71649    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0376   |
|    n_updates        | 5412     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.1     |
|    ep_rew_mean      | 27.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2528     |
|    fps              | 1182     |
|    time_elapsed     | 60       |
|    total_timesteps  | 71773    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.152    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.5     |
|    ep_rew_mean      | 28.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2588     |
|    fps              | 1134     |
|    time_elapsed     | 64       |
|    total_timesteps  | 73603    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00519  |
|    n_updates        | 5900     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.7     |
|    ep_rew_mean      | 28.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2592     |
|    fps              | 1131     |
|    time_elapsed     | 65       |
|    total_timesteps  | 73745    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0924   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.1     |
|    ep_rew_mean      | 28.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2652     |
|    fps              | 1086     |
|    time_elapsed     | 69       |
|    total_timesteps  | 75483    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00101  |
|    n_updates        | 6370     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.8     |
|    ep_rew_mean      | 27.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2656     |
|    fps              | 1084     |
|    time_elapsed     | 69       |
|    total_timesteps  | 75587    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0371   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.1     |
|    ep_rew_mean      | 26.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2716     |
|    fps              | 1048     |
|    time_elapsed     | 73       |
|    total_timesteps  | 77257    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00951  |
|    n_updates        | 6814     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28       |
|    ep_rew_mean      | 26       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2720     |
|    fps              | 1046     |
|    time_elapsed     | 73       |
|    total_timesteps  | 77365    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.119    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.8     |
|    ep_rew_mean      | 25.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2780     |
|    fps              | 1016     |
|    time_elapsed     | 77       |
|    total_timesteps  | 79035    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00262  |
|    n_updates        | 7258     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.2     |
|    ep_rew_mean      | 26.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2784     |
|    fps              | 1014     |
|    time_elapsed     | 78       |
|    total_timesteps  | 79180    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0962   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.5     |
|    ep_rew_mean      | 26.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2844     |
|    fps              | 985      |
|    time_elapsed     | 82       |
|    total_timesteps  | 80858    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000681 |
|    n_updates        | 7714     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.8     |
|    ep_rew_mean      | 26.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2848     |
|    fps              | 983      |
|    time_elapsed     | 82       |
|    total_timesteps  | 81002    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.118    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.1     |
|    ep_rew_mean      | 27.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2908     |
|    fps              | 955      |
|    time_elapsed     | 86       |
|    total_timesteps  | 82795    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0811   |
|    n_updates        | 8198     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.4     |
|    ep_rew_mean      | 27.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2912     |
|    fps              | 953      |
|    time_elapsed     | 86       |
|    total_timesteps  | 82929    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0369   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.8     |
|    ep_rew_mean      | 27.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2972     |
|    fps              | 929      |
|    time_elapsed     | 91       |
|    total_timesteps  | 84607    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00961  |
|    n_updates        | 8651     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.8     |
|    ep_rew_mean      | 27.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2976     |
|    fps              | 927      |
|    time_elapsed     | 91       |
|    total_timesteps  | 84708    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.14     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.3     |
|    ep_rew_mean      | 29.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3036     |
|    fps              | 904      |
|    time_elapsed     | 95       |
|    total_timesteps  | 86525    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00389  |
|    n_updates        | 9131     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.6     |
|    ep_rew_mean      | 29.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3040     |
|    fps              | 903      |
|    time_elapsed     | 95       |
|    total_timesteps  | 86650    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.119    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.5     |
|    ep_rew_mean      | 29.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3100     |
|    fps              | 882      |
|    time_elapsed     | 100      |
|    total_timesteps  | 88405    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00487  |
|    n_updates        | 9601     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.1     |
|    ep_rew_mean      | 28.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3104     |
|    fps              | 881      |
|    time_elapsed     | 100      |
|    total_timesteps  | 88525    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0109   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.8     |
|    ep_rew_mean      | 27.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3164     |
|    fps              | 864      |
|    time_elapsed     | 104      |
|    total_timesteps  | 90204    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.229    |
|    n_updates        | 10050    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.7     |
|    ep_rew_mean      | 27.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3168     |
|    fps              | 863      |
|    time_elapsed     | 104      |
|    total_timesteps  | 90319    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.2      |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.6     |
|    ep_rew_mean      | 28.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3228     |
|    fps              | 846      |
|    time_elapsed     | 108      |
|    total_timesteps  | 92058    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00337  |
|    n_updates        | 10514    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.8     |
|    ep_rew_mean      | 28.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3232     |
|    fps              | 844      |
|    time_elapsed     | 109      |
|    total_timesteps  | 92190    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.288    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.2     |
|    ep_rew_mean      | 28.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3292     |
|    fps              | 830      |
|    time_elapsed     | 113      |
|    total_timesteps  | 93857    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.11     |
|    n_updates        | 10964    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.1     |
|    ep_rew_mean      | 27.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3296     |
|    fps              | 829      |
|    time_elapsed     | 113      |
|    total_timesteps  | 93988    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.254    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.2     |
|    ep_rew_mean      | 27.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3356     |
|    fps              | 814      |
|    time_elapsed     | 117      |
|    total_timesteps  | 95767    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.123    |
|    n_updates        | 11441    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.3     |
|    ep_rew_mean      | 27.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3360     |
|    fps              | 813      |
|    time_elapsed     | 117      |
|    total_timesteps  | 95898    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00273  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.1     |
|    ep_rew_mean      | 28.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3420     |
|    fps              | 798      |
|    time_elapsed     | 122      |
|    total_timesteps  | 97715    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.138    |
|    n_updates        | 11928    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.1     |
|    ep_rew_mean      | 28.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3424     |
|    fps              | 797      |
|    time_elapsed     | 122      |
|    total_timesteps  | 97829    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0635   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.4     |
|    ep_rew_mean      | 26.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3484     |
|    fps              | 786      |
|    time_elapsed     | 126      |
|    total_timesteps  | 99481    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.14     |
|    n_updates        | 12370    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.4     |
|    ep_rew_mean      | 26.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3488     |
|    fps              | 785      |
|    time_elapsed     | 126      |
|    total_timesteps  | 99602    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.227    |
|    n_updates      

<stable_baselines3.dqn.dqn.DQN at 0x1d7a42795e0>

### A2C

In [5]:
env = CustomEnv()

model = A2C("MlpPolicy", env, verbose=1, tensorboard_log="./tensorboard/")
model.learn(total_timesteps=500000)


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./tensorboard/A2C_1
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 28.2     |
|    ep_rew_mean        | 26.2     |
| time/                 |          |
|    fps                | 260      |
|    iterations         | 100      |
|    time_elapsed       | 1        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.48    |
|    explained_variance | 8.34e-07 |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 1.3      |
|    value_loss         | 8.1      |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.7      |
|    ep_rew_mean        | 26.7      |
| time/                 |           |
|    fps                | 252       |
|    iterations  

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27.9      |
|    ep_rew_mean        | 25.9      |
| time/                 |           |
|    fps                | 244       |
|    iterations         | 1400      |
|    time_elapsed       | 28        |
|    total_timesteps    | 7000      |
| train/                |           |
|    entropy_loss       | -0.00109  |
|    explained_variance | -0.000427 |
|    learning_rate      | 0.0007    |
|    n_updates          | 1399      |
|    policy_loss        | 0.000154  |
|    value_loss         | 2.5       |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27.9      |
|    ep_rew_mean        | 25.9      |
| time/                 |           |
|    fps                | 244       |
|    iterations         | 1500      |
|    time_elapsed       | 30        |
|    total_timesteps    | 7500      |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.2      |
|    ep_rew_mean        | 26.2      |
| time/                 |           |
|    fps                | 242       |
|    iterations         | 2700      |
|    time_elapsed       | 55        |
|    total_timesteps    | 13500     |
| train/                |           |
|    entropy_loss       | -0.000557 |
|    explained_variance | 0.711     |
|    learning_rate      | 0.0007    |
|    n_updates          | 2699      |
|    policy_loss        | -3.2e-05  |
|    value_loss         | 0.794     |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.2      |
|    ep_rew_mean        | 26.2      |
| time/                 |           |
|    fps                | 242       |
|    iterations         | 2800      |
|    time_elapsed       | 57        |
|    total_timesteps    | 14000     |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 28.2     |
|    ep_rew_mean        | 26.2     |
| time/                 |          |
|    fps                | 241      |
|    iterations         | 4000     |
|    time_elapsed       | 82       |
|    total_timesteps    | 20000    |
| train/                |          |
|    entropy_loss       | -0.00245 |
|    explained_variance | 0.984    |
|    learning_rate      | 0.0007   |
|    n_updates          | 3999     |
|    policy_loss        | 0.000248 |
|    value_loss         | 2.38     |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.5      |
|    ep_rew_mean        | 26.5      |
| time/                 |           |
|    fps                | 241       |
|    iterations         | 4100      |
|    time_elapsed       | 85        |
|    total_timesteps    | 20500     |
| train/                |    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 28.7     |
|    ep_rew_mean        | 26.7     |
| time/                 |          |
|    fps                | 241      |
|    iterations         | 5300     |
|    time_elapsed       | 109      |
|    total_timesteps    | 26500    |
| train/                |          |
|    entropy_loss       | -0.00168 |
|    explained_variance | 0.901    |
|    learning_rate      | 0.0007   |
|    n_updates          | 5299     |
|    policy_loss        | 4.63e-05 |
|    value_loss         | 0.184    |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 29        |
|    ep_rew_mean        | 27        |
| time/                 |           |
|    fps                | 241       |
|    iterations         | 5400      |
|    time_elapsed       | 111       |
|    total_timesteps    | 27000     |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28        |
|    ep_rew_mean        | 26        |
| time/                 |           |
|    fps                | 239       |
|    iterations         | 6600      |
|    time_elapsed       | 137       |
|    total_timesteps    | 33000     |
| train/                |           |
|    entropy_loss       | -0.00163  |
|    explained_variance | 0.993     |
|    learning_rate      | 0.0007    |
|    n_updates          | 6599      |
|    policy_loss        | -5.28e-05 |
|    value_loss         | 0.108     |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.3      |
|    ep_rew_mean        | 26.3      |
| time/                 |           |
|    fps                | 240       |
|    iterations         | 6700      |
|    time_elapsed       | 139       |
|    total_timesteps    | 33500     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.3      |
|    ep_rew_mean        | 26.3      |
| time/                 |           |
|    fps                | 243       |
|    iterations         | 7900      |
|    time_elapsed       | 162       |
|    total_timesteps    | 39500     |
| train/                |           |
|    entropy_loss       | -0.000713 |
|    explained_variance | 0.784     |
|    learning_rate      | 0.0007    |
|    n_updates          | 7899      |
|    policy_loss        | 6.6e-05   |
|    value_loss         | 1.28      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28        |
|    ep_rew_mean        | 26        |
| time/                 |           |
|    fps                | 243       |
|    iterations         | 8000      |
|    time_elapsed       | 164       |
|    total_timesteps    | 40000     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.3      |
|    ep_rew_mean        | 26.3      |
| time/                 |           |
|    fps                | 246       |
|    iterations         | 9200      |
|    time_elapsed       | 186       |
|    total_timesteps    | 46000     |
| train/                |           |
|    entropy_loss       | -0.000559 |
|    explained_variance | 0.874     |
|    learning_rate      | 0.0007    |
|    n_updates          | 9199      |
|    policy_loss        | 3.16e-05  |
|    value_loss         | 0.609     |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.5      |
|    ep_rew_mean        | 26.5      |
| time/                 |           |
|    fps                | 246       |
|    iterations         | 9300      |
|    time_elapsed       | 188       |
|    total_timesteps    | 46500     |
| train/    

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 27.6     |
|    ep_rew_mean        | 25.6     |
| time/                 |          |
|    fps                | 248      |
|    iterations         | 10500    |
|    time_elapsed       | 211      |
|    total_timesteps    | 52500    |
| train/                |          |
|    entropy_loss       | -0.00188 |
|    explained_variance | 0.98     |
|    learning_rate      | 0.0007   |
|    n_updates          | 10499    |
|    policy_loss        | 5.99e-05 |
|    value_loss         | 0.119    |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27.9      |
|    ep_rew_mean        | 25.9      |
| time/                 |           |
|    fps                | 248       |
|    iterations         | 10600     |
|    time_elapsed       | 213       |
|    total_timesteps    | 53000     |
| train/                |    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28        |
|    ep_rew_mean        | 26        |
| time/                 |           |
|    fps                | 249       |
|    iterations         | 11800     |
|    time_elapsed       | 236       |
|    total_timesteps    | 59000     |
| train/                |           |
|    entropy_loss       | -0.000893 |
|    explained_variance | 0.942     |
|    learning_rate      | 0.0007    |
|    n_updates          | 11799     |
|    policy_loss        | 4.26e-05  |
|    value_loss         | 0.324     |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27.6      |
|    ep_rew_mean        | 25.6      |
| time/                 |           |
|    fps                | 249       |
|    iterations         | 11900     |
|    time_elapsed       | 238       |
|    total_timesteps    | 59500     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28        |
|    ep_rew_mean        | 26        |
| time/                 |           |
|    fps                | 251       |
|    iterations         | 13100     |
|    time_elapsed       | 260       |
|    total_timesteps    | 65500     |
| train/                |           |
|    entropy_loss       | -0.000639 |
|    explained_variance | 0.677     |
|    learning_rate      | 0.0007    |
|    n_updates          | 13099     |
|    policy_loss        | -5.74e-05 |
|    value_loss         | 1.41      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27.8      |
|    ep_rew_mean        | 25.8      |
| time/                 |           |
|    fps                | 251       |
|    iterations         | 13200     |
|    time_elapsed       | 262       |
|    total_timesteps    | 66000     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.8      |
|    ep_rew_mean        | 26.8      |
| time/                 |           |
|    fps                | 252       |
|    iterations         | 14400     |
|    time_elapsed       | 285       |
|    total_timesteps    | 72000     |
| train/                |           |
|    entropy_loss       | -0.000432 |
|    explained_variance | -2.64     |
|    learning_rate      | 0.0007    |
|    n_updates          | 14399     |
|    policy_loss        | 0.000203  |
|    value_loss         | 32.8      |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 29.3      |
|    ep_rew_mean        | 27.3      |
| time/                 |           |
|    fps                | 252       |
|    iterations         | 14500     |
|    time_elapsed       | 286       |
|    total_timesteps    | 72500     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.5      |
|    ep_rew_mean        | 26.5      |
| time/                 |           |
|    fps                | 253       |
|    iterations         | 15700     |
|    time_elapsed       | 309       |
|    total_timesteps    | 78500     |
| train/                |           |
|    entropy_loss       | -0.000323 |
|    explained_variance | 0.873     |
|    learning_rate      | 0.0007    |
|    n_updates          | 15699     |
|    policy_loss        | 4.78e-06  |
|    value_loss         | 0.225     |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.5      |
|    ep_rew_mean        | 26.5      |
| time/                 |           |
|    fps                | 253       |
|    iterations         | 15800     |
|    time_elapsed       | 311       |
|    total_timesteps    | 79000     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 27.9      |
|    ep_rew_mean        | 25.9      |
| time/                 |           |
|    fps                | 254       |
|    iterations         | 17000     |
|    time_elapsed       | 334       |
|    total_timesteps    | 85000     |
| train/                |           |
|    entropy_loss       | -0.000161 |
|    explained_variance | 0.989     |
|    learning_rate      | 0.0007    |
|    n_updates          | 16999     |
|    policy_loss        | -3.7e-07  |
|    value_loss         | 0.00539   |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.1      |
|    ep_rew_mean        | 26.1      |
| time/                 |           |
|    fps                | 254       |
|    iterations         | 17100     |
|    time_elapsed       | 336       |
|    total_timesteps    | 85500     |
| train/    

-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.6      |
|    ep_rew_mean        | 26.6      |
| time/                 |           |
|    fps                | 252       |
|    iterations         | 18300     |
|    time_elapsed       | 361       |
|    total_timesteps    | 91500     |
| train/                |           |
|    entropy_loss       | -5.83e-05 |
|    explained_variance | 0.984     |
|    learning_rate      | 0.0007    |
|    n_updates          | 18299     |
|    policy_loss        | -8.91e-08 |
|    value_loss         | 0.00127   |
-------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 28.4     |
|    ep_rew_mean        | 26.4     |
| time/                 |          |
|    fps                | 252      |
|    iterations         | 18400    |
|    time_elapsed       | 364      |
|    total_timesteps    | 92000    |
| train/             

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 28.8     |
|    ep_rew_mean        | 26.8     |
| time/                 |          |
|    fps                | 249      |
|    iterations         | 19600    |
|    time_elapsed       | 393      |
|    total_timesteps    | 98000    |
| train/                |          |
|    entropy_loss       | -4e-05   |
|    explained_variance | 0.00721  |
|    learning_rate      | 0.0007   |
|    n_updates          | 19599    |
|    policy_loss        | -0.00022 |
|    value_loss         | 7.37e+03 |
------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 28.5      |
|    ep_rew_mean        | 26.5      |
| time/                 |           |
|    fps                | 249       |
|    iterations         | 19700     |
|    time_elapsed       | 395       |
|    total_timesteps    | 98500     |
| train/                |    

<stable_baselines3.a2c.a2c.A2C at 0x1d798030880>

In [6]:
class CustomEnv(gym.Env):
    """Custom Environment that follows gym interface."""
    metadata = {"render.modes": ["human"]}

    def __init__(self):
        super().__init__()
        num_actions = 2
        self.action_space = spaces.Box(low=0, high=1, shape=(1,), dtype=np.uint8)
        # Example for using image as input (channel-first; channel-last also works):
        self.observation_space = spaces.Box(low=0, high=1,
                                            shape=(1, 60, 60), dtype=np.uint8)
        self.actualEnv = gym.make('jumping-task-v0')

    def step(self, action):
        action = int(round(action[0]))
        observation, r, done, info = self.actualEnv.step(action)
        return observation.astype('uint8').reshape(1, 60, 60), float(r), done, info

    def reset(self):
        observation = self.actualEnv.reset()
        return observation.astype('uint8').reshape(1, 60, 60)  # reward, done, info can't be included

    def render(self, mode="human"):
        pass

    def close(self):
        self.actualEnv.close()


### SAC

In [None]:
# env = CustomEnv()

# model = SAC("MlpPolicy", env, verbose=1, tensorboard_log="./tensorboard/", buffer_size=100000)
# model.learn(total_timesteps=500000)


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./tensorboard/SAC_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.8     |
|    ep_rew_mean     | 25.8     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 304      |
|    time_elapsed    | 0        |
|    total_timesteps | 111      |
| train/             |          |
|    actor_loss      | -2.22    |
|    critic_loss     | 0.396    |
|    ent_coef        | 0.997    |
|    ent_coef_loss   | -0.00449 |
|    learning_rate   | 0.0003   |
|    n_updates       | 10       |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 29.4     |
|    ep_rew_mean     | 27.4     |
| time/              |          |
|    episodes        | 8        |
|    fps             | 81       |
|    time_elapsed    | 2        |
|    total_timesteps | 2

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.2     |
|    ep_rew_mean     | 26.2     |
| time/              |          |
|    episodes        | 60       |
|    fps             | 47       |
|    time_elapsed    | 35       |
|    total_timesteps | 1694     |
| train/             |          |
|    actor_loss      | -10.3    |
|    critic_loss     | 1.1      |
|    ent_coef        | 0.62     |
|    ent_coef_loss   | -0.806   |
|    learning_rate   | 0.0003   |
|    n_updates       | 1593     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.4     |
|    ep_rew_mean     | 26.4     |
| time/              |          |
|    episodes        | 64       |
|    fps             | 47       |
|    time_elapsed    | 38       |
|    total_timesteps | 1816     |
| train/             |          |
|    actor_loss      | -10.5    |
|    critic_loss     | 0.814    |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 29.4     |
|    ep_rew_mean     | 28.5     |
| time/              |          |
|    episodes        | 120      |
|    fps             | 44       |
|    time_elapsed    | 78       |
|    total_timesteps | 3498     |
| train/             |          |
|    actor_loss      | -15.3    |
|    critic_loss     | 1.91     |
|    ent_coef        | 0.367    |
|    ent_coef_loss   | -1.48    |
|    learning_rate   | 0.0003   |
|    n_updates       | 3397     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 29.5     |
|    ep_rew_mean     | 28.5     |
| time/              |          |
|    episodes        | 124      |
|    fps             | 44       |
|    time_elapsed    | 81       |
|    total_timesteps | 3623     |
| train/             |          |
|    actor_loss      | -15.4    |
|    critic_loss     | 1.2      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 29.3     |
|    ep_rew_mean     | 28.3     |
| time/              |          |
|    episodes        | 180      |
|    fps             | 44       |
|    time_elapsed    | 118      |
|    total_timesteps | 5217     |
| train/             |          |
|    actor_loss      | -16.7    |
|    critic_loss     | 1.29     |
|    ent_coef        | 0.227    |
|    ent_coef_loss   | -1.91    |
|    learning_rate   | 0.0003   |
|    n_updates       | 5116     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 29.3     |
|    ep_rew_mean     | 29.3     |
| time/              |          |
|    episodes        | 184      |
|    fps             | 44       |
|    time_elapsed    | 121      |
|    total_timesteps | 5351     |
| train/             |          |
|    actor_loss      | -16.6    |
|    critic_loss     | 2.15     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.9     |
|    ep_rew_mean     | 27.9     |
| time/              |          |
|    episodes        | 240      |
|    fps             | 44       |
|    time_elapsed    | 157      |
|    total_timesteps | 6983     |
| train/             |          |
|    actor_loss      | -17      |
|    critic_loss     | 1.76     |
|    ent_coef        | 0.143    |
|    ent_coef_loss   | -1.55    |
|    learning_rate   | 0.0003   |
|    n_updates       | 6882     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.8     |
|    ep_rew_mean     | 27.8     |
| time/              |          |
|    episodes        | 244      |
|    fps             | 44       |
|    time_elapsed    | 159      |
|    total_timesteps | 7087     |
| train/             |          |
|    actor_loss      | -16.5    |
|    critic_loss     | 2.49     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.6     |
|    ep_rew_mean     | 27.6     |
| time/              |          |
|    episodes        | 300      |
|    fps             | 44       |
|    time_elapsed    | 193      |
|    total_timesteps | 8678     |
| train/             |          |
|    actor_loss      | -16.8    |
|    critic_loss     | 3.91     |
|    ent_coef        | 0.0964   |
|    ent_coef_loss   | -1.44    |
|    learning_rate   | 0.0003   |
|    n_updates       | 8577     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.6     |
|    ep_rew_mean     | 27.6     |
| time/              |          |
|    episodes        | 304      |
|    fps             | 44       |
|    time_elapsed    | 196      |
|    total_timesteps | 8792     |
| train/             |          |
|    actor_loss      | -15.1    |
|    critic_loss     | 2.71     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.8     |
|    ep_rew_mean     | 28.8     |
| time/              |          |
|    episodes        | 360      |
|    fps             | 44       |
|    time_elapsed    | 232      |
|    total_timesteps | 10445    |
| train/             |          |
|    actor_loss      | -16.6    |
|    critic_loss     | 7.04     |
|    ent_coef        | 0.072    |
|    ent_coef_loss   | -0.271   |
|    learning_rate   | 0.0003   |
|    n_updates       | 10344    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.8     |
|    ep_rew_mean     | 28.9     |
| time/              |          |
|    episodes        | 364      |
|    fps             | 44       |
|    time_elapsed    | 234      |
|    total_timesteps | 10548    |
| train/             |          |
|    actor_loss      | -18.1    |
|    critic_loss     | 6.76     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 26.8     |
|    ep_rew_mean     | 25.8     |
| time/              |          |
|    episodes        | 420      |
|    fps             | 45       |
|    time_elapsed    | 264      |
|    total_timesteps | 11945    |
| train/             |          |
|    actor_loss      | -17.5    |
|    critic_loss     | 5.02     |
|    ent_coef        | 0.0647   |
|    ent_coef_loss   | 0.169    |
|    learning_rate   | 0.0003   |
|    n_updates       | 11844    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 26.6     |
|    ep_rew_mean     | 24.6     |
| time/              |          |
|    episodes        | 424      |
|    fps             | 45       |
|    time_elapsed    | 267      |
|    total_timesteps | 12069    |
| train/             |          |
|    actor_loss      | -16.3    |
|    critic_loss     | 5.12     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 26.2     |
|    ep_rew_mean     | 25.3     |
| time/              |          |
|    episodes        | 480      |
|    fps             | 45       |
|    time_elapsed    | 299      |
|    total_timesteps | 13555    |
| train/             |          |
|    actor_loss      | -17.6    |
|    critic_loss     | 3.26     |
|    ent_coef        | 0.0569   |
|    ent_coef_loss   | -0.251   |
|    learning_rate   | 0.0003   |
|    n_updates       | 13454    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 26.4     |
|    ep_rew_mean     | 25.5     |
| time/              |          |
|    episodes        | 484      |
|    fps             | 45       |
|    time_elapsed    | 301      |
|    total_timesteps | 13659    |
| train/             |          |
|    actor_loss      | -17.4    |
|    critic_loss     | 4.03     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.1     |
|    ep_rew_mean     | 27.1     |
| time/              |          |
|    episodes        | 540      |
|    fps             | 45       |
|    time_elapsed    | 334      |
|    total_timesteps | 15182    |
| train/             |          |
|    actor_loss      | -16.2    |
|    critic_loss     | 4.04     |
|    ent_coef        | 0.0517   |
|    ent_coef_loss   | -0.154   |
|    learning_rate   | 0.0003   |
|    n_updates       | 15081    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.6     |
|    ep_rew_mean     | 27.6     |
| time/              |          |
|    episodes        | 544      |
|    fps             | 45       |
|    time_elapsed    | 337      |
|    total_timesteps | 15324    |
| train/             |          |
|    actor_loss      | -16      |
|    critic_loss     | 10.1     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.2     |
|    ep_rew_mean     | 26.3     |
| time/              |          |
|    episodes        | 600      |
|    fps             | 45       |
|    time_elapsed    | 370      |
|    total_timesteps | 16801    |
| train/             |          |
|    actor_loss      | -15.9    |
|    critic_loss     | 5.13     |
|    ent_coef        | 0.047    |
|    ent_coef_loss   | 0.144    |
|    learning_rate   | 0.0003   |
|    n_updates       | 16700    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.1     |
|    ep_rew_mean     | 26.1     |
| time/              |          |
|    episodes        | 604      |
|    fps             | 45       |
|    time_elapsed    | 372      |
|    total_timesteps | 16905    |
| train/             |          |
|    actor_loss      | -16.5    |
|    critic_loss     | 4.84     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 26.3     |
|    ep_rew_mean     | 26.3     |
| time/              |          |
|    episodes        | 660      |
|    fps             | 45       |
|    time_elapsed    | 405      |
|    total_timesteps | 18343    |
| train/             |          |
|    actor_loss      | -16.8    |
|    critic_loss     | 2.67     |
|    ent_coef        | 0.0451   |
|    ent_coef_loss   | 0.144    |
|    learning_rate   | 0.0003   |
|    n_updates       | 18242    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 26       |
|    ep_rew_mean     | 26       |
| time/              |          |
|    episodes        | 664      |
|    fps             | 45       |
|    time_elapsed    | 407      |
|    total_timesteps | 18417    |
| train/             |          |
|    actor_loss      | -16.4    |
|    critic_loss     | 3.56     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.7     |
|    ep_rew_mean     | 30.8     |
| time/              |          |
|    episodes        | 720      |
|    fps             | 45       |
|    time_elapsed    | 443      |
|    total_timesteps | 20118    |
| train/             |          |
|    actor_loss      | -17.8    |
|    critic_loss     | 18.2     |
|    ent_coef        | 0.0461   |
|    ent_coef_loss   | 0.21     |
|    learning_rate   | 0.0003   |
|    n_updates       | 20017    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.5     |
|    ep_rew_mean     | 30.6     |
| time/              |          |
|    episodes        | 724      |
|    fps             | 45       |
|    time_elapsed    | 445      |
|    total_timesteps | 20213    |
| train/             |          |
|    actor_loss      | -16.3    |
|    critic_loss     | 6.1      |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27       |
|    ep_rew_mean     | 27       |
| time/              |          |
|    episodes        | 780      |
|    fps             | 45       |
|    time_elapsed    | 477      |
|    total_timesteps | 21616    |
| train/             |          |
|    actor_loss      | -16.7    |
|    critic_loss     | 4.78     |
|    ent_coef        | 0.0395   |
|    ent_coef_loss   | -0.25    |
|    learning_rate   | 0.0003   |
|    n_updates       | 21515    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27       |
|    ep_rew_mean     | 27.1     |
| time/              |          |
|    episodes        | 784      |
|    fps             | 45       |
|    time_elapsed    | 479      |
|    total_timesteps | 21745    |
| train/             |          |
|    actor_loss      | -16.8    |
|    critic_loss     | 3.87     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.3     |
|    ep_rew_mean     | 25.3     |
| time/              |          |
|    episodes        | 840      |
|    fps             | 45       |
|    time_elapsed    | 513      |
|    total_timesteps | 23332    |
| train/             |          |
|    actor_loss      | -16.1    |
|    critic_loss     | 6.41     |
|    ent_coef        | 0.0418   |
|    ent_coef_loss   | -0.353   |
|    learning_rate   | 0.0003   |
|    n_updates       | 23231    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.1     |
|    ep_rew_mean     | 25.1     |
| time/              |          |
|    episodes        | 844      |
|    fps             | 45       |
|    time_elapsed    | 515      |
|    total_timesteps | 23426    |
| train/             |          |
|    actor_loss      | -16.9    |
|    critic_loss     | 6.24     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.7     |
|    ep_rew_mean     | 25.7     |
| time/              |          |
|    episodes        | 900      |
|    fps             | 45       |
|    time_elapsed    | 547      |
|    total_timesteps | 25008    |
| train/             |          |
|    actor_loss      | -16.4    |
|    critic_loss     | 2.72     |
|    ent_coef        | 0.0401   |
|    ent_coef_loss   | -0.312   |
|    learning_rate   | 0.0003   |
|    n_updates       | 24907    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.2     |
|    ep_rew_mean     | 26.2     |
| time/              |          |
|    episodes        | 904      |
|    fps             | 45       |
|    time_elapsed    | 550      |
|    total_timesteps | 25156    |
| train/             |          |
|    actor_loss      | -17      |
|    critic_loss     | 5.65     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.4     |
|    ep_rew_mean     | 27.4     |
| time/              |          |
|    episodes        | 960      |
|    fps             | 45       |
|    time_elapsed    | 581      |
|    total_timesteps | 26686    |
| train/             |          |
|    actor_loss      | -18      |
|    critic_loss     | 4.13     |
|    ent_coef        | 0.035    |
|    ent_coef_loss   | -0.106   |
|    learning_rate   | 0.0003   |
|    n_updates       | 26585    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.1     |
|    ep_rew_mean     | 27.1     |
| time/              |          |
|    episodes        | 964      |
|    fps             | 45       |
|    time_elapsed    | 583      |
|    total_timesteps | 26790    |
| train/             |          |
|    actor_loss      | -17.7    |
|    critic_loss     | 6.76     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 26.6     |
|    ep_rew_mean     | 24.6     |
| time/              |          |
|    episodes        | 1020     |
|    fps             | 46       |
|    time_elapsed    | 614      |
|    total_timesteps | 28322    |
| train/             |          |
|    actor_loss      | -16.7    |
|    critic_loss     | 6.46     |
|    ent_coef        | 0.0313   |
|    ent_coef_loss   | 0.104    |
|    learning_rate   | 0.0003   |
|    n_updates       | 28221    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27       |
|    ep_rew_mean     | 25       |
| time/              |          |
|    episodes        | 1024     |
|    fps             | 46       |
|    time_elapsed    | 616      |
|    total_timesteps | 28466    |
| train/             |          |
|    actor_loss      | -17.2    |
|    critic_loss     | 3.35     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.1     |
|    ep_rew_mean     | 25.1     |
| time/              |          |
|    episodes        | 1080     |
|    fps             | 46       |
|    time_elapsed    | 647      |
|    total_timesteps | 29972    |
| train/             |          |
|    actor_loss      | -15.2    |
|    critic_loss     | 16.6     |
|    ent_coef        | 0.0252   |
|    ent_coef_loss   | 0.133    |
|    learning_rate   | 0.0003   |
|    n_updates       | 29871    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27       |
|    ep_rew_mean     | 25       |
| time/              |          |
|    episodes        | 1084     |
|    fps             | 46       |
|    time_elapsed    | 648      |
|    total_timesteps | 30063    |
| train/             |          |
|    actor_loss      | -16.4    |
|    critic_loss     | 5.84     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.7     |
|    ep_rew_mean     | 25.7     |
| time/              |          |
|    episodes        | 1140     |
|    fps             | 46       |
|    time_elapsed    | 681      |
|    total_timesteps | 31676    |
| train/             |          |
|    actor_loss      | -16.7    |
|    critic_loss     | 3.47     |
|    ent_coef        | 0.023    |
|    ent_coef_loss   | 0.687    |
|    learning_rate   | 0.0003   |
|    n_updates       | 31575    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.4     |
|    ep_rew_mean     | 25.4     |
| time/              |          |
|    episodes        | 1144     |
|    fps             | 46       |
|    time_elapsed    | 683      |
|    total_timesteps | 31770    |
| train/             |          |
|    actor_loss      | -15.3    |
|    critic_loss     | 2.64     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.1     |
|    ep_rew_mean     | 25.1     |
| time/              |          |
|    episodes        | 1200     |
|    fps             | 46       |
|    time_elapsed    | 712      |
|    total_timesteps | 33227    |
| train/             |          |
|    actor_loss      | -14.3    |
|    critic_loss     | 3.64     |
|    ent_coef        | 0.0159   |
|    ent_coef_loss   | -1.74    |
|    learning_rate   | 0.0003   |
|    n_updates       | 33126    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.3     |
|    ep_rew_mean     | 25.3     |
| time/              |          |
|    episodes        | 1204     |
|    fps             | 46       |
|    time_elapsed    | 715      |
|    total_timesteps | 33348    |
| train/             |          |
|    actor_loss      | -14.7    |
|    critic_loss     | 4.38     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 26.1     |
|    ep_rew_mean     | 24.1     |
| time/              |          |
|    episodes        | 1260     |
|    fps             | 46       |
|    time_elapsed    | 744      |
|    total_timesteps | 34811    |
| train/             |          |
|    actor_loss      | -14.4    |
|    critic_loss     | 5.01     |
|    ent_coef        | 0.0177   |
|    ent_coef_loss   | 0.127    |
|    learning_rate   | 0.0003   |
|    n_updates       | 34710    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 26.1     |
|    ep_rew_mean     | 24.1     |
| time/              |          |
|    episodes        | 1264     |
|    fps             | 46       |
|    time_elapsed    | 746      |
|    total_timesteps | 34903    |
| train/             |          |
|    actor_loss      | -15.8    |
|    critic_loss     | 6.16     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 26.9     |
|    ep_rew_mean     | 24.9     |
| time/              |          |
|    episodes        | 1320     |
|    fps             | 46       |
|    time_elapsed    | 777      |
|    total_timesteps | 36431    |
| train/             |          |
|    actor_loss      | -16.6    |
|    critic_loss     | 5.04     |
|    ent_coef        | 0.0166   |
|    ent_coef_loss   | 0.246    |
|    learning_rate   | 0.0003   |
|    n_updates       | 36330    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 26.7     |
|    ep_rew_mean     | 24.7     |
| time/              |          |
|    episodes        | 1324     |
|    fps             | 46       |
|    time_elapsed    | 779      |
|    total_timesteps | 36555    |
| train/             |          |
|    actor_loss      | -15.5    |
|    critic_loss     | 2.88     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.5     |
|    ep_rew_mean     | 25.5     |
| time/              |          |
|    episodes        | 1380     |
|    fps             | 47       |
|    time_elapsed    | 810      |
|    total_timesteps | 38091    |
| train/             |          |
|    actor_loss      | -13.8    |
|    critic_loss     | 2.81     |
|    ent_coef        | 0.0136   |
|    ent_coef_loss   | -0.481   |
|    learning_rate   | 0.0003   |
|    n_updates       | 37990    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.7     |
|    ep_rew_mean     | 25.7     |
| time/              |          |
|    episodes        | 1384     |
|    fps             | 47       |
|    time_elapsed    | 812      |
|    total_timesteps | 38203    |
| train/             |          |
|    actor_loss      | -14.7    |
|    critic_loss     | 10.1     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 26.5     |
|    ep_rew_mean     | 24.5     |
| time/              |          |
|    episodes        | 1440     |
|    fps             | 47       |
|    time_elapsed    | 841      |
|    total_timesteps | 39655    |
| train/             |          |
|    actor_loss      | -14      |
|    critic_loss     | 2.59     |
|    ent_coef        | 0.0136   |
|    ent_coef_loss   | -0.523   |
|    learning_rate   | 0.0003   |
|    n_updates       | 39554    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 26.5     |
|    ep_rew_mean     | 24.5     |
| time/              |          |
|    episodes        | 1444     |
|    fps             | 47       |
|    time_elapsed    | 843      |
|    total_timesteps | 39743    |
| train/             |          |
|    actor_loss      | -13.7    |
|    critic_loss     | 2.96     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 25.8     |
|    ep_rew_mean     | 23.8     |
| time/              |          |
|    episodes        | 1500     |
|    fps             | 47       |
|    time_elapsed    | 871      |
|    total_timesteps | 41146    |
| train/             |          |
|    actor_loss      | -13.9    |
|    critic_loss     | 6.18     |
|    ent_coef        | 0.0125   |
|    ent_coef_loss   | -0.0805  |
|    learning_rate   | 0.0003   |
|    n_updates       | 41045    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 25.9     |
|    ep_rew_mean     | 23.9     |
| time/              |          |
|    episodes        | 1504     |
|    fps             | 47       |
|    time_elapsed    | 873      |
|    total_timesteps | 41270    |
| train/             |          |
|    actor_loss      | -13.5    |
|    critic_loss     | 2.7      |
|    ent_coef 

----------------------------------
| rollout/           |           |
|    ep_len_mean     | 25.1      |
|    ep_rew_mean     | 23.1      |
| time/              |           |
|    episodes        | 1560      |
|    fps             | 47        |
|    time_elapsed    | 902       |
|    total_timesteps | 42693     |
| train/             |           |
|    actor_loss      | -13.8     |
|    critic_loss     | 3.19      |
|    ent_coef        | 0.0138    |
|    ent_coef_loss   | -0.000207 |
|    learning_rate   | 0.0003    |
|    n_updates       | 42592     |
----------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 25.4     |
|    ep_rew_mean     | 23.4     |
| time/              |          |
|    episodes        | 1564     |
|    fps             | 47       |
|    time_elapsed    | 904      |
|    total_timesteps | 42817    |
| train/             |          |
|    actor_loss      | -14.6    |
|    critic_loss     | 2.83    

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.1     |
|    ep_rew_mean     | 25.1     |
| time/              |          |
|    episodes        | 1620     |
|    fps             | 47       |
|    time_elapsed    | 936      |
|    total_timesteps | 44397    |
| train/             |          |
|    actor_loss      | -14.4    |
|    critic_loss     | 3.23     |
|    ent_coef        | 0.0121   |
|    ent_coef_loss   | -0.761   |
|    learning_rate   | 0.0003   |
|    n_updates       | 44296    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 26.9     |
|    ep_rew_mean     | 24.9     |
| time/              |          |
|    episodes        | 1624     |
|    fps             | 47       |
|    time_elapsed    | 938      |
|    total_timesteps | 44482    |
| train/             |          |
|    actor_loss      | -13.4    |
|    critic_loss     | 3.74     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.9     |
|    ep_rew_mean     | 25.9     |
| time/              |          |
|    episodes        | 1680     |
|    fps             | 47       |
|    time_elapsed    | 970      |
|    total_timesteps | 46067    |
| train/             |          |
|    actor_loss      | -14.6    |
|    critic_loss     | 2.05     |
|    ent_coef        | 0.0108   |
|    ent_coef_loss   | -1.35    |
|    learning_rate   | 0.0003   |
|    n_updates       | 45966    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.9     |
|    ep_rew_mean     | 25.9     |
| time/              |          |
|    episodes        | 1684     |
|    fps             | 47       |
|    time_elapsed    | 973      |
|    total_timesteps | 46171    |
| train/             |          |
|    actor_loss      | -14.6    |
|    critic_loss     | 29.7     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.3     |
|    ep_rew_mean     | 26.3     |
| time/              |          |
|    episodes        | 1740     |
|    fps             | 47       |
|    time_elapsed    | 1007     |
|    total_timesteps | 47766    |
| train/             |          |
|    actor_loss      | -15.1    |
|    critic_loss     | 47.5     |
|    ent_coef        | 0.00967  |
|    ent_coef_loss   | 1.56     |
|    learning_rate   | 0.0003   |
|    n_updates       | 47665    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 28.3     |
|    ep_rew_mean     | 26.3     |
| time/              |          |
|    episodes        | 1744     |
|    fps             | 47       |
|    time_elapsed    | 1009     |
|    total_timesteps | 47870    |
| train/             |          |
|    actor_loss      | -14.2    |
|    critic_loss     | 44.8     |
|    ent_coef 

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.1     |
|    ep_rew_mean     | 25.1     |
| time/              |          |
|    episodes        | 1800     |
|    fps             | 47       |
|    time_elapsed    | 1041     |
|    total_timesteps | 49385    |
| train/             |          |
|    actor_loss      | -13.1    |
|    critic_loss     | 5.56     |
|    ent_coef        | 0.0106   |
|    ent_coef_loss   | -1.34    |
|    learning_rate   | 0.0003   |
|    n_updates       | 49284    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 27.2     |
|    ep_rew_mean     | 25.2     |
| time/              |          |
|    episodes        | 1804     |
|    fps             | 47       |
|    time_elapsed    | 1043     |
|    total_timesteps | 49479    |
| train/             |          |
|    actor_loss      | -13.2    |
|    critic_loss     | 3.2      |
|    ent_coef 