# Reinforcement Learning
# TP - DQN
# El Kettaneh Joseph

### High-level DQN Workflow
The DQN gets trained over multiple time steps over many episodes. It goes through a sequence of operations in each time step:
![Fig_05.png](attachment:Fig_05.png)

# DQN Implementation on ATARI Games Env

# Ping Pong
![Screen%20Shot%202023-01-03%20at%204.33.35%20PM.png](attachment:Screen%20Shot%202023-01-03%20at%204.33.35%20PM.png)

In [1]:
import gym
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3 import A2C, DQN

# that will make and wrap atari environments correctly.
env = make_atari_env("PongNoFrameskip-v4", n_envs=1)
# Frame-stacking with 4 frames
env = VecFrameStack(env, n_stack=1)

modelPong = DQN("MlpPolicy", env, verbose=1, device="mps")
modelPong.learn(total_timesteps=100000)
modelPong.save('PongNoFrameskip-v4-10.zip')

  from .autonotebook import tqdm as notebook_tqdm


Using mps device
Wrapping the env in a VecTransposeImage.




----------------------------------
| rollout/            |          |
|    ep_len_mean      | 4.02e+03 |
|    ep_rew_mean      | -19.8    |
|    exploration_rate | 0.621    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1616     |
|    time_elapsed     | 2        |
|    total_timesteps  | 3991     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.86e+03 |
|    ep_rew_mean      | -19.9    |
|    exploration_rate | 0.272    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1604     |
|    time_elapsed     | 4        |
|    total_timesteps  | 7662     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.79e+03 |
|    ep_rew_mean      | -19.8    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.63e+03 |
|    ep_rew_mean      | -20.4    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 84       |
|    fps              | 578      |
|    time_elapsed     | 130      |
|    total_timesteps  | 75709    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0305   |
|    n_updates        | 6427     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 3.62e+03 |
|    ep_rew_mean      | -20.4    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 88       |
|    fps              | 545      |
|    time_elapsed     | 145      |
|    total_timesteps  | 79117    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0202   |
|    n_updates      

In [None]:
modelPong.load('PongNoFrameskip-v4-10.zip', print_system_info=True)
vec_env = modelPong.get_env()
obs = vec_env.reset()
for i in range(1000):
    action, _states = modelPong.predict(obs, deterministic=True)
    obs, rewards, dones, info = vec_env.step(action)
    vec_env.render()

# Breakout
![Screen%20Shot%202023-01-03%20at%204.33.01%20PM.png](attachment:Screen%20Shot%202023-01-03%20at%204.33.01%20PM.png)

In [1]:
import gym
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3 import DQN

# that will make and wrap atari environments correctly.
env = make_atari_env("BreakoutNoFrameskip-v4", n_envs=1, seed=0)
# Frame-stacking
env = VecFrameStack(env, n_stack=1)

modelBreak = DQN("MlpPolicy", env, verbose=1, device="mps")
modelBreak.learn(total_timesteps=150000)
#modelBreak.load("Breakout-150000.zip")

vec_env = modelBreak.get_env()
obs = vec_env.reset()
for i in range(5000):
    action, _states = modelBreak.predict(obs)
    obs, rewards, dones, info = vec_env.step(action)
    vec_env.render()


  from .autonotebook import tqdm as notebook_tqdm


Using mps device
Wrapping the env in a VecTransposeImage.
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.993    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1228     |
|    time_elapsed     | 0        |
|    total_timesteps  | 116      |
----------------------------------




----------------------------------
| rollout/            |          |
|    ep_len_mean      | 626      |
|    ep_rew_mean      | 1        |
|    exploration_rate | 0.985    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1253     |
|    time_elapsed     | 0        |
|    total_timesteps  | 236      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 629      |
|    ep_rew_mean      | 1        |
|    exploration_rate | 0.979    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 1246     |
|    time_elapsed     | 0        |
|    total_timesteps  | 326      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 735      |
|    ep_rew_mean      | 1.67     |
|    exploration_rate | 0.967    |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 711      |
|    ep_rew_mean      | 1.11     |
|    exploration_rate | 0.807    |
| time/               |          |
|    episodes         | 96       |
|    fps              | 1301     |
|    time_elapsed     | 2        |
|    total_timesteps  | 3054     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 720      |
|    ep_rew_mean      | 1.15     |
|    exploration_rate | 0.795    |
| time/               |          |
|    episodes         | 100      |
|    fps              | 1305     |
|    time_elapsed     | 2        |
|    total_timesteps  | 3236     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 720      |
|    ep_rew_mean      | 1.15     |
|    exploration_rate | 0.769    |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 755      |
|    ep_rew_mean      | 1.47     |
|    exploration_rate | 0.595    |
| time/               |          |
|    episodes         | 184      |
|    fps              | 1325     |
|    time_elapsed     | 4        |
|    total_timesteps  | 6387     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 775      |
|    ep_rew_mean      | 1.57     |
|    exploration_rate | 0.583    |
| time/               |          |
|    episodes         | 188      |
|    fps              | 1326     |
|    time_elapsed     | 4        |
|    total_timesteps  | 6577     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 768      |
|    ep_rew_mean      | 1.53     |
|    exploration_rate | 0.578    |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 766      |
|    ep_rew_mean      | 1.5      |
|    exploration_rate | 0.402    |
| time/               |          |
|    episodes         | 272      |
|    fps              | 1326     |
|    time_elapsed     | 7        |
|    total_timesteps  | 9444     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 763      |
|    ep_rew_mean      | 1.49     |
|    exploration_rate | 0.396    |
| time/               |          |
|    episodes         | 276      |
|    fps              | 1325     |
|    time_elapsed     | 7        |
|    total_timesteps  | 9534     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 759      |
|    ep_rew_mean      | 1.46     |
|    exploration_rate | 0.391    |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 747      |
|    ep_rew_mean      | 1.39     |
|    exploration_rate | 0.229    |
| time/               |          |
|    episodes         | 360      |
|    fps              | 1316     |
|    time_elapsed     | 9        |
|    total_timesteps  | 12175    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 747      |
|    ep_rew_mean      | 1.39     |
|    exploration_rate | 0.223    |
| time/               |          |
|    episodes         | 364      |
|    fps              | 1316     |
|    time_elapsed     | 9        |
|    total_timesteps  | 12267    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 746      |
|    ep_rew_mean      | 1.38     |
|    exploration_rate | 0.211    |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 758      |
|    ep_rew_mean      | 1.46     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 448      |
|    fps              | 1320     |
|    time_elapsed     | 11       |
|    total_timesteps  | 15354    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 755      |
|    ep_rew_mean      | 1.44     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 452      |
|    fps              | 1319     |
|    time_elapsed     | 11       |
|    total_timesteps  | 15446    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 756      |
|    ep_rew_mean      | 1.45     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 762      |
|    ep_rew_mean      | 1.49     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 536      |
|    fps              | 1320     |
|    time_elapsed     | 14       |
|    total_timesteps  | 18492    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 762      |
|    ep_rew_mean      | 1.49     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 540      |
|    fps              | 1320     |
|    time_elapsed     | 14       |
|    total_timesteps  | 18582    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 762      |
|    ep_rew_mean      | 1.49     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 747      |
|    ep_rew_mean      | 1.4      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 624      |
|    fps              | 1320     |
|    time_elapsed     | 16       |
|    total_timesteps  | 21293    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 755      |
|    ep_rew_mean      | 1.44     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 628      |
|    fps              | 1320     |
|    time_elapsed     | 16       |
|    total_timesteps  | 21411    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 756      |
|    ep_rew_mean      | 1.45     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 752      |
|    ep_rew_mean      | 1.45     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 712      |
|    fps              | 1321     |
|    time_elapsed     | 18       |
|    total_timesteps  | 24429    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 751      |
|    ep_rew_mean      | 1.44     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 716      |
|    fps              | 1321     |
|    time_elapsed     | 18       |
|    total_timesteps  | 24565    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 751      |
|    ep_rew_mean      | 1.44     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 758      |
|    ep_rew_mean      | 1.47     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 800      |
|    fps              | 1322     |
|    time_elapsed     | 20       |
|    total_timesteps  | 27418    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 758      |
|    ep_rew_mean      | 1.47     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 804      |
|    fps              | 1323     |
|    time_elapsed     | 20       |
|    total_timesteps  | 27587    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 756      |
|    ep_rew_mean      | 1.46     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 759      |
|    ep_rew_mean      | 1.48     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 888      |
|    fps              | 1322     |
|    time_elapsed     | 22       |
|    total_timesteps  | 30351    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 760      |
|    ep_rew_mean      | 1.49     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 892      |
|    fps              | 1322     |
|    time_elapsed     | 23       |
|    total_timesteps  | 30441    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 760      |
|    ep_rew_mean      | 1.49     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 753      |
|    ep_rew_mean      | 1.44     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 976      |
|    fps              | 1319     |
|    time_elapsed     | 25       |
|    total_timesteps  | 33206    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 757      |
|    ep_rew_mean      | 1.46     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 980      |
|    fps              | 1320     |
|    time_elapsed     | 25       |
|    total_timesteps  | 33393    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 757      |
|    ep_rew_mean      | 1.46     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 746      |
|    ep_rew_mean      | 1.4      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1064     |
|    fps              | 1320     |
|    time_elapsed     | 27       |
|    total_timesteps  | 36243    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 748      |
|    ep_rew_mean      | 1.42     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1068     |
|    fps              | 1320     |
|    time_elapsed     | 27       |
|    total_timesteps  | 36381    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 750      |
|    ep_rew_mean      | 1.43     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 772      |
|    ep_rew_mean      | 1.59     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1152     |
|    fps              | 1324     |
|    time_elapsed     | 30       |
|    total_timesteps  | 39835    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 777      |
|    ep_rew_mean      | 1.62     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1156     |
|    fps              | 1324     |
|    time_elapsed     | 30       |
|    total_timesteps  | 40075    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 773      |
|    ep_rew_mean      | 1.6      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 763      |
|    ep_rew_mean      | 1.52     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1240     |
|    fps              | 1324     |
|    time_elapsed     | 32       |
|    total_timesteps  | 42600    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 763      |
|    ep_rew_mean      | 1.52     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1244     |
|    fps              | 1324     |
|    time_elapsed     | 32       |
|    total_timesteps  | 42787    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 769      |
|    ep_rew_mean      | 1.55     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 761      |
|    ep_rew_mean      | 1.51     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1328     |
|    fps              | 1325     |
|    time_elapsed     | 34       |
|    total_timesteps  | 45599    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 759      |
|    ep_rew_mean      | 1.5      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1332     |
|    fps              | 1325     |
|    time_elapsed     | 34       |
|    total_timesteps  | 45733    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 759      |
|    ep_rew_mean      | 1.5      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 754      |
|    ep_rew_mean      | 1.46     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1416     |
|    fps              | 1325     |
|    time_elapsed     | 36       |
|    total_timesteps  | 48383    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 752      |
|    ep_rew_mean      | 1.45     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1420     |
|    fps              | 1325     |
|    time_elapsed     | 36       |
|    total_timesteps  | 48475    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 752      |
|    ep_rew_mean      | 1.45     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 753      |
|    ep_rew_mean      | 1.46     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1492     |
|    fps              | 1246     |
|    time_elapsed     | 40       |
|    total_timesteps  | 50820    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000157 |
|    n_updates        | 204      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 751      |
|    ep_rew_mean      | 1.44     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1496     |
|    fps              | 1232     |
|    time_elapsed     | 41       |
|    total_timesteps  | 50985    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000164 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 755      |
|    ep_rew_mean      | 1.46     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1556     |
|    fps              | 1056     |
|    time_elapsed     | 50       |
|    total_timesteps  | 53141    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000271 |
|    n_updates        | 785      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 757      |
|    ep_rew_mean      | 1.48     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1560     |
|    fps              | 1045     |
|    time_elapsed     | 50       |
|    total_timesteps  | 53302    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.96e-05 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 725      |
|    ep_rew_mean      | 1.28     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1620     |
|    fps              | 953      |
|    time_elapsed     | 57       |
|    total_timesteps  | 55045    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000219 |
|    n_updates        | 1261     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 725      |
|    ep_rew_mean      | 1.28     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1624     |
|    fps              | 947      |
|    time_elapsed     | 58       |
|    total_timesteps  | 55183    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 8.7e-05  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 720      |
|    ep_rew_mean      | 1.25     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1684     |
|    fps              | 860      |
|    time_elapsed     | 66       |
|    total_timesteps  | 57100    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 9.74e-05 |
|    n_updates        | 1774     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 720      |
|    ep_rew_mean      | 1.25     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1688     |
|    fps              | 857      |
|    time_elapsed     | 66       |
|    total_timesteps  | 57192    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000354 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 706      |
|    ep_rew_mean      | 1.19     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1748     |
|    fps              | 805      |
|    time_elapsed     | 73       |
|    total_timesteps  | 58853    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 7.78e-05 |
|    n_updates        | 2213     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 712      |
|    ep_rew_mean      | 1.23     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1752     |
|    fps              | 796      |
|    time_elapsed     | 74       |
|    total_timesteps  | 59186    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000263 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 706      |
|    ep_rew_mean      | 1.19     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1812     |
|    fps              | 753      |
|    time_elapsed     | 80       |
|    total_timesteps  | 60856    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 8.12e-05 |
|    n_updates        | 2713     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 702      |
|    ep_rew_mean      | 1.17     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1816     |
|    fps              | 749      |
|    time_elapsed     | 81       |
|    total_timesteps  | 61019    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.94e-05 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 698      |
|    ep_rew_mean      | 1.17     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1876     |
|    fps              | 717      |
|    time_elapsed     | 87       |
|    total_timesteps  | 62712    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.41e-06 |
|    n_updates        | 3177     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 697      |
|    ep_rew_mean      | 1.16     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1880     |
|    fps              | 714      |
|    time_elapsed     | 87       |
|    total_timesteps  | 62845    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 7.4e-05  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 690      |
|    ep_rew_mean      | 1.14     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1940     |
|    fps              | 682      |
|    time_elapsed     | 94       |
|    total_timesteps  | 64712    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000183 |
|    n_updates        | 3677     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 690      |
|    ep_rew_mean      | 1.14     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1944     |
|    fps              | 681      |
|    time_elapsed     | 95       |
|    total_timesteps  | 64804    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000236 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 690      |
|    ep_rew_mean      | 1.19     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2004     |
|    fps              | 654      |
|    time_elapsed     | 101      |
|    total_timesteps  | 66690    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 8.42e-05 |
|    n_updates        | 4172     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 690      |
|    ep_rew_mean      | 1.19     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2008     |
|    fps              | 653      |
|    time_elapsed     | 102      |
|    total_timesteps  | 66782    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.65e-05 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 667      |
|    ep_rew_mean      | 1.05     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2068     |
|    fps              | 627      |
|    time_elapsed     | 109      |
|    total_timesteps  | 68611    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0152   |
|    n_updates        | 4652     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 672      |
|    ep_rew_mean      | 1.08     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2072     |
|    fps              | 626      |
|    time_elapsed     | 109      |
|    total_timesteps  | 68731    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.22e-05 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 677      |
|    ep_rew_mean      | 1.12     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2132     |
|    fps              | 604      |
|    time_elapsed     | 116      |
|    total_timesteps  | 70566    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.41e-05 |
|    n_updates        | 5141     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 671      |
|    ep_rew_mean      | 1.08     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2136     |
|    fps              | 603      |
|    time_elapsed     | 117      |
|    total_timesteps  | 70652    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000136 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 661      |
|    ep_rew_mean      | 1.01     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2196     |
|    fps              | 587      |
|    time_elapsed     | 122      |
|    total_timesteps  | 72241    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0158   |
|    n_updates        | 5560     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 661      |
|    ep_rew_mean      | 1.01     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2200     |
|    fps              | 586      |
|    time_elapsed     | 123      |
|    total_timesteps  | 72361    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0157   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 660      |
|    ep_rew_mean      | 0.99     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2260     |
|    fps              | 570      |
|    time_elapsed     | 129      |
|    total_timesteps  | 74145    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 9.79e-05 |
|    n_updates        | 6036     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 660      |
|    ep_rew_mean      | 0.99     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2264     |
|    fps              | 568      |
|    time_elapsed     | 130      |
|    total_timesteps  | 74409    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000491 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 670      |
|    ep_rew_mean      | 1.05     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2324     |
|    fps              | 551      |
|    time_elapsed     | 138      |
|    total_timesteps  | 76232    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0156   |
|    n_updates        | 6557     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 668      |
|    ep_rew_mean      | 1.04     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2328     |
|    fps              | 550      |
|    time_elapsed     | 138      |
|    total_timesteps  | 76397    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.17e-05 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 685      |
|    ep_rew_mean      | 1.13     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2388     |
|    fps              | 534      |
|    time_elapsed     | 146      |
|    total_timesteps  | 78445    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000132 |
|    n_updates        | 7111     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 682      |
|    ep_rew_mean      | 1.11     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2392     |
|    fps              | 534      |
|    time_elapsed     | 147      |
|    total_timesteps  | 78535    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000126 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 689      |
|    ep_rew_mean      | 1.17     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2452     |
|    fps              | 521      |
|    time_elapsed     | 154      |
|    total_timesteps  | 80485    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000321 |
|    n_updates        | 7621     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 689      |
|    ep_rew_mean      | 1.17     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2456     |
|    fps              | 520      |
|    time_elapsed     | 154      |
|    total_timesteps  | 80575    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000216 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 691      |
|    ep_rew_mean      | 1.16     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2516     |
|    fps              | 509      |
|    time_elapsed     | 161      |
|    total_timesteps  | 82542    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000155 |
|    n_updates        | 8135     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 691      |
|    ep_rew_mean      | 1.16     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2520     |
|    fps              | 509      |
|    time_elapsed     | 162      |
|    total_timesteps  | 82634    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.08e-05 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 673      |
|    ep_rew_mean      | 1.04     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2580     |
|    fps              | 501      |
|    time_elapsed     | 167      |
|    total_timesteps  | 84116    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.03e-05 |
|    n_updates        | 8528     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 673      |
|    ep_rew_mean      | 1.04     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2584     |
|    fps              | 501      |
|    time_elapsed     | 168      |
|    total_timesteps  | 84208    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000464 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 672      |
|    ep_rew_mean      | 1.1      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2644     |
|    fps              | 492      |
|    time_elapsed     | 174      |
|    total_timesteps  | 85937    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.48e-05 |
|    n_updates        | 8984     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 672      |
|    ep_rew_mean      | 1.1      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2648     |
|    fps              | 492      |
|    time_elapsed     | 174      |
|    total_timesteps  | 86027    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000272 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 697      |
|    ep_rew_mean      | 1.23     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2708     |
|    fps              | 482      |
|    time_elapsed     | 183      |
|    total_timesteps  | 88297    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.77e-05 |
|    n_updates        | 9574     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 697      |
|    ep_rew_mean      | 1.23     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2712     |
|    fps              | 481      |
|    time_elapsed     | 183      |
|    total_timesteps  | 88387    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.91e-05 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 689      |
|    ep_rew_mean      | 1.19     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2772     |
|    fps              | 473      |
|    time_elapsed     | 190      |
|    total_timesteps  | 90352    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0308   |
|    n_updates        | 10087    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 693      |
|    ep_rew_mean      | 1.24     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2776     |
|    fps              | 472      |
|    time_elapsed     | 191      |
|    total_timesteps  | 90596    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000139 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 695      |
|    ep_rew_mean      | 1.23     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2836     |
|    fps              | 465      |
|    time_elapsed     | 198      |
|    total_timesteps  | 92331    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000152 |
|    n_updates        | 10582    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 698      |
|    ep_rew_mean      | 1.25     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2840     |
|    fps              | 464      |
|    time_elapsed     | 199      |
|    total_timesteps  | 92595    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000244 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 688      |
|    ep_rew_mean      | 1.17     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2900     |
|    fps              | 457      |
|    time_elapsed     | 206      |
|    total_timesteps  | 94304    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.48e-05 |
|    n_updates        | 11075    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 688      |
|    ep_rew_mean      | 1.17     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2904     |
|    fps              | 457      |
|    time_elapsed     | 206      |
|    total_timesteps  | 94396    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0159   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 690      |
|    ep_rew_mean      | 1.16     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2964     |
|    fps              | 449      |
|    time_elapsed     | 214      |
|    total_timesteps  | 96419    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000464 |
|    n_updates        | 11604    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 695      |
|    ep_rew_mean      | 1.19     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2968     |
|    fps              | 448      |
|    time_elapsed     | 215      |
|    total_timesteps  | 96600    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000291 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 708      |
|    ep_rew_mean      | 1.44     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3028     |
|    fps              | 441      |
|    time_elapsed     | 223      |
|    total_timesteps  | 98792    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.21e-05 |
|    n_updates        | 12197    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 709      |
|    ep_rew_mean      | 1.44     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3032     |
|    fps              | 441      |
|    time_elapsed     | 223      |
|    total_timesteps  | 98884    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000349 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 718      |
|    ep_rew_mean      | 1.49     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3092     |
|    fps              | 436      |
|    time_elapsed     | 230      |
|    total_timesteps  | 100591   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00035  |
|    n_updates        | 12647    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 712      |
|    ep_rew_mean      | 1.38     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3096     |
|    fps              | 436      |
|    time_elapsed     | 230      |
|    total_timesteps  | 100681   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000268 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 724      |
|    ep_rew_mean      | 1.48     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3156     |
|    fps              | 431      |
|    time_elapsed     | 237      |
|    total_timesteps  | 102615   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.13e-05 |
|    n_updates        | 13153    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 719      |
|    ep_rew_mean      | 1.45     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3160     |
|    fps              | 431      |
|    time_elapsed     | 238      |
|    total_timesteps  | 102707   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000586 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 711      |
|    ep_rew_mean      | 1.41     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3220     |
|    fps              | 425      |
|    time_elapsed     | 246      |
|    total_timesteps  | 104649   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0162   |
|    n_updates        | 13662    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 711      |
|    ep_rew_mean      | 1.41     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3224     |
|    fps              | 424      |
|    time_elapsed     | 246      |
|    total_timesteps  | 104741   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00041  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 698      |
|    ep_rew_mean      | 1.31     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3284     |
|    fps              | 421      |
|    time_elapsed     | 252      |
|    total_timesteps  | 106522   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.76e-05 |
|    n_updates        | 14130    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 699      |
|    ep_rew_mean      | 1.32     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3288     |
|    fps              | 421      |
|    time_elapsed     | 253      |
|    total_timesteps  | 106610   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 5.47e-05 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 697      |
|    ep_rew_mean      | 1.31     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3348     |
|    fps              | 417      |
|    time_elapsed     | 260      |
|    total_timesteps  | 108491   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.6e-05  |
|    n_updates        | 14622    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 690      |
|    ep_rew_mean      | 1.27     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3352     |
|    fps              | 417      |
|    time_elapsed     | 260      |
|    total_timesteps  | 108581   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.17e-05 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 699      |
|    ep_rew_mean      | 1.33     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3412     |
|    fps              | 413      |
|    time_elapsed     | 267      |
|    total_timesteps  | 110451   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000741 |
|    n_updates        | 15112    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 699      |
|    ep_rew_mean      | 1.33     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3416     |
|    fps              | 413      |
|    time_elapsed     | 267      |
|    total_timesteps  | 110543   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 6.5e-05  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 676      |
|    ep_rew_mean      | 1.11     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3476     |
|    fps              | 410      |
|    time_elapsed     | 273      |
|    total_timesteps  | 112078   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0301   |
|    n_updates        | 15519    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 667      |
|    ep_rew_mean      | 1.06     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3480     |
|    fps              | 409      |
|    time_elapsed     | 273      |
|    total_timesteps  | 112168   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000353 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 661      |
|    ep_rew_mean      | 0.9      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3540     |
|    fps              | 406      |
|    time_elapsed     | 279      |
|    total_timesteps  | 113912   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000521 |
|    n_updates        | 15977    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 661      |
|    ep_rew_mean      | 0.9      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3544     |
|    fps              | 406      |
|    time_elapsed     | 280      |
|    total_timesteps  | 114004   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00072  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 663      |
|    ep_rew_mean      | 0.96     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3604     |
|    fps              | 404      |
|    time_elapsed     | 286      |
|    total_timesteps  | 115694   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0319   |
|    n_updates        | 16423    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 656      |
|    ep_rew_mean      | 0.92     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3608     |
|    fps              | 404      |
|    time_elapsed     | 286      |
|    total_timesteps  | 115786   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0152   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 656      |
|    ep_rew_mean      | 0.88     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3668     |
|    fps              | 401      |
|    time_elapsed     | 292      |
|    total_timesteps  | 117545   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000505 |
|    n_updates        | 16886    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 656      |
|    ep_rew_mean      | 0.88     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3672     |
|    fps              | 401      |
|    time_elapsed     | 292      |
|    total_timesteps  | 117637   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 9.78e-05 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 651      |
|    ep_rew_mean      | 0.93     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3732     |
|    fps              | 399      |
|    time_elapsed     | 299      |
|    total_timesteps  | 119484   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.5e-05  |
|    n_updates        | 17370    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 647      |
|    ep_rew_mean      | 0.9      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3736     |
|    fps              | 398      |
|    time_elapsed     | 299      |
|    total_timesteps  | 119651   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000495 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 663      |
|    ep_rew_mean      | 1.05     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3796     |
|    fps              | 395      |
|    time_elapsed     | 307      |
|    total_timesteps  | 121812   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0155   |
|    n_updates        | 17952    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 658      |
|    ep_rew_mean      | 1.02     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3800     |
|    fps              | 395      |
|    time_elapsed     | 307      |
|    total_timesteps  | 121904   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0152   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 662      |
|    ep_rew_mean      | 1.04     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3860     |
|    fps              | 393      |
|    time_elapsed     | 314      |
|    total_timesteps  | 123595   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.06e-05 |
|    n_updates        | 18398    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 662      |
|    ep_rew_mean      | 1.04     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3864     |
|    fps              | 393      |
|    time_elapsed     | 314      |
|    total_timesteps  | 123687   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0164   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 652      |
|    ep_rew_mean      | 0.99     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3924     |
|    fps              | 390      |
|    time_elapsed     | 320      |
|    total_timesteps  | 125486   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.39e-05 |
|    n_updates        | 18871    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 657      |
|    ep_rew_mean      | 1.02     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3928     |
|    fps              | 390      |
|    time_elapsed     | 321      |
|    total_timesteps  | 125578   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00022  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 667      |
|    ep_rew_mean      | 1.07     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3988     |
|    fps              | 388      |
|    time_elapsed     | 327      |
|    total_timesteps  | 127326   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000337 |
|    n_updates        | 19331    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 669      |
|    ep_rew_mean      | 1.09     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3992     |
|    fps              | 388      |
|    time_elapsed     | 328      |
|    total_timesteps  | 127614   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0156   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 680      |
|    ep_rew_mean      | 1.15     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4052     |
|    fps              | 385      |
|    time_elapsed     | 335      |
|    total_timesteps  | 129460   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0298   |
|    n_updates        | 19864    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 677      |
|    ep_rew_mean      | 1.13     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4056     |
|    fps              | 385      |
|    time_elapsed     | 336      |
|    total_timesteps  | 129594   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00031  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 682      |
|    ep_rew_mean      | 1.19     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4116     |
|    fps              | 382      |
|    time_elapsed     | 343      |
|    total_timesteps  | 131422   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0271   |
|    n_updates        | 20355    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 683      |
|    ep_rew_mean      | 1.19     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4120     |
|    fps              | 382      |
|    time_elapsed     | 343      |
|    total_timesteps  | 131514   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000598 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 695      |
|    ep_rew_mean      | 1.28     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4180     |
|    fps              | 380      |
|    time_elapsed     | 351      |
|    total_timesteps  | 133516   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000135 |
|    n_updates        | 20878    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 695      |
|    ep_rew_mean      | 1.28     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4184     |
|    fps              | 380      |
|    time_elapsed     | 351      |
|    total_timesteps  | 133608   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000336 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 695      |
|    ep_rew_mean      | 1.33     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4244     |
|    fps              | 377      |
|    time_elapsed     | 358      |
|    total_timesteps  | 135582   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 9.31e-05 |
|    n_updates        | 21395    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 695      |
|    ep_rew_mean      | 1.33     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4248     |
|    fps              | 377      |
|    time_elapsed     | 359      |
|    total_timesteps  | 135674   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000586 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 681      |
|    ep_rew_mean      | 1.19     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4308     |
|    fps              | 376      |
|    time_elapsed     | 365      |
|    total_timesteps  | 137388   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000272 |
|    n_updates        | 21846    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 681      |
|    ep_rew_mean      | 1.19     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4312     |
|    fps              | 376      |
|    time_elapsed     | 365      |
|    total_timesteps  | 137508   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000478 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 692      |
|    ep_rew_mean      | 1.26     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4372     |
|    fps              | 373      |
|    time_elapsed     | 372      |
|    total_timesteps  | 139472   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.014    |
|    n_updates        | 22367    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 691      |
|    ep_rew_mean      | 1.25     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4376     |
|    fps              | 373      |
|    time_elapsed     | 373      |
|    total_timesteps  | 139639   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.78e-05 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 704      |
|    ep_rew_mean      | 1.34     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4436     |
|    fps              | 371      |
|    time_elapsed     | 381      |
|    total_timesteps  | 141688   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000146 |
|    n_updates        | 22921    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 698      |
|    ep_rew_mean      | 1.31     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4440     |
|    fps              | 371      |
|    time_elapsed     | 382      |
|    total_timesteps  | 141780   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00073  |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 701      |
|    ep_rew_mean      | 1.36     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4500     |
|    fps              | 367      |
|    time_elapsed     | 391      |
|    total_timesteps  | 143913   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000663 |
|    n_updates        | 23478    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 701      |
|    ep_rew_mean      | 1.36     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4504     |
|    fps              | 367      |
|    time_elapsed     | 391      |
|    total_timesteps  | 144076   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000296 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 702      |
|    ep_rew_mean      | 1.37     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4564     |
|    fps              | 365      |
|    time_elapsed     | 398      |
|    total_timesteps  | 145814   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 7.5e-05  |
|    n_updates        | 23953    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 703      |
|    ep_rew_mean      | 1.38     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4568     |
|    fps              | 365      |
|    time_elapsed     | 399      |
|    total_timesteps  | 145936   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000148 |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 720      |
|    ep_rew_mean      | 1.45     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4628     |
|    fps              | 362      |
|    time_elapsed     | 407      |
|    total_timesteps  | 147997   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000401 |
|    n_updates        | 24499    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 717      |
|    ep_rew_mean      | 1.43     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4632     |
|    fps              | 362      |
|    time_elapsed     | 408      |
|    total_timesteps  | 148089   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0137   |
|    n_updates      

# Seaquest
![Screen%20Shot%202023-01-03%20at%204.34.21%20PM.png](attachment:Screen%20Shot%202023-01-03%20at%204.34.21%20PM.png)

In [1]:
import gym
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3 import A2C, DQN

# There already exists an environment generator
# that will make and wrap atari environments correctly.
# Here we are also multi-worker training (n_envs=4 => 4 environments)
env = make_atari_env("SeaquestNoFrameskip-v4", n_envs=1, seed=0)

env = VecFrameStack(env, n_stack=1)

model = DQN("CnnPolicy", env, verbose=1, device="mps")
#model.learn(total_timesteps=1000000)
#model.save("SeaquestNoFrameskip-v4-106.zip")
model.load("SeaquestNoFrameskip-v4-10.zip")
#Test trained agent
vec_env = model.get_env()
obs = vec_env.reset()
for i in range(1000):
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, dones, info = vec_env.step(action)
    vec_env.render()
    if dones:
        obs = vec_env.reset()
        


  from .autonotebook import tqdm as notebook_tqdm


Using mps device
Wrapping the env in a VecTransposeImage.




In [8]:
model.save("SeaquestNoFrameskip-v4-10.zip")

# Space Invaders
![Screen%20Shot%202023-01-03%20at%204.34.01%20PM.png](attachment:Screen%20Shot%202023-01-03%20at%204.34.01%20PM.png)

In [None]:
import gym
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3 import A2C, DQN

# There already exists an environment generator
# that will make and wrap atari environments correctly.
# Here we are also multi-worker training (n_envs=4 => 4 environments)
env = make_atari_env("SpaceInvaders-v4",seed=0)#, n_envs=4, seed=0)
env = VecFrameStack(env, n_stack=1)
Alien-v5
modelSpace = DQN("CnnPolicy", env, verbose=1, device="mps")
modelSpace.learn(total_timesteps=1000000)

# Enjoy trained agent
vec_env = modelSpace.get_env()
obs = vec_env.reset()
for i in range(1000):
    action, _states = modelSpace.predict(obs, deterministic=True)
    obs, rewards, dones, info = vec_env.step(action)
    vec_env.render()


# Alien
![Screen%20Shot%202023-01-03%20at%204.34.52%20PM.png](attachment:Screen%20Shot%202023-01-03%20at%204.34.52%20PM.png)

In [None]:
import gym
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3 import A2C, DQN

# There already exists an environment generator
# that will make and wrap atari environments correctly.
env = make_atari_env("Alien-v4",seed=0)
env = VecFrameStack(env, n_stack=1)

model = DQN("CnnPolicy", env, verbose=1, device="mps")
model.learn(total_timesteps=1000000)

#test the trained agent
vec_env = model.get_env()
obs = vec_env.reset()
for i in range(1000):
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, dones, info = vec_env.step(action)
    vec_env.render()

# Bowling
![Screen%20Shot%202023-01-03%20at%204.32.53%20PM.png](attachment:Screen%20Shot%202023-01-03%20at%204.32.53%20PM.png)

In [4]:
import gym
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3 import A2C, DQN

# There already exists an environment generator
# that will make and wrap atari environments correctly.
env = make_atari_env("Bowling-v4",seed=0)
env = VecFrameStack(env, n_stack=1)

modelBowl = DQN("CnnPolicy", env, verbose=1, device="mps")
#model.learn(total_timesteps=10000)
modelBowl.load("Bowling.zip")

#test the trained agent
vec_env = modelBowl.get_env()
obs = vec_env.reset()
for i in range(1000):
    action, _states = modelBowl.predict(obs, deterministic=True)
    obs, rewards, dones, info = vec_env.step(action)
    vec_env.render()

Using mps device
Wrapping the env in a VecTransposeImage.


In [5]:
import numpy as np
import torch
import gym
import time

import gym
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3 import A2C, DQN


env = gym.make('BipedalWalker-v3')
modelBowl = A2C("CnnPolicy", env, verbose=1, device="mps")
#model.learn(total_timesteps=10000)
modelBowl.load("Bowling.zip")

#test the trained agent
vec_env = modelBowl.get_env()
obs = vec_env.reset()
for i in range(1000):
    action, _states = modelBowl.predict(obs, deterministic=True)
    obs, rewards, dones, info = vec_env.step(action)
    vec_env.render()

Using mps device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


AssertionError: You should use NatureCNN only with images not with Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf
 -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf
 inf inf inf inf inf inf], (24,), float32)
(you are probably using `CnnPolicy` instead of `MlpPolicy` or `MultiInputPolicy`)
If you are using a custom environment,
please check it using our env checker:
https://stable-baselines3.readthedocs.io/en/master/common/env_checker.html