In [None]:
import gymnasium as gym
import torch
from stable_baselines3 import DQN
from stable_baselines3.common.monitor import Monitor
from gymnasium.wrappers import TimeLimit
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
from torch.utils.tensorboard import SummaryWriter
from stable_baselines3.common.callbacks import BaseCallback

class CustomTensorboardCallback(BaseCallback):
    def __init__(self, log_dir="./logs/walls_dqn/", verbose=1):
        super().__init__(verbose)
        self.writer = SummaryWriter(log_dir)

    def _on_step(self) -> bool:
        if self.locals["dones"][0]:
            reward = self.locals["rewards"][0]
            length = self.locals["infos"][0].get("episode_length", self.num_timesteps)
            goal_vec = self.locals["infos"][0].get("distance_to_goal", [0, 0])
            goal_dist = (goal_vec[0]**2 + goal_vec[1]**2)**0.5

            self.writer.add_scalar("custom/episode_reward", reward, self.num_timesteps)
            self.writer.add_scalar("custom/episode_length", length, self.num_timesteps)
            self.writer.add_scalar("custom/distance_to_goal", goal_dist, self.num_timesteps)
        return True

    def _on_training_end(self) -> None:
        self.writer.close()



from  Environments.set_up_envs import *
env = Monitor(gym.make("wallsEnv"),"monitor.csv", info_keywords=("distance_to_goal", "distance_to_nearest_obstacle")) # TimeLimit(gym.make("wallsEnv"),20)
eval_callback = EvalCallback(env, eval_freq=100, verbose=1)


policy_kwargs = dict(optimizer_class=torch.optim.Adam,optimizer_kwargs=dict(eps=1e-5))
# model = trained_model
model = DQN(
    "MlpPolicy",
    env,
    learning_rate=lambda progress: 1e-3 * (1 - progress), 
    policy_kwargs=policy_kwargs,
    tensorboard_log = "./logTensor/",
    verbose=1
)

trained_model = model.learn(total_timesteps=10_000, log_interval=10,progress_bar=True)


Logging to ./logTensor/DQN_4


Output()

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 100      |
|    ep_rew_mean      | 2.83     |
|    exploration_rate | 0.905    |
| time/               |          |
|    episodes         | 1        |
|    fps              | 0        |
|    time_elapsed     | 168      |
|    total_timesteps  | 100      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 66       |
|    ep_rew_mean      | -3.06    |
|    exploration_rate | 0.875    |
| time/               |          |
|    episodes         | 2        |
|    fps              | 0        |
|    time_elapsed     | 221      |
|    total_timesteps  | 132      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0244   |
|    n_updates        | 232      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 77.3     |
|    ep_rew_mean      | -1.38    |
|    exploration_rate | 0.78     |
| time/               |          |
|    episodes         | 3        |
|    fps              | 0        |
|    time_elapsed     | 383      |
|    total_timesteps  | 232      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0262   |
|    n_updates        | 257      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 83       |
|    ep_rew_mean      | -0.604   |
|    exploration_rate | 0.685    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 0        |
|    time_elapsed     | 544      |
|    total_timesteps  | 332      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.09     |
|    n_updates        | 282      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 86.4     |
|    ep_rew_mean      | 0.011    |
|    exploration_rate | 0.59     |
| time/               |          |
|    episodes         | 5        |
|    fps              | 0        |
|    time_elapsed     | 705      |
|    total_timesteps  | 432      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0232   |
|    n_updates        | 307      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 76.8     |
|    ep_rew_mean      | -1.58    |
|    exploration_rate | 0.562    |
| time/               |          |
|    episodes         | 6        |
|    fps              | 0        |
|    time_elapsed     | 755      |
|    total_timesteps  | 461      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.378    |
|    n_updates        | 315      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 72.1     |
|    ep_rew_mean      | -2.7     |
|    exploration_rate | 0.52     |
| time/               |          |
|    episodes         | 7        |
|    fps              | 0        |
|    time_elapsed     | 827      |
|    total_timesteps  | 505      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.56     |
|    n_updates        | 326      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 64.2     |
|    ep_rew_mean      | -3.6     |
|    exploration_rate | 0.512    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 0        |
|    time_elapsed     | 845      |
|    total_timesteps  | 514      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.28     |
|    n_updates        | 328      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 58.6     |
|    ep_rew_mean      | -4.27    |
|    exploration_rate | 0.499    |
| time/               |          |
|    episodes         | 9        |
|    fps              | 0        |
|    time_elapsed     | 868      |
|    total_timesteps  | 527      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0172   |
|    n_updates        | 331      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 62.7     |
|    ep_rew_mean      | -3.65    |
|    exploration_rate | 0.404    |
| time/               |          |
|    episodes         | 10       |
|    fps              | 0        |
|    time_elapsed     | 1027     |
|    total_timesteps  | 627      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0155   |
|    n_updates        | 356      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 66.1     |
|    ep_rew_mean      | -2.64    |
|    exploration_rate | 0.309    |
| time/               |          |
|    episodes         | 11       |
|    fps              | 0        |
|    time_elapsed     | 1186     |
|    total_timesteps  | 727      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.429    |
|    n_updates        | 381      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 65.1     |
|    ep_rew_mean      | -3.18    |
|    exploration_rate | 0.258    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 0        |
|    time_elapsed     | 1268     |
|    total_timesteps  | 781      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00891  |
|    n_updates        | 395      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 62.9     |
|    ep_rew_mean      | -3.66    |
|    exploration_rate | 0.223    |
| time/               |          |
|    episodes         | 13       |
|    fps              | 0        |
|    time_elapsed     | 1329     |
|    total_timesteps  | 818      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.482    |
|    n_updates        | 404      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 60.2     |
|    ep_rew_mean      | -4.08    |
|    exploration_rate | 0.199    |
| time/               |          |
|    episodes         | 14       |
|    fps              | 0        |
|    time_elapsed     | 1370     |
|    total_timesteps  | 843      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00761  |
|    n_updates        | 410      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 59.5     |
|    ep_rew_mean      | -4.38    |
|    exploration_rate | 0.152    |
| time/               |          |
|    episodes         | 15       |
|    fps              | 0        |
|    time_elapsed     | 1453     |
|    total_timesteps  | 893      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00625  |
|    n_updates        | 423      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 62.1     |
|    ep_rew_mean      | -3.95    |
|    exploration_rate | 0.0567   |
| time/               |          |
|    episodes         | 16       |
|    fps              | 0        |
|    time_elapsed     | 1609     |
|    total_timesteps  | 993      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0103   |
|    n_updates        | 448      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 59.6     |
|    ep_rew_mean      | -4.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 17       |
|    fps              | 0        |
|    time_elapsed     | 1644     |
|    total_timesteps  | 1013     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0087   |
|    n_updates        | 453      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 57.1     |
|    ep_rew_mean      | -4.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 18       |
|    fps              | 0        |
|    time_elapsed     | 1670     |
|    total_timesteps  | 1027     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00786  |
|    n_updates        | 456      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 58.8     |
|    ep_rew_mean      | -4.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 19       |
|    fps              | 0        |
|    time_elapsed     | 1807     |
|    total_timesteps  | 1118     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00886  |
|    n_updates        | 479      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 56.9     |
|    ep_rew_mean      | -5.05    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 20       |
|    fps              | 0        |
|    time_elapsed     | 1841     |
|    total_timesteps  | 1137     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.376    |
|    n_updates        | 484      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 55.4     |
|    ep_rew_mean      | -5.25    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 21       |
|    fps              | 0        |
|    time_elapsed     | 1884     |
|    total_timesteps  | 1163     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.506    |
|    n_updates        | 490      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 53.2     |
|    ep_rew_mean      | -5.45    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 22       |
|    fps              | 0        |
|    time_elapsed     | 1903     |
|    total_timesteps  | 1170     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0075   |
|    n_updates        | 492      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 53.6     |
|    ep_rew_mean      | -5.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 23       |
|    fps              | 0        |
|    time_elapsed     | 1995     |
|    total_timesteps  | 1232     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00757  |
|    n_updates        | 507      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 51.9     |
|    ep_rew_mean      | -5.78    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 24       |
|    fps              | 0        |
|    time_elapsed     | 2021     |
|    total_timesteps  | 1246     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.534    |
|    n_updates        | 511      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 53.2     |
|    ep_rew_mean      | -5.74    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 25       |
|    fps              | 0        |
|    time_elapsed     | 2154     |
|    total_timesteps  | 1330     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0096   |
|    n_updates        | 532      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 51.7     |
|    ep_rew_mean      | -5.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 26       |
|    fps              | 0        |
|    time_elapsed     | 2176     |
|    total_timesteps  | 1344     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00711  |
|    n_updates        | 535      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 53.5     |
|    ep_rew_mean      | -5.35    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 27       |
|    fps              | 0        |
|    time_elapsed     | 2338     |
|    total_timesteps  | 1444     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0106   |
|    n_updates        | 560      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 53.7     |
|    ep_rew_mean      | -5.47    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 28       |
|    fps              | 0        |
|    time_elapsed     | 2430     |
|    total_timesteps  | 1503     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.903    |
|    n_updates        | 575      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 55.3     |
|    ep_rew_mean      | -5.04    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 29       |
|    fps              | 0        |
|    time_elapsed     | 2586     |
|    total_timesteps  | 1603     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.982    |
|    n_updates        | 600      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 54.2     |
|    ep_rew_mean      | -5.18    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 30       |
|    fps              | 0        |
|    time_elapsed     | 2625     |
|    total_timesteps  | 1626     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0101   |
|    n_updates        | 606      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 53.5     |
|    ep_rew_mean      | -5.3     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 31       |
|    fps              | 0        |
|    time_elapsed     | 2682     |
|    total_timesteps  | 1660     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00977  |
|    n_updates        | 614      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 52       |
|    ep_rew_mean      | -5.44    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 32       |
|    fps              | 0        |
|    time_elapsed     | 2693     |
|    total_timesteps  | 1665     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.984    |
|    n_updates        | 616      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 50.6     |
|    ep_rew_mean      | -5.58    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 33       |
|    fps              | 0        |
|    time_elapsed     | 2704     |
|    total_timesteps  | 1671     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.426    |
|    n_updates        | 617      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 49.4     |
|    ep_rew_mean      | -5.71    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 34       |
|    fps              | 0        |
|    time_elapsed     | 2723     |
|    total_timesteps  | 1679     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.509    |
|    n_updates        | 619      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 49.9     |
|    ep_rew_mean      | -5.79    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 35       |
|    fps              | 0        |
|    time_elapsed     | 2830     |
|    total_timesteps  | 1747     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.451    |
|    n_updates        | 636      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 49.1     |
|    ep_rew_mean      | -5.88    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 36       |
|    fps              | 0        |
|    time_elapsed     | 2864     |
|    total_timesteps  | 1766     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.523    |
|    n_updates        | 641      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 48.2     |
|    ep_rew_mean      | -5.98    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 37       |
|    fps              | 0        |
|    time_elapsed     | 2897     |
|    total_timesteps  | 1785     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00935  |
|    n_updates        | 646      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 47.2     |
|    ep_rew_mean      | -6.08    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 38       |
|    fps              | 0        |
|    time_elapsed     | 2912     |
|    total_timesteps  | 1793     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0105   |
|    n_updates        | 648      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 46.6     |
|    ep_rew_mean      | -6.17    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 39       |
|    fps              | 0        |
|    time_elapsed     | 2951     |
|    total_timesteps  | 1816     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.432    |
|    n_updates        | 653      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 47       |
|    ep_rew_mean      | -6.24    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 40       |
|    fps              | 0        |
|    time_elapsed     | 3051     |
|    total_timesteps  | 1879     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.955    |
|    n_updates        | 669      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 46.8     |
|    ep_rew_mean      | -6.28    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 41       |
|    fps              | 0        |
|    time_elapsed     | 3117     |
|    total_timesteps  | 1919     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.548    |
|    n_updates        | 679      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 46.7     |
|    ep_rew_mean      | -6.32    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 42       |
|    fps              | 0        |
|    time_elapsed     | 3183     |
|    total_timesteps  | 1961     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.06     |
|    n_updates        | 690      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 45.8     |
|    ep_rew_mean      | -5.89    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 43       |
|    fps              | 0        |
|    time_elapsed     | 3197     |
|    total_timesteps  | 1968     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.463    |
|    n_updates        | 691      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 45.3     |
|    ep_rew_mean      | -5.98    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 44       |
|    fps              | 0        |
|    time_elapsed     | 3240     |
|    total_timesteps  | 1995     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.806    |
|    n_updates        | 698      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 44.5     |
|    ep_rew_mean      | -6.06    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 45       |
|    fps              | 0        |
|    time_elapsed     | 3257     |
|    total_timesteps  | 2003     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00744  |
|    n_updates        | 700      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 44.3     |
|    ep_rew_mean      | -6.13    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 46       |
|    fps              | 0        |
|    time_elapsed     | 3314     |
|    total_timesteps  | 2039     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.02     |
|    n_updates        | 709      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 43.7     |
|    ep_rew_mean      | -6.21    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 47       |
|    fps              | 0        |
|    time_elapsed     | 3342     |
|    total_timesteps  | 2054     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0101   |
|    n_updates        | 713      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 43       |
|    ep_rew_mean      | -6.29    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 48       |
|    fps              | 0        |
|    time_elapsed     | 3361     |
|    total_timesteps  | 2063     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00943  |
|    n_updates        | 715      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 42.6     |
|    ep_rew_mean      | -6.35    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 49       |
|    fps              | 0        |
|    time_elapsed     | 3401     |
|    total_timesteps  | 2087     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.513    |
|    n_updates        | 721      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 42.7     |
|    ep_rew_mean      | -6.41    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 50       |
|    fps              | 0        |
|    time_elapsed     | 3479     |
|    total_timesteps  | 2135     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0117   |
|    n_updates        | 733      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 42.4     |
|    ep_rew_mean      | -6.47    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 51       |
|    fps              | 0        |
|    time_elapsed     | 3524     |
|    total_timesteps  | 2163     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00523  |
|    n_updates        | 740      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 42.3     |
|    ep_rew_mean      | -6.52    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 52       |
|    fps              | 0        |
|    time_elapsed     | 3589     |
|    total_timesteps  | 2202     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00981  |
|    n_updates        | 750      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 42.2     |
|    ep_rew_mean      | -6.58    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 53       |
|    fps              | 0        |
|    time_elapsed     | 3648     |
|    total_timesteps  | 2237     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.457    |
|    n_updates        | 759      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 41.5     |
|    ep_rew_mean      | -6.64    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 54       |
|    fps              | 0        |
|    time_elapsed     | 3660     |
|    total_timesteps  | 2243     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.985    |
|    n_updates        | 760      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 41.3     |
|    ep_rew_mean      | -6.69    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 55       |
|    fps              | 0        |
|    time_elapsed     | 3703     |
|    total_timesteps  | 2270     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.911    |
|    n_updates        | 767      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 41.1     |
|    ep_rew_mean      | -6.75    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 56       |
|    fps              | 0        |
|    time_elapsed     | 3756     |
|    total_timesteps  | 2302     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.507    |
|    n_updates        | 775      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 40.6     |
|    ep_rew_mean      | -6.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 57       |
|    fps              | 0        |
|    time_elapsed     | 3784     |
|    total_timesteps  | 2317     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.426    |
|    n_updates        | 779      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 40.1     |
|    ep_rew_mean      | -6.84    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 58       |
|    fps              | 0        |
|    time_elapsed     | 3802     |
|    total_timesteps  | 2327     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00819  |
|    n_updates        | 781      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 39.5     |
|    ep_rew_mean      | -6.89    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 59       |
|    fps              | 0        |
|    time_elapsed     | 3815     |
|    total_timesteps  | 2332     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00886  |
|    n_updates        | 782      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 39.2     |
|    ep_rew_mean      | -6.94    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 60       |
|    fps              | 0        |
|    time_elapsed     | 3853     |
|    total_timesteps  | 2354     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.06     |
|    n_updates        | 788      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 39.2     |
|    ep_rew_mean      | -6.98    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 61       |
|    fps              | 0        |
|    time_elapsed     | 3909     |
|    total_timesteps  | 2390     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0138   |
|    n_updates        | 797      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.8     |
|    ep_rew_mean      | -7.02    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 62       |
|    fps              | 0        |
|    time_elapsed     | 3941     |
|    total_timesteps  | 2408     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.999    |
|    n_updates        | 801      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 39.8     |
|    ep_rew_mean      | -6.89    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 63       |
|    fps              | 0        |
|    time_elapsed     | 4102     |
|    total_timesteps  | 2508     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.858    |
|    n_updates        | 826      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 39.2     |
|    ep_rew_mean      | -6.93    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 64       |
|    fps              | 0        |
|    time_elapsed     | 4111     |
|    total_timesteps  | 2510     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.407    |
|    n_updates        | 827      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 39.2     |
|    ep_rew_mean      | -6.96    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 65       |
|    fps              | 0        |
|    time_elapsed     | 4174     |
|    total_timesteps  | 2548     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0136   |
|    n_updates        | 836      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 39       |
|    ep_rew_mean      | -7       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 66       |
|    fps              | 0        |
|    time_elapsed     | 4217     |
|    total_timesteps  | 2573     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00609  |
|    n_updates        | 843      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.5     |
|    ep_rew_mean      | -7.04    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 67       |
|    fps              | 0        |
|    time_elapsed     | 4231     |
|    total_timesteps  | 2580     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00667  |
|    n_updates        | 844      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.8     |
|    ep_rew_mean      | -7.06    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 68       |
|    fps              | 0        |
|    time_elapsed     | 4325     |
|    total_timesteps  | 2639     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00559  |
|    n_updates        | 859      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.7     |
|    ep_rew_mean      | -7.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 69       |
|    fps              | 0        |
|    time_elapsed     | 4383     |
|    total_timesteps  | 2671     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0143   |
|    n_updates        | 867      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.4     |
|    ep_rew_mean      | -7.13    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 70       |
|    fps              | 0        |
|    time_elapsed     | 4416     |
|    total_timesteps  | 2690     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0112   |
|    n_updates        | 872      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.5     |
|    ep_rew_mean      | -7.16    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 71       |
|    fps              | 0        |
|    time_elapsed     | 4487     |
|    total_timesteps  | 2733     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.434    |
|    n_updates        | 883      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.2     |
|    ep_rew_mean      | -7.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 72       |
|    fps              | 0        |
|    time_elapsed     | 4513     |
|    total_timesteps  | 2749     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00685  |
|    n_updates        | 887      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.2     |
|    ep_rew_mean      | -7.23    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 73       |
|    fps              | 0        |
|    time_elapsed     | 4581     |
|    total_timesteps  | 2791     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.378    |
|    n_updates        | 897      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.1     |
|    ep_rew_mean      | -7.25    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 74       |
|    fps              | 0        |
|    time_elapsed     | 4631     |
|    total_timesteps  | 2820     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.917    |
|    n_updates        | 904      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.6     |
|    ep_rew_mean      | -7.24    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 75       |
|    fps              | 0        |
|    time_elapsed     | 4742     |
|    total_timesteps  | 2892     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.928    |
|    n_updates        | 922      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.2     |
|    ep_rew_mean      | -7.27    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 76       |
|    fps              | 0        |
|    time_elapsed     | 4768     |
|    total_timesteps  | 2905     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00768  |
|    n_updates        | 926      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.1     |
|    ep_rew_mean      | -7.29    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 77       |
|    fps              | 0        |
|    time_elapsed     | 4818     |
|    total_timesteps  | 2936     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.08     |
|    n_updates        | 933      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.1     |
|    ep_rew_mean      | -7.32    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 78       |
|    fps              | 0        |
|    time_elapsed     | 4882     |
|    total_timesteps  | 2974     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00817  |
|    n_updates        | 943      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38       |
|    ep_rew_mean      | -7.34    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 79       |
|    fps              | 0        |
|    time_elapsed     | 4925     |
|    total_timesteps  | 3002     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.445    |
|    n_updates        | 950      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.6     |
|    ep_rew_mean      | -7.38    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 80       |
|    fps              | 0        |
|    time_elapsed     | 4937     |
|    total_timesteps  | 3008     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.546    |
|    n_updates        | 951      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.6     |
|    ep_rew_mean      | -7.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 81       |
|    fps              | 0        |
|    time_elapsed     | 4997     |
|    total_timesteps  | 3044     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.954    |
|    n_updates        | 960      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.6     |
|    ep_rew_mean      | -7.42    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 82       |
|    fps              | 0        |
|    time_elapsed     | 5058     |
|    total_timesteps  | 3080     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0108   |
|    n_updates        | 969      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.7     |
|    ep_rew_mean      | -7.43    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 83       |
|    fps              | 0        |
|    time_elapsed     | 5138     |
|    total_timesteps  | 3131     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0108   |
|    n_updates        | 982      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.3     |
|    ep_rew_mean      | -7.45    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 84       |
|    fps              | 0        |
|    time_elapsed     | 5273     |
|    total_timesteps  | 3219     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0117   |
|    n_updates        | 1004     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.2     |
|    ep_rew_mean      | -7.47    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 85       |
|    fps              | 0        |
|    time_elapsed     | 5322     |
|    total_timesteps  | 3249     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0102   |
|    n_updates        | 1012     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.2     |
|    ep_rew_mean      | -7.49    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 86       |
|    fps              | 0        |
|    time_elapsed     | 5381     |
|    total_timesteps  | 3285     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.531    |
|    n_updates        | 1021     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.9     |
|    ep_rew_mean      | -7.52    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 87       |
|    fps              | 0        |
|    time_elapsed     | 5411     |
|    total_timesteps  | 3301     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.791    |
|    n_updates        | 1025     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.7     |
|    ep_rew_mean      | -7.54    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 88       |
|    fps              | 0        |
|    time_elapsed     | 5444     |
|    total_timesteps  | 3320     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00569  |
|    n_updates        | 1029     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.6     |
|    ep_rew_mean      | -7.56    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 89       |
|    fps              | 0        |
|    time_elapsed     | 5483     |
|    total_timesteps  | 3343     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.41     |
|    n_updates        | 1035     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.5     |
|    ep_rew_mean      | -7.58    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 90       |
|    fps              | 0        |
|    time_elapsed     | 5533     |
|    total_timesteps  | 3374     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.505    |
|    n_updates        | 1043     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.4     |
|    ep_rew_mean      | -7.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 91       |
|    fps              | 0        |
|    time_elapsed     | 5576     |
|    total_timesteps  | 3399     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.363    |
|    n_updates        | 1049     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.8     |
|    ep_rew_mean      | -7.58    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 92       |
|    fps              | 0        |
|    time_elapsed     | 5696     |
|    total_timesteps  | 3476     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.48     |
|    n_updates        | 1068     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.2     |
|    ep_rew_mean      | -7.59    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 93       |
|    fps              | 0        |
|    time_elapsed     | 5815     |
|    total_timesteps  | 3552     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0102   |
|    n_updates        | 1087     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.9     |
|    ep_rew_mean      | -7.62    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 94       |
|    fps              | 0        |
|    time_elapsed     | 5831     |
|    total_timesteps  | 3559     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00825  |
|    n_updates        | 1089     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.6     |
|    ep_rew_mean      | -7.64    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 95       |
|    fps              | 0        |
|    time_elapsed     | 5862     |
|    total_timesteps  | 3575     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00618  |
|    n_updates        | 1093     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.3     |
|    ep_rew_mean      | -7.66    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 96       |
|    fps              | 0        |
|    time_elapsed     | 5874     |
|    total_timesteps  | 3581     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00575  |
|    n_updates        | 1095     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 36.9     |
|    ep_rew_mean      | -7.69    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 97       |
|    fps              | 0        |
|    time_elapsed     | 5882     |
|    total_timesteps  | 3583     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 36.7     |
|    ep_rew_mean      | -7.71    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 98       |
|    fps              | 0        |
|    time_elapsed     | 5904     |
|    total_timesteps  | 3593     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0251   |
|    n_updates        | 1098     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 36.5     |
|    ep_rew_mean      | -7.73    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 99       |
|    fps              | 0        |
|    time_elapsed     | 5935     |
|    total_timesteps  | 3611     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0102   |
|    n_updates        | 1102     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 36.3     |
|    ep_rew_mean      | -7.74    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 100      |
|    fps              | 0        |
|    time_elapsed     | 5966     |
|    total_timesteps  | 3628     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.529    |
|    n_updates        | 1106     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 35.3     |
|    ep_rew_mean      | -7.87    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 101      |
|    fps              | 0        |
|    time_elapsed     | 5975     |
|    total_timesteps  | 3632     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.463    |
|    n_updates        | 1107     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 35.5     |
|    ep_rew_mean      | -7.87    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 102      |
|    fps              | 0        |
|    time_elapsed     | 6050     |
|    total_timesteps  | 3680     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.699    |
|    n_updates        | 1119     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 35.1     |
|    ep_rew_mean      | -7.98    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 103      |
|    fps              | 0        |
|    time_elapsed     | 6145     |
|    total_timesteps  | 3740     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0134   |
|    n_updates        | 1134     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 34.1     |
|    ep_rew_mean      | -8.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 104      |
|    fps              | 0        |
|    time_elapsed     | 6154     |
|    total_timesteps  | 3744     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0123   |
|    n_updates        | 1135     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 33.4     |
|    ep_rew_mean      | -8.22    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 105      |
|    fps              | 0        |
|    time_elapsed     | 6197     |
|    total_timesteps  | 3769     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.45     |
|    n_updates        | 1142     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 33.2     |
|    ep_rew_mean      | -8.22    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 106      |
|    fps              | 0        |
|    time_elapsed     | 6224     |
|    total_timesteps  | 3782     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.473    |
|    n_updates        | 1145     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 33.2     |
|    ep_rew_mean      | -8.21    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 107      |
|    fps              | 0        |
|    time_elapsed     | 6294     |
|    total_timesteps  | 3825     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.381    |
|    n_updates        | 1156     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 33.3     |
|    ep_rew_mean      | -8.21    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 108      |
|    fps              | 0        |
|    time_elapsed     | 6326     |
|    total_timesteps  | 3842     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00889  |
|    n_updates        | 1160     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 33.5     |
|    ep_rew_mean      | -8.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 109      |
|    fps              | 0        |
|    time_elapsed     | 6376     |
|    total_timesteps  | 3873     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00902  |
|    n_updates        | 1168     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 32.9     |
|    ep_rew_mean      | -8.31    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 110      |
|    fps              | 0        |
|    time_elapsed     | 6443     |
|    total_timesteps  | 3913     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00819  |
|    n_updates        | 1178     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 32.4     |
|    ep_rew_mean      | -8.47    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 111      |
|    fps              | 0        |
|    time_elapsed     | 6532     |
|    total_timesteps  | 3967     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00617  |
|    n_updates        | 1191     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 32       |
|    ep_rew_mean      | -8.47    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 112      |
|    fps              | 0        |
|    time_elapsed     | 6563     |
|    total_timesteps  | 3984     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0145   |
|    n_updates        | 1195     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 32.2     |
|    ep_rew_mean      | -8.47    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 113      |
|    fps              | 0        |
|    time_elapsed     | 6651     |
|    total_timesteps  | 4042     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00672  |
|    n_updates        | 1210     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 32.1     |
|    ep_rew_mean      | -8.47    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 114      |
|    fps              | 0        |
|    time_elapsed     | 6669     |
|    total_timesteps  | 4050     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00792  |
|    n_updates        | 1212     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 31.8     |
|    ep_rew_mean      | -8.48    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 115      |
|    fps              | 0        |
|    time_elapsed     | 6707     |
|    total_timesteps  | 4073     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.491    |
|    n_updates        | 1218     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.8     |
|    ep_rew_mean      | -8.61    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 116      |
|    fps              | 0        |
|    time_elapsed     | 6717     |
|    total_timesteps  | 4077     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00584  |
|    n_updates        | 1219     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.7     |
|    ep_rew_mean      | -8.61    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 117      |
|    fps              | 0        |
|    time_elapsed     | 6737     |
|    total_timesteps  | 4087     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.545    |
|    n_updates        | 1221     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.7     |
|    ep_rew_mean      | -8.61    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 118      |
|    fps              | 0        |
|    time_elapsed     | 6750     |
|    total_timesteps  | 4093     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.46     |
|    n_updates        | 1223     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.4     |
|    ep_rew_mean      | -8.61    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 119      |
|    fps              | 0        |
|    time_elapsed     | 6855     |
|    total_timesteps  | 4161     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.531    |
|    n_updates        | 1240     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.4     |
|    ep_rew_mean      | -8.61    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 120      |
|    fps              | 0        |
|    time_elapsed     | 6892     |
|    total_timesteps  | 4182     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.509    |
|    n_updates        | 1245     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.8     |
|    ep_rew_mean      | -8.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 121      |
|    fps              | 0        |
|    time_elapsed     | 6994     |
|    total_timesteps  | 4245     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.915    |
|    n_updates        | 1261     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 31.8     |
|    ep_rew_mean      | -8.48    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 122      |
|    fps              | 0        |
|    time_elapsed     | 7149     |
|    total_timesteps  | 4345     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.357    |
|    n_updates        | 1286     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 31.9     |
|    ep_rew_mean      | -8.47    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 123      |
|    fps              | 0        |
|    time_elapsed     | 7279     |
|    total_timesteps  | 4426     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.505    |
|    n_updates        | 1306     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 31.9     |
|    ep_rew_mean      | -8.48    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 124      |
|    fps              | 0        |
|    time_elapsed     | 7294     |
|    total_timesteps  | 4433     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0129   |
|    n_updates        | 1308     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 31.4     |
|    ep_rew_mean      | -8.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 125      |
|    fps              | 0        |
|    time_elapsed     | 7359     |
|    total_timesteps  | 4472     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.483    |
|    n_updates        | 1317     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 31.8     |
|    ep_rew_mean      | -8.49    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 126      |
|    fps              | 0        |
|    time_elapsed     | 7436     |
|    total_timesteps  | 4519     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.467    |
|    n_updates        | 1329     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 31.4     |
|    ep_rew_mean      | -8.67    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 127      |
|    fps              | 0        |
|    time_elapsed     | 7532     |
|    total_timesteps  | 4579     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.03     |
|    n_updates        | 1344     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 31.4     |
|    ep_rew_mean      | -8.67    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 128      |
|    fps              | 0        |
|    time_elapsed     | 7636     |
|    total_timesteps  | 4644     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0073   |
|    n_updates        | 1360     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.8     |
|    ep_rew_mean      | -8.83    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 129      |
|    fps              | 0        |
|    time_elapsed     | 7696     |
|    total_timesteps  | 4680     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.04     |
|    n_updates        | 1369     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.8     |
|    ep_rew_mean      | -8.84    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 130      |
|    fps              | 0        |
|    time_elapsed     | 7741     |
|    total_timesteps  | 4707     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.528    |
|    n_updates        | 1376     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.5     |
|    ep_rew_mean      | -8.85    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 131      |
|    fps              | 0        |
|    time_elapsed     | 7751     |
|    total_timesteps  | 4711     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.02     |
|    n_updates        | 1377     |
----------------------------------


In [None]:
# import gymnasium as gym
# import torch
# from stable_baselines3 import DQN
# from stable_baselines3.common.monitor import Monitor
# from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
# import Environments.set_up_envs
# env = gym.make("wallsEnv")
# policy_kwargs = dict(optimizer_class=torch.optim.Adam)
# model = DQN(
#     "MlpPolicy",
#     env,
#     learning_rate=1e-3,  # fallback if optimizer_kwargs not used
#     policy_kwargs=policy_kwargs,
#     verbose=1
# )
obs, _ = env.reset()
for _ in range(20):
    action, _ = model.predict(obs)
    res = env.step(int(action))
    obs, reward, terminated, truncated, _ = res
    done = terminated or truncated
    print(f"Pos: {obs}, Reward: {reward}")
    if done:
        print("Goal reached!")
        break


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Observation from reset(): [78.38492   -2.0972388 19.6        5.148721 ]
Observation from reset() shape: (4,)
Observation space low: [ 0.        -3.1415927 -1.         0.       ]
Observation space high: [141.42136     3.1415927 141.42136     6.2831855]
Observation space shape: (4,)
Is obs within space? True
Observation from reset(): [77.399315  -2.0748956 19.6        5.148721 ]
Observation from reset() shape: (4,)
Observation space low: [ 0.        -3.1415927 -1.         0.       ]
Observation space high: [141.42136     3.1415927 141.42136     6.2831855]
Observation space shape: (4,)
Is obs within space? True
Step 1: Action=1, Reward=-9.787096977233887, Obs=[77.399315  -2.0748956 19.6        5.148721 ]
Pos: [77.399315  -2.0748956 19.6        5.148721 ], Reward: -9.787096977233887


KeyboardInterrupt: 

In [None]:
from torchrl.envs import GymEnv, TransformedEnv, Compose, ToTensor
from torchrl.modules import MLP, DQNet
from torchrl.objectives import DQNLoss
from torchrl.collectors import SyncDataCollector
from torchrl.trainers import Trainer
import torch

env = TransformedEnv(GymEnv(gym.make("wallsEnv")), Compose(ToTensor()))
model = DQNet(MLP(in_features=2, out_features=4, depth=2, num_cells=64))
loss_fn = DQNLoss(model)

collector = SyncDataCollector(env, policy=model, frames_per_batch=100)
trainer = Trainer(loss_module=loss_fn, collector=collector, max_steps=5000)

trainer.train()


KeyboardInterrupt: 