In [1]:
from stable_baselines3 import DQN, PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.env_checker import check_env
from env import DangerousDaveEnv
import time, os
from custom_cnn import policy_kwargs
import torch
from stable_baselines3.common.vec_env import SubprocVecEnv
import numpy as np

In [2]:
# Setting up the device
device = "mps" if torch.backends.mps.is_available() else "cpu"
device = torch.device(device)

# Manual assignment of arguments (replace with your desired values or use ipywidgets for interactivity)
train = True  # equivalent to --train in argparse
evaluate = False  # equivalent to --evaluate in argparse
model_name = "dqn_test_2"  # manually specify or generate a name
env_rep_type = 'text'  # 'text' or 'image'
model_type = 'DQN'  # 'DQN', 'RND', or 'PPO'
retrain = True  # equivalent to --retrain in argparse

# Your existing logic below
checkpoint_timestamp = int(time.time())
if not model_name:
    model_name = "checkpoints/dqn_ddave_{}".format(checkpoint_timestamp)

tensorboard_log = f"tensorboard_log/{model_name}"
tensorboard_log_run_name = '0'
print(model_name,tensorboard_log)
# Create the DangerousDaveEnv environment
random_respawn=True
env = DangerousDaveEnv(render_mode="human", env_rep_type=env_rep_type,random_respawn=random_respawn)
obs,info = env.reset()

total_timesteps=60000

dqn_test_2 tensorboard_log/dqn_test_2
(1, 11, 19)
Box(0, 255, (1, 11, 19), uint8)




In [3]:
if model_type == 'DQN':
    if train:
        # Define and train the DQN agent
        if retrain:
            model = DQN.load("checkpoints/{}".format(model_name),tensorboard_log=tensorboard_log)
            model.set_env(env)
        else:
            model = DQN("CnnPolicy", env, verbose=1, batch_size=256, policy_kwargs=policy_kwargs,
                        learning_starts=1000, exploration_fraction=0.5, exploration_final_eps=0.01, device=device,
                        target_update_interval=5000, buffer_size=100000,tensorboard_log=tensorboard_log)

        model.learn(total_timesteps=total_timesteps, progress_bar=True,tb_log_name=tensorboard_log_run_name,log_interval=1)
        model.save("checkpoints/{}".format(model_name))
        env = DangerousDaveEnv(render_mode="human", env_rep_type=env_rep_type,random_respawn=False)
        model = DQN.load("checkpoints/{}".format(model_name),tensorboard_log=tensorboard_log)
        model.set_env(env)
        obs,info = env.reset()
        model.learn(total_timesteps=total_timesteps, progress_bar=True,tb_log_name=tensorboard_log_run_name,log_interval=1)
        # Save the trained model if desired
        model.save("checkpoints/{}".format(model_name))

    if evaluate:
        # Evaluate the trained model
        model = DQN.load("checkpoints/{}".format(model_name), env=env,tensorboard_log=tensorboard_log)

elif model_type == 'PPO':
    if train:
        # Define and train the PPO agent
        if retrain:
            model = PPO.load("checkpoints/{}".format(model_name), env=env,tensorboard_log=tensorboard_log)
        else:
            model = PPO("CnnPolicy", env, verbose=1, batch_size=256, policy_kwargs=policy_kwargs, device=device,
                        tensorboard_log=tensorboard_log,ent_coef=0.01,vf_coef=1)

        model.learn(total_timesteps=total_timesteps, progress_bar=True,tb_log_name=tensorboard_log_run_name,log_interval=1)
        
        # Save the trained model if desired
        model.save("checkpoints/{}".format(model_name))

    if evaluate:
        # Evaluate the trained model
        model = PPO.load("checkpoints/{}".format(model_name), env=env,tensorboard_log=tensorboard_log)

if evaluate:
    eps_reward = []
    for i in range(5):
        env = DangerousDaveEnv(render_mode="human", env_rep_type=env_rep_type,random_respawn=False)
        obs, info = env.reset()
        terminated = False
        truncated = False
        reward = 0
        while not (terminated or truncated):
            action, _ = model.predict(obs, deterministic=True)
            obs, rewards, terminated, truncated, info = env.step(action)
            reward += rewards
        eps_reward.append(reward)
    print(f'{np.mean(eps_reward)} eval reward mean')

            

Using mps device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to tensorboard_log/dqn_test_2/0_18


Output()

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.95     |
| time/               |          |
|    episodes         | 1        |
|    fps              | 71       |
|    time_elapsed     | 20       |
|    total_timesteps  | 1500     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.01e-07 |
|    n_updates        | 124      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.901    |
| time/               |          |
|    episodes         | 2        |
|    fps              | 67       |
|    time_elapsed     | 44       |
|    total_timesteps  | 3000     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 7.47e-11 |
|    n_updates        | 499      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.852    |
| time/               |          |
|    episodes         | 3        |
|    fps              | 65       |
|    time_elapsed     | 69       |
|    total_timesteps  | 4500     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 6.89e-11 |
|    n_updates        | 874      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.802    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 63       |
|    time_elapsed     | 93       |
|    total_timesteps  | 6000     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.36e-10 |
|    n_updates        | 1249     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.752    |
| time/               |          |
|    episodes         | 5        |
|    fps              | 62       |
|    time_elapsed     | 119      |
|    total_timesteps  | 7500     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.45e-08 |
|    n_updates        | 1624     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.703    |
| time/               |          |
|    episodes         | 6        |
|    fps              | 61       |
|    time_elapsed     | 145      |
|    total_timesteps  | 9000     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00199  |
|    n_updates        | 1999     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.653    |
| time/               |          |
|    episodes         | 7        |
|    fps              | 61       |
|    time_elapsed     | 171      |
|    total_timesteps  | 10500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00195  |
|    n_updates        | 2374     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.604    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 60       |
|    time_elapsed     | 198      |
|    total_timesteps  | 12000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.39e-05 |
|    n_updates        | 2749     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.554    |
| time/               |          |
|    episodes         | 9        |
|    fps              | 59       |
|    time_elapsed     | 226      |
|    total_timesteps  | 13500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00196  |
|    n_updates        | 3124     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.505    |
| time/               |          |
|    episodes         | 10       |
|    fps              | 59       |
|    time_elapsed     | 254      |
|    total_timesteps  | 15000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 8.01e-06 |
|    n_updates        | 3499     |
----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.41e+03 |
|    exploration_rate | 0.456     |
| time/               |           |
|    episodes         | 11        |
|    fps              | 58        |
|    time_elapsed     | 282       |
|    total_timesteps  | 16500     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 2.68e-05  |
|    n_updates        | 3874      |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.42e+03 |
|    exploration_rate | 0.357     |
| time/               |           |
|    episodes         | 13        |
|    fps              | 57        |
|    time_elapsed     | 340       |
|    total_timesteps  | 19500     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 0.00197   |
|    n_updates        | 4624      |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.43e+03 |
|    exploration_rate | 0.307     |
| time/               |           |
|    episodes         | 14        |
|    fps              | 56        |
|    time_elapsed     | 370       |
|    total_timesteps  | 21000     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 5.42e-06  |
|    n_updates        | 4999      |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.43e+03 |
|    exploration_rate | 0.258     |
| time/               |           |
|    episodes         | 15        |
|    fps              | 56        |
|    time_elapsed     | 400       |
|    total_timesteps  | 22500     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 6.25e-06  |
|    n_updates        | 5374      |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.44e+03 |
|    exploration_rate | 0.208     |
| time/               |           |
|    episodes         | 16        |
|    fps              | 55        |
|    time_elapsed     | 430       |
|    total_timesteps  | 24000     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 0.0021    |
|    n_updates        | 5749      |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.39e+03 |
|    exploration_rate | 0.158     |
| time/               |           |
|    episodes         | 17        |
|    fps              | 55        |
|    time_elapsed     | 461       |
|    total_timesteps  | 25500     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 1.56e-05  |
|    n_updates        | 6124      |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.39e+03 |
|    exploration_rate | 0.109     |
| time/               |           |
|    episodes         | 18        |
|    fps              | 54        |
|    time_elapsed     | 492       |
|    total_timesteps  | 27000     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 0.000269  |
|    n_updates        | 6499      |
-----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.4e+03 |
|    exploration_rate | 0.0595   |
| time/               |          |
|    episodes         | 19       |
|    fps              | 54       |
|    time_elapsed     | 524      |
|    total_timesteps  | 28500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00197  |
|    n_updates        | 6874     |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.4e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 20       |
|    fps              | 53       |
|    time_elapsed     | 559      |
|    total_timesteps  | 30000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.12e-05 |
|    n_updates        | 7249     |
----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.41e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 21        |
|    fps              | 53        |
|    time_elapsed     | 592       |
|    total_timesteps  | 31500     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 7.06e-05  |
|    n_updates        | 7624      |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.41e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 22        |
|    fps              | 52        |
|    time_elapsed     | 626       |
|    total_timesteps  | 33000     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 2.13e-05  |
|    n_updates        | 7999      |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.42e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 23        |
|    fps              | 52        |
|    time_elapsed     | 660       |
|    total_timesteps  | 34500     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 0.00395   |
|    n_updates        | 8374      |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.42e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 24        |
|    fps              | 51        |
|    time_elapsed     | 693       |
|    total_timesteps  | 36000     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 0.0023    |
|    n_updates        | 8749      |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.42e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 25        |
|    fps              | 51        |
|    time_elapsed     | 727       |
|    total_timesteps  | 37500     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 0.000127  |
|    n_updates        | 9124      |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.43e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 26        |
|    fps              | 51        |
|    time_elapsed     | 761       |
|    total_timesteps  | 39000     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 3.91e-05  |
|    n_updates        | 9499      |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.43e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 27        |
|    fps              | 50        |
|    time_elapsed     | 797       |
|    total_timesteps  | 40500     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 3.05e-05  |
|    n_updates        | 9874      |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.43e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 28        |
|    fps              | 50        |
|    time_elapsed     | 831       |
|    total_timesteps  | 42000     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 0.0021    |
|    n_updates        | 10249     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.44e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 29        |
|    fps              | 50        |
|    time_elapsed     | 866       |
|    total_timesteps  | 43500     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 0.00398   |
|    n_updates        | 10624     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.44e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 30        |
|    fps              | 49        |
|    time_elapsed     | 900       |
|    total_timesteps  | 45000     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 6.24e-05  |
|    n_updates        | 10999     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.44e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 31        |
|    fps              | 49        |
|    time_elapsed     | 935       |
|    total_timesteps  | 46500     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 0.000176  |
|    n_updates        | 11374     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.44e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 32        |
|    fps              | 49        |
|    time_elapsed     | 970       |
|    total_timesteps  | 48000     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 0.000159  |
|    n_updates        | 11749     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.44e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 33        |
|    fps              | 49        |
|    time_elapsed     | 1005      |
|    total_timesteps  | 49500     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 0.00411   |
|    n_updates        | 12124     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.45e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 34        |
|    fps              | 49        |
|    time_elapsed     | 1040      |
|    total_timesteps  | 51000     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 8.25e-05  |
|    n_updates        | 12499     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.45e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 35        |
|    fps              | 48        |
|    time_elapsed     | 1075      |
|    total_timesteps  | 52500     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 0.00206   |
|    n_updates        | 12874     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.45e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 36        |
|    fps              | 48        |
|    time_elapsed     | 1111      |
|    total_timesteps  | 54000     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 0.00436   |
|    n_updates        | 13249     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.45e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 37        |
|    fps              | 48        |
|    time_elapsed     | 1148      |
|    total_timesteps  | 55500     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 0.00239   |
|    n_updates        | 13624     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.45e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 38        |
|    fps              | 48        |
|    time_elapsed     | 1185      |
|    total_timesteps  | 57000     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 0.0041    |
|    n_updates        | 13999     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.45e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 39        |
|    fps              | 47        |
|    time_elapsed     | 1221      |
|    total_timesteps  | 58500     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 0.000778  |
|    n_updates        | 14374     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 1.5e+03   |
|    ep_rew_mean      | -1.45e+03 |
|    exploration_rate | 0.01      |
| time/               |           |
|    episodes         | 40        |
|    fps              | 47        |
|    time_elapsed     | 1258      |
|    total_timesteps  | 60000     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 0.00477   |
|    n_updates        | 14749     |
-----------------------------------




(1, 11, 19)
Box(0, 255, (1, 11, 19), uint8)
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to tensorboard_log/dqn_test_2/0_19


Output()

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.95     |
| time/               |          |
|    episodes         | 1        |
|    fps              | 52       |
|    time_elapsed     | 28       |
|    total_timesteps  | 1500     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00193  |
|    n_updates        | 14874    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.901    |
| time/               |          |
|    episodes         | 2        |
|    fps              | 29       |
|    time_elapsed     | 102      |
|    total_timesteps  | 3000     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.88e-05 |
|    n_updates        | 15249    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.852    |
| time/               |          |
|    episodes         | 3        |
|    fps              | 24       |
|    time_elapsed     | 184      |
|    total_timesteps  | 4500     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.11e-05 |
|    n_updates        | 15624    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.802    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 22       |
|    time_elapsed     | 260      |
|    total_timesteps  | 6000     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.32e-05 |
|    n_updates        | 15999    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.752    |
| time/               |          |
|    episodes         | 5        |
|    fps              | 22       |
|    time_elapsed     | 335      |
|    total_timesteps  | 7500     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 7.11e-05 |
|    n_updates        | 16374    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.703    |
| time/               |          |
|    episodes         | 6        |
|    fps              | 21       |
|    time_elapsed     | 426      |
|    total_timesteps  | 9000     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 8.14e-05 |
|    n_updates        | 16749    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.653    |
| time/               |          |
|    episodes         | 7        |
|    fps              | 18       |
|    time_elapsed     | 576      |
|    total_timesteps  | 10500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000156 |
|    n_updates        | 17124    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.604    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 16       |
|    time_elapsed     | 735      |
|    total_timesteps  | 12000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 5.31e-06 |
|    n_updates        | 17499    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.554    |
| time/               |          |
|    episodes         | 9        |
|    fps              | 15       |
|    time_elapsed     | 886      |
|    total_timesteps  | 13500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00227  |
|    n_updates        | 17874    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.505    |
| time/               |          |
|    episodes         | 10       |
|    fps              | 14       |
|    time_elapsed     | 1031     |
|    total_timesteps  | 15000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000116 |
|    n_updates        | 18249    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.456    |
| time/               |          |
|    episodes         | 11       |
|    fps              | 13       |
|    time_elapsed     | 1188     |
|    total_timesteps  | 16500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 9.54e-06 |
|    n_updates        | 18624    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.406    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 12       |
|    time_elapsed     | 1401     |
|    total_timesteps  | 18000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 8.81e-05 |
|    n_updates        | 18999    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.357    |
| time/               |          |
|    episodes         | 13       |
|    fps              | 12       |
|    time_elapsed     | 1567     |
|    total_timesteps  | 19500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.41e-05 |
|    n_updates        | 19374    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.307    |
| time/               |          |
|    episodes         | 14       |
|    fps              | 12       |
|    time_elapsed     | 1733     |
|    total_timesteps  | 21000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.89e-07 |
|    n_updates        | 19749    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.258    |
| time/               |          |
|    episodes         | 15       |
|    fps              | 12       |
|    time_elapsed     | 1835     |
|    total_timesteps  | 22500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.98e-05 |
|    n_updates        | 20124    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.208    |
| time/               |          |
|    episodes         | 16       |
|    fps              | 12       |
|    time_elapsed     | 1900     |
|    total_timesteps  | 24000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 4.15e-05 |
|    n_updates        | 20499    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.158    |
| time/               |          |
|    episodes         | 17       |
|    fps              | 12       |
|    time_elapsed     | 1966     |
|    total_timesteps  | 25500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0022   |
|    n_updates        | 20874    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.109    |
| time/               |          |
|    episodes         | 18       |
|    fps              | 13       |
|    time_elapsed     | 2033     |
|    total_timesteps  | 27000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00194  |
|    n_updates        | 21249    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.0595   |
| time/               |          |
|    episodes         | 19       |
|    fps              | 13       |
|    time_elapsed     | 2102     |
|    total_timesteps  | 28500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000476 |
|    n_updates        | 21624    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 20       |
|    fps              | 13       |
|    time_elapsed     | 2169     |
|    total_timesteps  | 30000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.78e-05 |
|    n_updates        | 21999    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 21       |
|    fps              | 14       |
|    time_elapsed     | 2240     |
|    total_timesteps  | 31500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000212 |
|    n_updates        | 22374    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 22       |
|    fps              | 14       |
|    time_elapsed     | 2313     |
|    total_timesteps  | 33000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 7.74e-05 |
|    n_updates        | 22749    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 23       |
|    fps              | 14       |
|    time_elapsed     | 2386     |
|    total_timesteps  | 34500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000119 |
|    n_updates        | 23124    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 24       |
|    fps              | 14       |
|    time_elapsed     | 2458     |
|    total_timesteps  | 36000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 6.22e-05 |
|    n_updates        | 23499    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 25       |
|    fps              | 14       |
|    time_elapsed     | 2532     |
|    total_timesteps  | 37500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.63e-05 |
|    n_updates        | 23874    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 26       |
|    fps              | 14       |
|    time_elapsed     | 2606     |
|    total_timesteps  | 39000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00114  |
|    n_updates        | 24249    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 27       |
|    fps              | 15       |
|    time_elapsed     | 2681     |
|    total_timesteps  | 40500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000135 |
|    n_updates        | 24624    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 28       |
|    fps              | 15       |
|    time_elapsed     | 2757     |
|    total_timesteps  | 42000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.36e-05 |
|    n_updates        | 24999    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 29       |
|    fps              | 15       |
|    time_elapsed     | 2833     |
|    total_timesteps  | 43500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 5.47e-05 |
|    n_updates        | 25374    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 30       |
|    fps              | 15       |
|    time_elapsed     | 2910     |
|    total_timesteps  | 45000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000232 |
|    n_updates        | 25749    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 31       |
|    fps              | 15       |
|    time_elapsed     | 2987     |
|    total_timesteps  | 46500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.29e-05 |
|    n_updates        | 26124    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 32       |
|    fps              | 15       |
|    time_elapsed     | 3063     |
|    total_timesteps  | 48000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000152 |
|    n_updates        | 26499    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 33       |
|    fps              | 15       |
|    time_elapsed     | 3141     |
|    total_timesteps  | 49500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00195  |
|    n_updates        | 26874    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 34       |
|    fps              | 15       |
|    time_elapsed     | 3218     |
|    total_timesteps  | 51000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000485 |
|    n_updates        | 27249    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 35       |
|    fps              | 15       |
|    time_elapsed     | 3295     |
|    total_timesteps  | 52500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 3.23e-05 |
|    n_updates        | 27624    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 36       |
|    fps              | 16       |
|    time_elapsed     | 3373     |
|    total_timesteps  | 54000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 7.9e-05  |
|    n_updates        | 27999    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 37       |
|    fps              | 16       |
|    time_elapsed     | 3451     |
|    total_timesteps  | 55500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 7.53e-05 |
|    n_updates        | 28374    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 38       |
|    fps              | 16       |
|    time_elapsed     | 3528     |
|    total_timesteps  | 57000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000227 |
|    n_updates        | 28749    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 39       |
|    fps              | 16       |
|    time_elapsed     | 3605     |
|    total_timesteps  | 58500    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000365 |
|    n_updates        | 29124    |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.5e+03  |
|    ep_rew_mean      | -1.5e+03 |
|    exploration_rate | 0.01     |
| time/               |          |
|    episodes         | 40       |
|    fps              | 16       |
|    time_elapsed     | 3683     |
|    total_timesteps  | 60000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.000124 |
|    n_updates        | 29499    |
----------------------------------


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
(1, 11, 19)
Box(0, 255, (1, 11, 19), uint8)




(1, 11, 19)
Box(0, 255, (1, 11, 19), uint8)




(1, 11, 19)
Box(0, 255, (1, 11, 19), uint8)




(1, 11, 19)
Box(0, 255, (1, 11, 19), uint8)




(1, 11, 19)
Box(0, 255, (1, 11, 19), uint8)
-1500.0 eval reward mean


In [4]:
np.mean(eps_reward)

-1500.0

In [5]:
eps_reward

[-1500, -1500, -1500, -1500, -1500]

In [None]:
logfile = tensorboard_log+'/'+tensorboard_log_run_name +'_5/'
logfile

In [None]:
!tensorboard --logdir {logfile}