# Highway with SB3's DQN

##  Warming up
We start with a few useful installs and imports:

In [1]:
# Install environment and agent
#!pip install highway-env
#!pip install --user git+https://github.com/eleurent/highway-env
# TODO: we use the bleeding edge version because the current stable version does not support the latest gym>=0.21 versions. Revert back to stable at the next SB3 release.
#!pip install git+https://github.com/carlosluis/stable-baselines3@fix_tests
#!pip install tensorboard
# Environment
import gym
import highway_env

# Agent
from stable_baselines3 import DQN

# Visualization utils
%load_ext tensorboard
import sys
from tqdm.notebook import trange
#!pip install tensorboardx gym pyvirtualdisplay
#!apt-get install -y xvfb python-opengl ffmpeg
#!git clone https://github.com/eleurent/highway-env.git 2> /dev/null
#sys.path.insert(0, '../highway-env/scripts/')
#sys.path.insert(0, '/content/highway-env/scripts/')
from utils import record_videos, show_videos

In [2]:
#!pip install tqdm

In [3]:
#import gymnasium
#sys.modules["gym"] = ymnasium

## Training
Run tensorboard locally to visualize training.

In [4]:
%tensorboard --logdir "highway_dqn"

In [5]:
model = DQN('MlpPolicy', "highway-fast-v0",
                policy_kwargs=dict(net_arch=[256, 256]),
                learning_rate=5e-4,
                buffer_size=15000,
                learning_starts=200,
                batch_size=32,
                gamma=0.8,
                train_freq=1,
                gradient_steps=1,
                target_update_interval=50,
                exploration_fraction=0.7,
                verbose=1,
                tensorboard_log="highway_dqn/")
model.learn(int(2e4))


Using cuda device
Creating environment from the given name 'highway-fast-v0'
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to highway_dqn/DQN_6
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 10.8     |
|    ep_rew_mean      | 7.95     |
|    exploration_rate | 0.997    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 26       |
|    time_elapsed     | 1        |
|    total_timesteps  | 43       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 9.75     |
|    ep_rew_mean      | 7.32     |
|    exploration_rate | 0.995    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 26       |
|    time_elapsed     | 2        |
|    total_timesteps  | 78       |
----------------------------------
----------------------------------
| rollout/  

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 10.1     |
|    ep_rew_mean      | 7.58     |
|    exploration_rate | 0.951    |
| time/               |          |
|    episodes         | 72       |
|    fps              | 24       |
|    time_elapsed     | 30       |
|    total_timesteps  | 725      |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.263    |
|    n_updates        | 524      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 10.2     |
|    ep_rew_mean      | 7.7      |
|    exploration_rate | 0.947    |
| time/               |          |
|    episodes         | 76       |
|    fps              | 24       |
|    time_elapsed     | 32       |
|    total_timesteps  | 774      |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.367    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 11.1     |
|    ep_rew_mean      | 8.27     |
|    exploration_rate | 0.9      |
| time/               |          |
|    episodes         | 136      |
|    fps              | 24       |
|    time_elapsed     | 60       |
|    total_timesteps  | 1472     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.368    |
|    n_updates        | 1271     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 11.2     |
|    ep_rew_mean      | 8.41     |
|    exploration_rate | 0.897    |
| time/               |          |
|    episodes         | 140      |
|    fps              | 24       |
|    time_elapsed     | 62       |
|    total_timesteps  | 1518     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.291    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 12.1     |
|    ep_rew_mean      | 8.97     |
|    exploration_rate | 0.847    |
| time/               |          |
|    episodes         | 200      |
|    fps              | 24       |
|    time_elapsed     | 92       |
|    total_timesteps  | 2253     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.361    |
|    n_updates        | 2052     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 12       |
|    ep_rew_mean      | 8.9      |
|    exploration_rate | 0.845    |
| time/               |          |
|    episodes         | 204      |
|    fps              | 24       |
|    time_elapsed     | 93       |
|    total_timesteps  | 2283     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.525    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 12.7     |
|    ep_rew_mean      | 9.43     |
|    exploration_rate | 0.793    |
| time/               |          |
|    episodes         | 264      |
|    fps              | 24       |
|    time_elapsed     | 124      |
|    total_timesteps  | 3054     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.368    |
|    n_updates        | 2853     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 12.2     |
|    ep_rew_mean      | 9.02     |
|    exploration_rate | 0.791    |
| time/               |          |
|    episodes         | 268      |
|    fps              | 24       |
|    time_elapsed     | 125      |
|    total_timesteps  | 3084     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.118    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 13.7     |
|    ep_rew_mean      | 10.3     |
|    exploration_rate | 0.733    |
| time/               |          |
|    episodes         | 328      |
|    fps              | 24       |
|    time_elapsed     | 159      |
|    total_timesteps  | 3940     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.202    |
|    n_updates        | 3739     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 13.8     |
|    ep_rew_mean      | 10.4     |
|    exploration_rate | 0.728    |
| time/               |          |
|    episodes         | 332      |
|    fps              | 24       |
|    time_elapsed     | 162      |
|    total_timesteps  | 4003     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.341    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 14       |
|    ep_rew_mean      | 10.6     |
|    exploration_rate | 0.673    |
| time/               |          |
|    episodes         | 392      |
|    fps              | 24       |
|    time_elapsed     | 194      |
|    total_timesteps  | 4819     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.17     |
|    n_updates        | 4618     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 13.9     |
|    ep_rew_mean      | 10.5     |
|    exploration_rate | 0.669    |
| time/               |          |
|    episodes         | 396      |
|    fps              | 24       |
|    time_elapsed     | 197      |
|    total_timesteps  | 4880     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.175    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 13.8     |
|    ep_rew_mean      | 10.5     |
|    exploration_rate | 0.611    |
| time/               |          |
|    episodes         | 456      |
|    fps              | 24       |
|    time_elapsed     | 231      |
|    total_timesteps  | 5729     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.227    |
|    n_updates        | 5528     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 14.3     |
|    ep_rew_mean      | 10.8     |
|    exploration_rate | 0.606    |
| time/               |          |
|    episodes         | 460      |
|    fps              | 24       |
|    time_elapsed     | 234      |
|    total_timesteps  | 5808     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.11     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 13.8     |
|    ep_rew_mean      | 10.6     |
|    exploration_rate | 0.55     |
| time/               |          |
|    episodes         | 520      |
|    fps              | 24       |
|    time_elapsed     | 267      |
|    total_timesteps  | 6635     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.231    |
|    n_updates        | 6434     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 13.4     |
|    ep_rew_mean      | 10.3     |
|    exploration_rate | 0.548    |
| time/               |          |
|    episodes         | 524      |
|    fps              | 24       |
|    time_elapsed     | 268      |
|    total_timesteps  | 6668     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.394    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 14.5     |
|    ep_rew_mean      | 11.2     |
|    exploration_rate | 0.482    |
| time/               |          |
|    episodes         | 584      |
|    fps              | 24       |
|    time_elapsed     | 307      |
|    total_timesteps  | 7631     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.178    |
|    n_updates        | 7430     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 14.7     |
|    ep_rew_mean      | 11.3     |
|    exploration_rate | 0.477    |
| time/               |          |
|    episodes         | 588      |
|    fps              | 24       |
|    time_elapsed     | 310      |
|    total_timesteps  | 7712     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.345    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.9     |
|    ep_rew_mean      | 12.5     |
|    exploration_rate | 0.411    |
| time/               |          |
|    episodes         | 648      |
|    fps              | 24       |
|    time_elapsed     | 349      |
|    total_timesteps  | 8674     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.159    |
|    n_updates        | 8473     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 16.4     |
|    ep_rew_mean      | 12.9     |
|    exploration_rate | 0.403    |
| time/               |          |
|    episodes         | 652      |
|    fps              | 24       |
|    time_elapsed     | 354      |
|    total_timesteps  | 8792     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.411    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 17.5     |
|    ep_rew_mean      | 13.9     |
|    exploration_rate | 0.334    |
| time/               |          |
|    episodes         | 712      |
|    fps              | 24       |
|    time_elapsed     | 395      |
|    total_timesteps  | 9822     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.301    |
|    n_updates        | 9621     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 17.2     |
|    ep_rew_mean      | 13.8     |
|    exploration_rate | 0.33     |
| time/               |          |
|    episodes         | 716      |
|    fps              | 24       |
|    time_elapsed     | 397      |
|    total_timesteps  | 9881     |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.177    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 17       |
|    ep_rew_mean      | 13.7     |
|    exploration_rate | 0.26     |
| time/               |          |
|    episodes         | 776      |
|    fps              | 24       |
|    time_elapsed     | 439      |
|    total_timesteps  | 10908    |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.092    |
|    n_updates        | 10707    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 17.4     |
|    ep_rew_mean      | 14       |
|    exploration_rate | 0.254    |
| time/               |          |
|    episodes         | 780      |
|    fps              | 24       |
|    time_elapsed     | 442      |
|    total_timesteps  | 10993    |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.104    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 17.9     |
|    ep_rew_mean      | 14.6     |
|    exploration_rate | 0.178    |
| time/               |          |
|    episodes         | 840      |
|    fps              | 24       |
|    time_elapsed     | 487      |
|    total_timesteps  | 12107    |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.0807   |
|    n_updates        | 11906    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 17.6     |
|    ep_rew_mean      | 14.3     |
|    exploration_rate | 0.175    |
| time/               |          |
|    episodes         | 844      |
|    fps              | 24       |
|    time_elapsed     | 489      |
|    total_timesteps  | 12159    |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.0776   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 18.9     |
|    ep_rew_mean      | 15.5     |
|    exploration_rate | 0.0939   |
| time/               |          |
|    episodes         | 904      |
|    fps              | 24       |
|    time_elapsed     | 538      |
|    total_timesteps  | 13353    |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.227    |
|    n_updates        | 13152    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 19.2     |
|    ep_rew_mean      | 15.7     |
|    exploration_rate | 0.0886   |
| time/               |          |
|    episodes         | 908      |
|    fps              | 24       |
|    time_elapsed     | 541      |
|    total_timesteps  | 13431    |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.0723   |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.8     |
|    ep_rew_mean      | 17.1     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 968      |
|    fps              | 24       |
|    time_elapsed     | 593      |
|    total_timesteps  | 14713    |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.0698   |
|    n_updates        | 14512    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.8     |
|    ep_rew_mean      | 17       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 972      |
|    fps              | 24       |
|    time_elapsed     | 597      |
|    total_timesteps  | 14811    |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.293    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20       |
|    ep_rew_mean      | 16.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1032     |
|    fps              | 24       |
|    time_elapsed     | 647      |
|    total_timesteps  | 16026    |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.208    |
|    n_updates        | 15825    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.2     |
|    ep_rew_mean      | 16.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1036     |
|    fps              | 24       |
|    time_elapsed     | 651      |
|    total_timesteps  | 16136    |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.142    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21.4     |
|    ep_rew_mean      | 17.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1096     |
|    fps              | 24       |
|    time_elapsed     | 706      |
|    total_timesteps  | 17478    |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.176    |
|    n_updates        | 17277    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21.1     |
|    ep_rew_mean      | 17.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1100     |
|    fps              | 24       |
|    time_elapsed     | 708      |
|    total_timesteps  | 17545    |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.164    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.3     |
|    ep_rew_mean      | 16.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1160     |
|    fps              | 24       |
|    time_elapsed     | 754      |
|    total_timesteps  | 18686    |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.109    |
|    n_updates        | 18485    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20       |
|    ep_rew_mean      | 16.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1164     |
|    fps              | 24       |
|    time_elapsed     | 757      |
|    total_timesteps  | 18749    |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.184    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 19.9     |
|    ep_rew_mean      | 17       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1224     |
|    fps              | 24       |
|    time_elapsed     | 805      |
|    total_timesteps  | 19942    |
| train/              |          |
|    learning_rate    | 0.0005   |
|    loss             | 0.375    |
|    n_updates        | 19741    |
----------------------------------


<stable_baselines3.dqn.dqn.DQN at 0x7f5354f5d160>

## Testing

Visualize a few episodes

In [12]:
env = gym.make("highway-fast-v0")
env = record_videos(env)
for episode in trange(4, desc="Test episodes"):
    obs, done = env.reset(), False
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(int(action))
env.close()
show_videos()


  f"Overwriting existing videos at {self.video_folder} folder (try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)"


Test episodes:   0%|          | 0/4 [00:00<?, ?it/s]