# Highway with SB3's DQN

##  Warming up
We start with a few useful installs and imports:

In [2]:
# # Install environment and agent
# !pip install highway-env
# # TODO: we use the bleeding edge version because the current stable version does not support the latest gym>=0.21 versions. Revert back to stable at the next SB3 release.
# !pip install git+https://github.com/DLR-RM/stable-baselines3

# # Environment
import gymnasium as gym
import highway_env

gym.register_envs(highway_env)

# Agent
from stable_baselines3 import DQN


import sys
from tqdm.notebook import trange
# !pip install tensorboardx gym pyvirtualdisplay
# !apt-get install -y xvfb ffmpeg

2024-09-27 11:13:41.358486: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-27 11:13:41.360415: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-09-27 11:13:41.399560: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Training
Run tensorboard locally to visualize training.

In [3]:
import base64
from pathlib import Path

from gymnasium.wrappers import RecordVideo
from IPython import display as ipythondisplay
from pyvirtualdisplay import Display



def record_videos(env, video_folder="videos"):
    wrapped = RecordVideo(
        env, video_folder=video_folder, episode_trigger=lambda e: True
    )

    # Capture intermediate frames
    env.unwrapped.set_record_video_wrapper(wrapped)

    return wrapped


def show_videos(path="videos"):
    html = []
    for mp4 in Path(path).glob("*.mp4"):
        video_b64 = base64.b64encode(mp4.read_bytes())
        html.append(
            """<video alt="{}" autoplay
                      loop controls style="height: 400px;">
                      <source src="data:video/mp4;base64,{}" type="video/mp4" />
                 </video>""".format(
                mp4, video_b64.decode("ascii")
            )
        )
    ipythondisplay.display(ipythondisplay.HTML(data="<br>".join(html)))


In [15]:
model = DQN('MlpPolicy', 'highway-fast-v0',
                policy_kwargs=dict(net_arch=[256, 256]),
                learning_rate=5e-4,
                buffer_size=15000,
                learning_starts=200,
                batch_size=32,
                gamma=0.8,
                train_freq=1,
                gradient_steps=1,
                target_update_interval=50,
                exploration_fraction=0.7,
                verbose=1,
                tensorboard_log='highway_dqn/')
model.learn(int(2e4))
model.save('models/dqn_model_default_fast')

Using cpu device
Creating environment from the given name 'highway-fast-v0'
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to highway_dqn/DQN_7
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 9.5      |
|    ep_rew_mean      | 6.89     |
|    exploration_rate | 0.997    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 12       |
|    time_elapsed     | 2        |
|    total_timesteps  | 38       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 13.6     |
|    ep_rew_mean      | 9.74     |
|    exploration_rate | 0.993    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 13       |
|    time_elapsed     | 7        |
|    total_timesteps  | 109      |
----------------------------------
----------------------------------
| rollout/   

In [4]:
from stable_baselines3 import DQN
import pprint
from matplotlib import pyplot as plt
import numpy as np

## Testing

Visualize a few episodes

In [11]:
video_folder = "videos"
model_name = "dqn_model_defaultf1"
model_fast = DQN.load('models/dqn_model_default')
env = gym.make('highway-fast-v0', render_mode='rgb_array')
# Use RecordVideo wrapper to record videos in the specified folder
video_path = f"{video_folder}/{model_name}"
env = RecordVideo(env, video_folder=video_path, episode_trigger=lambda ep: True)
for episode in trange(3, desc='Test episodes'):
    (obs, info), done = env.reset(), False
    while not done:
        
        action, _ = model_fast.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(int(action))
env.close()
show_videos()



Test episodes:   0%|          | 0/3 [00:00<?, ?it/s]

Moviepy - Building video /home/prachit/Desktop/Reward-shaping-with-LLMS/videos/dqn_model_defaultf1/rl-video-episode-0.mp4.
Moviepy - Writing video /home/prachit/Desktop/Reward-shaping-with-LLMS/videos/dqn_model_defaultf1/rl-video-episode-0.mp4





Moviepy - Done !
Moviepy - video ready /home/prachit/Desktop/Reward-shaping-with-LLMS/videos/dqn_model_defaultf1/rl-video-episode-0.mp4
Moviepy - Building video /home/prachit/Desktop/Reward-shaping-with-LLMS/videos/dqn_model_defaultf1/rl-video-episode-1.mp4.
Moviepy - Writing video /home/prachit/Desktop/Reward-shaping-with-LLMS/videos/dqn_model_defaultf1/rl-video-episode-1.mp4





Moviepy - Done !
Moviepy - video ready /home/prachit/Desktop/Reward-shaping-with-LLMS/videos/dqn_model_defaultf1/rl-video-episode-1.mp4
Moviepy - Building video /home/prachit/Desktop/Reward-shaping-with-LLMS/videos/dqn_model_defaultf1/rl-video-episode-2.mp4.
Moviepy - Writing video /home/prachit/Desktop/Reward-shaping-with-LLMS/videos/dqn_model_defaultf1/rl-video-episode-2.mp4





Moviepy - Done !
Moviepy - video ready /home/prachit/Desktop/Reward-shaping-with-LLMS/videos/dqn_model_defaultf1/rl-video-episode-2.mp4
