### **Implementation of DDPG (Deep Deterministic Policy Gradient) with HER (Hindsight Experience Replay)**

In [1]:
# To install the stable baseline
!pip install git+https://github.com/DLR-RM/stable-baselines3

Collecting git+https://github.com/DLR-RM/stable-baselines3
  Cloning https://github.com/DLR-RM/stable-baselines3 to /tmp/pip-req-build-gqhunyat
  Running command git clone --filter=blob:none --quiet https://github.com/DLR-RM/stable-baselines3 /tmp/pip-req-build-gqhunyat
  Resolved https://github.com/DLR-RM/stable-baselines3 to commit 5623d98f9d6bcfd2ab450e850c3f7b090aef5642
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting gymnasium<0.30,>=0.28.1 (from stable_baselines3==2.3.0)
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
Collecting farama-notifications>=0.0.1 (from gymnasium<0.30,>=0.28.1->stable_baselines3==2.3.0)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105

In [2]:
# To install highway-env
!pip install git+https://github.com/eleurent/highway-env

Collecting git+https://github.com/eleurent/highway-env
  Cloning https://github.com/eleurent/highway-env to /tmp/pip-req-build-1dvupazu
  Running command git clone --filter=blob:none --quiet https://github.com/eleurent/highway-env /tmp/pip-req-build-1dvupazu
  Resolved https://github.com/eleurent/highway-env to commit c7ee099195262e6eedfb63d88bd48a367818d59e
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: highway-env
  Building wheel for highway-env (pyproject.toml) ... [?25l[?25hdone
  Created wheel for highway-env: filename=highway_env-1.8.2-py3-none-any.whl size=107296 sha256=d017ee31a5ade6b65f62759bc120076287a26b85368abe4f24be30d6b50a3724
  Stored in directory: /tmp/pip-ephem-wheel-cache-mfsqwv0x/wheels/ef/2f/6d/3a938641a628175970386038dafd5a98053b743e9c0480c57d
Suc

## Import environment and stable baselines

In [3]:
import gymnasium as gym
import highway_env
import numpy as np

from stable_baselines3 import HerReplayBuffer, DDPG

In [4]:
env = gym.make("parking-v0")

  and should_run_async(code)


# **Training DDPG**

In [7]:
# Create the action noise object that will be used for exploration
her_kwargs = dict(n_sampled_goal=4, goal_selection_strategy='future')
model = DDPG('MultiInputPolicy',
             env,
             replay_buffer_class=HerReplayBuffer,
             replay_buffer_kwargs=her_kwargs,
             verbose=1,
             buffer_size=int(1e6),
             learning_rate=1e-3,
             gamma=0.95,
             batch_size=256,
             policy_kwargs=dict(net_arch=[256, 256, 256]))

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [11]:
import warnings
with warnings.catch_warnings():
  warnings.simplefilter("ignore")

  # Train for 2e5 steps
  model.learn(int(2e5))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
|    time_elapsed    | 2039     |
|    total_timesteps | 43793    |
| train/             |          |
|    actor_loss      | 1.18     |
|    critic_loss     | 0.00487  |
|    learning_rate   | 0.001    |
|    n_updates       | 44957    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 32.2     |
|    ep_rew_mean     | -9.44    |
|    success_rate    | 0.87     |
| time/              |          |
|    episodes        | 556      |
|    fps             | 21       |
|    time_elapsed    | 2043     |
|    total_timesteps | 43881    |
| train/             |          |
|    actor_loss      | 1.07     |
|    critic_loss     | 0.00422  |
|    learning_rate   | 0.001    |
|    n_updates       | 45045    |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 31.7     |
|    ep_rew_mean 

KeyboardInterrupt: 

## Save the model


In [12]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/gdrive',force_remount=True)
path = r"/content/gdrive/MyDrive/Colab Notebooks/"

Mounted at /content/gdrive


In [13]:
# save the model
model.save(path + "ddpg_her_parking2e5")

# **Evaluate the agent**

In [15]:

# Visualization utils
import sys
from tqdm.notebook import trange
!pip install gym pyvirtualdisplay
!apt-get install -y xvfb ffmpeg
!git clone https://github.com/Farama-Foundation/HighwayEnv.git 2> /dev/null
sys.path.insert(0, '/content/HighwayEnv/scripts/')
from utils import record_videos, show_videos

Collecting pyvirtualdisplay
  Downloading PyVirtualDisplay-3.0-py3-none-any.whl (15 kB)
Installing collected packages: pyvirtualdisplay
Successfully installed pyvirtualdisplay-3.0
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
The following additional packages will be installed:
  libfontenc1 libxfont2 libxkbfile1 x11-xkb-utils xfonts-base xfonts-encodings xfonts-utils
  xserver-common
The following NEW packages will be installed:
  libfontenc1 libxfont2 libxkbfile1 x11-xkb-utils xfonts-base xfonts-encodings xfonts-utils
  xserver-common xvfb
0 upgraded, 9 newly installed, 0 to remove and 45 not upgraded.
Need to get 7,813 kB of archives.
After this operation, 11.9 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 libfontenc1 amd64 1:1.1.4-1build3 [14.7 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libxfont2 amd

In [17]:
env = gym.make('parking-v0', render_mode='rgb_array')
env = record_videos(env)
for episode in trange(3, desc='Test episodes'):
    (obs, info), done = env.reset(), False
    for i in range(300):
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)
        if done:
          break
env.close()
show_videos()

  logger.warn(


Test episodes:   0%|          | 0/3 [00:00<?, ?it/s]

Moviepy - Building video /content/videos/rl-video-episode-0.mp4.
Moviepy - Writing video /content/videos/rl-video-episode-0.mp4




t:   0%|          | 0/62 [00:00<?, ?it/s, now=None][A
t:  26%|██▌       | 16/62 [00:00<00:00, 155.57it/s, now=None][A
t:  53%|█████▎    | 33/62 [00:00<00:00, 157.90it/s, now=None][A
t:  79%|███████▉  | 49/62 [00:00<00:00, 130.07it/s, now=None][A
                                                             [A

Moviepy - Done !
Moviepy - video ready /content/videos/rl-video-episode-0.mp4
Moviepy - Building video /content/videos/rl-video-episode-1.mp4.
Moviepy - Writing video /content/videos/rl-video-episode-1.mp4




t:   0%|          | 0/88 [00:00<?, ?it/s, now=None][A
t:   9%|▉         | 8/88 [00:00<00:01, 79.28it/s, now=None][A
t:  18%|█▊        | 16/88 [00:00<00:00, 72.61it/s, now=None][A
t:  28%|██▊       | 25/88 [00:00<00:00, 79.26it/s, now=None][A
t:  38%|███▊      | 33/88 [00:00<00:00, 77.72it/s, now=None][A
t:  48%|████▊     | 42/88 [00:00<00:00, 80.03it/s, now=None][A
t:  58%|█████▊    | 51/88 [00:00<00:00, 67.10it/s, now=None][A
t:  67%|██████▋   | 59/88 [00:00<00:00, 63.48it/s, now=None][A
t:  75%|███████▌  | 66/88 [00:00<00:00, 59.19it/s, now=None][A
t:  83%|████████▎ | 73/88 [00:01<00:00, 58.46it/s, now=None][A
t:  90%|████████▉ | 79/88 [00:01<00:00, 57.29it/s, now=None][A
t:  97%|█████████▋| 85/88 [00:01<00:00, 57.02it/s, now=None][A
                                                            [A

Moviepy - Done !
Moviepy - video ready /content/videos/rl-video-episode-1.mp4
Moviepy - Building video /content/videos/rl-video-episode-2.mp4.
Moviepy - Writing video /content/videos/rl-video-episode-2.mp4




t:   0%|          | 0/62 [00:00<?, ?it/s, now=None][A
t:  29%|██▉       | 18/62 [00:00<00:00, 176.36it/s, now=None][A
t:  58%|█████▊    | 36/62 [00:00<00:00, 172.34it/s, now=None][A
t:  87%|████████▋ | 54/62 [00:00<00:00, 129.73it/s, now=None][A
                                                             [A

Moviepy - Done !
Moviepy - video ready /content/videos/rl-video-episode-2.mp4
