# Racetrack with SB3's SAC

##  Warming up
We start with a few useful installs and imports:

In [1]:
# Installs

# Install environment and agent
!pip install highway-env
# TODO: we use the bleeding edge version because the current stable version does not support the latest gym>=0.21 versions. Revert back to stable at the next SB3 release.
!pip install git+https://github.com/DLR-RM/stable-baselines3

Collecting highway-env
  Downloading highway_env-1.8.2-py3-none-any.whl (104 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.0/104.0 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gymnasium>=0.27 (from highway-env)
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m25.9 MB/s[0m eta [36m0:00:00[0m
Collecting farama-notifications>=0.0.1 (from gymnasium>=0.27->highway-env)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium, highway-env
Successfully installed farama-notifications-0.0.4 gymnasium-0.29.1 highway-env-1.8.2
Collecting git+https://github.com/DLR-RM/stable-baselines3
  Cloning https://github.com/DLR-RM/stable-baselines3 to /tmp/pip-req-build-4g2py_nx
  Running command git clone --filter=blob:none --quiet https://github.com/DLR-RM/stable-baselines3 /tmp/pip-req-build-

In [2]:
# Environment
import gymnasium as gym
import highway_env

# Agent
from stable_baselines3 import SAC

In [4]:

# Visualization utils
import sys
from tqdm.notebook import trange
!pip install gym pyvirtualdisplay
!apt-get install -y xvfb ffmpeg
!git clone https://github.com/Farama-Foundation/HighwayEnv.git 2> /dev/null
sys.path.insert(0, '/content/HighwayEnv/scripts/')
from utils import record_videos, show_videos

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
xvfb is already the newest version (2:21.1.4-2ubuntu1.7~22.04.10).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.


### Changing Rewards on environnement

For our experiment we are going to modify a little bit our reward function on the environnement to see if the agents improves

In [7]:
from highway_env.envs import RacetrackEnv
import numpy as np
from typing import Dict, Text


class RacetrackEnvModified(RacetrackEnv):

    def _rewards(self, action: np.ndarray) -> Dict[Text, float]:
        _, lateral = self.vehicle.lane.local_coordinates(self.vehicle.position)
        return {
            # "lane_centering_reward": 1/(1+self.config["lane_centering_cost"]*lateral**2),
            # "action_reward": np.linalg.norm(action),
            "collision_reward": self.vehicle.crashed,
            "on_road_reward": self.vehicle.on_road,
        }
# Configure racetrack environnment
configuration = {
    "observation": {
        "type": "OccupancyGrid",
        "features": ['presence', 'on_road'],
        "grid_size": [[-18, 18], [-18, 18]],
        "grid_step": [3, 3],
        "as_image": False,
        "align_to_vehicle_axes": True
    },
    "action": {
        "type": "ContinuousAction",
        "longitudinal": False,
        "lateral": True,
    },
    "simulation_frequency": 15,
    "policy_frequency": 5,
    "duration": 300,
    "collision_reward": -1,
    "lane_centering_cost": 4,
    "lane_centering_reward": 1,
    "action_reward": -0.3,
    "controlled_vehicles": 1,
    "other_vehicles": 1,
    "screen_width": 600,
    "screen_height": 600,
    "centering_position": [0.5, 0.5],
    "scaling": 7,
    "show_trajectories": False,
    "render_agent": True,
    "offscreen_rendering": False
}



env = RacetrackEnvModified(configuration)

### Environnement configuration

## Training
Run tensorboard locally to visualize training.

In [8]:
model = SAC('MlpPolicy', env,verbose=2)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [9]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/gdrive',force_remount=True)
path = r"/content/gdrive/MyDrive/Colab Notebooks/"

Mounted at /content/gdrive


In [10]:
# load the model
model = SAC.load(path + "sac_racetrack_modified_reward")

Exception: Can't get attribute '_function_setstate' on <module 'cloudpickle.cloudpickle' from '/usr/local/lib/python3.10/dist-packages/cloudpickle/cloudpickle.py'>


## Testing

Visualize a few episodes

In [12]:
env = env = RacetrackEnvModified(configuration, render_mode='rgb_array')
env = record_videos(env)
for episode in trange(3, desc='Test episodes'):
    (obs, info), done = env.reset(), False
    for i in range(100):
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)
        if done:
          break
env.close()
show_videos()

Test episodes:   0%|          | 0/3 [00:00<?, ?it/s]

Moviepy - Building video /content/videos/rl-video-episode-0.mp4.
Moviepy - Writing video /content/videos/rl-video-episode-0.mp4




t:   0%|          | 0/184 [00:00<?, ?it/s, now=None][A
t:   1%|          | 2/184 [00:00<00:11, 15.87it/s, now=None][A
t:   2%|▏         | 4/184 [00:00<00:12, 14.14it/s, now=None][A
t:   5%|▍         | 9/184 [00:00<00:06, 26.09it/s, now=None][A
t:   8%|▊         | 15/184 [00:00<00:04, 34.90it/s, now=None][A
t:  10%|█         | 19/184 [00:00<00:04, 35.32it/s, now=None][A
t:  14%|█▍        | 26/184 [00:00<00:03, 44.61it/s, now=None][A
t:  18%|█▊        | 33/184 [00:00<00:02, 51.22it/s, now=None][A
t:  21%|██        | 39/184 [00:00<00:02, 53.39it/s, now=None][A
t:  24%|██▍       | 45/184 [00:01<00:02, 54.89it/s, now=None][A
t:  28%|██▊       | 51/184 [00:01<00:02, 45.70it/s, now=None][A
t:  30%|███       | 56/184 [00:01<00:02, 46.20it/s, now=None][A
t:  33%|███▎      | 61/184 [00:01<00:02, 45.61it/s, now=None][A
t:  36%|███▌      | 66/184 [00:01<00:02, 45.94it/s, now=None][A
t:  39%|███▊      | 71/184 [00:01<00:02, 42.17it/s, now=None][A
t:  41%|████▏     | 76/184 [00:01<00

Moviepy - Done !
Moviepy - video ready /content/videos/rl-video-episode-0.mp4
Moviepy - Building video /content/videos/rl-video-episode-1.mp4.
Moviepy - Writing video /content/videos/rl-video-episode-1.mp4





Moviepy - Done !
Moviepy - video ready /content/videos/rl-video-episode-1.mp4
