# Racetrack with SB3's SAC

##  Warming up
We start with a few useful installs and imports:

In [None]:
# Installs

# Install environment and agent
!pip install highway-env
# TODO: we use the bleeding edge version because the current stable version does not support the latest gym>=0.21 versions. Revert back to stable at the next SB3 release.
!pip install git+https://github.com/DLR-RM/stable-baselines3

In [4]:
# Environment
import gymnasium as gym
import highway_env

# Agent
from stable_baselines3 import SAC

### Changing Rewards on environnement

For our experiment we are going to modify a little bit our reward function on the environnement to see if the agents improves

In [5]:
from highway_env.envs import RacetrackEnv
import numpy as np
from typing import Dict, Text

    
class RacetrackEnvModified(RacetrackEnv):
    
    def _rewards(self, action: np.ndarray) -> Dict[Text, float]:
        _, lateral = self.vehicle.lane.local_coordinates(self.vehicle.position)
        return {
            # "lane_centering_reward": 1/(1+self.config["lane_centering_cost"]*lateral**2),
            # "action_reward": np.linalg.norm(action),
            "collision_reward": self.vehicle.crashed,
            "on_road_reward": self.vehicle.on_road,
        }



### Environnement configuration

In [7]:
# Configure racetrack environnment
configuration = {
    "observation": {
        "type": "OccupancyGrid",
        "features": ['presence', 'on_road'],
        "grid_size": [[-18, 18], [-18, 18]],
        "grid_step": [3, 3],
        "as_image": False,
        "align_to_vehicle_axes": True
    },
    "action": {
        "type": "ContinuousAction",
        "longitudinal": False,
        "lateral": True,
    },
    "simulation_frequency": 15,
    "policy_frequency": 5,
    "duration": 300,
    "collision_reward": -1,
    "lane_centering_cost": 4,
    "lane_centering_reward": 1,
    "action_reward": -0.3,
    "controlled_vehicles": 1,
    "other_vehicles": 1,
    "screen_width": 600,
    "screen_height": 600,
    "centering_position": [0.5, 0.5],
    "scaling": 7,
    "show_trajectories": False,
    "render_agent": True,
    "offscreen_rendering": False
}



env = RacetrackEnvModified(configuration)

### Training default env

In [8]:
model = SAC('MlpPolicy', env,verbose=2)
model.learn(int(2e5))

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.35e+03 |
|    ep_rew_mean     | 61.5     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 23       |
|    time_elapsed    | 233      |
|    total_timesteps | 5397     |
| train/             |          |
|    actor_loss      | -7.61    |
|    critic_loss     | 0.0187   |
|    ent_coef        | 0.205    |
|    ent_coef_loss   | -2.55    |
|    learning_rate   | 0.0003   |
|    n_updates       | 5296     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.05e+03 |
|    ep_rew_mean     | 182      |
| time/              |          |
|    episodes        | 8        |
|    fps             | 22       |
|    time_elapsed    | 367      |
|    total_timesteps | 8433     |
| train/             |

In [9]:
# save the model
model.save("sac_racetrack")

In [4]:
# load the model
model = SAC.load("sac_racetrack")

## Testing

Visualize a few episodes

In [5]:
env = gym.make('racetrack-v0', render_mode='rgb_array')
env = record_videos(env)
for episode in trange(3, desc='Test episodes'):
    (obs, info), done = env.reset(), False
    for i in range(100):
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)
        if done:
          break
env.close()
show_videos()

NameError: name 'record_videos' is not defined