# Behavioural Planning for Autonomous Highway Driving

We plan a trajectory using the _Optimistic Planning for Deterministic systems_ ([OPD](https://hal.inria.fr/hal-00830182)) algorithm.


In [1]:
import gymnasium as gym

env = gym.make('highway-v0', render_mode='rgb_array')
print(env.config)
obs, info = env.reset()
done = truncated = False
while not (done):
   action = env.action_space.sample()
   obs, reward, done, truncated, info = env.step(action)
   print(obs)
   print(info)
   obs = env.render()

env.close()

THIS IS THE RESET!
{'observation': {'type': 'Kinematics'}, 'action': {'type': 'DiscreteMetaAction'}, 'simulation_frequency': 15, 'policy_frequency': 1, 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle', 'screen_width': 600, 'screen_height': 150, 'centering_position': [0.3, 0.5], 'scaling': 5.5, 'show_trajectories': False, 'render_agent': True, 'offscreen_rendering': False, 'manual_control': False, 'real_time_rendering': False, 'lanes_count': 6, 'vehicles_count': 1, 'controlled_vehicles': 1, 'initial_lane_id': None, 'duration': 60, 'ego_spacing': 2, 'vehicles_density': 1, 'collision_reward': -1, 'right_lane_reward': 0.1, 'high_speed_reward': 0.4, 'lane_change_reward': 0, 'reward_speed_range': [20, 30], 'normalize_reward': True, 'offroad_terminal': False}
THIS IS THE RESET!
[[ 1.          1.          0.3579927   0.31180274 -0.02086364]
 [ 1.          0.06647895 -0.02465937 -0.0443819   0.02086364]
 [ 0.          0.          0.          0.          0.        ]
 [ 0.        

  logger.warn(


[[ 1.          1.          0.33572203  0.26067272 -0.00201617]
 [ 1.          0.06105385 -0.00238869  0.00674814  0.00201617]
 [ 0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.        ]]
{'speed': 20.8544411731307, 'crashed': False, 'action': 4, 'rewards': {'collision_reward': 0.0, 'right_lane_reward': 0.4, 'high_speed_reward': 0.08538174186600571, 'on_road_reward': 1.0}}
[[ 1.0000000e+00  1.0000000e+00  3.3357471e-01  2.5182509e-01
  -2.0856228e-04]
 [ 1.0000000e+00  6.5897360e-02 -2.4139194e-04  1.5595766e-02
   2.0856228e-04]
 [ 0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00]
 [ 0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00]
 [ 0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00]]
{'speed': 20.146013943668194, 'crashed': False, 'action': 4, 'rewards': {'collision_reward': 0.

In [None]:
#@title Imports for env, agent, and visualisation.
# Environment
!pip install highway-env
import gymnasium as gym
import highway_env

# Agent
!pip install git+https://github.com/eleurent/rl-agents#egg=rl-agents
from rl_agents.agents.common.factory import agent_factory

# Visualisation
import sys
from tqdm.notebook import trange
!pip install moviepy -U
!pip install imageio_ffmpeg
!pip install pyvirtualdisplay
!apt-get install -y xvfb python-opengl ffmpeg
!git clone https://github.com/eleurent/highway-env.git
sys.path.insert(0, './highway-env/scripts/')
from utils import record_videos, show_videos


In [None]:
#@title Run an episode

# Make environment
env = gym.make("highway-fast-v0", render_mode="rgb_array")
env = record_videos(env)
(obs, info), done = env.reset(), False

# Make agent
agent_config = {
    "__class__": "<class 'rl_agents.agents.tree_search.deterministic.DeterministicPlannerAgent'>",
    "env_preprocessors": [{"method":"simplify"}],
    "budget": 50,
    "gamma": 0.7,
}
agent = agent_factory(env, agent_config)

# Run episode
for step in trange(env.unwrapped.config["duration"], desc="Running..."):
    action = agent.act(obs)
    obs, reward, done, truncated, info = env.step(action)
    
env.close()
show_videos()