# Behavioural Planning for Autonomous Highway Driving

We plan a trajectory using the _Optimistic Planning for Deterministic systems_ ([OPD](https://hal.inria.fr/hal-00830182)) algorithm.


In [2]:
import gymnasium as gym
import numpy as np 
from datetime import datetime

output = datetime.now().strftime("%Y_%m_%d%H:%M:%S")
output = output + ".npy"
env = gym.make('highway-v0', render_mode='rgb_array')
env.configure({
    "manual_control": True,
    "observation": {
        "type": "Kinematics",
        "vehicles_count": 3,
        "features": ["is_emg","presence", "x", "y", "vx", "vy"],
        "features_range": {
            "x": [-100, 100],
            "y": [-100, 100],
            "vx": [-20, 50],
            "vy": [-20, 50]
        },
        "absolute": False,
        "order": "sorted"
    }
})
print(env.config)
obs, info = env.reset()
done = truncated = False
data = []
while not (done):
   action = env.action_space.sample()
   obs, reward, done, truncated, info = env.step(action)
   data.append(obs)
   data.append({'obs': obs, 'info': info})
   print(obs)
   print(info)
   obs = env.render()


with open(output, 'wb') as f:
    np.save(data)
val = np.load(output)
print(val)
env.close()

THIS IS THE RESET!
1
{'observation': {'type': 'Kinematics', 'vehicles_count': 3, 'features': ['is_emg', 'presence', 'x', 'y', 'vx', 'vy'], 'features_range': {'x': [-100, 100], 'y': [-100, 100], 'vx': [-20, 50], 'vy': [-20, 50]}, 'absolute': False, 'order': 'sorted'}, 'action': {'type': 'DiscreteMetaAction'}, 'simulation_frequency': 15, 'policy_frequency': 1, 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle', 'screen_width': 600, 'screen_height': 150, 'centering_position': [0.3, 0.5], 'scaling': 5.5, 'show_trajectories': False, 'render_agent': True, 'offscreen_rendering': False, 'manual_control': True, 'real_time_rendering': False, 'lanes_count': 6, 'vehicles_count': 0, 'controlled_vehicles': 1, 'initial_lane_id': None, 'duration': 60, 'ego_spacing': 2, 'vehicles_density': 1, 'collision_reward': -1, 'right_lane_reward': 0.1, 'high_speed_reward': 0.4, 'lane_change_reward': 0, 'reward_speed_range': [20, 30], 'normalize_reward': True, 'offroad_terminal': False}
THIS IS THE R

  logger.warn(


[[ 0.          1.          1.          0.08        0.2857143  -0.42857143]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]]
[[ 0.          1.          1.          0.08        0.2857143  -0.42857143]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]]
[[ 0.          1.          1.          0.08        0.2857143  -0.42857143]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.        ]]
[[ 0.          1.          1.          0.08        0.2857143  -0.42857143]
 [ 1.          1.         -0.08230814  0.04       -0.2857226  -0.42857143]
 [ 0.          0.          0.          0.          0.          0.        ]]
[[ 0.          1.          1.          0.08        0.2857143  -0.42857143]
 [ 1.          1.    

KeyboardInterrupt: 

In [None]:
#@title Imports for env, agent, and visualisation.
# Environment
!pip install highway-env
import gymnasium as gym
import highway_env

# Agent
!pip install git+https://github.com/eleurent/rl-agents#egg=rl-agents
from rl_agents.agents.common.factory import agent_factory

# Visualisation
import sys
from tqdm.notebook import trange
!pip install moviepy -U
!pip install imageio_ffmpeg
!pip install pyvirtualdisplay
!apt-get install -y xvfb python-opengl ffmpeg
!git clone https://github.com/eleurent/highway-env.git
sys.path.insert(0, './highway-env/scripts/')
from utils import record_videos, show_videos



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.10 -m pip install --upgrade pip[0m
Collecting rl-agents
  Cloning https://github.com/eleurent/rl-agents to /private/var/folders/9y/v0kyt89958g_96yjb094fqrr0000gn/T/pip-install-k6fe4c3k/rl-agents_35d1f24bdf6b4889a7443fb7ad223624
  Running command git clone --filter=blob:none --quiet https://github.com/eleurent/rl-agents /private/var/folders/9y/v0kyt89958g_96yjb094fqrr0000gn/T/pip-install-k6fe4c3k/rl-agents_35d1f24bdf6b4889a7443fb7ad223624


  Resolved https://github.com/eleurent/rl-agents to commit f205ca5b99535aade0b7488aa003bf959839308c
  Preparing metadata (setup.py) ... [?25ldone


In [None]:
#@title Run an episode

# Make environment
env = gym.make("highway-fast-v0", render_mode="rgb_array")
env = record_videos(env)
(obs, info), done = env.reset(), False

# Make agent
agent_config = {
    "__class__": "<class 'rl_agents.agents.tree_search.deterministic.DeterministicPlannerAgent'>",
    "env_preprocessors": [{"method":"simplify"}],
    "budget": 50,
    "gamma": 0.7,
}
agent = agent_factory(env, agent_config)

# Run episode
for step in trange(env.unwrapped.config["duration"], desc="Running..."):
    action = agent.act(obs)
    obs, reward, done, truncated, info = env.step(action)
    
env.close()
show_videos()