# IRL via Apprenticeship Learning

## Overview

- https://github.com/Farama-Foundation/HighwayEnv

## Imports

In [32]:
import gymnasium as gym
import highway_env
from matplotlib import pyplot as plt
import pprint

## Environment

### Environment Creation

In [33]:
# Create the highway environment with configurations
config = {
    "observation": {
        "type": "TimeToCollision",
        "horizon": 10
    },
    "action": {
        "type": "DiscreteMetaAction",
        "longitudinal": True,
        "lateral": True
    },
    "duration": 40,
    "lanes_count": 2,
    "vehicles_density": 1.0,
    "collision_reward": -1,
    "right_lane_reward": 0.1,
    "high_speed_reward": 0.4,
    "reward_speed_range": [20, 30],
    "normalize_reward": True
}

env = gym.make('highway-v0', render_mode='rgb_array', config=config)


### Environment Config

In [25]:
import pprint
pprint.pprint(env.unwrapped.config)

{'action': {'lateral': True,
            'longitudinal': True,
            'type': 'DiscreteMetaAction'},
 'centering_position': [0.3, 0.5],
 'collision_reward': -1,
 'controlled_vehicles': 1,
 'duration': 40,
 'ego_spacing': 2,
 'high_speed_reward': 0.4,
 'initial_lane_id': None,
 'lane_change_reward': 0,
 'lanes_count': 2,
 'manual_control': False,
 'normalize_reward': True,
 'observation': {'horizon': 10, 'type': 'TimeToCollision'},
 'offroad_terminal': False,
 'offscreen_rendering': False,
 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle',
 'policy_frequency': 1,
 'real_time_rendering': False,
 'render_agent': True,
 'reward_speed_range': [20, 30],
 'right_lane_reward': 0.1,
 'scaling': 5.5,
 'screen_height': 150,
 'screen_width': 600,
 'show_trajectories': False,
 'simulation_frequency': 15,
 'vehicles_count': 50,
 'vehicles_density': 1.0}


In [34]:
# Convert to finite MDP
env.reset()
base_env = env.unwrapped
mdp = base_env.to_finite_mdp()

### Observation Space

In [35]:
# print the observation space
print(env.observation_space)

Box(0.0, 1.0, (3, 3, 10), float32)


| Num | Observation Dimension | Meaning | Length | Min | Max |
|-----|------------------------|---------|---------|-----|-----|
| 0 | Ego‐speed channels | Predictions at V discrete ego‐speeds (e.g. low/med/high m/s) | 3 | 0.0 | 1.0 |
| 1 | Lane channels | L lanes around the ego‐vehicle (left, current, right) | 3 | 0.0 | 1.0 |
| 2 | Time-to-collision bins (horizon) | H discretized future time steps for collision prediction | 10 | 0.0 | 1.0 |

### Action Space

In [36]:
# print the action space
print(env.action_space)

Discrete(5)


Type: Discrete(5)

Num | Action
--- | ---
0 | Lane Left
1 | Idle
2 | Lane Right
3 | Faster
4 | Slower

In [42]:
# Get all possible actions
ACTIONS_ALL = base_env.action_type.actions
print(ACTIONS_ALL)


{0: 'LANE_LEFT', 1: 'IDLE', 2: 'LANE_RIGHT', 3: 'FASTER', 4: 'SLOWER'}


### Reward