This is a standalone python notebook file where the highway environment is dicretized to a custom level of coarse granularity to reduce simulation complexity for faster generation of MDP tables.

In [1]:
# Move up one directory level to import local instance of highway environment
%cd ..
# %pwd

d:\WorkFiles\ProjectCode\ActiveProjects\HoLab\Mine\HighwayEnv-TRI


Import necessary modules

In [2]:
from highway_env.envs.MDPwrapper_highwayenv import HighwayGridworldMDP
from highway_env.envs.MDPwrapper_Gym import OptimalPolicy

import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)    # Other options: INFO, WARNING, ERROR, CRITICAL

import numpy as np
import itertools as it

from copy import deepcopy

MDP Stuff

In [3]:
# Define Configuration
num_of_vehicles = 0
num_of_ice = 5
num_of_obstacles = 10
env_length = 3000  # Max car speed (for MDP vehicle) is 30m/s At constant max speed, the simulation will last 100s in simulation time
lane_count = 3
config = {    
        ## Parameters of interest ##
        "observation": {
            # For more details about observation parameters check out "highway_env\envs\common\observation.py"
            "type": "Kinematics",
            "vehicles_count": num_of_vehicles+num_of_ice+5,   # Number of vehicles (and objects) to show in the observation. 
                                                                #   Keep greater than value of vehicles out outside obervation
                                                                #   dictionary to observe all vehicles in the environment.
            "features": ["presence", "x", "y", "vx", "vy"],# "heading"],
            "normalize": False, # Normalize object coordinates
            "absolute": True,   # Provide absolute coordinate of vehicles
            "order": "sorted",
            "observe_intentions": False,
            "include_obstacles": True,
            "see_behind": True  # Report vehicles behind the ego vehicle
            },
        ## Parameters specialized for the icy highway environment ##
        "ice_count": num_of_ice,    # Number of ice sheets in the environment
        "obstacle_count": num_of_obstacles,    # Number of obstacles in the environment
        "env_len":  env_length,    # Length of the road
        ## Keep these to default, because the fast versions of the environments implement different values ##
        ## of these variables for faster execution ##
        "vehicles_count": num_of_vehicles,
        "lanes_count": lane_count,
        "simulation_frequency": 5,
        "duration": (env_length/20)+5,  # [in simulation seconds], minimum speed for MDP vehicle is 20m/s, with extra 5s
        "disable_collision_checks": True,    # Check collisions for other vehicles
        "enable_lane_change": False,
        ## Other parameters aleady set by default configurations ##
        # "action": {
        #     "type": "DiscreteMetaAction",
        # },
        # "controlled_vehicles": 1,
        # "initial_lane_id": None,
        # "ego_spacing": 2,
        # "vehicles_density": 1,
        # "collision_reward": -1,  # The reward received when colliding with a vehicle.
        # "right_lane_reward": 0.1,  # The reward received when driving on the right-most lanes, linearly mapped to
        # # zero for other lanes.
        # "high_speed_reward": 0.4,  # The reward received when driving at full speed, linearly mapped to zero for
        # # lower speeds according to config["reward_speed_range"].
        # "lane_change_reward": 0,  # The reward received at each lane change action.
        # "reward_speed_range": [20, 30],
        # "normalize_reward": True,
        # "offroad_terminal": False
        }
# highway_mdp = HighwayGridworldMDP('highway-v0', config=config, render_mode='rgb_array')
# highway_mdp = HighwayGridworldMDP('highway-v0', config=config, render_mode=None)
# highway_mdp = HighwayGridworldMDP('highway-fast-v0', config=config, render_mode=None)
# highway_mdp = HighwayGridworldMDP('highway-icy-v0', config=config, render_mode=None)
# highway_mdp = HighwayGridworldMDP('highway-icy-fast-v0', config=config, render_mode=None)
highway_mdp = HighwayGridworldMDP('highway-icy-custom-v0', config=config, render_mode=None)

In [4]:
# Create discrete world grid
temp = highway_mdp.env.unwrapped.road.network.lanes_list()
x_max = max([lane.end[0] for lane in temp])
y_max = max([lane.end[1] for lane in temp])
#2# |Divide the environment into 600 blocks along the x-axis
x_grid = np.linspace(0, x_max, 600)
y_grid = np.linspace(0, y_max, lane_count)
coord_list = list(it.product(x_grid, y_grid))
[x_max, y_max]

[3000.0, 8.0]

In [5]:
# veh_speed for MDP vehicle can be 20/25/30
temp = highway_mdp.populate_MDPtable(coord_list, parallel_exec=False, speed = 25)   # notebook does not handle multiprocessing well
# temp

In [6]:
value_function = highway_mdp.value_iteration(iterations=100)
value_function.action_values

Value iteration did not converge


{(0.0, 0.0): {0: 0.1333333333333333,
  1: 0.1333333333333333,
  2: 21.064178873367815,
  3: 0.0,
  4: 0.6666666666666666},
 (0.0, 4.0): {0: 0.13271138500861746,
  1: 21.06480082169253,
  2: 21.098134155025864,
  3: 20.99983511524534,
  4: 21.06480082169253},
 (0.0, 8.0): {0: 21.064178873367815,
  1: 21.09875610335058,
  2: 21.09875610335058,
  3: 21.09875610335058,
  4: 21.198134155025866},
 (5.008347245409015, 0.0): {0: 0.0,
  1: 0.0,
  2: 20.06650178191201,
  3: 0.0,
  4: 0.0},
 (5.008347245409015, 4.0): {0: 20.231467488359197,
  1: 20.866501781912007,
  2: 20.96480082169253,
  3: 20.830845540034478,
  4: 20.931467488359196},
 (5.008347245409015, 8.0): {0: 20.865879833587293,
  1: 20.96542277001725,
  2: 20.96542277001725,
  3: 20.90045706357006,
  4: 20.96542277001725},
 (10.01669449081803, 0.0): {0: 0.0,
  1: 0.0,
  2: 19.897512206701148,
  3: 0.0,
  4: 0.0},
 (10.01669449081803, 4.0): {0: 20.033168448578675,
  1: 20.697512206701145,
  2: 20.76650178191201,
  3: 20.59875610335057,


In [7]:
policy = OptimalPolicy(highway_mdp)
agent_traj, infor = highway_mdp.simulatePlan((0,0), policy, obs_config = highway_mdp.get_obsFeatures())
agent_traj

INFO:root:Maintaining current vehicle speed. For custom speed, provide 'speed' argument.


[[(0, 0), 2, (25.041736227045078, 4.0)],
 [(25.041736227045078, 4.0), 2, (50.083472454090156, 8.0)],
 [(50.083472454090156, 8.0), 4, (70.11686143572622, 8.0)],
 [(70.11686143572622, 8.0), 4, (90.15025041736227, 8.0)],
 [(90.15025041736227, 8.0), 4, (105.17529215358933, 8.0)],
 [(105.17529215358933, 8.0), 1, (125.20868113522539, 8.0)],
 [(125.20868113522539, 8.0), 1, (145.24207011686144, 8.0)],
 [(145.24207011686144, 8.0), 1, (160.2671118530885, 8.0)],
 [(160.2671118530885, 8.0), 4, (180.30050083472455, 8.0)],
 [(180.30050083472455, 8.0), 4, (195.3255425709516, 8.0)],
 [(195.3255425709516, 8.0), 4, (220.36727879799668, 8.0)],
 [(220.36727879799668, 8.0), 3, (250.41736227045078, 8.0)],
 [(250.41736227045078, 8.0), 4, (280.4674457429049, 8.0)],
 [(280.4674457429049, 8.0), 1, (305.50918196994996, 8.0)],
 [(305.50918196994996, 8.0), 1, (330.55091819699504, 8.0)],
 [(330.55091819699504, 8.0), 4, (350.5843071786311, 8.0)],
 [(350.5843071786311, 8.0), 4, (370.61769616026714, 8.0)],
 [(370.6176

In [8]:
# |Plot Simulation result
# |Plot objects
import numpy, imageio
X,Y = env_length, lane_count
obj_len = 15
image = numpy.zeros((Y, X, 3), dtype=numpy.uint8)
for obj in highway_mdp.env.unwrapped.road.objects:
    if 'ice' in obj.__class__.__name__.lower():
        plot_color = (0xFF, 0xFF, 0xFF)
    else:
        plot_color = (0xA8, 0x32, 0x3E)
    obj_pos = obj.position.astype(int)
    obj_xRange = range(int(obj_pos[0]-obj_len/2),int(obj_pos[0]+obj_len/2))
    for obj_range in obj_xRange:
        image[int(obj_pos[1]/4), obj_range, :] = plot_color
# |Plot agent trajectory
plot_color = (0xED, 0xE8, 0x4A)
for traj_pos in agent_traj[:-1]:
    traj_pos = traj_pos[0]
    traj_xRange = range(int(traj_pos[0]-2),int(traj_pos[0]+2))
    for x_pos in traj_xRange:
        if x_pos >= X:
            continue
        image[int(traj_pos[1]/4), x_pos, :] = plot_color
# |Plot agent final position
plot_color = (0x6E, 0xE3, 0x5E)
obj_pos = agent_traj[-1][0]
obj_xRange = range(int(obj_pos[0]-obj_len/2),int(obj_pos[0]+obj_len/2))
for obj_range in obj_xRange:
    if obj_range >= X:
        continue
    image[int(obj_pos[1]/4), obj_range, :] = plot_color
imageio.imwrite('output.png', image)

In [None]:

startCoord = (0,0)
startCoord = tuple(highway_mdp.get_nearestGridCoord(highway_mdp.env.unwrapped.road.vehicles[0].position))
acts = policy(startCoord)
acts

In [9]:
highway_mdp.env.unwrapped.road.vehicles

[MDPVehicle #440: [150.36790864   8.        ],
 IDMVehicle #400: [173.77567741   4.        ],
 IDMVehicle #608: [195.86602574   8.        ]]

In [10]:
highway_mdp_copy = deepcopy(highway_mdp)
tmp = highway_mdp_copy.step(acts[0])
# tmp = highway_mdp_copy.step(0)

In [11]:
tmp

((frozendict.frozendict({'position': (176.31, 8.0), 'speed': (25.0, 0.0), 'heading': -1}),
  frozendict.frozendict({'position': (194.76, 5.99), 'speed': (15.19, -4.76), 'heading': -1}),
  frozendict.frozendict({'position': (218.85, 8.0), 'speed': (0.0, 0.0), 'heading': -1}),
  frozendict.frozendict({'position': (219.63, 4.0), 'speed': (20.4, 0.0), 'heading': -1}),
  frozendict.frozendict({'position': (268.53, 0.0), 'speed': (0.0, 0.0), 'heading': -1}),
  frozendict.frozendict({'position': (294.44, 0.0), 'speed': (0.0, 0.0), 'heading': -1}),
  frozendict.frozendict({'position': (343.98, 4.0), 'speed': (0.0, 0.0), 'heading': -1}),
  frozendict.frozendict({'position': (392.54, 8.0), 'speed': (0.0, 0.0), 'heading': -1}),
  frozendict.frozendict({'position': (460.6, 4.0), 'speed': (0.0, 0.0), 'heading': -1}),
  frozendict.frozendict({'position': (505.45, 0.0), 'speed': (0.0, 0.0), 'heading': -1}),
  frozendict.frozendict({'position': (551.08, 8.0), 'speed': (0.0, 0.0), 'heading': -1}),
  fr

Find optimal policy

In [None]:
# Simple MDP Code
highway_mdp_current = highway_mdp.get_copy()

#2# Set up the MDP
_ = highway_mdp_current.populate_MDPtable_StaticGridworld(coord_list, parallel_exec=False, veh_speed = 25)   # notebook does not handle multiprocessing well
# matMDP = highway_mdp_current.get_MDPmatrices()    # value iteration already calls this function
value_function = highway_mdp_current.value_iteration(iterations=1000)
policy = OptimalPolicy(highway_mdp_current)

#2# The below oprations can be repeated upto max depth iterations, to execute all actions calculated by the current policy
acts = policy(highway_mdp_current.current_state)
_ = highway_mdp_current.step(acts[0])

#2# OR optimal policy can be calculated for each action


INFO:root:Current Depth: 3 | Frontier: 0 | Visited: 28 | Transitions:55
  logger.warn(f"{pre} is not within the observation space.")


Construal code

In [None]:
# Construal MDP Code
construals = highway_mdp.get_construals_singleobj()
# len(construals)

construal_policies = dict()
for constr in construals:
    print("currently running contrual ", len(construal_policies)+1)
    # print(constr.env.unwrapped.road.vehicles)
    # print(constr.env.unwrapped.road.objects)
    # print("----------------")
    constr.populate_MDPtable(max_depth = max_depth)
    value_function = constr.value_iteration()
    policy = OptimalPolicy(constr)
    construal_policies[constr] = policy

In [7]:
construal_rewards = {}
for constr, policy in construal_policies.items():
    actns = policy(constr.current_state)
    print(actns)
    temp_mdp = highway_mdp.get_copy()
    obs, rwrd, done, truncated, info = temp_mdp.step(actns[0])
    construal_rewards[constr] = rwrd

# TODO: take optimal actions based on each policy and calculate reward
# Question: The behavior of other vehicles will also change in these construals 
#           (do we account for that by repreating the construal creation process at each timestep?)

# #2# The below oprations can be repeated upto max depth iterations, to execute all actions calculated by the current policy
# acts = policy(highway_mdp.current_state)
# _ = highway_mdp.step(acts[0])

[1, 2, 3, 4]
[1, 2, 3, 4]
[1, 2, 3, 4]


  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")


[1, 2, 3, 4]
[1, 2, 3, 4]
[1, 2, 3, 4]


  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")


[1, 2, 3, 4]
[1, 2, 3, 4]
[1, 2, 3, 4]


  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")


[1, 2, 3, 4]
[1, 2, 3, 4]
[1, 2, 3, 4]


  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")


[1, 2, 3, 4]
[1, 2, 3, 4]


  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")


[1, 2, 3, 4]
[1, 2, 3, 4]


  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")


[1, 2, 3, 4]
[1, 2, 3, 4]
[1, 2, 3, 4]


  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")


[1, 2, 3, 4]
[1, 2, 3, 4]


  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")


[1, 2, 3, 4]
[1, 2, 3, 4]


  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")


[1, 2, 3, 4]
[1, 2, 3, 4]


  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")


[1, 2, 3, 4]
[1, 2, 3, 4]
[1, 2, 3, 4]


  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")


[1, 2, 3, 4]
[1, 2, 3, 4]
[1, 2, 3, 4]


  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(f"{pre} is not within the observation space.")


In [6]:
construal_rewards
# len(highway_mdp.env.unwrapped.road.vehicles)
# highway_mdp.env.unwrapped.road.vehicles
# len(highway_mdp.env.unwrapped.road.objects)
# highway_mdp.env.unwrapped.road.objects
# construals[3].env.unwrapped.road.vehicles
# construals[1].env.unwrapped.road.objects

{<highway_env.envs.discreteMDP_wrapper.HighwayDiscreteMDP at 0x22279d3ffe0>: 0.8666666666666667,
 <highway_env.envs.discreteMDP_wrapper.HighwayDiscreteMDP at 0x2227cd14590>: 0.8666666666666667,
 <highway_env.envs.discreteMDP_wrapper.HighwayDiscreteMDP at 0x2227d6378c0>: 0.8666666666666667,
 <highway_env.envs.discreteMDP_wrapper.HighwayDiscreteMDP at 0x2227ccb4f20>: 0.8666666666666667,
 <highway_env.envs.discreteMDP_wrapper.HighwayDiscreteMDP at 0x2227d8589e0>: 0.8666666666666667,
 <highway_env.envs.discreteMDP_wrapper.HighwayDiscreteMDP at 0x2227d991ac0>: 0.8666666666666667,
 <highway_env.envs.discreteMDP_wrapper.HighwayDiscreteMDP at 0x2227d8a6e40>: 0.8666666666666667,
 <highway_env.envs.discreteMDP_wrapper.HighwayDiscreteMDP at 0x2227d858800>: 0.8666666666666667,
 <highway_env.envs.discreteMDP_wrapper.HighwayDiscreteMDP at 0x2227d7c7e30>: 0.8666666666666667,
 <highway_env.envs.discreteMDP_wrapper.HighwayDiscreteMDP at 0x2227d9ce0c0>: 0.8666666666666667,
 <highway_env.envs.discreteMDP

In [15]:
# construal_policies[0][0].first_state
# construal_policies[0][0].current_state
# policy(construal_policies[0][0].current_state)

(frozendict.frozendict({'position': (154.56, 4.0), 'speed': (25.0, 0.0), 'heading': 0.0}),
 frozendict.frozendict({'position': (151.09, 4.0), 'speed': (0.0, 0.0), 'heading': -1}),
 frozendict.frozendict({'position': (176.5, 0.0), 'speed': (23.3, 0.0), 'heading': 0.0}),
 frozendict.frozendict({'position': (197.53, 0.0), 'speed': (21.31, 0.0), 'heading': 0.0}),
 frozendict.frozendict({'position': (218.85, 8.0), 'speed': (0.0, 0.0), 'heading': -1}),
 frozendict.frozendict({'position': (221.24, 8.0), 'speed': (23.13, 0.0), 'heading': 0.0}),
 frozendict.frozendict({'position': (242.76, 0.0), 'speed': (22.4, 0.0), 'heading': 0.0}),
 frozendict.frozendict({'position': (265.23, 4.0), 'speed': (22.53, 0.0), 'heading': 0.0}),
 frozendict.frozendict({'position': (268.53, 0.0), 'speed': (0.0, 0.0), 'heading': -1}),
 frozendict.frozendict({'position': (288.73, 8.0), 'speed': (21.33, 0.0), 'heading': 0.0}),
 frozendict.frozendict({'position': (294.44, 0.0), 'speed': (0.0, 0.0), 'heading': -1}),
 fro

TODO:
1. Create a m-vehicle n-icesheet environment
2. Split it into x=m+n+1 separate environments each with a single ice-sheet or vehicle or completely empty
3. Calculate the optimal policy for each of the x environments
4. Run each of those poliies in the oriinal m-vehicle n-icesheet environment.

Print MDP Tables

In [5]:
print(highway_mdp.env.unwrapped.action_type.actions_indexes)

highway_mdp.currMDP.transition

{'LANE_LEFT': 0, 'IDLE': 1, 'LANE_RIGHT': 2, 'FASTER': 3, 'SLOWER': 4}


{(frozendict.frozendict({'position': (153.62, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}),
  0): {frozendict.frozendict({'position': (178.62, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}): 1},
 (frozendict.frozendict({'position': (153.62, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}),
  1): {frozendict.frozendict({'position': (178.62, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}): 1},
 (frozendict.frozendict({'position': (153.62, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}),
  2): {frozendict.frozendict({'position': (178.3, 3.66), 'speed': (24.98, 1.08), 'heading': 0.043}): 1},
 (frozendict.frozendict({'position': (153.62, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}),
  3): {frozendict.frozendict({'position': (181.02, 0.0), 'speed': (29.34, 0.0), 'heading': 0.0}): 1},
 (frozendict.frozendict({'position': (153.62, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}),
  4): {frozendict.frozendict({'position': (176.23, 0.0), 'speed': (20.66, 0.0), 'heading': 0.0}): 1},
 (frozendict.frozendict({'pos

In [6]:
highway_mdp.currMDP.reward

{(frozendict.frozendict({'position': (153.62, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}),
  0,
  frozendict.frozendict({'position': (178.62, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0})): 0.7999999999999999,
 (frozendict.frozendict({'position': (153.62, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}),
  1,
  frozendict.frozendict({'position': (178.62, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0})): 0.7999999999999999,
 (frozendict.frozendict({'position': (153.62, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}),
  2,
  frozendict.frozendict({'position': (178.3, 3.66), 'speed': (24.98, 1.08), 'heading': 0.043})): 0.8327113850086175,
 (frozendict.frozendict({'position': (153.62, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}),
  3,
  frozendict.frozendict({'position': (181.02, 0.0), 'speed': (29.34, 0.0), 'heading': 0.0})): 0.9157750342935529,
 (frozendict.frozendict({'position': (153.62, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}),
  4,
  frozendict.frozendict({'position': (176.23, 0.0), 'speed

In [7]:
highway_mdp.currMDP.absorption

{frozendict.frozendict({'position': (178.62, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}): False,
 frozendict.frozendict({'position': (178.3, 3.66), 'speed': (24.98, 1.08), 'heading': 0.043}): False,
 frozendict.frozendict({'position': (181.02, 0.0), 'speed': (29.34, 0.0), 'heading': 0.0}): False,
 frozendict.frozendict({'position': (176.23, 0.0), 'speed': (20.66, 0.0), 'heading': 0.0}): False,
 frozendict.frozendict({'position': (196.57, 0.0), 'speed': (20.09, 0.0), 'heading': 0.0}): False,
 frozendict.frozendict({'position': (196.19, 3.6), 'speed': (20.05, 1.16), 'heading': 0.058}): False,
 frozendict.frozendict({'position': (198.96, 0.0), 'speed': (24.43, 0.0), 'heading': 0.0}): False,
 frozendict.frozendict({'position': (203.62, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}): False,
 frozendict.frozendict({'position': (203.3, 3.66), 'speed': (24.98, 1.08), 'heading': 0.043}): False,
 frozendict.frozendict({'position': (206.02, 0.0), 'speed': (29.34, 0.0), 'heading': 0.0}): False,
 fr

Profiling HighwayEnv

In [1]:
# |Move up one directory level to import local instance of highway environment
%cd ..
# %pwd

import gymnasium
import highway_env
# from matplotlib import pyplot as plt
# %matplotlib inline

import numpy as np

d:\WorkFiles\ProjectCode\ActiveProjects\HoLab\Mine\HighwayEnv-TRI


In [2]:
# Define Configuration
num_of_vehicles = 2
num_of_ice = 5
env_length = 3000
lane_count = 3
config = {    
        ## Parameters of interest ##
        "observation": {
            # For more details about observation parameters check out "highway_env\envs\common\observation.py"
            "type": "Kinematics",
            "vehicles_count": num_of_vehicles+num_of_ice+5,   # Number of vehicles (and objects) to show in the observation. 
                                                                #   Keep greater than value of vehicles out outside obervation
                                                                #   dictionary to observe all vehicles in the environment.
            "features": ["presence", "x", "y", "vx", "vy"],# "heading"],
            "normalize": False, # Normalize object coordinates
            "absolute": True,   # Provide absolute coordinate of vehicles
            "order": "sorted",
            "observe_intentions": False,
            "include_obstacles": True,
            "see_behind": True  # Report vehicles behind the ego vehicle
            },
        ## Parameters specialized for the icy highway environment ##
        "ice_count": num_of_ice,    # Number of ice sheets in the environment
        "env_len":  env_length,    # Length of the road
        ## Keep these to default, because the fast versions of the environments implement different values ##
        ## of these variables for faster execution ##
        "vehicles_count": num_of_vehicles,
        "lanes_count": lane_count,
        "simulation_frequency": 5,
        "duration": 120,  # [s]
        "disable_collision_checks": True,    # Check collisions for other vehicles
        "enable_lane_change": False,
        ## Other parameters aleady set by default configurations ##
        # "action": {
        #     "type": "DiscreteMetaAction",
        # },
        # "controlled_vehicles": 1,
        # "initial_lane_id": None,
        # "ego_spacing": 2,
        # "vehicles_density": 1,
        # "collision_reward": -1,  # The reward received when colliding with a vehicle.
        # "right_lane_reward": 0.1,  # The reward received when driving on the right-most lanes, linearly mapped to
        # # zero for other lanes.
        # "high_speed_reward": 0.4,  # The reward received when driving at full speed, linearly mapped to zero for
        # # lower speeds according to config["reward_speed_range"].
        # "lane_change_reward": 0,  # The reward received at each lane change action.
        # "reward_speed_range": [20, 30],
        # "normalize_reward": True,
        # "offroad_terminal": False
        }

# env = gymnasium.make('highway-v0', config=config, render_mode='rgb_array')
# env = gymnasium.make('highway-fast-v0', config=config, render_mode='rgb_array')
env = gymnasium.make('highway-icy-custom-v0', config=config, render_mode='rgb_array')

_ = env.reset()
# action = env.unwrapped.action_type.actions_indexes["IDLE"]

In [3]:
import copy

In [5]:
env.unwrapped.road.vehicles[0].position

array([154.01276665,   8.        ])

In [33]:
env_copy = copy.deepcopy(env)
env_copy.unwrapped.road.vehicles[0].position = np.array([50.0, 4.0])
env_copy.unwrapped.road.vehicles

[MDPVehicle #200: [50.  4.],
 IDMVehicle #904: [179.50159323   0.        ],
 IDMVehicle #952: [204.93595799   4.        ]]

In [26]:
env_copy.step(0)
env_copy.unwrapped.road.vehicles

[MDPVehicle #344: [75.  4.],
 IDMVehicle #912: [200.95864985   0.        ],
 IDMVehicle #784: [226.00336446   4.82469402]]

In [26]:
# Create discrete world grid
temp = env.unwrapped.road.network.lanes_list()
x_max = max([lane.end[0] for lane in temp])
y_max = max([lane.end[1] for lane in temp])
x_grid = np.linspace(0, x_max, 500)
y_grid = np.linspace(0, y_max, lane_count)
[x_max, y_max]

[3000.0, 8.0]

In [3]:
%prun obs, reward, done, truncated, info = env.step(action)

 

         11827 function calls (11691 primitive calls) in 0.011 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
      296    0.001    0.000    0.001    0.000 lane.py:209(local_coordinates)
      443    0.000    0.000    0.000    0.000 {method 'dot' of 'numpy.ndarray' objects}
      195    0.000    0.000    0.001    0.000 linalg.py:2383(norm)
      160    0.000    0.000    0.003    0.000 objects.py:133(_is_colliding)
       12    0.000    0.000    0.001    0.000 objects.py:180(polygon)
1173/1167    0.000    0.000    0.000    0.000 {built-in method builtins.isinstance}
       16    0.000    0.000    0.001    0.000 controller.py:145(steering_control)
      146    0.000    0.000    0.000    0.000 lane.py:80(on_lane)
      106    0.000    0.000    0.000    0.000 _methods.py:90(_clip)
        6    0.000    0.000    0.001    0.000 utils.py:194(are_polygons_intersecting)
       12    0.000    0.000    0.001    0.000 road.py:480(neig

In [4]:
env.unwrapped.road.vehicles

[MDPVehicle #824: [183.87107986   4.        ],
 IDMVehicle #216: [197.79668924   8.        ],
 IDMVehicle #904: [219.72090957   0.        ]]

In [5]:
env.unwrapped.road.objects

[Ice1 #760: at [151.090512   4.      ],
 Ice1 #432: at [218.85401083   8.        ],
 Ice1 #608: at [268.52539651   0.        ],
 Ice1 #528: at [294.43759937   0.        ],
 Ice1 #536: at [343.98100414   4.        ],
 Ice1 #712: at [392.53761495   8.        ],
 Ice1 #656: at [460.59978171   4.        ],
 Ice1 #528: at [505.44612845   0.        ],
 Ice1 #72: at [551.07811977   8.        ],
 Ice1 #64: at [633.43185766   0.        ]]

In [6]:
obs

array([[  1.      , 183.87108 ,   4.      ,  29.444445,   0.      ],
       [  1.      , 197.79669 ,   8.      ,  17.942114,   0.      ],
       [  1.      , 218.854   ,   8.      ,   0.      ,   0.      ],
       [  1.      , 219.72092 ,   0.      ,  16.230988,   0.      ],
       [  1.      , 268.5254  ,   0.      ,   0.      ,   0.      ],
       [  1.      , 294.4376  ,   0.      ,   0.      ,   0.      ],
       [  1.      , 343.98102 ,   4.      ,   0.      ,   0.      ],
       [  0.      ,   0.      ,   0.      ,   0.      ,   0.      ],
       [  0.      ,   0.      ,   0.      ,   0.      ,   0.      ],
       [  0.      ,   0.      ,   0.      ,   0.      ,   0.      ],
       [  0.      ,   0.      ,   0.      ,   0.      ,   0.      ],
       [  0.      ,   0.      ,   0.      ,   0.      ,   0.      ]],
      dtype=float32)