This is stage 2 for developing a discrete MDP wrapper for the highway environment based on discretizedMDP.ipynb.

The purpose of this code is to implement multiprocessing and set up a separate discretized mdp python script in the codebase.

In [1]:
# Move up one directory level to import local instance of highway environment
%cd ..
%pwd

d:\Project Files\HoLab\Codes\Mine\HighwayEnv-TRI


'd:\\Project Files\\HoLab\\Codes\\Mine\\HighwayEnv-TRI'

Import necessary modules

In [None]:
import gymnasium as gym
import highway_env

from frozendict import frozendict
import numpy as np
import copy

from matplotlib import pyplot as plt
%matplotlib inline




Set up MDP

In [2]:
from highway_env.envs.discreteMDP_wrapper import HighwayDiscreteMDP

import cProfile
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)    # Other options: INFO, WARNING, ERROR, CRITICAL

import functools
import multiprocessing as mp
# pool = mp.Pool((mp.cpu_count()-1)) #This uses all processors instead of manually putting the number



In [3]:
config = {
        "observation": {
            "type": "Kinematics",
            "vehicles_count": 50,
            "features": ["presence", "x", "y", "vx", "vy", "heading"],
            "normalize": False,
            "absolute": True,
            "order": "sorted",
            "observe_intentions": False,
            "include_obstacles": True
            }
        }

highway_mdp = HighwayDiscreteMDP('highway-v0', config=config, render_mode='human')

initial_state, actions = highway_mdp.get_env_properties()

In [None]:
max_depth = 2   # The number of steps to plan ahead

def exeuteAction(action, envMDP: HighwayDiscreteMDP):
    """
    Given an instance of a gym environment and an action the environment will execute 
    the action and return the next state and a copy of the updated environment instance.
    """
    print("In function")
    envMDP_copy = copy.deepcopy(envMDP)
    # logging.debug(envMDP_copy.env.unwrapped.road.vehicles[0])
    next_state, reward, done, truncated, info = envMDP_copy.step(action)
    logging.debug(' | '.join((state[0], action, next_state[0])))
    return((action, next_state, reward, done, truncated, info, envMDP_copy.copy_env()))

visited = set()
transitions = {}
frontier = {(initial_state, 0, highway_mdp.copy_env())}
while frontier:
    state, depth, curr_env = frontier.pop()
    visited.add(state)
    if depth < max_depth:
        highway_mdp.set_env(curr_env)
        print("calling function")
        process_count = (mp.cpu_count()-2)
        with mp.Pool(process_count) as pool:
            return_vals = [pool.apply_async(functools.partial(exeuteAction, envMDP=highway_mdp), a) for a in actions]
            # return_vals = pool.map(functools.partial(exeuteAction, envMDP=highway_mdp), actions)
        # for action, next_state, reward, done, truncated, info, updated_env in return_vals:
        for val in return_vals:
            action, next_state, reward, done, truncated, info, updated_env = val.get()
            if (state[0], action) not in transitions:
                transitions[(state[0], action)] = {}            
            if next_state[0] not in transitions[(state[0], action)]:
                transitions[(state[0], action)][next_state[0]] = 0
            transitions[(state[0], action)][next_state[0]] += 1
            if next_state not in visited:
                frontier.add((next_state, depth + 1, updated_env))
    MDPstatus = "Current Depth: " + str(depth) + " | Frontier: " + str(len(frontier)) +\
                " | Visited: " + str(len(visited)) + " | Transitions:" + str(len(transitions))
    logging.info(MDPstatus)



calling function


In [None]:
return_vals

[<multiprocessing.pool.ApplyResult at 0x1fd81b2a8a0>,
 <multiprocessing.pool.ApplyResult at 0x1fd81396240>,
 <multiprocessing.pool.ApplyResult at 0x1fd81bab590>,
 <multiprocessing.pool.ApplyResult at 0x1fd81af6ea0>,
 <multiprocessing.pool.ApplyResult at 0x1fdfe10fc50>]

In [6]:
max_depth = 2   # The number of steps to plan ahead

def run_code():
    visited = set()
    transitions = {}
    frontier = {(initial_state, 0, highway_mdp.copy_env())}
    loop_counter = 0
    while frontier:
        state, depth, curr_env = frontier.pop()
        visited.add(state)
        if depth < max_depth:
            for action in actions:
                highway_mdp.set_env(curr_env)
                # logging.debug(highway_mdp.env.unwrapped.road.vehicles[0])
                if (state[0], action) not in transitions:
                    transitions[(state[0], action)] = {}
                next_state, reward, done, truncated, info = highway_mdp.step(action)
                logging.debug(str(state[0]) + ' | ' + str(action) + ' | ' + str(next_state[0]))
                if next_state[0] not in transitions[(state[0], action)]:
                    transitions[(state[0], action)][next_state[0]] = 0
                transitions[(state[0], action)][next_state[0]] += 1
                if next_state not in visited:
                    frontier.add((next_state, depth + 1, highway_mdp.copy_env()))
                loop_counter += 1
        MDPstatus = "Current Depth: " + str(depth) + " | Frontier: " + str(len(frontier)) +\
                    " | Visited: " + str(len(visited)) + " | Transitions:" + str(len(transitions))
        logging.info(MDPstatus)
    print("Number of execution calls: ", loop_counter)

run_code()

# cProfile.run('run_code()')


INFO:root:Current Depth: 0 | Frontier: 5 | Visited: 1 | Transitions:5
INFO:root:Current Depth: 1 | Frontier: 9 | Visited: 2 | Transitions:10
INFO:root:Current Depth: 2 | Frontier: 8 | Visited: 3 | Transitions:10
INFO:root:Current Depth: 2 | Frontier: 7 | Visited: 4 | Transitions:10
INFO:root:Current Depth: 1 | Frontier: 11 | Visited: 5 | Transitions:15
INFO:root:Current Depth: 2 | Frontier: 10 | Visited: 6 | Transitions:15
INFO:root:Current Depth: 2 | Frontier: 9 | Visited: 7 | Transitions:15
INFO:root:Current Depth: 1 | Frontier: 13 | Visited: 8 | Transitions:20
INFO:root:Current Depth: 2 | Frontier: 12 | Visited: 9 | Transitions:20
INFO:root:Current Depth: 2 | Frontier: 11 | Visited: 10 | Transitions:20
INFO:root:Current Depth: 2 | Frontier: 10 | Visited: 11 | Transitions:20
INFO:root:Current Depth: 1 | Frontier: 14 | Visited: 12 | Transitions:25
INFO:root:Current Depth: 1 | Frontier: 18 | Visited: 13 | Transitions:30
INFO:root:Current Depth: 2 | Frontier: 17 | Visited: 14 | Transiti

Number of execution calls:  30


In [109]:
print(highway_mdp.env.unwrapped.action_type.actions_indexes)

transitions

{'LANE_LEFT': 0, 'IDLE': 1, 'LANE_RIGHT': 2, 'FASTER': 3, 'SLOWER': 4}


{(frozendict.frozendict({'position': (178.19, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}),
  0): {frozendict.frozendict({'position': (296.98, 0.2), 'speed': (0.04, -0.01), 'heading': -0.173}): 1},
 (frozendict.frozendict({'position': (178.19, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}),
  1): {frozendict.frozendict({'position': (296.98, 0.2), 'speed': (0.04, -0.01), 'heading': -0.173}): 1},
 (frozendict.frozendict({'position': (178.19, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}),
  2): {frozendict.frozendict({'position': (296.98, 0.2), 'speed': (0.04, -0.01), 'heading': -0.173}): 1},
 (frozendict.frozendict({'position': (178.19, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}),
  3): {frozendict.frozendict({'position': (296.98, 0.2), 'speed': (0.04, -0.01), 'heading': -0.173}): 1},
 (frozendict.frozendict({'position': (178.19, 0.0), 'speed': (25.0, 0.0), 'heading': 0.0}),
  4): {frozendict.frozendict({'position': (296.98, 0.2), 'speed': (0.04, -0.01), 'heading': -0.173}): 1},
 (frozendi

# TEST CODE

In [127]:
# |DEBUGGING CODE

print(highway_mdp.env.unwrapped.road.vehicles[0])
tmp = highway_mdp.copy_env()
print(tmp.unwrapped.road.vehicles[0])
highway_mdp.step(1)
print(highway_mdp.env.unwrapped.road.vehicles[0])
print(tmp.unwrapped.road.vehicles[0])
highway_mdp.set_env(tmp)
print(highway_mdp.env.unwrapped.road.vehicles[0])


# dir(highway_mdp.env.unwrapped)

next_state, reward, done, truncated, info = highway_mdp.step(1)
# next_state


MDPVehicle #544: [207.38302935   8.        ]
MDPVehicle #248: [207.38302935   8.        ]
1 [1.   1.   0.08 1.   0.   0.  ]
MDPVehicle #544: [232.38302935   8.        ]
MDPVehicle #248: [207.38302935   8.        ]
MDPVehicle #248: [207.38302935   8.        ]


In [None]:
config = {
        "observation": {
            "type": "Kinematics",
            "vehicles_count": 50,
            "features": ["presence", "x", "y", "vx", "vy", "heading"],
            "normalize": False,
            "absolute": True,
            "order": "sorted",
            "observe_intentions": False,
            "include_obstacles": True
            }
        }

env = gym.make('highway-v0', config=config, render_mode='human')

In [75]:
vehicles = []
for veh_state in obs:
    feature_vals = {k: v for k,v in zip(config["observation"]["features"], veh_state)}
    veh = {}
    veh["position"] = tuple(np.round((feature_vals["x"],feature_vals["y"]), 2))
    veh["speed"] = tuple(np.round((feature_vals["vx"],feature_vals["vy"]), 2))
    veh["heading"] = np.round(feature_vals["heading"], 3)
    vehicles.append(frozendict(veh))
vehicles = tuple(vehicles)
vehicles

(frozendict.frozendict({'position': (1.0, 0.08), 'speed': (1.0, 0.0), 'heading': 0.0}),
 frozendict.frozendict({'position': (0.08, 0.0), 'speed': (-0.24, 0.0), 'heading': 0.0}),
 frozendict.frozendict({'position': (0.33, -0.04), 'speed': (-0.18, 0.0), 'heading': 0.003}),
 frozendict.frozendict({'position': (0.45, 0.04), 'speed': (-0.31, 0.0), 'heading': 0.005}),
 frozendict.frozendict({'position': (0.72, 0.0), 'speed': (-0.19, 0.0), 'heading': 0.0}),
 frozendict.frozendict({'position': (0.93, 0.04), 'speed': (-0.19, 0.0), 'heading': 0.0}),
 frozendict.frozendict({'position': (1.0, -0.08), 'speed': (-0.27, 0.0), 'heading': 0.0}),
 frozendict.frozendict({'position': (1.0, -0.04), 'speed': (-0.24, -0.0), 'heading': -0.0}),
 frozendict.frozendict({'position': (1.0, -0.0), 'speed': (-0.16, 0.0), 'heading': 0.001}),
 frozendict.frozendict({'position': (1.0, -0.08), 'speed': (-0.28, 0.0), 'heading': 0.0}),
 frozendict.frozendict({'position': (0.0, 0.0), 'speed': (0.0, 0.0), 'heading': 0.0}),


In [77]:
import sys

# sys.getsizeof(transitions)
sys.getsizeof(highway_mdp.copy_env())
# sys.getsizeof(int(10.0))


48