In [7]:
import gymnasium as gym
import numpy as np
import time
import tqdm

# FrozenLake-v1 environment

4x4 or 8x8 map

Environment action space:  
`env.action_space: Discrete(4)`
0 = Left  
1 = Down  
2 = Right  
3 = Up  

Observation space:  
`env.observation_space: Discrete(nrows * ncols)`
index = row * ncols + col  

Transition
`env.unwrapped.P: Dict[int: Dict[int: List[Tuple[float, int, float, bool]]]]`  
`state` (int: 0 - nrows * ncols)  
&emsp;--> `action` (int: 0-3)  
&emsp;&emsp;--> [  
&emsp;&emsp;(  
&emsp;&emsp;&emsp;`p` (float: 0-1),  
&emsp;&emsp;&emsp;`next_state` (int: 0 - nrows * ncols),  
&emsp;&emsp;&emsp;`reward` (float),  
&emsp;&emsp;&emsp;`terminate` (bool)),  
&emsp;&emsp;...]  

Map:
`env.unwrapped.desc: np.ndarray((nrows, ncols))`
'S' = Starting position, default is (0, 0)
'G' = Goal, default is (nrows-1, ncols-1)
'F' = Floor
'H' = Hole

In [9]:
env_kwargs = {"desc": None, "map_name": "4x4", "is_slippery": False}
env = gym.make('FrozenLake-v1', **env_kwargs)
env_render = gym.make('FrozenLake-v1', **env_kwargs, render_mode="human")
print(env.action_space)
print(env.observation_space)

Discrete(4)
Discrete(16)


In [10]:
env.unwrapped.P[4]

{0: [(1.0, 4, 0.0, False)],
 1: [(1.0, 8, 0.0, False)],
 2: [(1.0, 5, 0.0, True)],
 3: [(1.0, 0, 0.0, False)]}

In [14]:
env.unwrapped.desc

array([[b'S', b'F', b'F', b'F'],
       [b'F', b'H', b'F', b'H'],
       [b'F', b'F', b'F', b'H'],
       [b'H', b'F', b'F', b'G']], dtype='|S1')

In [31]:
map = env.unwrapped.desc.astype(str)
nrows, ncols = map.shape
tile_locations = {}
for tp in np.unique(map):
    tile_locations[tp] = list(zip(*[arr.tolist() for arr in np.where(map == tp)]))
tile_locations


{'F': [(0, 1),
  (0, 2),
  (0, 3),
  (1, 0),
  (1, 2),
  (2, 0),
  (2, 1),
  (2, 2),
  (3, 1),
  (3, 2)],
 'G': [(3, 3)],
 'H': [(1, 1), (1, 3), (2, 3), (3, 0)],
 'S': [(0, 0)]}

In [32]:
assert(len(tile_locations["S"]) == 1)
assert(len(tile_locations["G"]) == 1)

In [118]:
def get_env_data(env, pad_tile="B"):
    map = env.unwrapped.desc.astype(str)
    nrows, ncols = map.shape
    
    map["S"] = "F" # Remove start tile
    tile_types = sorted(np.unique(map).tolist())
    if pad_tile not in tile_types:
        tile_types.append(pad_tile)
    tile_type_ids = {tp: i for i, tp in enumerate(tile_types)}
    assert len(tile_types) == len(np.unique(tile_types)), "Tile types should be unique."

    map_to_idmap = np.vectorize(lambda x: tile_type_ids[x])
    map = map_to_idmap(map).astype(int)

    tile_locations = {}
    for id in tile_type_ids.values():
        tile_locations[id] = list(zip(*[arr.tolist() for arr in np.where(map == id)]))
    assert len(tile_locations[tile_type_ids["G"]]) == 1, "There should be exactly one goal tile."

    pad_tile_id = tile_type_ids[pad_tile]
    padded_map = np.pad(map, 1, mode="constant", constant_values=pad_tile_id)

    return {
        "nrows": nrows,
        "ncols": ncols,
        "map": map,
        "padded_map": padded_map,
        "pad_tile": pad_tile,
        "pad_tile_id": pad_tile_id,
        "nholes": len(tile_locations[tile_type_ids["H"]]),
        "tile_locations": tile_locations,
        "tile_types": tile_types,
        "tile_type_ids": tile_type_ids,
    }
env_data = get_env_data(env)

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [119]:
env_data

{'nrows': 4,
 'ncols': 4,
 'map': array([[3, 0, 0, 0],
        [0, 2, 0, 2],
        [0, 0, 0, 2],
        [2, 0, 0, 1]]),
 'padded_map': array([[4, 4, 4, 4, 4, 4],
        [4, 3, 0, 0, 0, 4],
        [4, 0, 2, 0, 2, 4],
        [4, 0, 0, 0, 2, 4],
        [4, 2, 0, 0, 1, 4],
        [4, 4, 4, 4, 4, 4]]),
 'pad_tile': 'B',
 'pad_tile_id': 4,
 'nholes': 4,
 'tile_locations': {0: [(0, 1),
   (0, 2),
   (0, 3),
   (1, 0),
   (1, 2),
   (2, 0),
   (2, 1),
   (2, 2),
   (3, 1),
   (3, 2)],
  1: [(3, 3)],
  2: [(1, 1), (1, 3), (2, 3), (3, 0)],
  3: [(0, 0)],
  4: []},
 'tile_types': ['F', 'G', 'H', 'S', 'B'],
 'tile_type_ids': {'F': 0, 'G': 1, 'H': 2, 'S': 3, 'B': 4}}

In [120]:
def pos2coord(pos, env_data):
    """Convert position to coordinates."""
    return np.unravel_index(pos, (env_data["nrows"], env_data["ncols"]))

def coord2pos(coord, env_data):
    """Convert coordinates to position."""
    return np.ravel_multi_index(coord, (env_data["nrows"], env_data["ncols"]))
print(pos2coord(5, env_data))
print(coord2pos((3, 0), env_data))

(1, 1)
12


In [121]:
def get_tile_type(pos, env_data):
    """Get tile type at position."""
    return env_data["map"][pos2coord(pos, env_data)]

def get_tile_neighbors(pos, env_data, radius=1):
    """Get neighbors of tile at position."""
    coord = np.array(pos2coord(pos, env_data)) + 1
    dcoord = np.arange(-radius, radius + 1).astype(int)
    ix = np.ix_(*[coord[i] + dcoord for i in range(len(coord))])
    return env_data["padded_map"][ix]

get_tile_neighbors(0, env_data, radius=1)

array([[4, 4, 4],
       [4, 3, 0],
       [4, 0, 2]])

In [124]:
def pos_to_onehot(pos, env_data):
    """Convert position to one-hot vector."""
    return np.eye(np.size(env_data["map"]))[pos]

def tile_id_to_onehot(tile_id, env_data):
    """Convert tile id to one-hot vector."""
    return np.eye(len(env_data["tile_types"]))[tile_id]

def state_to_observation_human(state, env_data):
    """Convert state (position index) to observation for human agent."""
    position = pos_to_onehot(state, env_data)
    neighbors = get_tile_neighbors(state, env_data, radius=1)
    neighbors = tile_id_to_onehot(neighbors.flatten(), env_data)
    observation = np.concatenate([position, neighbors.flatten()])
    return observation

def state_to_observation_ai(state, env_data):
    """Convert state (position index) to observation for AI agent."""
    position = pos_to_onehot(state, env_data)
    neighbors = get_tile_neighbors(state, env_data, radius=1, invisible_tiles=invisible_tiles)
    neighbors = tile_id_to_onehot(neighbors.flatten(), env_data)
    observation = np.concatenate([position, neighbors.flatten()])
    return observation

state_to_observation_human(0, env_data)

array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0.,
       0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       1., 0., 0., 0., 0., 0., 0., 1., 0., 0.])

In [20]:
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False,
               render_mode="human")
env_data = get_env_data(env)
observation, info = env.reset(seed=42)
for _ in tqdm.tqdm(range(100)):
    env.render()
    time.sleep(0.01)
    action = env.action_space.sample()
    observation, reward, terminated, truncated, info = env.step(action)

    if terminated or truncated:
        time.sleep(0.2)
        observation, info = env.reset()
env.render()
env.close()

 38%|███▊      | 38/100 [00:20<00:33,  1.85it/s]


KeyboardInterrupt: 