In [45]:
from time import sleep
from IPython.display import clear_output

import gym

## Explore The Environment

In [9]:
env = gym.make("Taxi-v3").env

In [28]:
print('Environment Display:')
env.reset() # reset environment to a new, random state
env.render()

print("State Space {}".format(env.observation_space))
print("Action Space {}".format(env.action_space))

Environment Display:
+---------+
|[35mR[0m: |[43m [0m: :G|
| : | : : |
| : : : : |
| | : | : |
|[34;1mY[0m| : |B: |
+---------+

State Space Discrete(500)
Action Space Discrete(6)


## Explore Encode Process

In [40]:
state = env.encode(3, 1, 2, 0) # (taxi row, taxi column, passenger index, destination index)
print("State:", state)

env.s = state
env.render()

State: 328
+---------+
|[35mR[0m: | : :G|
| : | : : |
| : : : : |
| |[43m [0m: | : |
|[34;1mY[0m| : |B: |
+---------+



## Apply Random Approach

In [58]:
env.s = 328 # set environment's initial state

epochs = 0
penalties, reward = 0, 0

frames = [] # for animation

done = False

while not done:
    action = env.action_space.sample()
    next_state, reward, done, info = env.step(action)
    
    if reward == -10:
        penalties += 1
        
    # put each rendered frame into dict for animation
    frames.append({
        'frame': env.render(mode='ansi'),
        'state': next_state,
        'action': action,
        'reward': reward
    })
    
    epochs += 1
    
print(f'Timesteps taken: {epochs}')
print(f'Penalties incurred: {penalties}')

Timesteps taken: 524
Penalties incurred: 181


In [59]:
def print_frames(frames):
    for i, frame in enumerate(frames):
        clear_output(wait=True)
        print(frame['frame'])
        print(f"Timestep: {i+1}")
        print(f"State: {frame['state']}")
        print(f"Action: {frame['action']}")
        print(f"Reward: {frame['reward']}")
        sleep(.1)
              
print_frames(frames)

+---------+
|[35m[34;1m[43mR[0m[0m[0m: | : :G|
| : | : : |
| : : : : |
| | : | : |
|Y| : |B: |
+---------+
  (Dropoff)

Timestep: 524
State: 0
Action: 5
Reward: 20


---