# Taxi-v2

Import needed dependencies

In [1]:
import gym
import torch
import training
import benchmark

In [2]:
ENV = 'Taxi-v2'

## Interaction with the environment
Explore the environment to understand its dynamics, observations and actions.

In [3]:
test_env = gym.make(ENV)
state = test_env.reset()
print(f'Observation space: {test_env.observation_space}')
print(f'Observation space sample: {test_env.observation_space.sample()}')

print(f'Action space: {test_env.action_space}')
print(f'Action space sample: {test_env.action_space.sample()}')

print(f'State: {state}')
test_env.render()

Observation space: Discrete(500)
Observation space sample: 445
Action space: Discrete(6)
Action space sample: 3
State: 213
+---------+
|R: | : :[35mG[0m|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+



Play random action and see how it affects the environment

In [4]:
random_action = test_env.action_space.sample()
new_state, reward, done, _ = test_env.step(random_action)

print(f'Action: {random_action}\tNew state: {new_state}\tReward: {reward}\tDone:{done}')
test_env.render()

Action: 3	New state: 213	Reward: -1	Done:False
+---------+
|R: | : :[35mG[0m|
| : : : : |
|[43m [0m: : : : |
| | : | : |
|Y| : |[34;1mB[0m: |
+---------+
  (West)


## Solving the environment using Q-Learning

In [None]:
env = gym.make(ENV)

best_score, q_table = training.fit(env, episodes=10_000)
rewards = benchmark.play_episodes(env, q_table, episodes=100)

mean_reward = rewards.mean().item()
print(f'Mean reward: {mean_reward}')

Episode 2000: New best score! 8.550000190734863
Episode 4000: New best score! 8.649999618530273
Episode 5000: New best score! 8.739999771118164


In [None]:
benchmark.play_episodes(env, q_table, render=True)

Save q-table to file

In [None]:
torch.save(q_table, 'Taxi-v2_saved/Taxi-v2-qtable.pt')