In [None]:
#adapted from https://jovian.ai/americanachen/qlearning-med/v/1?utm_source=embed#C8

# some more recent versions of OpenAI gym return 2 values in env.reset() and 5 values in env.step()

import gymnasium as gym
import random

random.seed(1234)

streets = gym.make("Taxi-v3", render_mode="ansi") #New versions keep getting released; if -v3 doesn't work, try -v2 or -v4
streets.reset()
print(streets.s)
print(streets.render())

In [None]:
# set initial state
initial_state = streets.unwrapped.encode(0, 0, 0, 1) # taxi_row, taxi_col, passenger_position, passenger_target
streets.unwrapped.s = initial_state
print(streets.s)
print(streets.render())

In [None]:
# test random policy

epochs = 1000
lengths=[]
for taxi_run in range(epochs):
    state, info = streets.reset()
    done = False
    trip_length = 0
    while not done and trip_length < 100:
        action = streets.action_space.sample() # Explore a random action
            
        next_state, reward, terminated, truncated, info = streets.step(action)       
        
        done = terminated or truncated
        trip_length +=1
    lengths.append(trip_length)
    
avg_len=sum(lengths)/epochs
print(avg_len)

print('states',streets.observation_space.n)
print('actions',streets.action_space.n)
        

In [None]:
# learn using q-learning algorithm

import numpy as np

q_table = np.zeros([streets.observation_space.n, streets.action_space.n])
# a 2D array that represent every possible state and action in the virtual space and initialize all of them to 0
learning_rate = 0.1
discount_factor = 0.6
exploration = 0.01
epochs = 100000

for taxi_run in range(epochs):
    pass

In [None]:
#check correct action for some states

state=streets.unwrapped.encode(1,1,1,0)
print(state)
print(q_table[state])
action=np.argmax(q_table[state])
print(action) 
streets.unwrapped.s = state
streets.unwrapped.lastaction = action
print(streets.render())

In [None]:
# check performance after learning

from IPython.display import clear_output
from time import sleep
lengths=[]
for tripnum in range(1, 11):
    state, info = streets.reset()
   
    done = False
    trip_length = 0
    returnL=0
    while not done and trip_length < 100:
        action = np.argmax(q_table[state])
        next_state, reward, terminated, truncated,  info = streets.step(action)
        done = terminated or truncated
        returnL+=reward
        clear_output(wait=True)
        print("Trip number " + str(tripnum) + " Step " + str(trip_length))
        print(streets.render())
        sleep(.2)
        state = next_state
        trip_length += 1
    lengths.append(trip_length)
    sleep(.2)
avg_len=sum(lengths)/10
print(avg_len)