In [None]:
# In the Name of Allah
# This program solves mountain car problem using gymnasium
# environment and tile coding.
#
# Programmer    : Maziar Palhang
# First Edit on : 1398/10/9 with openai gym
# Revised on    : 1403/2/24 revised to work with gymnasium

import gymnasium as gym
import numpy as np
import random
from math import *
from mpl_toolkits import mplot3d
import matplotlib.pyplot as plt

%matplotlib widget
env = gym.make('MountainCar-v0',render_mode='human')
print(env.action_space)
# 0 accelerate left, 1 don't move, 2 accelerate right
print(env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)

actions = np.array([-1,0,1])
epsilon = 1.0
noOfTiling = 8
numBins = 8
w = np.zeros(noOfTiling*numBins*numBins*3)
x = np.zeros(noOfTiling*numBins*numBins*3)
alpha = 0.5/noOfTiling
gamma = 1


def tileCode(p,v,a, offset1,offset2):
    code = -1

    d1range = (0.6 + 1.2)/numBins
    d2range = (0.07+0.07)/numBins

    d1bin = (p+1.2-offset1)//d1range         # floor division
    d2bin = (v+0.07-offset2)//d2range

    if numBins > d1bin >= 0 and d2bin < numBins and d2bin >= 0:
        code = int(((a+1)*numBins*numBins)+(d2bin * numBins )+ d1bin)

    return code

#input is position, velocity, and action
def tileCoding(p,v,a):
    s = np.zeros(noOfTiling*numBins*numBins*3)
    d1range = (0.6 + 1.2)/numBins
    d2range = (0.07+0.07)/numBins
    offset1 = d1range/noOfTiling
    offset2 = d2range/noOfTiling
    for i in range(noOfTiling):
        code = tileCode(p,v,a,i*offset1,i*offset2)
        if code > -1:
            s[i*numBins*numBins*3+code] = 1
    return s

noOfEpisodes = 300
stepsInEpisodes = np.zeros(noOfEpisodes)

for itr in range(noOfEpisodes):
    observation, info = env.reset()
    p = observation[0]
    v = observation[1]
    r = random.random()
    if r < epsilon:
        a = env.action_space.sample()
    else:
        a = np.argmax([w.dot(tileCoding(p, v, -1)), w.dot(tileCoding(p, v, 0)), w.dot(tileCoding(p, v, 1))])

    for t in range(300):
        env.render()

        observation, reward, terminated, truncated, info = env.step(a)
        pp = observation[0]
        vp = observation[1]
        x = tileCoding(p,v,a-1)

        if terminated or truncated:
            print('Episode {}'.format(itr),end=' ')
            print("finished after {} steps".format(t + 1))
            w = w + alpha*(reward-w.dot(x))*x
            break
        else:
            r = random.random()
            if r < epsilon:
                ap = env.action_space.sample()
            else:
                ap = np.argmax([w.dot(tileCoding(pp, vp, -1)), w.dot(tileCoding(pp, vp, 0)), w.dot(tileCoding(pp, vp, 1))])

        xp = tileCoding(pp, vp, ap-1)
        w = w + alpha * (reward + gamma * w.dot(xp) - w.dot(x)) * x
        #alpha = alpha*0.9995
        epsilon *= 0.995
        p = pp
        v = vp
        a = ap

    stepsInEpisodes[itr] = t

env.close()

x1 = np.arange(-1.2,0.6,0.05)
y1 = np.arange(-0.07,0.07,0.005)
z = np.zeros((len(x1),len(y1)))

X, Y= np.meshgrid(x1, y1)

for i in range(len(x1)):
    #j = 0
    for j in range(len(y1)):
        qmax = 0
        for a in [-1,0,1]:
            xs = tileCoding(x1[i],y1[j],a)
            q = w.dot(xs)
            if -q > qmax:
               qmax = -q
        z[i,j] = qmax

fig = plt.figure()
ax = plt.axes(projection='3d')

ax.plot_surface(Y, X, z.T, cmap='winter')
ax.set_title('Mountain Car')

plt.figure()
plt.plot(stepsInEpisodes)
plt.show()


Discrete(3)
Box([-1.2  -0.07], [0.6  0.07], (2,), float32)
[0.6  0.07]
[-1.2  -0.07]
Episode 0 finished after 200 timesteps
Episode 1 finished after 200 timesteps
Episode 2 finished after 200 timesteps
Episode 3 finished after 200 timesteps
Episode 4 finished after 200 timesteps
Episode 5 finished after 200 timesteps
Episode 6 finished after 200 timesteps
Episode 7 finished after 200 timesteps
Episode 8 finished after 200 timesteps
Episode 9 finished after 200 timesteps
Episode 10 finished after 200 timesteps
Episode 11 finished after 200 timesteps
Episode 12 finished after 200 timesteps
Episode 13 finished after 200 timesteps
Episode 14 finished after 200 timesteps
Episode 15 finished after 200 timesteps
Episode 16 finished after 200 timesteps
Episode 17 finished after 200 timesteps
Episode 18 finished after 200 timesteps
Episode 19 finished after 200 timesteps
Episode 20 finished after 200 timesteps
Episode 21 finished after 200 timesteps
Episode 22 finished after 200 timesteps
Episo

NameError: name 'epsilon' is not defined