In [None]:
import numpy
import matplotlib.pyplot as plt

def make_maze(size):
    w = size
    h = size
    vis = [[0] * w + [1] for _ in range(h)] + [[1] * (w + 1)]
    ver = [["10"] * w + ['1'] for _ in range(h)] + [[]]
    hor = [["11"] * w + ['1'] for _ in range(h + 1)]
 
    def walk(x, y):
        vis[y][x] = 1
 
        d = [(x - 1, y), (x, y + 1), (x + 1, y), (x, y - 1)]
        numpy.random.shuffle(d)
        for (xx, yy) in d:
            if vis[yy][xx]: continue
            if xx == x: hor[max(y, yy)][x] = "10"
            if yy == y: ver[y][max(x, xx)] = "00"
            walk(xx, yy)
 
    walk(numpy.random.randint(w), numpy.random.randint(h))
 
    s = ""
    for (a, b) in zip(hor, ver):
        s += ''.join(a + ['\n'] + b + ['\n'])
        
    M=[]
    for line in s.split("\n"):
        if line!="":
            R=[]
            for e in line:
                R.append(int(e))
            M.append(R)
    return M

In [None]:
# Configure properties for the maze and create it
maze_size=12
startLocation=(1,1)
targetLocation=(maze_size*2-1,maze_size*2-1)
maze=numpy.array(make_maze(maze_size))

# Create a maze figure
plt.figure(figsize=[maze_size/2.5,maze_size/2.5])
# Plot the maze
plt.imshow(maze)
# Show tick for each square
plt.xticks(range(0, maze_size*2+1))
plt.yticks(range(0, maze_size*2+1))
# Set the tick label size
plt.tick_params(axis='both', labelsize=8)
# Show the start and target locations
plt.plot(startLocation[1], startLocation[0], 'gs')
plt.plot(targetLocation[1], targetLocation[0], 'rs')
# Show the plot
plt.show()

In [None]:
policy = numpy.ones((maze_size*2+1, maze_size*2+1, 4)) / 4.0
for i in range(maze_size*2+1):
    for j in range(maze_size*2+1):
        policy[i][j] /= policy[i][j].sum()

learning_rate = 0.01

L = []
for i in range(200):
    visited = numpy.zeros((maze_size*2+1, maze_size*2+1))
    x = 1
    y = 1
    xm = [0, 1, 0, -1]
    ym = [1, 0, -1, 0]
    trackX = [x]
    trackY = [y]
    actions = []
    visited[x][y] = 1
    while x != targetLocation[0] or y != targetLocation[0]:
        direction = numpy.random.choice([0, 1, 2, 3], p=policy[x][y])
        tx = x + xm[direction]
        ty = y + ym[direction]
        if maze[tx][ty] == 0:
            x = tx
            y = ty
            visited[x][y] += 1
            trackX.append(x)
            trackY.append(y)
            actions.append(direction)

    for i in range(len(actions)):
        policy[trackX[i]][trackY[i]][actions[i]] += learning_rate
    for i in range(maze_size*2+1):
        for j in range(maze_size*2+1):
            policy[i][j] /= policy[i][j].sum()
    L.append(len(actions))

In [None]:
# Plot the number of steps for each run
plt.plot(L)

# Create a figure with 4 subplots
plt.figure(figsize=[maze_size,maze_size])

# Plot the maze
plt.subplot(2, 2, 1)
plt.imshow(maze, cmap='binary')
plt.xticks(range(0, maze_size*2+1))
plt.yticks(range(0, maze_size*2+1))
plt.tick_params(axis='both', labelsize=8)
plt.plot(startLocation[1], startLocation[0], 'gs')
plt.plot(targetLocation[1], targetLocation[0], 'rs')

# Plot the visited squares in black and white
plt.subplot(2, 2, 2)
plt.imshow(visited,cmap='gray',vmax=1)
plt.xticks(range(0, maze_size*2+1))
plt.yticks(range(0, maze_size*2+1))
plt.tick_params(axis='both', labelsize=8)
plt.plot(startLocation[1], startLocation[0], 'gs')
plt.plot(targetLocation[1], targetLocation[0], 'rs')

# Plot the visited squares by number of visits
plt.subplot(2, 2, 3)
plt.imshow(visited)
plt.xticks(range(0, maze_size*2+1))
plt.yticks(range(0, maze_size*2+1))
plt.tick_params(axis='both', labelsize=8)
plt.plot(startLocation[1], startLocation[0], 'gs')
plt.plot(targetLocation[1], targetLocation[0], 'rs')
plt.colorbar()

# Plot the movement policy
plt.subplot(2, 2, 4)
plt.imshow(policy)
plt.xticks(range(0, maze_size*2+1))
plt.yticks(range(0, maze_size*2+1))
plt.tick_params(axis='both', labelsize=8)
plt.plot(startLocation[1], startLocation[0], 'gs')
plt.plot(targetLocation[1], targetLocation[0], 'rs')
plt.colorbar()

# Show the figure
plt.show()