# Reinforcement Learning Maze Solver

In [1]:
import logging
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import numpy as np

import models
from environment.maze import Maze, Render

In [2]:
# Setup Logging
logging.basicConfig(format="%(levelname)-8s: %(asctime)s: %(message)s",
                    datefmt="%Y-%m-%d %H:%M:%S",
                    level=logging.INFO)

In [3]:
# Define the Maze
maze = np.array([
    [0, 1, 0, 0, 0, 0, 0, 0],
    [0, 1, 0, 1, 0, 1, 0, 0],
    [0, 0, 0, 1, 1, 0, 1, 0],
    [0, 1, 0, 1, 0, 0, 0, 0],
    [1, 0, 0, 1, 0, 1, 0, 0],
    [0, 0, 0, 1, 0, 1, 1, 1],
    [0, 1, 1, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 1, 0, 0]
])

In [4]:
# Initialize the Game Environment
game = Maze(maze)

# Training Parameters
runs = 10
epi = list()
nme = list()
sec = list()

In [5]:
# Model Training
models_to_run = [0, 1, 2, 3]

for model_id in models_to_run:
    episodes = list()
    seconds = list()

    logging.disable(logging.WARNING)
    for r in range(runs):
        if model_id == 0:
            model = models.QTableModel(game)
        elif model_id == 1:
            model = models.SarsaTableModel(game)
        elif model_id == 2:
            model = models.QTableTraceModel(game)
        elif model_id == 3:
            model = models.SarsaTableTraceModel(game)

        _, _, e, s = model.train(stop_at_convergence=True, discount=0.90, exploration_rate=0.10,
                                     exploration_decay=0.999, learning_rate=0.10, episodes=1000)

        print(e, s)

        episodes.append(e)
        seconds.append(s.seconds)

    logging.disable(logging.NOTSET)
    logging.info("model: {} | trained {} times | average no of episodes: {}| average training time {}"
                     .format(model.name, runs, np.average(episodes), np.sum(seconds) / len(seconds)))

    epi.append(episodes)
    sec.append(seconds)
    nme.append(model.name)

f, (epi_ax, sec_ax) = plt.subplots(2, len(models_to_run), sharex="row", sharey="row", tight_layout=True)

# Plot Training Results
for i in range(len(epi)):
    epi_ax[i].set_title(nme[i])
    epi_ax[i].set_xlabel("training episodes")
    epi_ax[i].hist(epi[i], edgecolor="black")

for i in range(len(sec)):
    sec_ax[i].set_xlabel("seconds per episode")
    sec_ax[i].hist(sec[i], edgecolor="black")

plt.show()

155 0:00:05.197483
125 0:00:04.561865
190 0:00:04.767803
125 0:00:04.462442
165 0:00:05.716477
115 0:00:05.059876
165 0:00:05.148594
240 0:00:06.582573
195 0:00:06.126508


INFO    : 2024-04-21 16:15:18: model: QTableModel | trained 10 times | average no of episodes: 160.0| average training time 4.9


125 0:00:05.205127
130 0:00:04.839174
195 0:00:05.418118
165 0:00:05.133030
115 0:00:04.807327
160 0:00:05.125974
130 0:00:04.853828
130 0:00:04.486917
90 0:00:03.823501
125 0:00:04.769630


INFO    : 2024-04-21 16:16:05: model: SarsaTableModel | trained 10 times | average no of episodes: 132.5| average training time 4.1


85 0:00:03.783983
85 0:00:01.390429
75 0:00:01.284767
70 0:00:01.499733
100 0:00:01.694912
75 0:00:01.455278
60 0:00:01.143481
80 0:00:01.308329
60 0:00:01.230542
70 0:00:01.297821


INFO    : 2024-04-21 16:16:19: model: QTableTraceModel | trained 10 times | average no of episodes: 80.5| average training time 1.1


130 0:00:02.012236
80 0:00:01.538565
60 0:00:01.319755
85 0:00:01.984447
115 0:00:02.201757
40 0:00:00.997599
125 0:00:02.048698
70 0:00:01.657940
100 0:00:02.235114
85 0:00:02.257535


INFO    : 2024-04-21 16:16:38: model: SarsaTableTraceModel | trained 10 times | average no of episodes: 89.0| average training time 1.4


130 0:00:02.551762


In [6]:
# Render the Maze and Play the Game
game.render(Render.TRAINING)
game.render(Render.MOVES)
game.play(model, start_cell=(4, 1))

plt.show()