In [15]:
import numpy as np
from enum import Enum
import random
import pygame
import sys

In [16]:
maze = np.array([
    ['G', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'G'],
    ['G', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'G'],
    ['H', 'H', 'H', 'H', 'H', 'H', 'H', 'G', 'G'],
    ['H', 'H', 'H', 'H', 'H', 'H', 'H', 'G', 'G'],
    ['G', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'G'],
    ['G', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'G'],
    ['G', 'G', 'H', 'H', 'H', 'H', 'H', 'H', 'H'],
    ['G', 'G', 'H', 'H', 'H', 'H', 'H', 'H', 'H'],
    ['G', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'G'],
    ['G', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'G'],
    ['G', 'G', 'G', 'G', 'F', 'G', 'G', 'G', 'G'],
    ['G', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'G'],
    ['G', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'G'],
    ['G', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'G'],
    ['G', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'G'],
    ['G', 'G', 'G', 'G', 'G', 'G', 'G', 'G', 'G'],
])

In [17]:
# action space
class Action(Enum):
    UP = 0
    RIGHT = 1
    DOWN = 2
    LEFT = 3

In [18]:
class Colour(Enum):
    BLACK = (0, 0, 0)
    WHITE = (255, 255, 255)
    GREEN = (0, 255, 0)
    AZURE_BLUE = (0, 128, 255)
    RED = (255, 0, 0)

In [19]:
class MazeEnv:
    def __init__(self, maze):
        self.maze = maze
        self.start_pos = (0, 0)
        self.current_pos = self.start_pos
        self.cell_size = 50
        self.screen_width = self.cell_size * maze.shape[1]
        self.screen_height = self.cell_size * maze.shape[0]
        self.player_colour = Colour.AZURE_BLUE.value

        # pygame
        pygame.init()
        self.screen = pygame.display.set_mode((self.screen_width, self.screen_height))
        pygame.display.set_caption("Maze Navigation Game")
        self.clock = pygame.time.Clock()
        
    def step(self, action):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                sys.exit()

        self.clock.tick(60)

        row, col = self.current_pos
    
        action = Action(action)
        if action == Action.UP:
            new_pos = (max(row - 1, 0), col)
        elif action == Action.RIGHT:
            new_pos = (row, min(col + 1, self.maze.shape[1] - 1))
        elif action == Action.DOWN:
            new_pos = (min(row + 1, self.maze.shape[0] - 1), col)
        elif action == Action.LEFT:
            new_pos = (row, max(col - 1, 0))
        else:
            print("Invalid action: ", action)
            raise ValueError("Invalid action.")

        new_state_value = self.maze[new_pos[0], new_pos[1]]
        reward = 0
        done = False

        if new_state_value == 'G':
            reward = -1
        elif new_state_value == 'H':
            self.player_colour = Colour.RED.value
            reward = -50
            done = True
        elif new_state_value == 'F':
            reward = 10
            done = True

        self.current_pos = new_pos

        info = {"new_position": new_pos, "state_value": new_state_value}

        self.render()
        self.clock.tick(60)
        # pygame.time.delay(50)

        return new_pos, reward, done, info

    def reset(self):
        self.player_colour = Colour.AZURE_BLUE.value  # Reset colour to original after fall
        self.current_pos = self.start_pos  # Reset position to (0, 0)
        print(f"Player reset to {self.current_pos}")
        # pygame.time.delay(500)  # Delay to show the player reset
        self.render()  # Force screen to update with reset position
        return self.current_pos


    def render(self):
        self.screen.fill(Colour.WHITE.value)  # Fill the screen with white
        
        # Draw the maze
        for row in range(len(self.maze)):
            for col in range(len(self.maze[0])):
                if self.maze[row][col] == 'H':
                    pygame.draw.rect(self.screen, Colour.BLACK.value, (col * self.cell_size, row * self.cell_size, self.cell_size, self.cell_size))
                elif self.maze[row][col] == 'F':
                    pygame.draw.rect(self.screen, Colour.GREEN.value, (col * self.cell_size, row * self.cell_size, self.cell_size, self.cell_size))

        # Draw the player
        pygame.draw.rect(self.screen, self.player_colour, (self.current_pos[1] * self.cell_size, self.current_pos[0] * self.cell_size, self.cell_size, self.cell_size))

        font = pygame.font.SysFont(None, 24)
        episode_text = font.render(f'Episode: {episode}', True, Colour.BLACK.value)
        wins_text = font.render(f'Wins: {wins}', True, Colour.BLACK.value)
        # losses_text = font.render(f'Losses: {losses}', True, Colour.BLACK.value)
        # wins_and_losses_text = font.render(f"Wins/losses: {wins}/{losses}", True, Colour.BLACK.value)

        self.screen.blit(episode_text, (20, 20))
        self.screen.blit(wins_text, (200, 20))

        pygame.display.flip()



    def update(self, position):
        self.current_pos = position
        

In [20]:
class Agent:
    def __init__(self, num_states, num_actions, learning_rate=0.8, gamma=0.95, epsilon=1.0, max_epsilon=1.0, min_epsilon=0.01):
        self.q_table = np.zeros((num_states, num_actions))
        self.learning_rate = learning_rate
        self.gamma = gamma
        self.epsilon = epsilon
        self.max_epsilon = max_epsilon
        self.min_epsilon = min_epsilon

    def choose_action(self, state):
        exp_exp_tradeoff = random.uniform(0, 1)

        if exp_exp_tradeoff > self.epsilon:
            # Exploitation
            action = np.argmax(self.q_table[state, :])
        else:
            # Exploration
            action = random.choice(list(Action)).value

        return action

    def update_q_table(self, state, action, reward, new_state):
        self.q_table[state, action] = self.q_table[state, action] + self.learning_rate * (reward + self.gamma * np.max(self.q_table[new_state, :]) - self.q_table[state, action])


    def decay_epsilon(self):
        # Decay the epsilon after each episode
        self.epsilon = max(self.min_epsilon, self.epsilon * 0.995)

In [21]:
def position_to_state(pos, maze_shape):
    return pos[0] * maze_shape[1] + pos[1]

def state_to_position(state, maze_shape):
    return (state // maze_shape[1], state % maze_shape[1])

env = MazeEnv(maze)

start_pos = position_to_state(env.start_pos, maze.shape)

wins = 0
# losses = 0
agent = Agent(len(np.ravel(maze)), len(Action))

total_episodes = 1000
max_steps = 99

for episode in range(total_episodes):
    print(episode)
    state = start_pos
    env.reset()  # reset the environment at the start of each episode
    
    for step in range(max_steps):
        # choose action
        action = agent.choose_action(state)
        
        # Take the action and observe the new state, reward, and whether the game is done
        new_position, reward, done, info = env.step(action)
        new_state = position_to_state(new_position, maze.shape)
        
        # update q table
        agent.update_q_table(state, action, reward, new_state)
        state = new_state

        # update
        env.update(new_position)
        env.render()
        
        if done:
            print(maze[env.current_pos])
            print(env.current_pos)
            env.reset()
            if reward == 10:  # Assuming reward 10 means the player reached the finish
                wins += 1
            # elif reward == -10:  # Assuming reward -10 means the player fell into a hole
            #     losses += 1
            # if wins >= 10:
            #     np.save('q_table.npy', q_table)
            break

        agent.decay_epsilon()

0
Player reset to (0, 0)
H
(2, 4)
Player reset to (0, 0)
1
Player reset to (0, 0)
H
(2, 0)
Player reset to (0, 0)
2
Player reset to (0, 0)
H
(2, 1)
Player reset to (0, 0)
3
Player reset to (0, 0)
H
(2, 6)
Player reset to (0, 0)
4
Player reset to (0, 0)
H
(2, 2)
Player reset to (0, 0)
5
Player reset to (0, 0)
H
(2, 0)
Player reset to (0, 0)
6
Player reset to (0, 0)
H
(2, 0)
Player reset to (0, 0)
7
Player reset to (0, 0)
H
(2, 4)
Player reset to (0, 0)
8
Player reset to (0, 0)
H
(2, 3)
Player reset to (0, 0)
9
Player reset to (0, 0)
H
(2, 4)
Player reset to (0, 0)
10
Player reset to (0, 0)
H
(2, 1)
Player reset to (0, 0)
11
Player reset to (0, 0)
H
(2, 5)
Player reset to (0, 0)
12
Player reset to (0, 0)
H
(2, 6)
Player reset to (0, 0)
13
Player reset to (0, 0)
H
(2, 3)
Player reset to (0, 0)
14
Player reset to (0, 0)
H
(2, 6)
Player reset to (0, 0)
15
Player reset to (0, 0)
H
(2, 3)
Player reset to (0, 0)
16
Player reset to (0, 0)
H
(2, 0)
Player reset to (0, 0)
17
Player reset to (0, 0

SystemExit: 

In [None]:
# loaded_q_table = np.load('q_table.npy')
# print(loaded_q_table)