In [7]:
import numpy as np
import random

In [8]:
# 0 = space, -1 = wall, 1 = goal
maze = np.array([
    [0, -1, 0, 0, 1],
    [0, -1, 0, -1, 0],
    [0, 0, 0, -1, 0],
    [0, -1, 0, 0, 0],
    [0, 0, 0, -1, 0]
])


In [9]:



n_rows, n_cols = maze.shape
n_actions = 4  
actions = {0: (-1, 0), 1: (1, 0), 2: (0, -1), 3: (0, 1)}  
start_position = (0, 0)
goal_position = (0, 4)

# Q-L par
q_table = np.zeros((n_rows, n_cols, n_actions))
learning_rate = 0.1
discount_factor = 0.9
exploration_rate = 1.0
exploration_decay = 0.995
min_exploration_rate = 0.01
episodes = 1000


def is_within_bounds(position):
    return 0 <= position[0] < n_rows and 0 <= position[1] < n_cols

def choose_action(state):
    if random.uniform(0, 1) < exploration_rate:
        return random.choice(list(actions.keys()))
    else:
        return np.argmax(q_table[state[0], state[1]]) 


def step(state, action):
    move = actions[action]
    new_state = (state[0] + move[0], state[1] + move[1])
    if not is_within_bounds(new_state) or maze[new_state] == -1:
        return state, -1  
    if new_state == goal_position:
        return new_state, 10  
    return new_state, -0.1 


for episode in range(episodes):
    state = start_position
    total_reward = 0
    while state != goal_position:
        action = choose_action(state)
        new_state, reward = step(state, action)


        q_value = q_table[state[0], state[1], action]
        max_future_q = np.max(q_table[new_state[0], new_state[1]])
        new_q_value = (1 - learning_rate) * q_value + learning_rate * (reward + discount_factor * max_future_q)
        q_table[state[0], state[1], action] = new_q_value

        state = new_state
        total_reward += reward

   
    exploration_rate = max(min_exploration_rate, exploration_rate * exploration_decay)

    if episode % 100 == 0:
        print(f"Episode {episode}, Total Reward: {total_reward}")

print("Training completed!")





Episode 0, Total Reward: -74.1
Episode 100, Total Reward: -4.899999999999993
Episode 200, Total Reward: 7.699999999999999
Episode 300, Total Reward: 9.3
Episode 400, Total Reward: 9.3
Episode 500, Total Reward: 9.3
Episode 600, Total Reward: 9.3
Episode 700, Total Reward: 9.3
Episode 800, Total Reward: 9.3
Episode 900, Total Reward: 9.3
Training completed!


In [10]:
state = start_position
path = [state]
while state != goal_position:
    action = np.argmax(q_table[state[0], state[1]])
    new_state, _ = step(state, action)
    path.append(new_state)
    state = new_state

print("Optimal Path:", path)

Optimal Path: [(0, 0), (1, 0), (2, 0), (2, 1), (2, 2), (1, 2), (0, 2), (0, 3), (0, 4)]


In [11]:
import time

def display_maze(maze, path):
    display = maze.copy()
    for step in path:
 
        display = maze.copy()

        display[step] = 2  
        for row in display:
            row_str = ""
            for cell in row:
                if cell == 0:
                    row_str += " . "  
                elif cell == -1:
                    row_str += " | "  
                elif cell == 1:
                    row_str += " G "  
                elif cell == 2:
                    row_str += " A " 
            print(row_str)
        print("\n")
        time.sleep(0.5)
       
        print("\033[H\033[J", end="")


state = start_position
path = [state]
while state != goal_position:
    action = np.argmax(q_table[state[0], state[1]])
    new_state, _ = step(state, action)
    path.append(new_state)
    state = new_state


print("Optimal Path:")
display_maze(maze, path)


Optimal Path:
 A  |  .  .  G 
 .  |  .  |  . 
 .  .  .  |  . 
 .  |  .  .  . 
 .  .  .  |  . 


[H[J .  |  .  .  G 
 A  |  .  |  . 
 .  .  .  |  . 
 .  |  .  .  . 
 .  .  .  |  . 


[H[J .  |  .  .  G 
 .  |  .  |  . 
 A  .  .  |  . 
 .  |  .  .  . 
 .  .  .  |  . 


[H[J .  |  .  .  G 
 .  |  .  |  . 
 .  A  .  |  . 
 .  |  .  .  . 
 .  .  .  |  . 


[H[J .  |  .  .  G 
 .  |  .  |  . 
 .  .  A  |  . 
 .  |  .  .  . 
 .  .  .  |  . 


[H[J .  |  .  .  G 
 .  |  A  |  . 
 .  .  .  |  . 
 .  |  .  .  . 
 .  .  .  |  . 


[H[J .  |  A  .  G 
 .  |  .  |  . 
 .  .  .  |  . 
 .  |  .  .  . 
 .  .  .  |  . 


[H[J .  |  .  A  G 
 .  |  .  |  . 
 .  .  .  |  . 
 .  |  .  .  . 
 .  .  .  |  . 


[H[J .  |  .  .  A 
 .  |  .  |  . 
 .  .  .  |  . 
 .  |  .  .  . 
 .  .  .  |  . 


[H[J