In [3]:
import numpy as np
import random


In [4]:
maze_size = 5
maze = np.zeros((maze_size, maze_size))
goal_position = (4, 4)
maze[goal_position] = 1  # Set the goal at the bottom-right corner


actions = ['up', 'down', 'left', 'right']
q_table = np.zeros((maze_size, maze_size, len(actions)))  # Initialize Q-table
learning_rate = 0.1
discount_factor = 0.9
exploration_rate = 1.0
max_exploration_rate = 1.0
min_exploration_rate = 0.01
exploration_decay_rate = 0.001
episodes = 500

In [5]:
# Define action outcomes
def get_new_position(position, action):
    x, y = position
    if action == 'up' and x > 0:
        x -= 1
    elif action == 'down' and x < maze_size - 1:
        x += 1
    elif action == 'left' and y > 0:
        y -= 1
    elif action == 'right' and y < maze_size - 1:
        y += 1
    return (x, y)

In [6]:
# Training
for episode in range(episodes):
    state = (0, 0)  # Start position
    done = False
    
    while not done:
        # Choose an action
        if random.uniform(0, 1) < exploration_rate:
            action_index = random.choice(range(len(actions)))
        else:
            action_index = np.argmax(q_table[state[0], state[1], :])

        action = actions[action_index]
        new_state = get_new_position(state, action)
        
        # Reward for reaching the goal
        reward = 1 if new_state == goal_position else -0.1
        
        # Update Q-value
        old_value = q_table[state[0], state[1], action_index]
        next_max = np.max(q_table[new_state[0], new_state[1], :])
        q_table[state[0], state[1], action_index] = old_value + learning_rate * (reward + discount_factor * next_max - old_value)
        
        state = new_state
        if state == goal_position:
            done = True

    # Decay exploration rate
    exploration_rate = min_exploration_rate + (max_exploration_rate - min_exploration_rate) * np.exp(-exploration_decay_rate * episode)

In [7]:
state = (0, 0)
path = [state]
while state != goal_position:
    action_index = np.argmax(q_table[state[0], state[1], :])
    action = actions[action_index]
    state = get_new_position(state, action)
    path.append(state)

print("Path taken by the agent:", path)

Path taken by the agent: [(0, 0), (0, 1), (1, 1), (1, 2), (1, 3), (2, 3), (3, 3), (4, 3), (4, 4)]
