In [1]:
import numpy as np

def create_maze():
    rows, cols = 5, 5
    # Define a 5x5 maze layout, where 0 represents an open path and 1 represents a wall
    maze_layout = [
        [0, 1, 0, 0, 0],
        [0, 1, 0, 1, 0],
        [0, 0, 0, 1, 0],
        [0, 1, 1, 1, 0],
        [0, 0, 0, 0, 0]
    ]
    maze = np.array(maze_layout, dtype=int)
    return maze

# Create the maze
maze = create_maze()

In [2]:
class QLearningAgent:
    def __init__(self, num_states, num_actions, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.2):
        self.num_states = num_states
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_prob = exploration_prob
        self.q_table = np.zeros((num_states, num_actions))

    def choose_action(self, state):
        if np.random.rand() < self.exploration_prob:
            return np.random.choice(self.num_actions)  # Random action
        else:
            return np.argmax(self.q_table[state])  # Greedy action

    def learn(self, state, action, reward, next_state):
        predicted = self.q_table[state, action]
        target = reward + self.discount_factor * np.max(self.q_table[next_state])
        self.q_table[state, action] += self.learning_rate * (target - predicted)


In [3]:
num_states = maze.size
num_actions = 4  # Four possible actions: Up, Down, Left, Right
initial_state = 0
goal_state = num_states - 1

# Instantiate the agent
agent = QLearningAgent(num_states, num_actions)


In [4]:
def train_agent(agent, num_episodes=1000):
    for episode in range(num_episodes):
        state = initial_state
        done = False
        while not done:
            action = agent.choose_action(state)
            
            # Calculate the next state based on the action
            next_state = state
            if action == 0:  # Move Up
                next_state = state - maze.shape[1]
            elif action == 1:  # Move Down
                next_state = state + maze.shape[1]
            elif action == 2:  # Move Left
                next_state = state - 1
            elif action == 3:  # Move Right
                next_state = state + 1

            # Ensure the move is within bounds and not into a wall
            if (0 <= next_state < num_states) and (maze.flat[next_state] == 0):
                if next_state == goal_state:
                    reward = 1  # Reached the goal
                    done = True
                else:
                    reward = 0  # Moved to an empty cell
                agent.learn(state, action, reward, next_state)
                state = next_state

# Train the agent
train_agent(agent, num_episodes=1000)


In [5]:
def test_agent(agent):
    state = initial_state
    while state != goal_state:
        action = agent.choose_action(state)
        print(f"Current State: {state}, Chosen Action: {action}")
        
        # Calculate the next state based on the chosen action
        if action == 0:  # Move Up
            state = state - maze.shape[1]
        elif action == 1:  # Move Down
            state = state + maze.shape[1]
        elif action == 2:  # Move Left
            state = state - 1
        elif action == 3:  # Move Right
            state = state + 1
        
        print(f"New State: {state}")
    print("Agent reached the goal!")

# Test the agent
test_agent(agent)


Current State: 0, Chosen Action: 1
New State: 5
Current State: 5, Chosen Action: 3
New State: 6
Current State: 6, Chosen Action: 0
New State: 1
Current State: 1, Chosen Action: 0
New State: -4
Current State: -4, Chosen Action: 2
New State: -5
Current State: -5, Chosen Action: 0
New State: -10
Current State: -10, Chosen Action: 0
New State: -15
Current State: -15, Chosen Action: 2
New State: -16
Current State: -16, Chosen Action: 3
New State: -15
Current State: -15, Chosen Action: 2
New State: -16
Current State: -16, Chosen Action: 1
New State: -11
Current State: -11, Chosen Action: 1
New State: -6
Current State: -6, Chosen Action: 0
New State: -11
Current State: -11, Chosen Action: 1
New State: -6
Current State: -6, Chosen Action: 1
New State: -1
Current State: -1, Chosen Action: 0
New State: -6
Current State: -6, Chosen Action: 1
New State: -1
Current State: -1, Chosen Action: 0
New State: -6
Current State: -6, Chosen Action: 1
New State: -1
Current State: -1, Chosen Action: 0
New Sta