In [18]:
import numpy as np
import random

# Define the Q-learning agent
class QLearningAgent:
    def __init__(self, actions, alpha=0.1, gamma=0.9, epsilon=0.1):
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.actions = actions
        self.q_table = {}
        
    def get_q_value(self, state, action):
        return self.q_table.get((state, action), 0.0)
    
    def choose_action(self, state):
        if np.random.uniform(0, 1) < self.epsilon:
            return random.choice(self.actions)

        q_values = [self.get_q_value(state, action) for action in self.actions]
        return self.actions[np.argmax(q_values)]
    
    def learn(self, state, action, reward, next_state):
        predict = self.get_q_value(state, action)
        target = reward + self.gamma * max([self.get_q_value(next_state, a) for a in self.actions])
        self.q_table[(state, action)] = predict + self.alpha * (target - predict)

# Initialize Q-learning agent
actions = [(0, -1), (0, 1), (-1, 0), (1, 0)]  # up, down, left, right
agent = QLearningAgent(actions)


In [22]:
import pygame

# Initialize pygame
pygame.init()

# Constants
WIDTH, HEIGHT = 500, 500
CELL_SIZE = 50
GRID_SIZE = WIDTH // CELL_SIZE

# Colors
WHITE = (255, 255, 255)
RED = (255, 0, 0)
GREEN = (0, 255, 0)
BLUE = (0, 0, 255)
CYAN = (0, 255, 255)
BLACK = (0, 0, 0)


# Load images (assuming you have a car.png and flag.png in the current directory)
car_img = pygame.transform.scale(pygame.image.load('car.jpg'), (CELL_SIZE, CELL_SIZE))
flag_img = pygame.transform.scale(pygame.image.load('flag.jpg'), (CELL_SIZE, CELL_SIZE))

screen = pygame.display.set_mode((WIDTH, HEIGHT))

def draw_grid():
    for x in range(0, WIDTH, CELL_SIZE):
        pygame.draw.line(screen, WHITE, (x, 0), (x, HEIGHT))
    for y in range(0, HEIGHT, CELL_SIZE):
        pygame.draw.line(screen, WHITE, (0, y), (WIDTH, y))

def game(num_episodes=100):
    clock = pygame.time.Clock()

    for episode in range(num_episodes):
        car_pos = [0, 0]  # Reset starting position at the beginning of each episode
        steps = 0
        total_reward = 0

        running = True
        while running:
            screen.fill(BLACK)
            draw_grid()

            # Get agent's current state
            current_state = tuple(car_pos)
            
            # Get agent's action
            action = agent.choose_action(current_state)
            new_pos = [car_pos[0] + action[0], car_pos[1] + action[1]]

            # Check boundaries and update position if it's a valid move
            if 0 <= new_pos[0] < GRID_SIZE and 0 <= new_pos[1] < GRID_SIZE:
                car_pos = new_pos

            # Check for reward
            reward = -1  # Default reward for each step
            if tuple(car_pos) == (GRID_SIZE - 1, GRID_SIZE - 1):  # If agent reaches the flag
                reward = 10
                running = False  # End the episode when the agent reaches the flag

            total_reward += reward
            steps += 1

            # Update Q-values
            agent.learn(current_state, action, reward, tuple(car_pos))

            # Drawing the car and flag
            screen.blit(car_img, (car_pos[0] * CELL_SIZE, car_pos[1] * CELL_SIZE))
            screen.blit(flag_img, ((GRID_SIZE - 1) * CELL_SIZE, (GRID_SIZE - 1) * CELL_SIZE))

            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    running = False

            # Refresh the display
            pygame.display.flip()

            # Cap the frame rate
            clock.tick(60)

        # Optionally decay epsilon after each episode
        agent.epsilon *= 0.9

        print(f"Episode {episode+1}/{num_episodes} - Steps: {steps}, Total Reward: {total_reward}")

game()
pygame.quit()


Episode 1/100 - Steps: 29, Total Reward: -18
Episode 2/100 - Steps: 29, Total Reward: -18
Episode 3/100 - Steps: 29, Total Reward: -18
Episode 4/100 - Steps: 50, Total Reward: -39
Episode 5/100 - Steps: 40, Total Reward: -29
Episode 6/100 - Steps: 23, Total Reward: -12
Episode 7/100 - Steps: 21, Total Reward: -10
Episode 8/100 - Steps: 34, Total Reward: -23
Episode 9/100 - Steps: 39, Total Reward: -28
Episode 10/100 - Steps: 28, Total Reward: -17
Episode 11/100 - Steps: 35, Total Reward: -24
Episode 12/100 - Steps: 38, Total Reward: -27
Episode 13/100 - Steps: 33, Total Reward: -22
Episode 14/100 - Steps: 22, Total Reward: -11
Episode 15/100 - Steps: 33, Total Reward: -22
Episode 16/100 - Steps: 56, Total Reward: -45
Episode 17/100 - Steps: 32, Total Reward: -21
Episode 18/100 - Steps: 32, Total Reward: -21
Episode 19/100 - Steps: 37, Total Reward: -26
Episode 20/100 - Steps: 20, Total Reward: -9
Episode 21/100 - Steps: 24, Total Reward: -13
Episode 22/100 - Steps: 22, Total Reward: -1

In [15]:
pygame.quit()


In [23]:
for y in range(GRID_SIZE):
        for x in range(GRID_SIZE):
            state = (x, y)
            q_values = [agent.get_q_value(state, action) for action in agent.actions]
            print(f"State: {state}, Q-values: {q_values}")

State: (0, 0), Q-values: [-5.863546321425413, -5.872396348222972, -5.8705032886611175, -5.864845869830371]
State: (1, 0), Q-values: [-5.514880286138465, -5.505565218792635, -5.524011492979922, -5.511677865238788]
State: (2, 0), Q-values: [-5.150086297258368, -5.151757277428459, -5.215959798398722, -5.1603560185015755]
State: (3, 0), Q-values: [-4.848628825761963, -4.804689768295516, -4.841406702500324, -4.820850386465432]
State: (4, 0), Q-values: [-4.473165228376147, -4.450273037523816, -4.523677047577596, -4.462178056292265]
State: (5, 0), Q-values: [-4.129632180625154, -4.074695320162007, -4.162175081407496, -4.103099798010827]
State: (6, 0), Q-values: [-3.7647460510879984, -3.7993684584388214, -3.76453905643564, -3.7638253101182104]
State: (7, 0), Q-values: [-3.4434077942585617, -3.4417705218438046, -3.4744596725576007, -3.4319298434817855]
State: (8, 0), Q-values: [-3.0991781120542234, -3.128845930587743, -3.155187317874747, -3.1439474310320987]
State: (9, 0), Q-values: [-2.9595973