In [2]:
import pygame

import random
from collections import deque
import numpy as np
from helper import *

pygame 2.6.1 (SDL 2.28.4, Python 3.11.11)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [3]:
# Parameters
grid_size = 5  # Grid size for player and enemies (positions range from 0 to 4)
num_enemies = 4  # Number of enemies
visited_states = 2  # Visited status can be 0 or 1
wall_states = 2  # Wall status can be 0 (no wall) or 1 (wall)

movements = np.array([
    [0, 1],   # Move up
    [1, 0],   # Move right
    [-1, 0],  # Move left
    [0, -1]   # Move down
])

# Generate walls: 0 for no wall, 1 for wall

state_action_log = []


In [4]:
base_shape = [ visited_states, wall_states]
enemy_shape = [grid_size, grid_size] * num_enemies  # Each enemy has x and y positions
full_shape = base_shape + enemy_shape

# # Generate estimated_value_grid filled with zeros
estimated_value_grid = np.zeros(full_shape, dtype=float)

# # Generate random indices into the movements array
policy_indices = np.random.randint(0, 4, size=full_shape)

# # Create the policy array by indexing into movements
policy = movements[policy_indices]

# # Now, policy has shape full_shape + (2,), where the last dimension stores (dx, dy)
print("Policy Shape:", policy.shape)  # For verification

# # Generate reward_states: 40x40 grid of random 1s and 2s
reward_states = np.random.choice([1, 2], size=(grid_size, grid_size))

# # Output shapes for verification
print("Estimated Value Grid Shape:", estimated_value_grid.shape)
print("Reward States Shape:", reward_states.shape)

Policy Shape: (2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 2)
Estimated Value Grid Shape: (2, 2, 5, 5, 5, 5, 5, 5, 5, 5)
Reward States Shape: (5, 5)


In [5]:
policy = {}
estimated_value_grid={}

In [6]:
def partition_maze_optimized(maze, s):
    x, y = s
    rows, cols = len(maze), len(maze[0])
    half_grid = 5 // 2

    partition = [
        [
            maze[(x + dx) % rows][(y + dy) % cols]
            for dy in range(-half_grid, half_grid + 1)
        ]
        for dx in range(-half_grid, half_grid + 1)
    ]
    return np.array(partition)

In [7]:
def optimized_relative_positions(maze_size, player_pos, enemy_positions, grid_size=5):
    x, y = player_pos
    rows, cols = maze_size  # Maze dimensions
    half_grid = grid_size // 2

    relative_positions = []

    for ex, ey in enemy_positions:
        # Compute differences considering wrap-around
        dx = (ex - x + cols) % cols
        if dx > cols // 2:
            dx -= cols

        dy = (ey - y + rows) % rows
        if dy > rows // 2:
            dy -= rows

        # Check if enemy is within the local grid
        if -half_grid <= dx <= half_grid and -half_grid <= dy <= half_grid:
            # Map to local grid coordinates (0 to 4)
            local_x = int(dx + half_grid)
            local_y = int(dy + half_grid)
            relative_positions.append((local_x, local_y))
    return relative_positions

In [8]:
def check_collision(current_state, enemies):
    x, y = current_state
    for enemy in enemies:
        ex, ey = enemy["pos"]
        if (x, y) == (ex, ey):
            return True
    return False


In [9]:
def is_wall_ahead(current_state, action, reward_maze):
    x, y = current_state
    dx, dy = action
    maze_rows, maze_cols = len(reward_maze),len(reward_maze[0])
    nextx, nexty = (x + dx) % maze_rows, (y + dy) % maze_cols
    return reward_maze[nextx][nexty] == 1  # Returns True if the next cell is a wall


In [10]:
def get_valid_actions(current_state, reward_maze):
    x, y = current_state
    maze_rows, maze_cols = len(reward_maze),len(reward_maze[0])
    valid_actions = []
    for dx, dy in movements:
        nextx, nexty = (x + dx) % maze_rows, (y + dy) % maze_cols
        if reward_maze[nextx][nexty] != 1:  # Not a wall
            valid_actions.append((dx, dy))
    return valid_actions


In [11]:
def Next_action(current_state, policy, enemies, reward_maze, grid_size=5):
    maze_rows, maze_cols = len(reward_maze),len(reward_maze[0])
    x, y = current_state

    # Retrieve enemy positions and compute relative positions
    og_enemy_positions = [enemy["pos"] for enemy in enemies]
    enemy_positions = optimized_relative_positions(
        (maze_rows, maze_cols), current_state, og_enemy_positions, grid_size=grid_size
    )

    # Get food and wall status at the current position
    food = int(reward_maze[x][y] == 2)
    wall = int(reward_maze[x][y] == 1)

    # Prepare enemy positions for the state index
    max_enemies_in_local_grid = len(enemy_positions)
    enemy_positions.sort()
    padded_enemy_positions = enemy_positions[:max_enemies_in_local_grid]
    num_missing = max_enemies_in_local_grid - len(padded_enemy_positions)
    padded_enemy_positions.extend([(-1, -1)] * num_missing)  # Use placeholder (-1, -1)

    # Flatten the enemy positions list
    enemy_positions_flat = [coord for pos in padded_enemy_positions for coord in pos]

    # Build the state index tuple
    indices_s = (food, wall) + tuple(enemy_positions_flat)

    # Retrieve the action from the policy dictionary
    action = policy.get(indices_s)

    # If action is None or leads into a wall, select a valid action
    if action is None or is_wall_ahead(current_state, action, reward_maze):
        valid_actions = get_valid_actions(current_state, reward_maze)
        if valid_actions:
            action = random.choice(valid_actions)
        else:
            action = (0, 0)  # No valid moves, stay in place

    # Append the state index, action, reward value, and current state to the log
    state_action_log.append((indices_s, action, reward_maze[x][y], current_state))

    return action


In [12]:
def update_policy_estimated_vals(epsilon,old_indices_s,policy, prev_player_pos,estimated_value_grid, og_enemy_positions, reward_maze,pack):
    move_values=[(1,0),(-1,0),(0,-1),(0,1)]
    maze_rows, maze_cols = len(reward_maze),len(reward_maze[0])
    x,y=prev_player_pos
    e1=optimized_relative_positions(
        (maze_rows, maze_cols), ((x+1)%maze_rows,y), og_enemy_positions, grid_size=grid_size
    )
    f1 = int(reward_maze[(x+1)%maze_rows][y] == 2)
    w1 = int(reward_maze[(x+1)%maze_rows][y] == 1)
    max_enemies_in_local_grid = len(e1)
    e1.sort()
    padded_enemy_positions = e1[:max_enemies_in_local_grid]
    num_missing = max_enemies_in_local_grid - len(padded_enemy_positions)
    padded_enemy_positions.extend([(-1, -1)] * num_missing)
    enemy_positions_flat = [coord for pos in padded_enemy_positions for coord in pos]
    indices_1 = (f1, w1) + tuple(enemy_positions_flat)
    e2=optimized_relative_positions(
        (maze_rows, maze_cols), ((x-1)%maze_rows,y), og_enemy_positions, grid_size=grid_size
    )
    f2 = int(reward_maze[(x-1)%maze_rows][y] == 2)
    w2 = int(reward_maze[(x-1)%maze_rows][y] == 1)
    max_enemies_in_local_grid = len(e2)
    e2.sort()
    padded_enemy_positions = e2[:max_enemies_in_local_grid]
    num_missing = max_enemies_in_local_grid - len(padded_enemy_positions)
    padded_enemy_positions.extend([(-1, -1)] * num_missing)
    enemy_positions_flat = [coord for pos in padded_enemy_positions for coord in pos]
    indices_2 = (f2, w2) + tuple(enemy_positions_flat)
    e3=optimized_relative_positions(
        (maze_rows, maze_cols), (x,(y+1)%maze_rows), og_enemy_positions, grid_size=grid_size
    )
    f3 = int(reward_maze[x][(y+1)%maze_rows] == 2)
    w3 = int(reward_maze[x][(y+1)%maze_rows] == 1)
    max_enemies_in_local_grid = len(e3)
    e3.sort()
    padded_enemy_positions = e3[:max_enemies_in_local_grid]
    num_missing = max_enemies_in_local_grid - len(padded_enemy_positions)
    padded_enemy_positions.extend([(-1, -1)] * num_missing)
    enemy_positions_flat = [coord for pos in padded_enemy_positions for coord in pos]
    indices_3 = (f3, w3) + tuple(enemy_positions_flat)
    e4=optimized_relative_positions(
        (maze_rows, maze_cols), (x,(y-1)%maze_rows), og_enemy_positions, grid_size=grid_size
    )
    f4 = int(reward_maze[x][(y-1)%maze_rows] == 2)
    w4 = int(reward_maze[x][(y-1)%maze_rows] == 1)
    max_enemies_in_local_grid = len(e4)
    e4.sort()
    padded_enemy_positions = e4[:max_enemies_in_local_grid]
    num_missing = max_enemies_in_local_grid - len(padded_enemy_positions)
    padded_enemy_positions.extend([(-1, -1)] * num_missing)
    enemy_positions_flat = [coord for pos in padded_enemy_positions for coord in pos]
    indices_4 = (f4, w4) + tuple(enemy_positions_flat)
    val_1 = estimated_value_grid.get(indices_1, 0)
    val_2 = estimated_value_grid.get(indices_2, 0)
    val_3 = estimated_value_grid.get(indices_3, 0)
    val_4 = estimated_value_grid.get(indices_4, 0)

    moves = [
        (val_1, (1, 0)),   
        (val_2, (-1, 0)),  
        (val_3, (0, 1)),   
        (val_4, (0, -1)),  
    ]
    QSA,learning_rate,discount,R1 =pack
    max_future_Q = max(
            val_1,val_2,val_3,val_4
    )
    estimated_value_grid[old_indices_s] = QSA + learning_rate * (R1 + discount * max_future_Q - QSA)
    # Pick the mov
    # e with the greatest value
    best_val, best_move = max(moves, key=lambda x: x[0])
    if random.random() < epsilon:
        # Exploration: Choose a random move
        best_move = random.choice(move_values)
    else:
        best_val, best_move = max(moves, key=lambda x: x[0])

    policy[old_indices_s]=best_move

    return policy,estimated_value_grid


In [13]:
def Q_Policy(policy, prev_player_pos,new_player_pos,estimated_value_grid, enemies, reward_maze, visited_empty_spaces,grid_size=5,learning_rate=0.1, discount=0.9,epsilon=0.1):
    maze_rows, maze_cols = len(reward_maze),len(reward_maze[0])
    og_enemy_positions = [enemy["pos"] for enemy in enemies]
    new_elative_enemy_positions = optimized_relative_positions(
        (maze_rows, maze_cols), new_player_pos, og_enemy_positions, grid_size=grid_size
    )
    old_relative_enemy_positions = optimized_relative_positions(
        (maze_rows, maze_cols), prev_player_pos, og_enemy_positions, grid_size=grid_size
    )
    old_food = int(reward_maze[prev_player_pos[0]][prev_player_pos[1]] == 2)
    old_wall = int(reward_maze[prev_player_pos[0]][prev_player_pos[1]] == 1)

    new_food = int(reward_maze[new_player_pos[0]][new_player_pos[1]] == 2)
    new_wall = int(reward_maze[new_player_pos[0]][new_player_pos[1]] == 1)

    max_enemies_in_local_grid = len(old_relative_enemy_positions)
    old_relative_enemy_positions.sort()
    old_padded_enemy_positions = old_relative_enemy_positions[:max_enemies_in_local_grid]
    num_missing = max_enemies_in_local_grid - len(old_padded_enemy_positions)
    old_padded_enemy_positions.extend([(-1, -1)] * num_missing) 
    old_enemy_positions_flat = [coord for pos in old_padded_enemy_positions for coord in pos]


    max_enemies_in_local_grid = len(new_elative_enemy_positions)
    new_elative_enemy_positions.sort()
    new_padded_enemy_positions = new_elative_enemy_positions[:max_enemies_in_local_grid]
    num_missing = max_enemies_in_local_grid - len(new_padded_enemy_positions)
    new_padded_enemy_positions.extend([(-1, -1)] * num_missing)  
    new_enemy_positions_flat = [coord for pos in new_padded_enemy_positions for coord in pos]


    old_indices_s = (old_food, old_wall) + tuple(old_enemy_positions_flat)
    new_indices_s = (new_food, new_wall) + tuple(new_enemy_positions_flat)
    
    QS1A1=estimated_value_grid.get(new_indices_s)
    if check_collision(new_player_pos, enemies):
        R1 = -99999 # Collision with enemy
        print("Collision Detected")
        visited_empty_spaces=0
    elif new_wall:
        visited_empty_spaces=0
        R1 = -10 # Attempted to move into a wall
    elif new_food:  # Food
        visited_empty_spaces=0
        R1 = 20
    else:
        R1 = -10 # Default penalty for empty space
        visited_empty_spaces+=1
    QSA=estimated_value_grid.get(old_indices_s)
    if QSA is None:
        QSA=0
    if QS1A1 is None:
        QS1A1=0
    print(QSA,learning_rate,R1,discount,QS1A1)
    policy,estimated_value_grid=update_policy_estimated_vals(epsilon,old_indices_s,policy, prev_player_pos,estimated_value_grid, og_enemy_positions, reward_maze,(QSA,learning_rate,discount,R1))
    return estimated_value_grid,policy,visited_empty_spaces
    

In [14]:
def initialize_positions(maze, num_enemies):
    maze_rows, maze_cols = len(maze), len(maze[0])

    # Find all possible positions (excluding walls)
    possible_positions = [(x, y) for x in range(maze_rows) for y in range(maze_cols) if maze[x][y] != 1]

    # Randomly select a position for the player
    player_pos = random.choice(possible_positions)

    # Remove player's position from possible positions
    possible_positions.remove(player_pos)

    enemies = []
    for _ in range(num_enemies):
        if not possible_positions:
            break  # No more positions available
        enemy_pos = random.choice(possible_positions)
        enemies.append({"pos": enemy_pos, "target": None})
        possible_positions.remove(enemy_pos)

    return player_pos, enemies


In [15]:
maxt=50

In [16]:
class Game:
    def __init__(self, maze, player_pos, enemies, score=0, timeout=maxt):
        self.maze = maze
        self.player_pos = player_pos
        self.enemies = enemies
        self.score = score
        self.timeout = timeout
        self.state_action_log = []
        self.running = True  # Indicates if the game is still running
        self.visited_empty_spaces = 0

    def restart_game(self):
        self.maze = create_maze(ROWS, COLS)
        self.player_pos, self.enemies = initialize_positions(self.maze, num_enemies)
        self.score = 0
        self.timeout = maxt
        self.state_action_log = []
        self.running = True
        self.visited_empty_spaces=0


In [17]:
#neeed to handle when food is taken it automaticly changes it state

In [18]:
pygame.init()
print("RRUN")
global policy, maxt,estimated_value_grid
maxt = 50
num_games = 1  # Number of games to run
games = []
for _ in range(num_games):
    maze = create_maze(ROWS, COLS)
    player_pos, enemies = initialize_positions(maze, num_enemies)
    game = Game(maze, player_pos, enemies, score=0, timeout=maxt)
    games.append(game)

clock = pygame.time.Clock()
running = True
while running:
    # Handle events (e.g., quitting the game)
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False

    # For each game instance, update its state
    all_games_ready_for_update = True
    for game in games:
        if not game.running:
            continue  # Skip if game is not running
        
        if game.timeout == 0:
            # Check if all games have reached the timeout
            pass  # We'll handle synchronization after this loop
        else:
            all_games_ready_for_update = False
            current_state = game.player_pos
            action = Next_action(current_state, policy, game.enemies, game.maze, grid_size=5)
            game.timeout -= 1
            new_player_pos = move_entity(game.player_pos, action)
            move_enemies(game.enemies, game.maze)
            estimated_value_grid,policy,game.visited_empty_spaces=Q_Policy(policy, current_state,new_player_pos,estimated_value_grid, game.enemies, game.maze,game.visited_empty_spaces, grid_size=5,learning_rate=0.1, discount=0.9)
            if is_valid_move(new_player_pos, game.maze):
                if game.maze[new_player_pos[0]][new_player_pos[1]] == 2:  # Collect food
                    game.score += 1
                    game.maze[new_player_pos[0]][new_player_pos[1]] = 0
                game.player_pos = new_player_pos
            print(game.visited_empty_spaces)
            
            # Check for collisions
            for enemy in game.enemies or game.visited_empty_spaces>=5:
                if enemy["pos"] == game.player_pos or check_collision(game.player_pos, game.enemies):
                    print("Game Over! Restarting...")
                    game.running = False  # Mark the game for update

            # Check if all food is collected
            if all(game.maze[row][col] != 2 for row in range(ROWS) for col in range(COLS)):
                print("You Win! Restarting...")
                game.running = False  # Mark the game for update

            

    # Synchronize updates
    if all_games_ready_for_update or all(not game.running for game in games):
        # Run Next_Cycle for each game
        for game in games:
            game.restart_game()
            game.running = True  # Reset the running flag
        if maxt < 10000:
            maxt += 10

    # Draw all games
    draw_all_games(games)

    pygame.display.flip()
    clock.tick(FPS)
pygame.quit()


RRUN
0 0.1 20 0.9 0
0
0 0.1 20 0.9 2.0
0
0 0.1 20 0.9 0
0
0 0.1 20 0.9 0
0
Collision Detected
2.0 0.1 -99999 0.9 0
0
Game Over! Restarting...
Game Over! Restarting...
Game Over! Restarting...
Game Over! Restarting...
2.0 0.1 20 0.9 2.0
0
2.18 0.1 20 0.9 3.9800000000000004
0


4.3202 0.1 -10 0.9 4.3202
1
3.276998 0.1 20 0.9 3.9800000000000004
0
5.3074981999999995 0.1 20 0.9 3.9800000000000004
0
7.2544232179999995 0.1 20 0.9 3.9800000000000004
0
9.18187898582 0.1 -10 0.9 9.18187898582
1
8.090060195961799 0.1 -10 0.9 8.090060195961799
2
7.009159594002181 0.1 -10 0.9 7.009159594002181
3
5.939067998062159 0.1 -10 0.9 5.939067998062159
4
4.879677318081537 0.1 20 0.9 3.9800000000000004
0
6.8308805449007215 0.1 -10 0.9 6.8308805449007215
1
5.762571739451714 0.1 20 0.9 3.9800000000000004
0
7.7049460220571975 0.1 20 0.9 3.9800000000000004
0
9.627896561836625 0.1 -10 0.9 9.627896561836625
1
8.53161759621826 0.1 -10 0.9 8.53161759621826
2
7.446301420256077 0.1 -10 0.9 0
3
0 0.1 20 0.9 0
0
0 0.1 20 0.9 0
0
0 0.1 20 0.9 0
0
0 0.1 -10 0.9 0
1
0 0.1 20 0.9 0
0
0 0.1 20 0.9 0
0
0 0.1 20 0.9 0
0
0 0.1 -10 0.9 0
1
0 0.1 -10 0.9 0
2
0 0.1 20 0.9 0
0
0 0.1 -10 0.9 0
1
0 0.1 -10 0.9 0
2
0 0.1 -10 0.9 0
3
0 0.1 20 0.9 0
0
0 0.1 -10 0.9 0
1
0 0.1 -10 0.9 0
2
0 0.1 -10 0.9 0
3
0 0.