In [None]:
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output
import random

# Define the environment
class PacManEnv:
    def __init__(self, grid_size=5, num_ghosts=1):
        self.grid_size = grid_size
        self.num_ghosts = num_ghosts
        self.reset()

    def reset(self):
        # 0 = empty, 1 = Pac-Man, 2 = dot, 3 = ghost
        self.grid = np.zeros((self.grid_size, self.grid_size), dtype=int)
        
        # Place Pac-Man
        self.pacman_pos = [self.grid_size - 1, 0]
        self.grid[self.pacman_pos[0], self.pacman_pos[1]] = 1
        
        # Place dots
        for i in range(self.grid_size):
            for j in range(self.grid_size):
                if self.grid[i,j] == 0:
                    self.grid[i,j] = 2
                    
        # Place ghosts randomly
        self.ghosts = []
        for _ in range(self.num_ghosts):
            while True:
                pos = [random.randint(0,self.grid_size-1), random.randint(0,self.grid_size-1)]
                if self.grid[pos[0], pos[1]] == 2:  # Place on a dot
                    self.grid[pos[0], pos[1]] = 3
                    self.ghosts.append(pos)
                    break
        return self.grid

    def render(self):
        clear_output(wait=True)
        for i in range(self.grid_size):
            row = ""
            for j in range(self.grid_size):
                if [i,j] == self.pacman_pos:
                    row += "P "
                elif [i,j] in self.ghosts:
                    row += "G "
                elif self.grid[i,j] == 2:
                    row += ". "
                else:
                    row += "  "
            print(row)
        print("\n")

    def step(self, action):
        # Actions: 0=up, 1=down, 2=left, 3=right
        move = {0:(-1,0), 1:(1,0), 2:(0,-1), 3:(0,1)}
        new_pos = [self.pacman_pos[0] + move[action][0],
                   self.pacman_pos[1] + move[action][1]]
        # Keep within bounds
        new_pos[0] = max(0, min(self.grid_size-1, new_pos[0]))
        new_pos[1] = max(0, min(self.grid_size-1, new_pos[1]))
        
        self.pacman_pos = new_pos
        
        reward = 0
        done = False
        
        # Check dot collection
        if self.grid[new_pos[0], new_pos[1]] == 2:
            reward += 1
            self.grid[new_pos[0], new_pos[1]] = 0
            
        # Check ghost collision
        if new_pos in self.ghosts:
            reward -= 10
            done = True
        
        # Check if all dots collected
        if np.sum(self.grid == 2) == 0:
            done = True
        
        return self.grid, reward, done

# Example usage
env = PacManEnv(grid_size=5, num_ghosts=1)
state = env.reset()
done = False

while not done:
    env.render()
    action = int(input("Move (0=up, 1=down, 2=left, 3=right): "))
    state, reward, done = env.step(action)
    print(f"Reward: {reward}")
