In [6]:
class PacManRL:
    def __init__(self):
        self.board = np.zeros((15, 10), dtype=np.int8)  # 0: empty, 1: obstacle, 2: pit, 3: player, 4: zombie
        self.player_pos = None
        self.zombie_pos = None
        self.vaccine_pos = None
        self.obstacle_pos = None
        self.pit_pos = None
        self.vaccine_count = 4
        self.w = np.zeros((5,), dtype=np.float32)

    def get_initial_state(self):
        """
        Returns the initial state of the game
        """
        # Randomly place player, zombies, vaccine, obstacles, and pit on the board
        self.board = np.zeros((15, 10), dtype=np.int8)
        self.player_pos = self.place_randomly()
        self.zombie_pos = [self.place_randomly() for _ in range(4)]
        self.vaccine_pos = self.place_randomly()
        self.obstacle_pos = [self.place_randomly() for _ in range(10)]
        self.pit_pos = self.place_randomly()
        # Set the positions of the objects on the board
        self.board[self.player_pos[0], self.player_pos[1]] = 3
        for i in range(4):
            self.board[self.zombie_pos[i][0], self.zombie_pos[i][1]] = 4
        self.board[self.vaccine_pos[0], self.vaccine_pos[1]] = 5
        for i in range(10):
            self.board[self.obstacle_pos[i][0], self.obstacle_pos[i][1]] = 1
        self.board[self.pit_pos[0], self.pit_pos[1]] = 2
        # Return the state
        return tuple(map(tuple, self.board))

    def get_next_state_reward(self, state, action):
        """
        Returns the next state and reward for a given state and action.

        Args:
            state (tuple): Current state of the game.
            action (int): Action to be taken from the current state.

        Returns:
            tuple: Next state and reward obtained by taking the given action.
        """
        # Get the positions of the player, zombies, vaccine, obstacles, exit and pit from the state
        player_pos, zombies_pos, vaccine_pos, obstacles_pos, exit_pos, pit_pos = state

        # Compute the new position of the player based on the given action
        new_player_pos = get_new_position(player_pos, action)

        # Check if the new position of the player is valid
        if not is_valid_position(new_player_pos, obstacles_pos, exit_pos, pit_pos):
            # If not, return the current state with a reward of -10
            return state, -10

        # Check if the new position of the player is the same as the vaccine position
        if new_player_pos == vaccine_pos:
            # If yes, remove the vaccine from the board and generate a new one
            vaccine_pos = generate_vaccine(obstacles_pos, exit_pos, pit_pos)

        # Check if the new position of the player is the same as any zombie position
        if new_player_pos in zombies_pos:
            # If yes, the player dies and the game is over
            return None, -100

        # Compute the new positions of the zombies based on their movement rules
        new_zombies_pos = []
        for zombie_pos in zombies_pos:
            new_zombie_pos = move_zombie(zombie_pos, player_pos, obstacles_pos, exit_pos, pit_pos)
            if new_zombie_pos in new_player_pos.get_adjacent_positions():
                # If a zombie moves to the player's adjacent position, the player dies and the game is over
                return None, -100
            new_zombies_pos.append(new_zombie_pos)

        # Check if the player has any remaining bullets
        if self.num_bullets == 0:
            # If not, the player can't shoot and the reward is 0
            reward = 0
        else:
            # If yes, decrement the number of bullets and check if any zombie can be shot
            self.num_bullets -= 1
            if any(zombie_pos == new_player_pos for zombie_pos in new_zombies_pos):
                # If yes, remove the shot zombie and add a reward of 20
                new_zombies_pos.remove(new_player_pos)
                reward = 20
            else:
                # If no, the reward is 0
                reward = 0

        # Compute the new state and return it with the reward
        new_state = (new_player_pos, tuple(new_zombies_pos), vaccine_pos, obstacles_pos, exit_pos, pit_pos)
        return new_state, reward

    def is_terminal_state(self, state):
            """
            Check if the current state is a terminal state.
            """
            # Check if the player is captured by a zombie
            player_pos = state['player']
            for zombie_pos in state['zombies']:
                if is_adjacent(player_pos, zombie_pos):
                    return True

            # Check if the player has reached the exit port
            if player_pos == state['exit']:
                return True

            # Check if the player has fallen into the pit
            if player_pos == state['pit']:
                return True

            return False

    def train(self, num_episodes=1000, alpha=0.1, gamma=0.9, epsilon=0.1):
        """
        Train the agent to learn how to play the game using Q-learning.
        """
        # Initialize the Q table with zeros
        self.Q = defaultdict(lambda: [0, 0, 0, 0])
        
        for i in range(num_episodes):
            # Reset the game to its initial state
            state = self.get_initial_state()
            
            while not self.is_terminal_state(state):
                # Choose the next action based on epsilon-greedy policy
                action = self.choose_action(state, epsilon)
                
                # Get the next state and reward
                next_state, reward = self.get_next_state_reward(state, action)
                
                # Update the Q value for the current state-action pair
                current_q = self.Q[state['player']][action]
                next_q = max(self.Q[next_state['player']])
                self.Q[state['player']][action] = current_q + alpha * (reward + gamma * next_q - current_q)
                
                # Transition to the next state
                state = next_state
                
    def choose_action(self, state, epsilon):
        """
        Choose the next action based on epsilon-greedy policy.
        """
        if random.random() < epsilon:
            # Choose a random action
            return random.choice(range(4))
        else:
            # Choose the action with the highest Q value
            return np.argmax(self.Q[state['player']])

    def get_best_action(self, state):
        """
        Get the best action for the current state.
        """
        return np.argmax(self.Q[state['player']])


In [7]:
class PacmanZ(Pacman):
    def __init__(self, x, y, symbol, lives=3):
        super().__init__(x, y, symbol, lives)
        self.score = 0

    def move(self, dx, dy, zombies):
        # Check if the new position of the player is the same as any zombie position
        for zombie in zombies:
            if (self.x + dx, self.y + dy) == (zombie.x, zombie.y):
                self.lives -= 1
                if self.lives == 0:
                    print("Game Over!")
                    exit()
                else:
                    print("You were killed by a zombie! You have", self.lives, "lives left.")
                self.x, self.y = self.spawn_point
                return False

        # Otherwise, move the player
        super().move(dx, dy)
        self.score += 1
        print("Your current score is:", self.score)
        return True


NameError: name 'Pacman' is not defined