In [1]:
import gymnasium as gym
import numpy as np

### action space
| Value | Meaning | Value | Meaning | Value | Meaning |
| ----- | ------- | ----- | ------- | ----- | ------- |
| 0 | NO OP | 1 | UP | 2 | RIGHT |
| 3 | LEFT | 4 | DOWN | 5 | UP RIGHT |
| 6 | UP LEFT | 7 | DOWN RIGHT | 8 | DOWN LEFT |

### rgb colors
| Entity | RGB values | Entity | RGB values |
| ------ | ---------- | ------ | ---------- |
| MsPacman | 210,164,74 | Pink Ghost | 198,89,179 |
| Red Ghost | 200,72,72 | Cyan Ghost | 84,184,153 |
| Orange Ghost | 180,122,48 |


In [2]:
# env = gym.make("MsPacman-v4", render_mode="human")

# num_episodes = 100;
# observation, info = env.reset()
# rewards = []

# for episode in range(1):
#     done = False;
#     reward_sum = 0;
#     lives = 3;
#     while not done:
#         action = env.action_space.sample()
#         observation, reward, terminated, truncated, info = env.step(action)
#         reward_sum += reward;
#         lives = info['lives']
#         if terminated or truncated or lives == 0:
#             done = True;
#             rewards.append(reward_sum)
#             print(f"episode: {episode} reward = {reward_sum}")
#             observation, info = env.reset()
# env.close()

In [3]:
class Pacman:
    def __init__(self):
        self.env = gym.make("MsPacman-v4", render_mode="human")
        self.env.reset()

    def find_ghosts(self, obs_space):
        # ghost colors
        ghost_colors = {
            "red": np.array([200, 72, 72]),
            "orange": np.array([180, 122, 48]),
            "pink": np.array([198, 89, 179]),
            "cyan": np.array([84, 184, 153]),
        }
        screen_pixels = np.array(obs_space)

        # dictionary where each color maps to a list of (x, y) coordinates
        ghost_coords = {}

        # for each color, find their x and y location on the screen
        for color_name, color_value in ghost_colors.items():
            ghost_indices = np.where(np.all(screen_pixels == color_value, axis=-1))
            ghost_coords[color_name] = list(zip(*ghost_indices))

        return ghost_coords
    
    def find_mspacman(self, obs_space):
        # find the location of ms pacman (every pixel that is yellow)
        pacman_coords = np.where(np.all(obs_space == np.array([210, 164, 74]), axis=-1))
        return list(zip(*pacman_coords))

    def closest_ghost(self, obs_space):
        # find current ghost and mspacman pixel coords
        ghost_coords = self.find_ghosts(obs_space)
        mspacman_coords = self.find_mspacman(obs_space)

        closest_ghost = None
        closest_distance = 1000000

        # for each ghost, find the distance between it and ms pacman
        for ghost_color, ghost_coord_list in ghost_coords.items():
            # for each coordinate in the list, find the distance between it and ms pacman
            for ghost_coord in ghost_coord_list:
                # distance = np.linalg.norm(np.array(mspacman_coords) - np.array(ghost_coord))
                for pacman_coord in mspacman_coords:
                    # print(f'pacman_coord: {pacman_coord} ghost_coord: {ghost_coord}')
                    distance = np.linalg.norm(np.array(pacman_coord) - np.array(ghost_coord))
                    if distance < closest_distance:
                        closest_distance = distance
                        closest_ghost = ghost_color

        return closest_ghost, closest_distance

    def run(self, num_episodes):

        rewards = []
        for episode in range(num_episodes):
            done = False
            observation = self.env.reset()
            lives = 3
            reward_sum = 0
            while not done:
                action = self.env.action_space.sample()
                observation, reward, terminated, truncated, info = self.env.step(action)
                closest_ghost, closest_distance = self.closest_ghost(observation)
                lives = info['lives']
                reward_sum += reward
                print(f'episode: {episode} action: {action} reward: {reward} done: {done} info: {info} closest_ghost: {closest_ghost} closest_distance: {closest_distance}')
                if terminated or truncated or lives == 0:
                    done = True;
                    rewards.append(reward_sum)
                    print(f"episode: {episode} reward = {reward_sum}")
                    observation, info = self.env.reset()
        self.env.close()



        



In [4]:
game = Pacman()
game.run(1)

A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


episode: 0 action: 2 reward: 0.0 done: False info: {'lives': 3, 'episode_frame_number': 4, 'frame_number': 4} closest_ghost: cyan closest_distance: 9.0
episode: 0 action: 7 reward: 0.0 done: False info: {'lives': 3, 'episode_frame_number': 6, 'frame_number': 6} closest_ghost: pink closest_distance: 9.0
episode: 0 action: 0 reward: 0.0 done: False info: {'lives': 3, 'episode_frame_number': 10, 'frame_number': 10} closest_ghost: cyan closest_distance: 9.0
episode: 0 action: 7 reward: 0.0 done: False info: {'lives': 3, 'episode_frame_number': 12, 'frame_number': 12} closest_ghost: pink closest_distance: 9.0
episode: 0 action: 7 reward: 0.0 done: False info: {'lives': 3, 'episode_frame_number': 16, 'frame_number': 16} closest_ghost: cyan closest_distance: 9.0
episode: 0 action: 4 reward: 0.0 done: False info: {'lives': 3, 'episode_frame_number': 20, 'frame_number': 20} closest_ghost: orange closest_distance: 9.0
episode: 0 action: 7 reward: 0.0 done: False info: {'lives': 3, 'episode_frame

NameError: name 'reward_sum' is not defined

In [None]:
with np.printoptions(threshold=np.inf):
    print(distance)

('pink', 9.0)
