In [61]:
import numpy as np
import pygame
import time

In [103]:
class PacManZ:
    def __init__(self, board_size=(10, 10), zombies_num=3, pits_num=3, obstacles_num=5, ep_num=10, max_shots=3):
        self.board_size = board_size
        self.zombies_num = zombies_num
        self.pits_num = pits_num
        self.obstacles_num = obstacles_num
        self.episodes_num = ep_num
        self.max_shots = max_shots
        self.is_bug = False

    def reset(self):
        rand_pos_num = self.zombies_num + self.pits_num + self.obstacles_num + 3
        rand_positions = [[np.random.randint(self.board_size[0]), np.random.randint(self.board_size[1])] for _\
                            in range(rand_pos_num)]
        
        # while(len(rand_positions) < rand_pos_num):
        #     np.append(rand_positions, [np.random.randint(self.board_size[0]), np.random.randint(self.board_size[1])])
        #     rand_positions = np.unique(rand_positions)
        # rand_positions = list(rand_positions)

        self.player_pos = rand_positions[0]
        self.vaccine_pos = rand_positions[1]
        self.end_point = rand_positions[2]
        s = 3
        self.zombie_pos = rand_positions[s:(self.zombies_num+s)]
        s = (self.zombies_num+s)
        self.pit_pos = rand_positions[s:(self.pits_num+s)]
        s = (s+self.pits_num)
        self.obstacle_pos = rand_positions[s:(self.obstacles_num+s)]

        self.player_has_vaccine = False
        self.remaining_shots = self.max_shots
        
        self.w_p = np.zeros(3 + self.zombies_num)
        self.w_z = [np.zeros(3) for _ in range(self.zombies_num)]
        
        self.cured_zombies = []
        self.shot_zombies = []
        self.zombies_archive = [[1, z] for z in self.zombie_pos]

    def approximator_func(self, x, w):
        return np.dot(x, w)
    
    def update_weights(self, x, w):
        V_hat = self.approximator_func(x, w)
        weight = w + self.alpha * (self.V_train - V_hat) * x
        return weight

    def get_player_features(self, pos):
        z_coefficient = -1 if self.player_has_vaccine else 1

        if(len(self.zombie_pos) > 0):
            dist_to_exit = -np.sum(abs(np.array(pos) - np.array(self.end_point)))
        else:
            dist_to_exit = 0
        dist_to_vaccine = np.sum(abs(np.array(pos) - np.array(self.vaccine_pos))) * z_coefficient
        dist_to_pit = np.sum(abs(np.array(pos) - np.array(self.pit_pos[0])))

        zombie_dists = []
        for i, z in enumerate(self.zombies_archive):
            z_dist = np.sum(abs(np.array(pos) - np.array(z[1])))
            zombie_dists.append(z_dist * z_coefficient)
        
        features = [dist_to_exit, dist_to_pit, dist_to_vaccine] + zombie_dists
        return np.array(features)

    def get_valid_moves(self, pos):
        valid_moves = []
        if pos[0] > 0 and [pos[0]-1, pos[1]] not in self.obstacle_pos:
            valid_moves.append([pos[0]-1, pos[1]])
        if pos[0] < self.board_size[0]-1 and [pos[0]+1, pos[1]] not in self.obstacle_pos:
            valid_moves.append([pos[0]+1, pos[1]])
        if pos[1] > 0 and [pos[0], pos[1]-1] not in self.obstacle_pos:
            valid_moves.append([pos[0], pos[1]-1])
        if pos[1] < self.board_size[1]-1 and [pos[0], pos[1]+1] not in self.obstacle_pos:
            valid_moves.append([pos[0], pos[1]+1])
        return valid_moves

    def get_zombie_features(self, pos):
        dist_to_vaccine = np.sum(abs(np.array(pos) - np.array(self.vaccine_pos)))

        z_coefficient = 1 if self.player_has_vaccine else -1
        dist_to_player = np.sum(abs(np.array(pos) - np.array(self.player_pos))) * z_coefficient

        in_range = -1 if (pos[0] == self.player_pos[0] or pos[1] == self.player_pos[1]) else 1
        
        features = [dist_to_player, dist_to_vaccine, in_range]
        return np.array(features)
    
    def get_successor_move(self, pos, agent="player", idx=0):
        valid_moves = self.get_valid_moves(pos)
        if(agent == "player"):
            get_features = self.get_player_features
            weights = self.w_p
        else:
            get_features = self.get_zombie_features
            weights = self.w_z[idx]

        v_s = []
        for move in valid_moves:
            x = get_features(move)
            v = self.approximator_func(x, weights)
            v_s.append(v)
        
        best = np.random.choice(np.flatnonzero(v_s == np.max(v_s)))
        return valid_moves[best], v_s[best]
    
    def move_player(self):
        if(len(self.zombie_pos) > 0 and self.remaining_shots > 0 or self.player_has_vaccine):
            for i, zombie in enumerate(self.zombie_pos):
                hamming_dist_old = np.sum(abs(np.array(zombie) - np.array(self.player_pos)))
                if(self.player_has_vaccine and (hamming_dist_old == 1)):
                    self.cured_zombies.append(zombie)
                    self.zombies_archive[i] = [0, zombie]
                    self.zombie_pos.remove(zombie)
                    self.player_has_vaccine = False
                    continue
                
                if(self.remaining_shots > 0 and ((zombie[1] == self.player_pos[1] and abs(self.player_pos[0] - zombie[0]) <= 2) or \
                   (zombie[0] == self.player_pos[0] and abs(self.player_pos[1] - zombie[1]) <= 2))):
                    self.shot_zombies.append(zombie)
                    self.zombies_archive[i] = [0, zombie]
                    self.zombie_pos.remove(zombie)
                    self.remaining_shots -= 1
                    continue

        new_pos, v = self.get_successor_move(self.player_pos, agent="player")

        if(len(self.zombie_pos) > 0 and not self.player_has_vaccine and new_pos == self.vaccine_pos):
            self.player_has_vaccine = True
            new_vaccine_pos = [np.random.randint(self.board_size[0]), np.random.randint(self.board_size[1])]
            while(new_vaccine_pos == self.player_pos or new_vaccine_pos == new_pos or\
                  new_vaccine_pos in self.obstacle_pos or new_vaccine_pos in self.pit_pos):
                new_vaccine_pos = [np.random.randint(self.board_size[0]), np.random.randint(self.board_size[1])]
            self.vaccine_pos = new_vaccine_pos

        if(len(self.zombie_pos) > 0 and self.remaining_shots > 0 or self.player_has_vaccine):
            for zombie in self.zombie_pos:
                hamming_dist_new = np.sum(abs(np.array(zombie) - np.array(new_pos)))
                if(self.player_has_vaccine and (hamming_dist_new == 1)):
                    self.cured_zombies.append(zombie)
                    self.zombie_pos.remove(zombie)
                    self.player_has_vaccine = False
                    continue
                
                if(self.remaining_shots > 0 and ((zombie[1] == new_pos[1] and abs(new_pos[0] - zombie[0]) <= 2) or \
                   (zombie[0] == new_pos[0] and abs(new_pos[1] - zombie[1]) <= 2))):
                    self.shot_zombies.append(zombie)
                    self.zombie_pos.remove(zombie)
                    self.remaining_shots -= 1
                    continue
        self.player_pos = new_pos

    def move_zombies(self):
        for i, zombie in enumerate(self.zombie_pos):
            new_pos, v = self.get_successor_move(zombie, agent="zombie", idx=i)

            if(new_pos in self.pit_pos):
                new_pos = [np.random.randint(self.board_size[0]), np.random.randint(self.board_size[1])]
                while(new_pos == self.player_pos or new_pos in self.obstacle_pos or\
                       new_pos in self.pit_pos):
                    new_pos = [np.random.randint(self.board_size[0]), np.random.randint(self.board_size[1])]

            self.zombie_pos[i] = new_pos
            
    def is_over(self):
        if(self.player_pos in self.zombie_pos): 
            self.v_train_p = -1000
            self.v_train_z = []
            for z in self.zombies_archive:
                if(z[1] == self.player_pos):
                    self.v_train_z.append(1000)
                else:
                    self.v_train_z.append(-500)
            return True
        
        if(self.player_pos in self.pit_pos):
            print("oftad to pit kos khol :D")
            self.v_train_p = -1000
            self.v_train_z = [500 for _ in range(len(self.zombies_archive))]
            return True
        
        if(self.player_pos == self.end_point and len(self.zombie_pos) == 0):
            print("raft to end point bord")
            self.v_train_p = len(self.cured_zombies) * 250 + len(self.shot_zombies) * 200
            self.v_train_z = [-1000 for _ in range(len(self.zombies_archive))]
            return True
        
        if(len(self.get_valid_moves(self.player_pos)) == 0):
            self.is_bug = True
            return True

        for z in self.zombie_pos:
            if(len(self.get_valid_moves(z)) == 0):
                self.is_bug = True
                return True

        return False
    
    def train(self, alpha):
        self._alpha = alpha
        for i in range(self.episodes_num):
            print(f"Episode {i+1}:")
            self.reset()
            while(not self.is_over()):
                self.move_player()
                if(self.is_over()): break
                if(len(self.zombie_pos) > 0):
                    self.move_zombies()

            if(not self.is_bug):
                # player weights
                x_p = self.get_player_features(self.player_pos)
                v_hat_p = self.approximator_func(x_p, self.w_p)
                self.w_p = (self.w_p + alpha * (self.v_train_p - v_hat_p) * x_p)/10000

                # zombies weights
                for j, z in enumerate(self.zombies_archive):
                    x_z = self.get_zombie_features(z[1])
                    v_hat_z = self.approximator_func(x_z, self.w_p[j])
                    self.w_z[j] = (self.w_z[j] + alpha * (self.v_train_z[j] - v_hat_z) * x_z)/10000

                print(f"new weights for player: {self.w_p}")
                print(f"new weights for each zombie player: ")
                print(*self.w_z, sep="\n")
            else:
                print("There was a bug in this game, weights upading skipped.")
            print()

    def play(self):
        # Define some colors
        BLACK = (0, 0, 0)
        GRAY = (60, 60, 60)
        RED = (255, 0, 0)

        # Set the dimensions of the board
        BOARD_WIDTH = 600
        BOARD_HEIGHT = 400

        # Set the dimensions of each cell
        CELL_WIDTH = BOARD_WIDTH // 15
        CELL_HEIGHT = BOARD_HEIGHT // 10

        # Load the images
        def load_img(path, padding=10):        
            img = pygame.image.load(path)
            # Scale the image to fit inside a cell
            scaled_image_width = CELL_WIDTH - padding # subtract some padding
            scaled_image_height = CELL_HEIGHT - padding # subtract some padding
            img = pygame.transform.scale(img, (scaled_image_width, scaled_image_height))
            return img

        zombie = load_img("img/zombie.png")
        player = load_img("img/player.png")
        pit = load_img("img/pit2.png")
        endpoint = load_img("img/endpoint.png")
        vaccine = load_img("img/vaccine.png", padding=15)
        obstacle = load_img("img/obstacle3.png", padding=0)

        # Initialize Pygame
        pygame.init()

        # Set up the display
        screen = pygame.display.set_mode((BOARD_WIDTH, BOARD_HEIGHT))
        pygame.display.set_caption("Pac-man Z edition")

        # Create the board
        board = []
        for row in range(10):
            board.append([])
            for column in range(15):
                board[row].append(0)

        # Set up the font for the numbers on the board
        font = pygame.font.SysFont('Calibri', 25, True, False)

        def place_sth(pos, img):
            center_x = pos[0] * CELL_WIDTH + CELL_WIDTH // 2
            center_y = pos[1] * CELL_HEIGHT + CELL_HEIGHT // 2
            image_rect = img.get_rect()
            image_rect.center = (center_x, center_y)
            screen.blit(img, image_rect)

        # Draw the board
        done = False
        self.reset()
        while(not self.is_over()):
            screen.fill(0)
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    done = True

            # Draw the cells
            for row in range(10):
                for column in range(15):
                    color = GRAY
                    pygame.draw.rect(screen, color, [column * CELL_WIDTH, row * CELL_HEIGHT, CELL_WIDTH, CELL_HEIGHT], 1) # border of 1px

            # Draw the lines between cells
            for x in range(0, BOARD_WIDTH, CELL_WIDTH):
                pygame.draw.line(screen, BLACK, (x, 0), (x, BOARD_HEIGHT), 1) # vertical lines
            for y in range(0, BOARD_HEIGHT, CELL_HEIGHT):
                pygame.draw.line(screen, BLACK, (0, y), (BOARD_WIDTH, y), 1) # horizontal lines

            place_sth(self.pit_pos[0], pit)
            place_sth(self.end_point, endpoint)
            for o in self.obstacle_pos:
                place_sth(o, obstacle)
            place_sth(self.vaccine_pos, vaccine)
            
            self.move_player()
            place_sth(self.player_pos, player)
            if(self.is_over()): break
            if(len(self.zombie_pos) > 0):
                self.move_zombies()
            for z in self.zombie_pos:
                place_sth(z, zombie)


            # Draw the numbers on the board
            for row in range(10):
                for column in range(15):
                    if board[row][column] != 0:
                        text = font.render(str(board[row][column]), True, BLACK)
                        text_rect = text.get_rect()
                        text_rect.center = (column * CELL_WIDTH + CELL_WIDTH // 2, row * CELL_HEIGHT + CELL_HEIGHT // 2)
                        screen.blit(text, text_rect)

            # Update the screen
            time.sleep(0.5)
            pygame.display.flip()

        # Quit Pygame
        pygame.quit()

In [105]:
game = PacManZ(board_size=(15, 10), zombies_num=4, pits_num=1, obstacles_num=10, max_shots=3, ep_num=50)
game.train(alpha=0.4)

Episode 1:
new weights for player: [ 0.4  -0.24 -0.32 -0.28 -0.32 -0.36 -0.48]
new weights for each zombie player: 
[ 0.139216 -0.140784 -0.020016]
[ 0.1606144 -0.0399616 -0.0199904]
[ 0.1810368 -0.1789632 -0.0199872]
[ 0.2416128 -0.1195968 -0.0199888]

Episode 2:
oftad to pit kos khol :D
new weights for player: [ 0.6   0.   -0.44 -0.24 -0.48 -0.52 -0.32]
new weights for each zombie player: 
[-0.120864  0.2946    0.019976]
[-0.24  0.06 -0.02]
[-0.2570256  0.0802816  0.0200176]
[-0.1593856  0.3427744  0.0200096]

Episode 3:
new weights for player: [ 0.2  -0.36 -0.08 -0.36 -0.12 -0.36 -0.16]
new weights for each zombie player: 
[ 0.179352 -0.180648 -0.020008]
[ 0.0601296 -0.0598704 -0.0199856]
[ 0.1802592 -0.2196128 -0.0199968]
[ 0.0802304 -0.1194816  0.0200144]

Episode 4:
new weights for player: [ 0.16 -0.36 -0.44 -0.04 -0.12 -0.4  -0.08]
new weights for each zombie player: 
[ 0.0199936 -0.20064    0.0199936]
[ 0.0601296 -0.1590784  0.0200144]
[ 0.20176   -0.1391376 -0.0199824]
[ 0.040

In [106]:
game.play()

oftad to pit kos khol :D
