In [9]:
# Cell 1: Imports
import numpy as np
from Montecarlotree import MCTS, Node
from typing import List, Tuple
import random
import time
import gymnasium as gym
import ale_py

In [96]:
# Cell 2: PacmanNode class
class PacmanNode(Node):
    def __init__(self, state, parent=None):
        self.state = state
        self.parent = parent
        self.children = []
        self.visits = 0
        self.value = 0.0
        self.action_space = None
        self.untried_actions = []  # Initialize empty, will be set later
  
    def set_possible_actions(self, action_space) -> List:
        self.action_space = action_space
        self.untried_actions = list(range(self.action_space.n))
    
    def get_possible_actions(self) -> List:
        if self.action_space is None:
            return []  # Return empty list if actions not yet set
        return list(range(self.action_space.n))

In [100]:
# Test PacmanNode
test_env = gym.make(
    "ALE/Pacman-v5",
    render_mode="rgb_array",
    difficulty=0,
    mode=0,
    repeat_action_probability=0.0,
    frameskip=4,
)
initial_state, info = test_env.reset()
test_node = PacmanNode(initial_state)
test_node.set_possible_actions(test_env.action_space)
print("Possible actions:", test_node.get_possible_actions())
print("Untried actions:", test_node.untried_actions)
print("Action space:", test_node.action_space)
print("State:", test_node.state.shape)
print("Parent:", test_node.parent)
print("Children:", test_node.children)
print("Visits:", test_node.visits)
print("Value:", test_node.value)
test_env.close()




Possible actions: [0, 1, 2, 3, 4]
Untried actions: [0, 1, 2, 3, 4]
Action space: Discrete(5)
State: (250, 160, 3)
Parent: None
Children: []
Visits: 0
Value: 0.0


In [75]:
# Cell 3: PacmanMCTS class
class PacmanMCTS(MCTS):
    def __init__(self, exploration_weight=1.0):
        super().__init__(exploration_weight)
        self.env = gym.make(
            "ALE/Pacman-v5",
            render_mode="rgb_array",
            difficulty=0,  # Easiest difficulty
            mode=0,        # Default mode
            repeat_action_probability=0.0,  # Fully deterministic
            frameskip=4,   # Fixed frameskip
        )
        self.action_space = self.env.action_space
        print(self.action_space)
        self.observation_space = self.env.observation_space 
        print(self.observation_space)

    def _get_current_state(self,state):
        return state
        
    
    def _get_new_position(self, pos: Tuple[int, int], action: str) -> Tuple[int, int]:
        x, y = pos
        if action == 'UP': return (x, y + 1)
        if action == 'DOWN': return (x, y - 1)
        if action == 'LEFT': return (x - 1, y)
        if action == 'RIGHT': return (x + 1, y)
        return pos



In [76]:
# Cell 4: Test the implementation
import matplotlib.pyplot as plt
import time

def create_simple_game_state(mcts):
    observation,info = mcts.env.reset()
    plt.figure(figsize=(8, 6))
    plt.imshow(observation)
    plt.axis('off')
    # plt.close()
    time.sleep(0.1)  # Add delay to make it viewable
    return observation,info




In [77]:
# Cell 5: Run simulation
from IPython import display
def run_simulation(num_episodes=5, max_steps=100, render=True):
    mcts = PacmanMCTS(exploration_weight=1.4)
    all_rewards = []
    
    for episode in range(num_episodes):
        observation, info = create_simple_game_state(mcts)
        total_reward = 0
        
        print(f"\nEpisode {episode + 1}")
        print(f"Initial Info: {info}")
        
        for step in range(max_steps):
            # Random action
            action = mcts.action_space.sample()
            
            observation, reward, terminated, truncated, info = mcts.env.step(action)
            state
            total_reward += reward
            
            # Render if requested
            if render:
                plt.figure(figsize=(8, 6))
                plt.imshow(observation)
                plt.axis('off')
               
                display.clear_output(wait=True)
                display.display(plt.gcf())
                plt.close()
                time.sleep(0.1)  # Add delay to make it viewable
        
            if terminated or truncated:
                break
                
        print(f"Episode {episode + 1} finished with total reward: {total_reward}")
        all_rewards.append(total_reward)
    
    return all_rewards
