In [7]:
import numpy as np
from mcts import mcts

from minigrid.core.grid import Grid
from minigrid.core.mission import MissionSpace
from minigrid.core.world_object import Goal
from minigrid.manual_control import ManualControl
from minigrid.minigrid_env import MiniGridEnv
from gymnasium import spaces

### Define the environment

The GridWorld environment uses MiniGrid Gymnasium package, which provides a graphical interface

**Parameters**

- size (int): size of the grid

- start_pos (int, int): starting position of the agent

- start_dir (int): starting direction of the agent

- goal_pos (int, int): starting position of the goal

- agent_view_size (int): square radius of the agents view

In [8]:
#Environment class
class GridWorld(MiniGridEnv):
    def __init__(self, size=18, start_pos = None, start_dir = 0, goal_pos = None, **kwargs,):
        self.agent_start_pos = start_pos
        self.agent_start_dir = start_dir
        self.goal_pos = goal_pos

        mission_space = MissionSpace(mission_func=self._gen_mission)

        super().__init__(
            mission_space=mission_space,
            grid_size=size,
            **kwargs,
        )

    @staticmethod
    def _gen_mission():
        return 'Reach the goal'
    
    def _gen_grid(self, width, height):
        #Create an empty grid
        self.grid = Grid(width, height)

        #Surround the grid with walls
        self.grid.wall_rect(0, 0, width, height)

        #Place the agent
        if self.agent_start_pos is not None:
            self.agent_pos = self.agent_start_pos
            self.agent_dir = self.agent_start_dir
        else:
            self.place_agent()
        
        #Place the initial goal
        if self.goal_pos is not None:
            self.put_obj(Goal(), *self.goal_pos)
        else:
            self.place_obj(Goal())

        self.mission = "Reach the goal"


### Create the environment and get a test run

In [9]:
env = GridWorld(render_mode = "human", size = 18, agent_view_size = 5)

obs = env.reset()

#Image returns the partial observation of the agent
obs[0]['image']

#Show the grid
env.render()

### MCTS-Agent

In [10]:
from copy import deepcopy

#State class that represents the state of the agent
class State:
    def __init__(self, env, agent_pos, agent_dir, goal_belief=None):
        self.env = deepcopy(env)
        self.agent_pos = agent_pos
        self.agent_dir = agent_dir
        self.goal_belief = goal_belief if goal_belief is not None else self.initialize_goal_belief()

    def initialize_goal_belief(self):
        width, height = self.env.width - 1, self.env.height - 1
        #Uniform distribution over all possible goal locations
        #The goal can only be inside the walls
        return np.ones((width, height)) / (width * height)

    def update_belief(self, observation):
        visible_area = observation[0]['image']
        for i in range(visible_area.shape[0]):
            for j in range(visible_area.shape[1]):
                cell = visible_area[i, j]
                #The type of the object
                cell_type = cell[0]
                if cell_type == 2:  # Goal
                    self.belief[i, j] = 1.0  # Belief of goal location
                else:
                    self.belief[i, j] = 0.0  # Update based on observation
        self.belief /= np.sum(self.belief)  # Normalize the belief state

    def getPossibleActions(self):
        return list(self.env.action_space)

    def takeAction(self, action):
        new_env = deepcopy(self.env)
        new_env.agent_pos = self.agent_pos
        new_env.agent_dir = self.agent_dir
        obs, reward, done, _, _ = new_env.step(action)
        new_state = State(new_env, new_env.agent_pos, new_env.agent_dir, deepcopy(self.belief))
        new_state.update_belief(obs)
        return new_state, reward, done

    def isTerminal(self):
        return self.env.grid.get(*self.agent_pos).type == 'goal'

    def getReward(self):
        if self.isTerminal():
            return 0  # No steps remaining if goal is reached
        return -1  # Negative reward for each step to encourage reaching the goal quickly

    def __hash__(self):
        return hash((self.agent_pos, self.agent_dir, self.belief.tobytes()))

    def __eq__(self, other):
        return self.agent_pos == other.agent_pos and self.agent_dir == other.agent_dir and np.array_equal(self.belief, other.belief)

#MCTS Agent that makes use of the mcts library
class POMDPMCTSAgent():
    def __init__(self):
        
    def getAction(self, state):
        pass
    
    

In [11]:
env = GridWorld(size=10, agent_view_size=3)
agent = Agent(env, visibility_range=3)
agent.run(episodes=1)

AttributeError: 'NoneType' object has no attribute 'type'