In [None]:
from typing import List, Tuple

import numpy as np
np.random.seed(1)

In [2]:
# Environment Definition (Already Implemented)

actions = {
    "up": (0, 1),
    "left": (-1, 0),
    "right": (1, 0),
    "down": (0, -1),
}

class DungeonEnvironment:
    def __init__(self):
        self.start_pos = (0,0)
        self.goal_pos = (3,4)
        self.holes = [(0,4), (3,2)]
        self.walls = [(0,2), (2,0), (2,2), (2,3)]
        self.current_position = (0,0)

    def get_light(self):
        curr_x, curr_y = self.current_position
        goal_x, goal_y = self.goal_pos
        return curr_x == goal_x or curr_y == goal_y

    def get_echo(self):
        curr_x, curr_y = self.current_position
        for adjacent in [
            (curr_x, curr_y + 1),
            (curr_x, curr_y - 1),
            (curr_x + 1, curr_y),
            (curr_x - 1, curr_y),
        ]:
            if adjacent in self.holes:
                return True
        return False

    def reset(self):
        """
        Called at the start of the game.
        """
        self.current_position = self.start_pos

    def step(self, action: str):
        """
        Updates the environment with the action of the agent.
        :returns: new observation for the agent, as well as if the game ended and the outcome.
        """
        act_x, act_y = actions[action]
        curr_x, curr_y = self.current_position

        new_x = curr_x + act_x
        new_y = curr_y + act_y

        # we bump if we go into a wall, or if we go out of bound
        if (new_x, new_y) in self.walls or not (0<=new_x<=4 and 0<=new_y<=4):
            bump = True
        else:
            # if we do not bump, update the position.
            bump = False
            self.current_position = (new_x, new_y)

        observation = {
            "bump" : bump,
            "echo" : self.get_echo(),
            "light": self.get_light()
        }

        if self.current_position == self.goal_pos:
            outcome = "Escaped"
            terminated = True
        elif self.current_position in self.holes:
            outcome = "Fell into hole"
            terminated = True

        else:
            outcome = None
            terminated = False

        return observation, outcome, terminated

In [None]:
# Belief State (To be completed)


class BeliefState:
    """
    Maintains what we believe, using the knowledge base.
    """
    def __init__(self):
        self.sensed_walls = []  # list of positions where we bumped
        self.safe_tiles = [] # list of tiles that are not walls and safe
        self.sensed_light = []  # list of positions where we sensed light
        self.sensed_echoes = []  # list of positions where we sensed echoes
        self.current_position = (0, 0)

    def update(self, taken_action: str, echo: bool, bump: bool, light: bool):
        """
        :param taken_action: action taken by the agent before sensing (up, left, right or down).
        :param echo: did we hear an echo ?
        :param bump: did we bump into a wall ?
        :param light: did we see light ?
        """

        # TODO: implement logic here
        # we must update the position, and sensed components.

    def infer_goal_position(self) -> List[Tuple[int, int]]:
        """
        :return: The list of possible positions for the goal
        """
        # TODO: implement logic here

    def infer_hole_positions(self) -> List[Tuple[int, int]]:
        """
        :return: The list of positions where we could have holes
        """
        # TODO: implement logic here


In [None]:
# function for running any policy on the dungeon environment (Already implemented)
def play_game(policy):
    belief_state = BeliefState()
    terminated = False
    environment = DungeonEnvironment()
    environment.reset()
    num_steps = 0
    trajectory = [environment.current_position]
    while not terminated:
        action = policy(belief_state)
        observation, outcome, terminated = environment.step(action)
        belief_state.update(action, **observation)
        trajectory.append(environment.current_position)
        num_steps += 1

        if num_steps > 200:
            print("Check your code, probably your agent is stuck somewhere")
            outcome = "Timeout"
            terminated = True

    print(f"The game ended in {num_steps} with the following outcome: {outcome}.")
    print(f"The path taken was: {trajectory}")

In [3]:
# Agent definitions

# Example
def random_agent(belief_state) -> str:
    """
    This is a naive agent, picking actions randomly.
    """
    action_names = list(actions.keys())
    return np.random.choice(action_names)

def your_agent(belief_state) -> str:
    """
    This function selects a new action based on the belief state.
    :param belief_state: Current belief state.
    :return: action picked by the policy:
    """
    # TODO: try implementing a smarter agent
    # This should at least exploit inferred goal and hole positions.
    # (Does not have to be optimal, but must always find the exit and never fall into holes).
    
    return "up"

In [None]:
play_game(random_agent)
play_game(your_agent)