In [4]:
import numpy as np
import random

# Aima Code

In [5]:
class Thing(object):

    """This represents any physical object that can appear in an Environment.
    You subclass Thing to get the things you want.  Each thing can have a
    .__name__  slot (used for output only)."""

    def __repr__(self):
        return '<{}>'.format(getattr(self, '__name__', self.__class__.__name__))

    def is_alive(self):
        "Things that are 'alive' should return true."
        return hasattr(self, 'alive') and self.alive

    def show_state(self):
        "Display the agent's internal state.  Subclasses should override."
        print("I don't know how to show_state.")

    def display(self, canvas, x, y, width, height):
        # Do we need this?
        "Display an image of this Thing on the canvas."
        pass

class Agent(Thing):

    """An Agent is a subclass of Thing with one required slot,
    .program, which should hold a function that takes one argument, the
    percept, and returns an action. (What counts as a percept or action
    will depend on the specific environment in which the agent exists.)
    Note that 'program' is a slot, not a method.  If it were a method,
    then the program could 'cheat' and look at aspects of the agent.
    It's not supposed to do that: the program can only look at the
    percepts.  An agent program that needs a model of the world (and of
    the agent itself) will have to build and maintain its own model.
    There is an optional slot, .performance, which is a number giving
    the performance measure of the agent in its environment."""

    def __init__(self, program=None):
        self.alive = True
        self.bump = False
        self.holding = []
        self.performance = 0
        if program is None:
            def program(percept):
                return eval(input('Percept={}; action? ' .format(percept)))
        assert isinstance(program, collections.Callable)
        self.program = program

    def can_grab(self, thing):
        """Returns True if this agent can grab this thing.
        Override for appropriate subclasses of Agent and Thing."""
        return False


class Environment(object):

    """Abstract class representing an Environment.  'Real' Environment classes
    inherit from this. Your Environment will typically need to implement:
        percept:           Define the percept that an agent sees.
        execute_action:    Define the effects of executing an action.
                           Also update the agent.performance slot.
    The environment keeps a list of .things and .agents (which is a subset
    of .things). Each agent has a .performance slot, initialized to 0.
    Each thing has a .location slot, even though some environments may not
    need this."""

    def __init__(self):
        self.things = []
        self.agents = []

    def thing_classes(self):
        return []  # List of classes that can go into environment

    def percept(self, agent):
        '''
            Return the percept that the agent sees at this point.
            (Implement this.)
        '''
        raise NotImplementedError

    def execute_action(self, agent, action):
        "Change the world to reflect this action. (Implement this.)"
        raise NotImplementedError

    def default_location(self, thing):
        "Default location to place a new thing with unspecified location."
        return None

    def exogenous_change(self):
        "If there is spontaneous change in the world, override this."
        pass

    def is_done(self):
        "By default, we're done when we can't find a live agent."
        return not any(agent.is_alive() for agent in self.agents)

    def step(self):
        """Run the environment for one time step. If the
        actions and exogenous changes are independent, this method will
        do.  If there are interactions between them, you'll need to
        override this method."""
        if not self.is_done():
            actions = []
            for agent in self.agents:
                if agent.alive:
                    actions.append(agent.program(self.percept(agent)))
                else:
                    actions.append("")
            for (agent, action) in zip(self.agents, actions):
                self.execute_action(agent, action)
            self.exogenous_change()

    def run(self, steps=1000):
        "Run the Environment for given number of time steps."
        for step in range(steps):
            if self.is_done():
                return
            self.step()

    def list_things_at(self, location, tclass=Thing):
        "Return all things exactly at a given location."
        return [thing for thing in self.things
                if thing.location == location and isinstance(thing, tclass)]

    def some_things_at(self, location, tclass=Thing):
        """Return true if at least one of the things at location
        is an instance of class tclass (or a subclass)."""
        return self.list_things_at(location, tclass) != []

    def add_thing(self, thing, location=None):
        """Add a thing to the environment, setting its location. For
        convenience, if thing is an agent program we make a new agent
        for it. (Shouldn't need to override this."""
        if not isinstance(thing, Thing):
            thing = Agent(thing)
        assert thing not in self.things, "Don't add the same thing twice"
        thing.location = location if location is not None else self.default_location(thing)
        self.things.append(thing)
        if isinstance(thing, Agent):
            thing.performance = 100
            self.agents.append(thing)

    def delete_thing(self, thing):
        """Remove a thing from the environment."""
        try:
            self.things.remove(thing)
        except ValueError as e:
            print(e)
            print("  in Environment delete_thing")
            print("  Thing to be removed: {} at {}" .format(thing, thing.location))
            print("  from list: {}" .format([(thing, thing.location) for thing in self.things]))
        if thing in self.agents:
            self.agents.remove(thing)

def distance_m(a, b):
    """The distance between two (x, y) points."""
    return sum([abs(a[0] - b[0]), abs(a[1] - b[1])])


class Trap(Thing):
    '''Creates a Gold as a subclass of Thing'''
    pass

class Gold(Thing):
    '''Creates a Trap as a subclass of Thing'''
    pass



# Reflex Agent

In [6]:
class ReflexAgent(Agent):
    '''
    Initializes the Agent's variables. Every Agent starts being alive with a performance of 100 and facing the right direction.
    '''
    def __init__(self):
        self.location = (0,0)
        self.direction = 'R'
        self.performance = 100
        self.alive = True

        '''Declares the internal state to register the visited cells into the performance, which will only work to
            penalize if it moves to a previously visited cell'''
        self.visited_cells = np.full((5, 5), False)
        self.visited_cells[self.location[0]][self.location[1]] = True

    def __str__(self):
        '''Prints the internal state of the agent'''
        facing = {'U': 'UP', 'R': 'RIGHT', 'D': 'DOWN', 'L': 'LEFT'}
        return "(%s, %s, %s)" % (self.location[0], self.location[1], facing[self.direction])

    def print_percepts(self, percepts, radius):
        '''
        Receives a list of percepts around the player and show them.
        '''
        print("Percept")
        
        x, y = self.location

        '''Gets all the gold perceived around the player'''
        gold_grid = [[0 for row in range(5)] for col in range(5)]
        '''Gets all the traps perceived around the player'''
        traps_grid = [[0 for row in range(5)] for col in range(5)]

        for percept in percepts:
            xPercept, yPercept = percept[2]
            if isinstance(percept[0], Trap):
                traps_grid[xPercept][yPercept] += 1
            if isinstance(percept[0], Gold):
                gold_grid[xPercept][yPercept] += 1

        row = '  '
        for c in range(-radius, radius+1):
            if self.is_inbounds((0, y+c)):
                row += '   %s    ' % (y+c)                    
        print(row)

        for r in range(-radius, radius+1):
            if self.is_inbounds((x+r, 0)):
                row = '%s ' % (x+r)
            
                for c in range(-radius, radius+1):
                    if self.is_inbounds((x+r, y+c)):
                        agentState = '-' if x != x+r or y != y+c else self.direction

                        cellGold = gold_grid[x+r][y+c]
                        gold = '-' if cellGold < 1 else cellGold

                        cellTraps = traps_grid[x+r][y+c]
                        traps = '-' if cellTraps < 1 else cellTraps

                        row += '(%s %s %s) ' % (agentState, gold, traps)
                    
                print(row)
        print('')

    def is_inbounds(self, location):
        '''Checks to make sure that the location is inbounds'''
        x,y = location
        return not (x < 0 or x >= 5 or y < 0 or y >= 5)
    
    def get_gold(self):
        '''States when the agent enters a cell with a gold'''
        self.performance += 10

    def fall_in_trap(self):
        '''Takes 5 points from the agent when the agent enter a cell with traps'''
        self.performance -= 5

    def turn_clockwise(self):
        '''States when the agent turns. 
        Reduces 1 point from the agent'''
        self.performance -= 1
        
        '''Turns its direction clockwise'''
        turns = {'U': 'R', 'R': 'D', 'D': 'L', 'L': 'U'}
        self.direction = turns[self.direction]

    def move_forward(self):
        '''Reduces 1 point from the agent'''
        self.performance -= 1

        '''Advances in the direction of the agent'''
        '''And checks that the agents does not move outside the grid'''
        r = self.location[0]
        c = self.location[1]

        if self.direction == "U" and r != 0:
            r = r-1
        elif self.direction == "R" and c != 4:
            c = c+1
        elif self.direction == "D" and r != 4:
            r = r+1
        elif self.direction == "L" and c != 0:
            c = c-1
        self.location = (r, c)

        '''Reduces the performance if the agent enters a visited cell'''
        if self.visited_cells[r][c]:
            self.performance -= 2
        else:
            self.visited_cells[r][c] = True
    
    def program(self, percepts):
        '''Reads the percept and returns the action'''
        golds = [p for p in percepts if isinstance(p[0], Gold)]

        if len(golds) < 1:
            r = self.location[0]
            c = self.location[1]
            
            if self.direction == 'U' and r > 1:
                return 'ADVANCE'
            elif self.direction == 'R' and c < 3:
                return 'ADVANCE'
            elif self.direction == 'D' and r < 3:
                return 'ADVANCE'
            elif self.direction == 'L' and c > 1:
                return 'ADVANCE'

            return 'TURN'
        

        state = golds[0]
        rGold = state[2][0] 
        cGold = state[2][1]
        rAgent = self.location[0]
        cAgent = self.location[1]

        # If there is gold in the current location, stay
        if cGold == cAgent and rGold == rAgent:
            return 'STAY'
        
        # TODO: Agregar que si esta en linea con la dirección, se mueva adelante

        # Check if the percepted gold is in front of the agent
        if self.direction == 'U' and rGold < rAgent and cGold >= cAgent:
            return 'ADVANCE'
        elif self.direction == 'R' and rGold >= rAgent and cGold > cAgent:
            return 'ADVANCE'
        elif self.direction == 'D' and rGold > rAgent and cGold <= cAgent:
            return 'ADVANCE'
        elif self.direction == 'L' and rGold <= rAgent and cGold < cAgent:
            return 'ADVANCE'

        return 'TURN'

# Model Based Agent

In [7]:
class ModelBasedAgent(Agent):
    '''
    Initializes the Agent's variables. Every Agent starts being alive with a performance of 100 and facing the right direction.
    '''
    def __init__(self):
        self.location = (0,0)
        self.direction = 'R'
        self.performance = 100
        self.alive = True

        '''Initialized the Agent's internal state which will be more helpful in case of a Partially Observable Enrivonment'''
        self.internal_state = [[{'Visited': -1, 'Gold': -1, 'Traps': -1} for row in range(5)] for col in range(5)]
    
    def __str__(self):
        '''Prints the internal state of the agent'''
        facing = {'U': 'UP', 'R': 'RIGHT', 'D': 'DOWN', 'L': 'LEFT'}
        return "(%s, %s, %s)" % (self.location[0], self.location[1], facing[self.direction])
    
    def print_internal_state(self):
        '''
        Show the actual internal state of the Agent, with information about the visited cells, gold and traps discovered by previous steps.
        '''
        print('Agent internal state:')
        print('     0       1       2       3       4')
        print('  (A G T) (A G T) (A G T) (A G T) (A G T)')
        for r in range(len(self.internal_state)):
            row = '%s ' % r
            row2 = ''
            for c in range(len(self.internal_state[r])):
                visited = '?' if self.internal_state[r][c]['Visited'] < 0 else '-' if self.internal_state[r][c]['Visited'] == 0 else 'V'
                golds = '?' if self.internal_state[r][c]['Gold'] < 0 else '-' if self.internal_state[r][c]['Gold'] == 0 else self.internal_state[r][c]['Gold']
                traps = '?' if self.internal_state[r][c]['Traps'] < 0 else '-' if self.internal_state[r][c]['Traps'] == 0 else self.internal_state[r][c]['Traps']
                
                row += '(%s %s %s) ' % (visited, golds, traps)
                row2 +='(%s %s %s) ' % (self.internal_state[r][c]['Visited'], self.internal_state[r][c]['Gold'], self.internal_state[r][c]['Traps'])
            print(row)

    def print_percepts(self, percept, radius):
        '''
        Receives a list of percepts around the player and show them in a grid form.
        '''
        print("Percept:")
        
        x, y = self.location

        row = '  '
        for c in range(-radius, radius+1):
            if self.is_inbounds((0, y+c)):
                row += '   %s    ' % (y+c)                    
        print(row)

        for r in range(-radius, radius+1):
            if self.is_inbounds((x+r, 0)):
                row = '%s ' % (x+r)
                for c in range(-radius, radius+1):
                    if self.is_inbounds((x+r, y+c)):
                        agentState = '-' if x != x+r or y != y+c else self.direction

                        cellGold = self.internal_state[x+r][y+c]['Gold']
                        gold = '-' if cellGold < 1 else cellGold

                        cellTraps = self.internal_state[x+r][y+c]['Traps']
                        traps = '-' if cellTraps < 1 else cellTraps

                        row += '(%s %s %s) ' % (agentState, gold, traps)
                    
                print(row)
        print('')
    
    '''Action when the agent enters a cell with a gold'''
    def get_gold(self):
        '''Gives 10 point from the agent'''
        self.performance += 10

    '''Action when the agent falls into a trap'''
    def fall_in_trap(self):
        '''Takes 5 points from the agent'''
        self.performance -= 5

    '''State when the agent recieve the action to turn'''
    def turn_clockwise(self):
        '''Reduces 1 point from the agent'''
        self.performance -= 1
        
        '''Turns its direction clockwise'''
        turns = {'U': 'R', 'R': 'D', 'D': 'L', 'L': 'U'}
        self.direction = turns[self.direction]

    '''State when the agent recieve the action to advance'''
    def move_forward(self):
        '''Reduces 1 point from the agent'''
        self.performance -= 1

        '''Advances in the direction of the agent'''
        '''And checks that the agents does not move outside the grid'''
        r = self.location[0]
        c = self.location[1]
        if self.direction == "U" and r != 0:
            r = r-1
        elif self.direction == "R" and c != 4:
            c = c+1
        elif self.direction == "D" and r != 4:
            r = r+1
        elif self.direction == "L" and c != 0:
            c = c-1
        self.location = (r, c)

        '''Reduces the performance if the agent enters a visited cell'''
        if self.internal_state[r][c]['Visited'] == 1:
            self.performance -= 2

    def is_inbounds(self, location):
        '''Checks to make sure that the location is inbounds (within walls if we have walls)'''
        x,y = location
        return not (x < 0 or x >= 5 or y < 0 or y >= 5)
    
    # Update the internal state of the agent, with the new information after executing an action
    def update_internal_state(self, percepts, radius=4):
        
        x, y = self.location
        near_locations = []

        # Find the locations where the agent need to update its internal state
        for r in range(-radius, radius+1):
            for c in range(-radius, radius+1):
                near_locations.append((x+r, y+c))

        # Delete internal state in percept radious
        for loc in near_locations:
            if self.is_inbounds(loc):
                cell = self.internal_state[loc[0]][loc[1]]
                cell['Gold'] = 0
                cell['Traps'] = 0

                if cell['Visited'] == -1:
                    cell['Visited'] = 0
        
        # Update internal state in percept radious
        for percept in percepts:
            xPercept, yPercept = percept[2]
            if isinstance(percept[0], Trap):
                self.internal_state[xPercept][yPercept]['Traps'] += 1
            if isinstance(percept[0], Gold):
                self.internal_state[xPercept][yPercept]['Gold'] += 1
        
        # Mark the current agent location as visited
        self.internal_state[x][y]['Visited'] = 1
    
    def program(self, percepts):
        '''Reads the percept and returns the action'''

        golds = [p for p in percepts if isinstance(p[0], Gold)]

        # If there is no gold, look in the internal state for gold
        if len(golds) < 1:
            golds = []
            for row in range(len(self.internal_state)):
                for column in range(len(self.internal_state[0])):
                    if self.internal_state[row][column]['Gold'] > 0:
                        distance = distance_m((row, column), self.location)
                        golds.append(((row,column), distance, (row,column)))
            
            # Sort gold by distance
            golds = sorted(golds, key=lambda tup: tup[1])

        # If there is still no gold found, take default behaviour
        if len(golds) < 1:
            r = self.location[0]
            c = self.location[1]
            
            if self.direction == 'U' and r > 1:
                return 'ADVANCE'
            elif self.direction == 'R' and c < 3:
                return 'ADVANCE'
            elif self.direction == 'D' and r < 3:
                return 'ADVANCE'
            elif self.direction == 'L' and c > 1:
                return 'ADVANCE'

            return 'TURN'


        state = golds[0]
        rGold = state[2][0] 
        cGold = state[2][1]
        rAgent = self.location[0]
        cAgent = self.location[1]

        # If there is gold in the current location, stay
        if cGold == cAgent and rGold == rAgent:
            return 'STAY'
        
        # TODO: Agregar que si esta en linea con la dirección, se mueva adelante

        # Check if the percepted gold is in front of the agent
        if self.direction == 'U' and rGold < rAgent and cGold >= cAgent:
            return 'ADVANCE'
        elif self.direction == 'R' and rGold >= rAgent and cGold > cAgent:
            return 'ADVANCE'
        elif self.direction == 'D' and rGold > rAgent and cGold <= cAgent:
            return 'ADVANCE'
        elif self.direction == 'L' and rGold <= rAgent and cGold < cAgent:
            return 'ADVANCE'

        return 'TURN'

# Fully Observable Environment

In [8]:
'''
This class is a representation of a Fully Observable Environment which let
the Agents' percepts have complete access to the multidimensional states of
the task environment.
'''
class FullyObservableEnvironment(Environment):
    '''Creates a FullyObservableEnvironment as a subclass of Environment'''
    def __init__(self, width=5, height=5, radius=4):
        super(FullyObservableEnvironment, self).__init__()
        self.width = width
        self.height = height
        self.radius_of_vision = radius

    def __str__(self):
        grid = self.get_grid()
        gridStr = []
        gridStr.append('     0       1       2       3       4')
        gridStr.append('  (A G T) (A G T) (A G T) (A G T) (A G T)')
        for r in range(self.width):
            row = ''
            row += str(r)+' '
            for c in range(self.height):
                gold = 0
                traps = 0
                agent = None
                for obj in grid[r][c]:
                    if isinstance(obj, Agent):
                        agent = obj
                    elif isinstance(obj, Gold):
                        gold += 1
                    elif isinstance(obj, Trap):
                        traps += 1
                cell = '(%s %s %s) ' % ('-' if agent is None else agent.direction, '-' if gold < 1 else gold, '-' if traps < 1 else traps)
                row += cell
            row += '\n'
            gridStr.append(row)
        return '\n'.join(gridStr)

    def run(self, steps=1000):
        "Run the Environment for given number of time steps."
        print('---------------------------')
        print('Initial State')
        print('---------------------------')
        
        print(self)

        for agent in self.agents:
            percepts = self.percept(agent)

            if isinstance(agent, ModelBasedAgent):
                agent.update_internal_state(percepts, self.radius_of_vision)

            self.print_agent_percept(agent, percepts)

            if isinstance(agent, ModelBasedAgent):
                agent.print_internal_state()
                print('')

            print("Agent state: %s" % agent)
            print("Agent performance: %s" % agent.performance)
            print('')
        
        

        print('---------------------------')
        print('Run details')
        print('---------------------------')
        i = 1
        for step in range(steps):
            if self.is_done():
                return
            print('<STEP %s>' % i)
            self.step()
            i += 1
    
    def get_grid(self):
        grid =[]
        for x in range(self.width):
            row = []
            for y in range(self.height):
                row.append(self.list_things_at((x, y)))
            grid.append(row)
        return grid

    def default_location(self, thing):
        return (random.randint(0, self.width-1), random.randint(0, self.width-1))

    def is_done(self):
        golds = [g for g in self.things if isinstance(g, Gold)]

        if not golds:
            return True

        return False         

    def percept(self, agent):
        things_list = [(thing, distance_m(agent.location, thing.location), thing.location) for thing in self.things if not isinstance(thing, Agent)]
        sorted_things = sorted(things_list, key=lambda tup: tup[1])
        return sorted_things

    def execute_action(self, agent, action):
        '''changes the state of the environment based on what the agent does.'''

        if action == "TURN":
            agent.turn_clockwise()
        elif action == "ADVANCE":
            agent.move_forward()
        elif action == "STAY":
            pass
                
        things = self.list_things_at(agent.location)
        golds = [g for g in things if isinstance(g, Gold)]
        traps = [t for t in things if isinstance(t, Trap)]

        if golds:
            agent.get_gold()
            self.delete_thing(golds[0])
                
        if traps:
            agent.fall_in_trap()
            self.delete_thing(traps[0])

        print("SELECTED ACTION: ", action)
        print("Agent state: ", agent)
        print("Agent performance: %s" % agent.performance)

        print('\nEnvironment: ')
        print(self)

        percepts = self.percept(agent)

        if isinstance(agent, ModelBasedAgent):
            agent.update_internal_state(percepts, self.radius_of_vision)

        self.print_agent_percept(agent, percepts)

        if isinstance(agent, ModelBasedAgent):
            agent.print_internal_state()
            print('')

    def print_agent_percept(self, agent, percepts):
        pass

# Partially Observable Environment

In [9]:
'''
This class is a representation of a Partially Observable Environment
where some of the information required for optimal decision making is
hidden until it emerges due to Agents' level activity.
'''
class PartiallyObservableEnvironment(FullyObservableEnvironment):
    '''Creates a PartiallyObservableEnvironment as a subclass of FullyObservableEnvironment,
    they have the same function but the Partially Observable one is limited to a radius of information
    around the agent.
    '''
    def __init__(self):
        FullyObservableEnvironment.__init__(self)
        self.radius_of_vision = 1

    def percept(self, agent):
        '''Only perceive near things'''
        x, y = agent.location
        near_locations = [(x-1, y-1), (x-1, y), (x-1, y+1), (x, y-1), (x, y), (x, y+1), (x+1, y-1), (x+1, y), (x+1, y+1)]
        things_near = []

        for loc in near_locations:
            things_near += self.percepts_from(agent, loc)
      
        return things_near
    
    def percepts_from(self, agent, location, tclass=Thing):
        ''' Get percepts from a defined location'''
        things = self.list_things_at((location[0], location[1]))
        percepts = [(p, distance_m(agent.location, p.location), p.location) for p in things if not isinstance(p, Agent)]
        return percepts

    def print_agent_percept(self, agent, percepts):
        agent.print_percepts(percepts, self.radius_of_vision)

First, we initialize all the libraries which are necessary.

Some of the classes imported are the following:
<ul>
    <li><b>FullyObservableEnvironment:</b></li>
        &emsp; This class contains the type of environment where you can perceive all places where there are Golds and Traps. All other relevant portions of the environment are also visible.
    <li><b>PartiallyObservableEnvironment:</b></li>
        &emsp; In this type of environment some states are hidden, that means, the agent(s) can never see the entire state of the environment. This kind of environment needs agents with memory to be solved.
    <li><b>ReflexAgent:</b></li>
        &emsp; This class implements the Simple Reflex Agents which acts only on basis of the percepts that the agents receives from the environment. It's actions are based on condition-action rules.
    <li><b>ModelBasedAgent:</b></li>
        &emsp; This is the kind of agents which maintains the structure that describes the part of the world which cannot see. This knowledge is what is called model of the world.
</ul>

<h1><b>Partially Observable Environment Tests</b></h1>

The first Agent to be tested is the Reflex Agent in a Partially Observable Environment.
In addition to the agent, we also add 5 pieces of gold and 6 traps in specified positions.

In [10]:
environment = PartiallyObservableEnvironment()

reflex_agent = ReflexAgent()
environment.add_thing(reflex_agent)

gold = Gold()
environment.add_thing(gold, (4,0))
gold = Gold()
environment.add_thing(gold, (0,1))
gold = Gold()
environment.add_thing(gold, (2,3))
gold = Gold()
environment.add_thing(gold, (1,4))
gold = Gold()
environment.add_thing(gold, (1,4))

trap = Trap()
environment.add_thing(trap, (1,0))
trap = Trap()
environment.add_thing(trap, (3,1))
trap = Trap()
environment.add_thing(trap, (3,1))
trap = Trap()
environment.add_thing(trap, (3,1))
trap = Trap()
environment.add_thing(trap, (2,3))
trap = Trap()
environment.add_thing(trap, (4,4))

environment.run()

---------------------------
Initial State
---------------------------
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (- 1 -) (- - -) (- - -) (- - -) 

1 (- - 1) (- - -) (- - -) (- - -) (- 2 -) 

2 (R - -) (- - -) (- - -) (- 1 1) (- - -) 

3 (- - -) (- - 3) (- - -) (- - -) (- - -) 

4 (- 1 -) (- - -) (- - -) (- - -) (- - 1) 

Percept
     0       1    
1 (- - 1) (- - -) 
2 (R - -) (- - -) 
3 (- - -) (- - 3) 

Agent state: (2, 0, RIGHT)
Agent performance: 100

---------------------------
Run details
---------------------------
<STEP 1>
SELECTED ACTION:  ADVANCE
Agent state:  (2, 1, RIGHT)
Agent performance: 99

Environment: 
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (- 1 -) (- - -) (- - -) (- - -) 

1 (- - 1) (- - -) (- - -) (- - -) (- 2 -) 

2 (- - -) (R - -) (- - -) (- 1 1) (- - -) 

3 (- - -) (- - 3) (- - -) (- - -) (- - -) 

4 (- 1 -) (- - -) (- - -) (- - -) (- - 1) 

Percept
     0       1  

The second agent in the Partially Observable Environment is the Model Based Agent which will be tested with gold and traps at the same positions as the previous example.

In [11]:
environment = PartiallyObservableEnvironment()

model_agent = ModelBasedAgent()
environment.add_thing(model_agent)

gold = Gold()
environment.add_thing(gold, (4,0))
gold = Gold()
environment.add_thing(gold, (0,1))
gold = Gold()
environment.add_thing(gold, (2,3))
gold = Gold()
environment.add_thing(gold, (1,4))
gold = Gold()
environment.add_thing(gold, (1,4))

trap = Trap()
environment.add_thing(trap, (1,0))
trap = Trap()
environment.add_thing(trap, (3,1))
trap = Trap()
environment.add_thing(trap, (3,1))
trap = Trap()
environment.add_thing(trap, (3,1))
trap = Trap()
environment.add_thing(trap, (2,3))
trap = Trap()
environment.add_thing(trap, (4,4))

environment.run()

-) (- - -) 

4 (- 1 -) (- - -) (- - -) (- - -) (- - 1) 

Percept:
     2       3       4    
1 (- - -) (- - -) (- 2 -) 
2 (- - -) (U - -) (- - -) 
3 (- - -) (- - -) (- - -) 

Agent internal state:
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (? ? ?) (? ? ?) (? ? ?) (? ? ?) (? ? ?) 
1 (- - 1) (- - -) (- - -) (- - -) (- 2 -) 
2 (- - -) (V - -) (V - -) (V - -) (- - -) 
3 (- - -) (- - 3) (- - -) (- - -) (- - -) 
4 (? ? ?) (? ? ?) (? ? ?) (? ? ?) (? ? ?) 

<STEP 6>
SELECTED ACTION:  ADVANCE
Agent state:  (1, 3, UP)
Agent performance: 99

Environment: 
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (- 1 -) (- - -) (- - -) (- - -) 

1 (- - 1) (- - -) (- - -) (U - -) (- 2 -) 

2 (- - -) (- - -) (- - -) (- - -) (- - -) 

3 (- - -) (- - 3) (- - -) (- - -) (- - -) 

4 (- 1 -) (- - -) (- - -) (- - -) (- - 1) 

Percept:
     2       3       4    
0 (- - -) (- - -) (- - -) 
1 (- - -) (U - -) (- 2 -) 
2 (- - -) (- - -) 

At the end of the implementation of the agents in the Partially Observable Environment we see the results of the <u>Reflex Agent's</u> performance:

In [12]:
reflex_agent.performance

94

... and the <u>Model-Based Agent</u>:

In [13]:
model_agent.performance

97

<h1><b>Fully Observable Environment Tests</b></h1>

In this second part of the homework we use the Fully Observable Environment, first with the Reflex Agent inside it, as well as the past exercise, we use gold and traps in explicit positions.

In [14]:
environment = FullyObservableEnvironment()

reflex_agent = ReflexAgent()
environment.add_thing(reflex_agent)

gold = Gold()
environment.add_thing(gold, (4,0))
gold = Gold()
environment.add_thing(gold, (0,1))
gold = Gold()
environment.add_thing(gold, (2,3))
gold = Gold()
environment.add_thing(gold, (1,4))
gold = Gold()
environment.add_thing(gold, (1,4))

trap = Trap()
environment.add_thing(trap, (1,0))
trap = Trap()
environment.add_thing(trap, (3,1))
trap = Trap()
environment.add_thing(trap, (3,1))
trap = Trap()
environment.add_thing(trap, (3,1))
trap = Trap()
environment.add_thing(trap, (2,3))
trap = Trap()
environment.add_thing(trap, (4,4))

environment.run()

---------------------------
Initial State
---------------------------
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (- 1 -) (- - -) (- - -) (- - -) 

1 (- - 1) (- - -) (- - -) (- - -) (- 2 -) 

2 (- - -) (- - -) (- - -) (R 1 1) (- - -) 

3 (- - -) (- - 3) (- - -) (- - -) (- - -) 

4 (- 1 -) (- - -) (- - -) (- - -) (- - 1) 

Agent state: (2, 3, RIGHT)
Agent performance: 100

---------------------------
Run details
---------------------------
<STEP 1>
SELECTED ACTION:  STAY
Agent state:  (2, 3, RIGHT)
Agent performance: 105

Environment: 
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (- 1 -) (- - -) (- - -) (- - -) 

1 (- - 1) (- - -) (- - -) (- - -) (- 2 -) 

2 (- - -) (- - -) (- - -) (R - -) (- - -) 

3 (- - -) (- - 3) (- - -) (- - -) (- - -) 

4 (- 1 -) (- - -) (- - -) (- - -) (- - 1) 

<STEP 2>
SELECTED ACTION:  TURN
Agent state:  (2, 3, DOWN)
Agent performance: 104

Environment: 
     0       1

And the Model Based Agent in the Fully Observable Environment.

In [15]:
environment = FullyObservableEnvironment()

model_agent = ModelBasedAgent()
environment.add_thing(model_agent)

gold = Gold()
environment.add_thing(gold, (4,0))
gold = Gold()
environment.add_thing(gold, (0,1))
gold = Gold()
environment.add_thing(gold, (2,3))
gold = Gold()
environment.add_thing(gold, (1,4))
gold = Gold()
environment.add_thing(gold, (1,4))

trap = Trap()
environment.add_thing(trap, (1,0))
trap = Trap()
environment.add_thing(trap, (3,1))
trap = Trap()
environment.add_thing(trap, (3,1))
trap = Trap()
environment.add_thing(trap, (3,1))
trap = Trap()
environment.add_thing(trap, (2,3))
trap = Trap()
environment.add_thing(trap, (4,4))

environment.run()

---------------------------
Initial State
---------------------------
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (- 1 -) (- - -) (- - -) (- - -) 

1 (- - 1) (- - -) (- - -) (- - -) (- 2 -) 

2 (- - -) (- - -) (- - -) (- 1 1) (- - -) 

3 (- - -) (R - 3) (- - -) (- - -) (- - -) 

4 (- 1 -) (- - -) (- - -) (- - -) (- - 1) 

Agent internal state:
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (- 1 -) (- - -) (- - -) (- - -) 
1 (- - 1) (- - -) (- - -) (- - -) (- 2 -) 
2 (- - -) (- - -) (- - -) (- 1 1) (- - -) 
3 (- - -) (V - 3) (- - -) (- - -) (- - -) 
4 (- 1 -) (- - -) (- - -) (- - -) (- - 1) 

Agent state: (3, 1, RIGHT)
Agent performance: 100

---------------------------
Run details
---------------------------
<STEP 1>
SELECTED ACTION:  TURN
Agent state:  (3, 1, DOWN)
Agent performance: 94

Environment: 
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (- 1

We can also see the performance of the <u>Reflex Agent</u>:

In [16]:
reflex_agent.performance

115

... and the <u>Model Based Agent</u> in the Fully Observable Environment:

In [17]:
model_agent.performance

112

<h1>Additional tests:</h1>
In addition to the tests performed previously, we can run several times the <u>Reflex Agent</u> in the <u>Partially Observable Environment with gold and traps placed at random positions...</u>

In [18]:
numberOfTests = 5
totalFitness_PartiallyObservableReflex = 0
fitness_PartiallyObservableReflex = []
for _ in range(numberOfTests):
    environment = PartiallyObservableEnvironment()

    reflex_agent = ReflexAgent()
    environment.add_thing(reflex_agent)

    gold = Gold()
    environment.add_thing(gold)
    gold = Gold()
    environment.add_thing(gold)
    gold = Gold()
    environment.add_thing(gold)
    gold = Gold()
    environment.add_thing(gold)
    gold = Gold()
    environment.add_thing(gold)

    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)

    environment.run()

    totalFitness_PartiallyObservableReflex += reflex_agent.performance
    fitness_PartiallyObservableReflex.append(reflex_agent.performance)

      2       3       4
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (- - -) (- - -) (- - -) (- - -) 

1 (- - -) (- - -) (R - -) (- - -) (- - -) 

2 (- - -) (- - -) (- - -) (- - -) (- 1 1) 

3 (- 1 -) (- - -) (- 1 2) (- 1 -) (- - -) 

4 (- - -) (- - -) (- - -) (- - -) (- - 1) 

Percept
     1       2       3    
0 (- - -) (- - -) (- - -) 
1 (- - -) (R - -) (- - -) 
2 (- - -) (- - -) (- - -) 

<STEP 8>
SELECTED ACTION:  ADVANCE
Agent state:  (1, 3, RIGHT)
Agent performance: 90

Environment: 
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (- - -) (- - -) (- - -) (- - -) 

1 (- - -) (- - -) (- - -) (R - -) (- - -) 

2 (- - -) (- - -) (- - -) (- - -) (- 1 1) 

3 (- 1 -) (- - -) (- 1 2) (- 1 -) (- - -) 

4 (- - -) (- - -) (- - -) (- - -) (- - 1) 

Percept
     2       3       4    
0 (- - -) (- - -) (- - -) 
1 (- - -) (R - -) (- - -) 
2 (- - -) (- - -) (- 1 1) 

<STEP 9>
SELECTED ACTION:  ADVANCE
Agent state:  (1, 4, RIGHT)
Agent performanc

... as well as the <u>Model-Based Agent</u> in the same kind of environment

In [19]:
numberOfTests = 5
totalFitness_PartiallyObservableModel = 0
fitness_PartiallyObservableModel = []
for _ in range(numberOfTests):
    environment = PartiallyObservableEnvironment()

    model_agent = ModelBasedAgent()
    environment.add_thing(model_agent)

    gold = Gold()
    environment.add_thing(gold)
    gold = Gold()
    environment.add_thing(gold)
    gold = Gold()
    environment.add_thing(gold)
    gold = Gold()
    environment.add_thing(gold)
    gold = Gold()
    environment.add_thing(gold)

    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)

    environment.run()

    totalFitness_PartiallyObservableModel += model_agent.performance
    fitness_PartiallyObservableModel.append(model_agent.performance)

 - -) (- 1 2) 

Percept:
     3       4    
0 (- - -) (- - -) 
1 (- - -) (R - -) 
2 (- - -) (- 1 -) 

Agent internal state:
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (- - -) (- - 1) (- - -) (- - -) 
1 (- - -) (V - -) (V - -) (V - -) (V - -) 
2 (V - -) (V - -) (- - 1) (- - -) (- 1 -) 
3 (V - -) (V - -) (- - -) (? ? ?) (? ? ?) 
4 (- - -) (- - -) (- - -) (? ? ?) (? ? ?) 

<STEP 17>
SELECTED ACTION:  TURN
Agent state:  (1, 4, DOWN)
Agent performance: 106

Environment: 
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (- - -) (- - 1) (- - -) (- - -) 

1 (- - -) (- - -) (- - -) (- - -) (D - -) 

2 (- - -) (- - -) (- - 1) (- - -) (- 1 -) 

3 (- - -) (- - -) (- - -) (- - 1) (- - -) 

4 (- - -) (- - -) (- - -) (- - -) (- 1 2) 

Percept:
     3       4    
0 (- - -) (- - -) 
1 (- - -) (D - -) 
2 (- - -) (- 1 -) 

Agent internal state:
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T)

... to finally se their average performance. The maximum performance obtained by the <u>Reflex Agent</u> in the <u>Partially Observable Environment</u> was:

In [20]:
np.max(fitness_PartiallyObservableReflex)

128

... and for the <u>Model Based Agent</u>, its maximum performance was:

In [21]:
np.max(fitness_PartiallyObservableModel)

130

In average, the <u>Reflex Agent</u> had a performance of:

In [22]:
totalFitness_PartiallyObservableReflex/numberOfTests

111.0

... and the <u>Model-Based Agent</u> had a performance of:

In [23]:
totalFitness_PartiallyObservableModel/numberOfTests

114.6

In [24]:
numberOfTests = 5
totalFitness_FullyObservableReflex = 0
fitness_FullyObservableReflex = []
for _ in range(numberOfTests):
    environment = FullyObservableEnvironment()

    reflex_agent = ReflexAgent()
    environment.add_thing(reflex_agent)

    gold = Gold()
    environment.add_thing(gold)
    gold = Gold()
    environment.add_thing(gold)
    gold = Gold()
    environment.add_thing(gold)
    gold = Gold()
    environment.add_thing(gold)
    gold = Gold()
    environment.add_thing(gold)

    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)

    environment.run()

    totalFitness_FullyObservableReflex += reflex_agent.performance
    fitness_FullyObservableReflex.append(reflex_agent.performance)

) (A G T) (A G T) (A G T) (A G T)
0 (- 1 -) (- - -) (- - -) (- - -) (- - -) 

1 (U - -) (- - -) (- - -) (- - -) (- - -) 

2 (- - -) (- - -) (- - -) (- - -) (- - -) 

3 (- - -) (- - 1) (- - -) (- - 1) (- - -) 

4 (- - 1) (- - -) (- - -) (- - -) (- - 2) 

<STEP 15>
SELECTED ACTION:  ADVANCE
Agent state:  (0, 0, UP)
Agent performance: 130

Environment: 
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (U - -) (- - -) (- - -) (- - -) (- - -) 

1 (- - -) (- - -) (- - -) (- - -) (- - -) 

2 (- - -) (- - -) (- - -) (- - -) (- - -) 

3 (- - -) (- - 1) (- - -) (- - 1) (- - -) 

4 (- - 1) (- - -) (- - -) (- - -) (- - 2) 

---------------------------
Initial State
---------------------------
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- 1 -) (- - -) (- - -) (- - -) (- 1 -) 

1 (- - 1) (- - -) (- - -) (- - -) (- - -) 

2 (- 1 -) (- 1 -) (- - 1) (- - -) (- - -) 

3 (- 1 2) (- - -) (R - -) (- - -) (- - 1) 

4 (- - -) (- - -) (

In [25]:
numberOfTests = 5
totalFitness_FullyObservableModel = 0
fitness_FullyObservableModel = []
for _ in range(numberOfTests):
    environment = FullyObservableEnvironment()

    model_agent = ModelBasedAgent()
    environment.add_thing(model_agent)

    gold = Gold()
    environment.add_thing(gold)
    gold = Gold()
    environment.add_thing(gold)
    gold = Gold()
    environment.add_thing(gold)
    gold = Gold()
    environment.add_thing(gold)
    gold = Gold()
    environment.add_thing(gold)

    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)
    trap = Trap()
    environment.add_thing(trap)

    environment.run()

    totalFitness_FullyObservableModel += model_agent.performance
    fitness_FullyObservableModel.append(model_agent.performance)

 -) 

1 (- - -) (- - -) (- - -) (- - -) (- - -) 

2 (- - -) (- - -) (- - 1) (- - -) (D - -) 

3 (- - -) (- - -) (- - -) (- - -) (- - -) 

4 (- - -) (- - -) (- - 1) (- 1 -) (- - 1) 

Agent internal state:
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (V - -) (- - -) (- - -) (- - -) (- - -) 
1 (V - -) (V - -) (V - -) (V - -) (V - -) 
2 (- - -) (- - -) (- - 1) (- - -) (V - -) 
3 (- - -) (- - -) (- - -) (- - -) (- - -) 
4 (- - -) (- - -) (- - 1) (- 1 -) (- - 1) 

<STEP 14>
SELECTED ACTION:  ADVANCE
Agent state:  (3, 4, DOWN)
Agent performance: 113

Environment: 
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T) (A G T)
0 (- - -) (- - -) (- - -) (- - -) (- - -) 

1 (- - -) (- - -) (- - -) (- - -) (- - -) 

2 (- - -) (- - -) (- - 1) (- - -) (- - -) 

3 (- - -) (- - -) (- - -) (- - -) (D - -) 

4 (- - -) (- - -) (- - 1) (- 1 -) (- - 1) 

Agent internal state:
     0       1       2       3       4
  (A G T) (A G T) (A G T) (A G T) (A

The maximum performance obtained by the <u>Reflex Agent</u> in the <u>Fully Observable Environment</u> was:

In [26]:
np.max(fitness_FullyObservableReflex)

130

while the max performance for all the runs for the <u>Model-Based Anges</u> in the same Environment was:

In [27]:
np.max(fitness_FullyObservableModel)

127

In average, the performance of the <u>Reflex Agent</u> in the <u>Fully Observable Environment</u> was:

In [28]:
totalFitness_FullyObservableReflex/numberOfTests

119.6

and the average for the <u>Model-Based Agent</u>:

In [29]:
totalFitness_FullyObservableModel/numberOfTests

117.4

<h1>Conclusions</h1>

<ul>
    <li><b>Which agent behaves better in the Partially Observable Environment?:</b></li>
        &emsp; During the tests that were carried out, we obtained better results in the vast majority of them using the Model-Based Agent which makes sense since it may not receive the full state of the environment and may not be able to see the gold pieces it is looking for, but it keep in his model some of the gold pieces already seen by it's percepts. In the case of the Reflex Agent, when it doesn't perceive any piece of gold it must explore the world which may lead to falling into traps.
    <li><b>Which agent behaves better in the Fully Observable Environment?:</b></li>
        &emsp; Using this kind of environment, both Agents had similar results because they didn't have to look for pieces of gold, their perceives always had the exact position of each gold in the Environment.
    <li><b>Are the Agents behaving rationally?:</b></li>
        &emsp; Yes, in some way. Whenever they are in the same column or row as some piece of gold, they try to go for it, if not they try to explore. But sometimes they don't try not to fall into traps.
    <li><b>What is better to pick all the gold in the environment? Less or more steps?:</b></li>
        &emsp; It depends of the number of pieces of gold in the environment, if there is a small number of gold then is better to set a small number of steps for an agent to perform because if we take all the gold ang the agent doesn't stop, it would continue to loss performance. But fortunately, the agents in this exercise do stop when there are no more gold left.
    <li><b>Was it fair to test with gold pieces and traps in fixed positions? Why not in random positions?:</b></li>
        &emsp; That would not have been fair because one agent may have had a more difficult layout than other.
</ul>

