## Environment

#### Disclaimer:
This implementation borrows minor code for setting up aspects of the environment from , otherwise, the main classes and loop are original.

My implementation of the naive agent is currently very rough due to recent committments that have come up, but the environment is implemented according to instructions. The following version of the wumpus agent will have classes that represent environment, knowledgem percepts, and actuators in an organized fashion.  

In [73]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
from collections import namedtuple
from IPython.display import display
from enum import Enum

Agent, Pit, Wumpus, Gold, Breeze, Stench = 1, 2, 3, 4, 5, 6

In [74]:
class Environment:
    def __init__(self, pitprob, addwumpus=True):
        self.pit_prob = pitprob
        self.allowclimbwithoutgold = True
        self.addwumpus = addwumpus
        self.EnvSize = 4
        self.getIndexes()
        self.getEnv()
        self.startloc = self.setAgent(getloc=True)
        self.pits = []
        self.get_pit_loc()
        
        
    def getMatrix(self, item):
        matrix = []
        try:
            copy = getattr(item, 'copy')
        except AttributeError:
            copy = None
        for i in range(self.EnvSize):
            matrix.append([])
            for j in range(self.EnvSize):
                if copy:
                    matrix[i].append(copy())
                else:
                    matrix[i].append(item)
        return matrix


    def getIndexes(self):
        """ Fills 'Indexes' list with all potential coordinate locations in
            environment. This will act as an index set for the Wumpus World
            environment. Indexes = [(0,0), (0,1), ..., (3,2), (3,3)] """
        Indexes = []
        for x in range(self.EnvSize):
            for y in range(self.EnvSize):
                Indexes.append((x, y))
        return Indexes

    def setElement(self, index, value):
        """ Sets env[index] to value and then removes this index from the
            main index set """
        x, y = index
        env[x][y] = value
        # Removes location from Indexes after it is used.
        Indexes.remove(index)

    
    def randomize_pits(self):
        for index in set(Indexes):
            setPit = np.random.binomial(1, self.pit_prob, 1)
            if setPit==1:
                self.setElement(index, Pit)
    
    def get_pit_loc(self):
        pit_loc = np.where(self.environment == 2)
        [self.pits.append((pit_loc[0][i], pit_loc[1][i])) for i in range(len(pit_loc[0]))]
        

    def setWumpus(self):
        """ Places a Wumpus at a single randomly selected square """
        index = random.choice(Indexes)
        self.setElement(index, Wumpus)
        self.wumpusloc = index
            
    def setGold(self):
        """ Places a block of gold at a randomly selected square """
        index = random.choice(Indexes)
        self.setElement(index, Gold)
        self.goldloc = index

    def setAgent(self, getloc=False):
        """ Places the agent at square (1,1) """
        index = (3, 0)
        if getloc: return index
        else:
            self.setElement(index, Agent)
            self.startloc = index
        

    def refreshGlobals(self):
#         """ Resets the index set and the environment """
        global Indexes
        global env
        Indexes = self.getIndexes()
        env = self.getMatrix(0)
        return Indexes, env


    def getEnv(self):
        """ Returns a new Wumpus World environment """
        self.refreshGlobals()
        self.setAgent()
        self.setGold()
        if self.addwumpus: self.setWumpus()
        else: 
            self.wumpusloc = None
            print("👾 Wumpus not spawned 👾")
        self.randomize_pits()
        self.environment = np.array(env)
    
    def printEnv(self, agentsteps, spath, final_state=False, returnpath=False):
        state = pd.DataFrame(index=["4","3","2","1"], columns=["1","2","3","4"])
        if final_state:
            for step in agentsteps:
                state.iloc[step] = "👣"
        else: state.iloc[agentsteps[-1]] = "👣"
        for pit in self.pits:
            state.iloc[pit] = "🕳️"
        if self.goldloc: state.iloc[self.goldloc] = "💰"
        if self.wumpusloc: state.iloc[self.wumpusloc] = "👾"
        if returnpath:
            try:
                for step in spath:
                    state.iloc[step] = "🦶"
            except: pass
        state = state.fillna(".")        
        return state

In [75]:
# # Test if environment is working
# envir = Environment(pitprob = 0.2, addwumpus=False)
# print("\nAgent start: 👣 | Gold: 💰 | Wumpus: 👾 | Pits: 🕳️ | Unattended: .\n")
# display(envir.printEnv(agentsteps=[(3,0),(3,1)], final_state=False, returnpath=True, spath=0))

## Percepts

Naive agent has no use for these, unless agent is on gold, wumpus, pit, or exit, which are implemented in the main loop

In [76]:
percept_mapping = {Pit: Breeze, Wumpus: Stench}
items= {1: "Agent", 2: "Pit", 3: "Wumpus", 4: "Gold", 5:"Breeze", 6:"Stench"}

class Percepts(Environment):
    def __init__(self, pitprob, addwumpus):
        super().__init__(pitprob, addwumpus)       
        self.wumpuscry = False
        
        self.percepts = self.getMatrix(set()) # empty sets containing all state information of each room
        
        Indexes = self.getIndexes()
        for x, y in Indexes:
            if self.environment[x, y] and self.environment[x, y] != 1:
                self.addPercept((x, y), self.environment[x, y])

            if (x + 1, y) in Indexes:
                num = percept_mapping.get(self.environment[x + 1, y], None)
                if num:
                    self.addPercept((x, y), num)

            if (x - 1, y) in Indexes:
                num = percept_mapping.get(self.environment[x - 1, y], None)
                if num:
                    self.addPercept((x, y), num)

            if (x, y + 1) in Indexes:
                num = percept_mapping.get(self.environment[x, y + 1], None)
                if num:
                    self.addPercept((x, y), num)

            if (x, y - 1) in Indexes:
                num = percept_mapping.get(self.environment[x, y - 1], None)
                if num:
                    self.addPercept((x, y), num)
                
        self.percept_hist = [list(self.getPercept(self.startloc[0], self.startloc[1])),]

    def addPercept(self, index, num):
        x, y = index
        self.percepts[x][y].add(num)

    def getPercepts(self):
        return self.percepts
        
    def getPercept(self, x, y):
        return (self.percepts[x][y])

In [77]:
# # test if percepts are working properly
# perc = Percepts(pitprob=0.2, addwumpus=True)
# display(np.array(perc.getPercepts()))
# display(perc.printEnv(agentsteps=(perc.startloc)))
# perc.getEnv()
# perc.percept_hist[-1]

## Knowledge base

This knowledge base currently contains information about the environment that the agent doesn't have, however, the agent only uses this information when it is perceived. Future version will put information in it's proper place.

In [78]:
class KB(Percepts):
    def __init__(self, pitprob, addwumpus):
        super().__init__(pitprob, addwumpus)

        self.loc_path = [(self.startloc),]
        self.curr_dir = [1,] # 0, 1, 2, 3 - up, right, down, left
        self.action = ["Forward","Tright","Tleft","Shoot", "Grab", "Climb"]
        self.haveGold = False
        self.dead = False
        self.haveArrow = True
        self.arrow_path = []
        self.score = 0
        self.moves = 0
        
    def get_arrow_path(self):
        if self.haveArrow:
            if self.curr_dir[-1] == 0: # Up
                for i in range(self.loc_path[-1][0]):
                    self.arrow_path.append((self.loc_path[-1][0] - i - 1, self.loc_path[-1][1]))
            if self.curr_dir[-1] == 1: # Right
                for i in range(len(env) - (self.loc_path[-1][1]) - 1):
                    self.arrow_path.append((self.loc_path[-1][0], self.loc_path[-1][1] + i + 1))
            if self.curr_dir[-1] == 2: # Down
                for i in range(len(env) - self.loc_path[-1][0] - 1):
                    self.arrow_path.append((i, self.loc_path[-1][1]))
            if self.curr_dir[-1] == 3: # Left
                for i in range(self.loc_path[-1][1]):
                    self.arrow_path.append((self.loc_path[-1][0], i))


## Actuators

In [79]:
class Actuators(KB):
    def __init__(self, pitprob, addwumpus, verbose):
        super().__init__(pitprob, addwumpus)
        self.verbose = verbose
        
    def forward(self, direction):
        if direction == 0: # facing up
            self.loc_path.append((self.loc_path[-1][0]-1, self.loc_path[-1][1]))
        elif direction == 1: # facing right
            self.loc_path.append((self.loc_path[-1][0], self.loc_path[-1][1]+1))
        elif direction == 2: # facing down
            self.loc_path.append((self.loc_path[-1][0] + 1, self.loc_path[-1][1]))
        else: # facing left
            self.loc_path.append((self.loc_path[-1][0], self.loc_path[-1][1]-1))
        self.score -= 1
        if self.verbose: 
            print("Current score: {}".format(self.score))
        try:
            self.percept_hist.append(list(self.getPercept(self.loc_path[-1][0], self.loc_path[-1][1])))
        except:
            pass
        
    def bump(self):
        if len(self.loc_path) > 1:
            self.loc_path.pop(-1)
        if self.verbose: 
            print("Agent attempts to move, bupms into a wall.")
            print("Current score: {}".format(self.score))
    
    def turnright(self, direction):
        if direction < 3: self.curr_dir.append(self.curr_dir[-1]+1)
        else: self.curr_dir.append(0)
        self.score -= 1
        if self.verbose: 
            print("Agent turns right.")
            print("Current score: {}".format(self.score))
        
    def turnleft(self, direction):
        if direction > 0: self.curr_dir.append(self.curr_dir[-1]-1)                 
        else: self.curr_dir.append(3)
        self.score -= 1
        if self.verbose: 
            print("Agent turns left.")
            print("Current score: {}".format(self.score))
            
    def shoot(self):
        arrow_path = self.arrow_path
        print("🏹🏹 Agent fires arrow 🏹🏹")
        if self.wumpusloc and (self.wumpusloc in arrow_path):
            self.wumpuscry = True
            self.wumpusloc = None
            print("Wumpus killed!")
        else: print("Arrow misses its mark")
        self.haveArrow = False
        self.score -= 10
    
    def grab(self):
        if self.verbose: print("Agent attempts to grab something.")
        if (self.goldloc == self.loc_path[-1]) and not self.haveGold:
            self.haveGold = True
            print("💰💰Agent picks up the gold💰💰")
            self.goldloc = None
        else: 
            self.score -= 1
            if self.verbose: 
                print("Agent grabs nothing.")
                print("Current score: {}".format(self.score))

        
    def climb(self, allowclimbwithoutgold=True):
        if not allowclimbwithoutgold:
            if self.verbose: 
                print("Agent attempts to climb out but does not have the gold.")
            pass
        else:
            if self.haveGold:
                self.score += 1000
                print("🎉🎉Agent climbs out of the cave with the gold.🎉🎉")
            else: 
                print("Agent climbs out without the gold.")
            self.score -= 1
            
       
        
        

### Shortest route back along the beeline algorithm

In [80]:
# class Beeline(Actuators):
#     self.__init__(self):
#     self.nodes = self.getNodes(savenodes=True)
#     self.edges = self.geteEdges(saveedges=True)
#     self.spath = self.getShortestPath()

#     def getNodes(self, savenodes=False):
#         G = nx.Graph()
#         nodes = self.loc_path
#         for a, (i, j) in enumerate(nodes):
#             G.add_node(a, coordinate=(i,j))
#         if savenodes: return G.nodes
    
#     def getEdges(self, saveedges=False):
#         edges = []
#         for i in range(len(self.EnvSize**4)):
#             a = random.choice(self.loc_path)
#             b = random.choice(self.loc_path)
#             if a[0] == b[0] and (np.absolute(a[1] - b[1]) == 1):        
#                 edges.append((a, b))
#             elif a[1] == b[1] and (np.absolute(a[0] - b[0])== 1):
#                 edges.append((a, b))
#         edges = set(edges)
        
#         dictionary = {}
#         for e in range(len(G.nodes)):
#             x = G.nodes[e]['coordinate']
#             dictionary[x] = e
        
#         edge_list = []
#         for i, j in edges:
#             x, y = dictionary.get(i), dictionary.get(j)
#             edge_list.append((x,y))
        
#         for i in edge_list:
#             G.add_edge(*i)
#         if saveedges: return edges
    
#     def getShortestPath(self):
#         spath = nx.shortest_path(G, source=(dictionary.get(self.loc_path[-1])), target=(dictionary.get(self.loc_path[-1])))
#         return spath
    

In [109]:
# class Beeline(Actuators):
#     self.__init__(self):
#     self.nodes = self.getNodes(savenodes=True)
#     self.edges = self.geteEdges(saveedges=True)
#     self.spath = self.getShortestPath()

def beeline(agentpath, source, target):   
    G = nx.Graph()
    # Adding nodes
    nodes = agentpath
    for a, (i, j) in enumerate(nodes):
        G.add_node(a, coordinate=(i,j))
    
    # mapping nodes to coordinates
    dictionary = {}
    for e in range(len(G.nodes)):
        x = G.nodes[e]['coordinate']
        dictionary[x] = e
    
    # adding edges
    edges = []
    
    # Connecting nodes for edges
    for i in range(1000):
        a = random.choice(nodes)
        b = random.choice(nodes)
        if a[0] == b[0] and (np.absolute(a[1] - b[1]) == 1):        
            edges.append((a, b))
        elif a[1] == b[1] and (np.absolute(a[0] - b[0])== 1):
            edges.append((a, b))
    edges = set(edges)
        
    # generating edges
    edge_list = []
    for i, j in edges:
        x, y = dictionary.get(i), dictionary.get(j)
        edge_list.append((x,y))

    for i in edge_list:
        G.add_edge(*i)

    # shortest path
    spath = nx.shortest_path(G, source=dictionary.get(source), target=dictionary.get(target))
    
    # remapping nodes to coordinates
    dictionary2 = {}
    for e in (spath):
        x = G.nodes[e]['coordinate']
        dictionary2[e] = x
        
    path_back = []
    for i in spath:
        x, y = dictionary2.get(i)
        path_back.append((x,y))
        
#     path_back.pop(0) # do not include current location (where gold was found)
    
    return spath, path_back

## Main loop

In [129]:
def simulate(verbose, addwumpus=True, pitprob=0.2):
    """
    Simulates main loop, and includes percepts for naive agent in the form of conditional clauses 
    and coresponding actuators.
    Returns final state of environment.
    """
    Act = Actuators(verbose = verbose, addwumpus=addwumpus, pitprob=pitprob)
    print("\nAgent start: 👣 | Gold: 💰 | Wumpus: 👾 | Pits: 🕳️ | Unattended: .\n")
    display("Starting state:", Act.printEnv(agentsteps=Act.loc_path, spath=0))
    print("Agent perceives: ", [items.get(i) for i in (Act.percept_hist[-1])])
    
    while Act.score < 1000 and not Act.dead:
        
        if not Act.haveGold and Act.goldloc and (Act.goldloc == Act.loc_path[-1]):
            Act.haveGold = True
            Act.goldloc = None
            print("💰💰Agent picks up the gold💰💰")   
            
            way_back = beeline(agentpath=Act.loc_path, source=Act.loc_path[-1], target=Act.loc_path[0])
            [Act.loc_path.append(way_back[1][i]) for i in range(len(way_back[1]))] 
            
            if Act.loc_path[-1] == Act.loc_path[-1]: Act.climb() 
            Act.score -= len(way_back[0]) # current location is not removed since climbing out counts for 1 point
            
            break
            
        if any(loc in Act.pits for loc in Act.loc_path): # agent falls in pit percept
            Act.dead = True
            Act.score -= 1000
            print("Agent fell into a pit")
            break

        elif(Act.wumpusloc in Act.loc_path) and not Act.wumpuscry: # agent eaten by wumpus percepts
            Act.dead = True
            Act.score -= 1000
            print("Agent has fallen victim to the wumpus")
            break
            
        else: #Act.loc_path[-1] != [Act.wumpusloc or Act.pits]:  
            
#             Test if agent successfully climbs out
#             if Act.score < -50:
#                 if Act.get_start(env, append=False) == Act.loc_path[-1]:
#                     Act.climb()
#                     break
            
            action = random.choice(Act.action[:-1]) # excluding randomly attempting to climb
            if action == "Forward":
                if Act.curr_dir[-1] == 0: # Up
                    Act.forward(direction=0)

                elif Act.curr_dir[-1] == 1:# right
                    Act.forward(direction=1)

                elif Act.curr_dir[-1] == 2: # down
                    Act.forward(direction=2)

                else: # left
                    Act.forward(direction=3)

                Indexes = Act.getIndexes()
                if Act.loc_path[-1] not in Indexes: # Bump 
                    Act.bump()
                    
                else:
                    display("Agent moves to a new room.", Act.printEnv(agentsteps=Act.loc_path, spath=0))  
                    print("Agent perceives: ", [items.get(i) for i in (Act.percept_hist[-1])])


            elif action == "Tright": # turn right
                Act.turnright(direction=Act.curr_dir[-1])

            elif action == "Tleft": # turn left
                Act.turnleft(direction=Act.curr_dir[-1])

            elif action == "Shoot": # Shoots arrow
                if Act.haveArrow:
                    Act.get_arrow_path()
                    Act.shoot()
            else: 
                Act.grab()     # randomly grabs (except for when percept = glitter, 
                                 # in which case the agent will advertently grab the gold)                        

                
    
    print("\n\n--Final score: {}".format(Act.score), "\n--Final state and agent path:")
    
    print("\nAgent bee line: 👣 |  Shortest rout back: 🦶\nGold: 💰 | Wumpus: 👾 | Pits: 🕳️ | Unattended: .\n")

#     print("Percept hist:\n", [items.get(j) for i in (Act.percept_hist[j][i]) for j in Act.percept_hist])
    try:
        return display(Act.printEnv(agentsteps=Act.loc_path, final_state=True, returnpath=True, spath=way_back[1]))
    except: 
        return display(Act.printEnv(agentsteps=Act.loc_path, final_state=True, returnpath=False, spath=0))

# Simulation

Initialize/refresh environment:

**1. Pitprob** - set pit probability between 0 (for no pits) and 1 (all other room have pits) for testing functionality

**2. Addwumpus** - add wumpus (True/False) for testing functionality

**3. Verbose** - print intermediate steps (turn, direction, grab, move forward and new state after moving forward, current score) 

In [107]:
# # rerun to referesh environment
# Env = Environment(verbose=True, pitprob= 0.2, addwumpus=1) # instantiates object for creating and printing (don't change object name)
# env = Env.getEnv() # creates environment

Run simulation given current environment:

In [145]:
simulate(verbose=True, pitprob= 0.2, addwumpus=1)


Agent start: 👣 | Gold: 💰 | Wumpus: 👾 | Pits: 🕳️ | Unattended: .



'Starting state:'

Unnamed: 0,1,2,3,4
4,💰,🕳️,.,🕳️
3,.,🕳️,.,👾
2,.,.,.,.
1,👣,🕳️,.,.


Agent perceives:  ['Breeze']
Agent turns left.
Current score: -1
Current score: -2


'Agent moves to a new room.'

Unnamed: 0,1,2,3,4
4,💰,🕳️,.,🕳️
3,.,🕳️,.,👾
2,👣,.,.,.
1,.,🕳️,.,.


Agent perceives:  []
Current score: -3


'Agent moves to a new room.'

Unnamed: 0,1,2,3,4
4,💰,🕳️,.,🕳️
3,👣,🕳️,.,👾
2,.,.,.,.
1,.,🕳️,.,.


Agent perceives:  ['Breeze']
🏹🏹 Agent fires arrow 🏹🏹
Arrow misses its mark
Agent attempts to grab something.
Agent grabs nothing.
Current score: -14
Agent turns right.
Current score: -15
Agent attempts to grab something.
Agent grabs nothing.
Current score: -16
Current score: -17


'Agent moves to a new room.'

Unnamed: 0,1,2,3,4
4,💰,🕳️,.,🕳️
3,.,🕳️,.,👾
2,.,.,.,.
1,.,🕳️,.,.


Agent perceives:  ['Pit', 'Breeze']
Agent fell into a pit


--Final score: -1017 
--Final state and agent path:

Agent bee line: 👣 |  Shortest rout back: 🦶
Gold: 💰 | Wumpus: 👾 | Pits: 🕳️ | Unattended: .



Unnamed: 0,1,2,3,4
4,💰,🕳️,.,🕳️
3,👣,🕳️,.,👾
2,👣,.,.,.
1,👣,🕳️,.,.
