In [None]:
!pip install agentpy

Collecting agentpy
  Downloading agentpy-0.1.5-py3-none-any.whl.metadata (3.3 kB)
Collecting SALib>=1.3.7 (from agentpy)
  Downloading salib-1.5.1-py3-none-any.whl.metadata (11 kB)
Collecting multiprocess (from SALib>=1.3.7->agentpy)
  Downloading multiprocess-0.70.17-py311-none-any.whl.metadata (7.2 kB)
Collecting dill>=0.3.9 (from multiprocess->SALib>=1.3.7->agentpy)
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Downloading agentpy-0.1.5-py3-none-any.whl (53 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.9/53.9 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading salib-1.5.1-py3-none-any.whl (778 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m778.9/778.9 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading multiprocess-0.70.17-py311-none-any.whl (144 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m144.3/144.3 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.9-py3-none

In [21]:
import agentpy as ap
import numpy as np
import random, json
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import seaborn as sns, IPython
from matplotlib import pyplot as plt, cm

class MazeAgent(ap.Agent):
    '''
    Initializing agent elements:
    - 4 possible actions
    - Q values as a zero matrix (unless a matrix definition is provided)
    - Policies. Values of epsilon, alpha, and gamma
    '''
    def setup(self):
        # Actions are linked to a movement in the grid.
        self.actions = {'up': (-1,0), 'down': (1, 0), 'left': (0, -1), 'right': (0, 1)}
        self.env = self.model.env
        self.reward = 0
        m, n = self.env.shape
        self.Q = self.p.Q
        # Learning policies
        self.epsilon = self.p.epsilon
        self.alpha = self.p.alpha
        self.gamma = self.p.gamma

    '''
    Actual action execution. This process will be employed after agent has trained
    '''
    def execute(self):
        action = self.choose_action(self.get_position())
        self.env.move_by(self, self.actions[action])
        self.reward += self.env.get_reward(self.get_position())
        return action

    '''
    Get position of agent in environment
    '''
    def get_position(self):
        return self.env.positions[self]

    '''
    Training. Agent will be able to perform a number of possible episode.
    An episode is a complete cycle, until agent reaches the goal
    '''
    def train(self, episodes = 0):
        for _ in range(episodes):
            state = self.p.start                                 # Initial position
            while state != self.p.goal:                            # Iterate until agent reaches the goal
                action = self.execute()                 # Choose & execute action
                new_state = self.get_position()
                reward = self.env.get_reward(new_state)                  # Get action reward
                self.update_Q(state, action, reward, new_state)    # Update Q-values
                state = new_state
            self.env.move_to(self, self.p.start)
            self.env.setup()                                    # Initialize envornment

    '''
    Applying epsilon greedy policy
    '''
    def choose_action(self, state):
        if random.uniform(0, 1) < self.epsilon:
            return random.choice(list(self.actions.keys()))
        else:
            return max(self.Q[state], key=self.Q[state].get)

    '''
    Updating Q-values according to definition
    '''
    def update_Q(self, state, action, reward, new_state):
        max_Q_new_state = max(self.Q[new_state].values())
        self.Q[state][action] = self.Q[state][action] + self.alpha * (
            reward + self.gamma * max_Q_new_state - self.Q[state][action])


'''
Maze environment
'''
class Maze(ap.Grid):
    def setup(self):
        self.environment = np.copy(self.p.maze)

    def get_reward(self, state):
        return -self.environment[state]
'''
'''
class MazeModel(ap.Model):
    def setup(self):
        self.env = Maze(self, shape=maze.shape)
        self.agent = MazeAgent(self)
        self.env.add_agents([self.agent], positions=[self.p.start])
        self.agent.train(self.p.episodes)
        self.agent.epsilon = 0              # Agent uses only information after training
        self.agent.reward = 0

    def step(self):
        self.agent.execute()

    def update(self):
        if self.agent.get_position() == self.model.p.goal:
            print('ending')
            self.stop()

    # Report found route and Q-values
    def end(self):
        self.report('Q-Table', self.agent.Q)






def animation_plot(model, ax):
    n, m = model.p.maze.shape
    grid = np.copy(maze)
    grid[model.p.goal] = goal
    agent = list(model.env.agents)[0]
    state = model.env.positions[agent]
    grid[state] = explorer

    # Colors: black = edge, white = floor, green = goal, blue = agent
    color_dict = {e:'#000000', m: '#7c4700', w: '#2a9dfb', g: '#006400', r:'#d3d3d3', explorer:'#ff0000', goal:'#d4af37'}

    ap.gridplot(grid, ax=ax, color_dict=color_dict, convert=True)
    ax.set_title("Agent Q-Learning\nTravel cost: {}\n".format(agent.reward, state))




def animation_plot(model, ax):
    N, M = model.p.maze.shape
    grid = np.copy(maze)
    grid[model.p.goal] = goal
    agent = list(model.env.agents)[0]
    state = model.env.positions[agent]
    grid[state] = explorer

    # Colors: black = edge, white = floor, green = goal, blue = agent
    color_dict = {e:'#000000', m: '#7c4700', w: '#2a9dfb', g: '#006400', r:'#d3d3d3', explorer:'#ff0000', goal:'#d4af37'}

    ap.gridplot(grid, ax=ax, color_dict=color_dict, convert=True)
    ax.set_title("Agent Q-Learning\nTravel cost: {}\n".format(agent.reward, state))

# Value codification for plotting
#peaton e, r, g, w, m, explorer, goal = 100, 21, 3, 2, 10, -1, -2
e, r, g, w, m, explorer, goal = 100, 2, 3, 21, 10, -1, -2



# Reading information from files
# f = open('params_5_5__1.json')
# values = json.load(f)
# grid = values['state']
actions = ['up', 'down', 'left', 'right']

# Environment representation with a grid
maze =  np.array([
                 [e, e, e, e, e, e, e, e, e, e, e, e, e, e, e, e, e, e, e, e],
                 [e, e, e, e, e, e, w, w, r, r, r, r, w, w, e, e, e, e, e, e],
                 [e, e, e, e, e, e, w, w, r, r, r, r, w, w, e, e, e, e, e, e],
                 [e, e, e, e, e, e, w, w, r, r, r, r, w, w, e, e, e, e, e, e],
                 [e, e, e, e, e, e, w, w, r, r, r, r, w, w, e, e, e, e, e, e],
                 [e, e, e, e, e, e, w, w, r, r, r, r, w, w, e, e, e, e, e, e],
                 [e, e, e, e, e, e, w, w, r, r, r, r, w, w, e, e, e, e, e, e],
                 [e, w, w, w, w, w, w, w, g, g, g, g, w, w, w, w, w, w, w, e],
                 [e, w, w, w, w, w, w, w, g, g, g, g, w, w, w, w, w, w, w, e],
                 [e, r, r, r, r, r, g, g, r, r, r, r, g, g, r, r, r, r, r, e],
                 [e, r, r, r, r, r, g, g, r, r, r, r, g, g, r, r, r, r, r, e],
                 [e, r, r, r, r, r, g, g, r, r, r, r, g, g, r, r, r, r, r, e],
                 [e, r, r, r, r, r, g, g, r, r, r, r, g, g, r, r, r, r, r, e],
                 [e, w, w, w, w, w, w, w, g, g, g, g, w, w, w, w, w, w, w, e],
                 [e, w, w, w, w, w, w, w, g, g, g, g, w, w, w, w, w, w, w, e],
                 [e, e, e, e, e, e, w, w, r, r, r, r, w, w, e, e, e, e, e, e],
                 [e, e, e, e, e, e, w, w, r, r, r, r, w, w, e, e, e, e, e, e],
                 [e, e, e, e, e, e, w, w, r, r, r, r, w, w, e, e, e, e, e, e],
                 [e, e, e, e, e, e, w, w, r, r, r, r, w, w, e, e, e, e, e, e],
                 [e, e, e, e, e, e, e, e, e, e, e, e, e, e, e, e, e,e, e, e]])

#maze = np.zeros((5, 5))
n = len(maze)

# Initilize Q-values with 0
Q = {}
for x in range(n):
    for y in range(n):
        Q[(x, y)] = {action: 0 for action in actions}

parameters = {
    'maze': maze,
    'start': (18, 13),
    'goal': (4, 6),
    'goal_value': 50,
    'epsilon': 0.1,
    'alpha': 0.99,
    'gamma': 0.99,
    'episodes': 300,
    'steps': 500,
    'Q': Q
}


fig = plt.figure(figsize=(7,7))
ax = fig.add_subplot(111)
mazeModel = MazeModel(parameters)
animation = ap.animate(mazeModel, fig, ax, animation_plot)
IPython.display.HTML(animation.to_jshtml())


ending
