Luis Adrián Amado Álvarez A01571393

Código del examen integrador



In [49]:
!pip install agentpy



In [50]:
import agentpy as ap
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns, IPython
import random

In [51]:
"""
Class to represent the agent that will be moving through the streets
"""
class StreetAgent(ap.Agent):

  """
  Getting the parameters and setting initial values
  """
  def setup(self):
    # These are the possible actions
    self.actions = {"up": (-1, 0), "down": (1, 0), "right": (0, 1), "left": (0, -1)}

    # Learning parameters
    self.epsilon = self.p.epsilon
    self.train_episodes = self.p.train_episodes
    self.alpha = self.p.alpha
    self.gamma = self.p.gamma

    self.goal = self.p.goal
    self.grid = self.model.grid

    self.reward = 0

    # The agent keeps track of its own Q table while training and executing
    self.Q = {}
    for i in range(self.grid.shape[0]):
      for j in range(self.grid.shape[1]):
        self.Q[(i, j)] = {action: 0 for action in self.actions}

  """
  Function to get the current position (or state) of the agent
  """
  def get_position(self):
    return self.grid.positions[self]

  """
  Function that runs at each step of training and simulation
  """
  def execute(self):
    # Choose the next action and perform it
    action = self.choose_action(self.get_position())
    self.grid.move_by(self, self.actions[action])
    self.reward += self.grid.get_reward(self.get_position())
    return action

  """
  Function that executes a specified number of training episodes
  """
  def train(self, episodes):
    print("Training...")
    for i in range(episodes):
      state = self.p.start

      # Keep going until the goal is found
      while state != self.p.goal:
        action = self.execute()
        new_state = self.get_position()
        reward = self.grid.get_reward(new_state)
        self.update_Q(state, action, reward, new_state) # Update the q table accordingly
        state = new_state

      # Reset grid state
      self.grid.move_to(self, self.p.start)
      self.grid.setup()

    print("Finished training")

  """
  Function to choose an action depending on Q values and epsilon
  """
  def choose_action(self, state):
    if random.uniform(0, 1) < self.epsilon:
      return random.choice(list(self.actions.keys()))
    else:
      return max(self.Q[state], key=self.Q[state].get)

  """
  Function that runs the Q-learning formula after each training step
  """
  def update_Q(self, state, action, reward, new_state):
    max_Q_new_state = max(self.Q[new_state].values())
    self.Q[state][action] = self.Q[state][action] + self.alpha * (reward + self.gamma * max_Q_new_state - self.Q[state][action])

In [52]:
"""
Class to represent the grid of reward values in the streets
"""
class StreetGrid(ap.Grid):

  """
  Setup function to copy the street matrix
  """
  def setup(self):
    self.streets = np.copy(self.p.streets)

  """
  Function to get the reward (or value) at a specific position
  """
  def get_reward(self, pos):
    reward = self.p.streets[pos]

    if pos == self.p.goal: # The reward for being in the goal is special
      return self.p.goal_value
    elif reward < 0: # Dont allow the agent to go through closed roads (-1) or buildings (-10)
      return -1000
    else:
      return -self.p.streets[pos] # Negative value because it represents cost

In [53]:
"""
Class to represent the full simulation model that holds the agents and environments
"""
class StreetModel(ap.Model):

  """
  Setup method to create agent and environments, as well as run the agent training
  """
  def setup(self):
    # Create grid and agent
    self.grid = StreetGrid(self, shape=self.p.streets.shape)
    self.agent = StreetAgent(self)
    self.grid.add_agents([self.agent], positions=[self.p.start])

    # Train agent and reset agent state
    self.agent.train(self.p.train_episodes)
    self.agent.reward = 0

    # Make sure the final agent doesnt take any random paths
    self.agent.epsilon = 0

  """
  Function that runs each step of the simulation, executes the agents action
  """
  def step(self):
    self.agent.execute()

    # Also record the current position in order to stream it later to Unity
    self.record('position', self.agent.get_position())

  """
  Function that runs before the step function, checks if the agent made it to the goal
  """
  def update(self):
    if self.agent.get_position() == self.p.goal:
      print("Ending...")
      self.stop()

  """
  Function that runs just after the simulation ends, reports the final Q table in case of debugging
  """
  def end(self):
    self.report('Q-Table', self.agent.Q)

In [54]:
"""
Available tiles that represent different road conditions and grid colors
"""
street_tile = 1
dirt_road_tile = 2
cracked_street_tile = 4
pothole_tile = 5
closed_road_tile = -1
building_tile = -10
goal_tile = -2
agent_tile = -3

"""
Function to animate the execution of the simulation
"""
def animation_plot(model, ax):
  streets = model.p.streets

  N, M = streets.shape
  grid = np.copy(streets)
  grid[model.p.goal] = goal_tile
  agent = list(model.grid.agents)[0]
  state = model.grid.positions[agent]
  grid[state] = agent_tile

  # Each tile type gets a different color
  color_dict = {
      street_tile: "#5e5e5e",
      dirt_road_tile: "#a38f64",
      cracked_street_tile: "#8aa1e6",
      pothole_tile: "#292929",
      closed_road_tile: "#992222",
      building_tile: "#c2c2c2",
      goal_tile: "#f5ee20",
      agent_tile: "#00ff00"
  }

  ap.gridplot(grid, ax=ax, color_dict=color_dict, convert=True)
  ax.set_title("Agent Q-Learning\nTravel cost: {}  Steps: {}\n{}: {}".format(agent.reward, model.t, state, agent.Q[state]))

In [55]:
# Load street map from file
streets = np.load("streets-2.npy")

parameters = {
    "streets": streets,
    "train_episodes": 1000,
    "alpha": 1,
    "epsilon": 0.4,
    "gamma": 1,
    "start": (0, 6),
    "goal": (21, 18),
    "goal_value": 1000,
    "steps": 100,
}

# Run and plot the simulation
fig = plt.figure(figsize=(7,7))
ax = fig.add_subplot(111)
model = StreetModel(parameters)
animation = ap.animate(model, fig, ax, animation_plot)
IPython.display.HTML(animation.to_jshtml())

Training...
Finished training
Ending...


In [58]:
# Finally, we can also retrieve the full movements list from the variables of the results

model = StreetModel(parameters)
results = model.run(display=False)
position_list = list(results.arrange_variables()['position'])

Training...
Finished training
Ending...


## Preguntas

* Propose appropriate tools and techniques to solve the problem. Justify your selection.

Para resolver este problema, utilicé la libreria para simulaciones de agentes conocida como AgentPy. Esta me ayudó a modelar el ambiente y el agente y ya solo me tenía que preocupar por la lógica. Para encontrar el mejor camino, usé el algoritmo de Q-learning, ya que permite generar un agente que aprende a través de un entrenamiento y que al final sabe como llegar a su objetivo.

* Clearly describe the agent and its environment, defining all necessary components (states, actions, goal, utility).

El agente aprende y recuerda todos los movimientos que ha intentado para encontrar una mejor solución en el futuro. Su único estado es la posición en la que actualmente se encuentra dentro de la cuadrícula. Su meta es llegar a un espacio específico en la gráfica y su utilidad es el costo al que tiene que recurrir para llegar al destino.

El ambiente es una cuadrícula (discreto) que cuenta con valores en cada celda. Estos valores representan un diferente tipo de terreno que tiene diferentes costos para el agente.

* Provide an implementation of the agent and an appropriate visualization of the environment.

Generé una visualización de la simulación donde cada tipo de celda tiene un color diferente y se puede saber información importante como la recompenza y los valores de la tabla Q en cualquier momento de la situacion.

* Provide the best route found with your implementation.

La mejor ruta que encontré con mi solución le toma al agente un total de 57 pasos y además termina con una recompenza de 923.

## Transmisión a Unity

Para la transisión de los datos a Unity pasé todo este código a un archivo local en mi computadora, desde donde pude generar un Socket al que se conecta Unity y se le va pasando la información paso por paso.