In [3]:
#!pip install --upgrade mesa
#!pip install seaborn

# Definiendo clase obstáculo y grid (escenarios)

In [14]:
import json
import random
import math
from mesa import Agent, Model
from mesa.space import MultiGrid
from queue import PriorityQueue

# -------------------------
# Helper: Comprobar si una celda está libre de obstáculos
# -------------------------
def cell_is_free(model, pos):
    cell_agents = model.grid.get_cell_list_contents(pos)
    for agent in cell_agents:
        if isinstance(agent, Obstacle):
            return False
    return True

# -------------------------
# Clase Obstacle
# -------------------------
class Obstacle(Agent):
    """Representa un obstáculo estático en el grid."""
    def __init__(self, model, x, y):
        super().__init__(model)
        self.x = x
        self.y = y
    def step(self):
        pass  # No se mueve

# -------------------------
# Modelo de Grid
# -------------------------
class GridModel(Model):
    """Modelo que carga un grid de dimensiones dadas, coloca obstáculos desde datos JSON y crea un agente según el tipo."""
    def __init__(self, width, height, goalx, goaly, obstacles_data, agent_type):
        super().__init__()
        self.grid = MultiGrid(width, height, torus=False)
        self.obstacles = []

        for obs in obstacles_data:
            obstacle = Obstacle(self, obs["x"], obs["y"])
            self.grid.place_agent(obstacle, (obs["x"], obs["y"]))
            self.obstacles.append(obstacle)

        if agent_type == "reactivo":
            self.agent = Walle_reactivo(self, 0, 0, goalx, goaly)
        elif agent_type == "A*":
            self.agent = Walle_AStar(self, 0, 0, goalx, goaly)
        elif agent_type == "Q-Learning":
            self.agent = Walle_QLearning(self, 0, 0, goalx, goaly)
        elif agent_type == "Bayesiano":
            self.agent = Walle_Bayesiano(self, 0, 0, goalx, goaly)
        else:
            raise ValueError("Tipo de agente desconocido")
        self.grid.place_agent(self.agent, (0, 0))

    def step(self):
        self.agent.step()

    def save_log(self, filename="simulation_log.json"):
        log_data = {
            "grid_size": {"width": self.grid.width, "height": self.grid.height},
            "obstacles": [{"x": obs.x, "y": obs.y} for obs in self.obstacles],
            "agent": {
                "type": type(self.agent).__name__,
                "spawnPosition": {"x": self.agent.spawn_x, "y": self.agent.spawn_y},
                "goal": {"x": self.agent.goalx, "y": self.agent.goaly},
                "path": self.agent.path
            }
        }
        with open(filename, "w") as file:
            json.dump(log_data, file, indent=4)
        print(f"Simulation log saved to {filename}")


# Definición de Agentes (Reactivo, A*, Q-Learning, Bayesiano)

In [15]:

# -------------------------
# Agente Reactivo
# -------------------------
class Walle_reactivo(Agent):
    """Se mueve aleatoriamente hasta alcanzar la meta o exceder el número máximo de pasos."""
    def __init__(self, model, x, y, goalx, goaly):
        super().__init__(model)
        self.x = x
        self.y = y
        self.spawn_x = x
        self.spawn_y = y
        self.goalx = goalx
        self.goaly = goaly
        self.steps_taken = 0
        self.path = [{"x": x, "y": y}]

    def step(self):
        if (self.x, self.y) == (self.goalx, self.goaly):
            print(f"Agente reactivo llegó a la meta ({self.x}, {self.y}) en {self.steps_taken} pasos!")
            return


        moves = []
        grid_width, grid_height = self.model.grid.width, self.model.grid.height

        # Solo se agregan movimientos a celdas que estén dentro del grid y libres de obstáculos.
        if self.x < grid_width - 1 and cell_is_free(self.model, (self.x+1, self.y)):
            moves.append((self.x+1, self.y))
        if self.x > 0 and cell_is_free(self.model, (self.x-1, self.y)):
            moves.append((self.x-1, self.y))
        if self.y < grid_height - 1 and cell_is_free(self.model, (self.x, self.y+1)):
            moves.append((self.x, self.y+1))
        if self.y > 0 and cell_is_free(self.model, (self.x, self.y-1)):
            moves.append((self.x, self.y-1))

        if moves:
            new_x, new_y = random.choice(moves)
            self.model.grid.move_agent(self, (new_x, new_y))
            self.x, self.y = new_x, new_y
            self.steps_taken += 1
            self.path.append({"x": self.x, "y": self.y})
            print(f"Reactivo se movió a ({self.x}, {self.y})")
        else:
            print("Agente reactivo ya no tiene movimientos!")


El algoritmo A* usa la ecuación matemática para calcular la función de costo estimado:

$$
f(n) = g(n) + h(n)
$$

Donde:

- $f(n)$ es el costo total estimado de llegar al objetivo desde el nodo $n$.
- $g(n)$ es el costo real acumulado desde el nodo inicial hasta el nodo $n$.
- $h(n)$ es la heurística, que estima el costo desde $n$ hasta el objetivo.

La heurística utilizada para este agente es la distancia Manhattan:

$$
h(n) = |x_n - x_{goal}| + |y_n - y_{goal}|
$$


In [16]:
# -------------------------
# Agente A* (Basado en metas)
# -------------------------
class Walle_AStar(Agent):
    """Utiliza A* para calcular y seguir la ruta óptima hacia la meta."""
    def __init__(self, model, x, y, goalx, goaly):
        super().__init__(model)
        self.x = x
        self.y = y
        self.spawn_x = x
        self.spawn_y = y
        self.goalx = goalx
        self.goaly = goaly
        self.path = [{"x": x, "y": y}]
        self.a_star_path = self.a_star_search((self.x, self.y), (self.goalx, self.goaly))
        self.path_index = 0
        self.reached_goal = False
        self.steps = 0 

    def a_star_search(self, start, goal):
        def heuristic(a, b):
            return abs(a[0]-b[0]) + abs(a[1]-b[1])

        open_set = PriorityQueue()
        open_set.put((0, start))
        came_from = {}
        g_score = {start: 0}
        f_score = {start: heuristic(start, goal)}
        grid_width, grid_height = self.model.grid.width, self.model.grid.height

        while not open_set.empty():
            _, current = open_set.get()
            if current == goal:
                path = []
                while current in came_from:
                    path.append(current)
                    current = came_from[current]
                path.reverse()
                return path

            x, y = current
            neighbors = [(x+1,y), (x-1,y), (x,y+1), (x,y-1)]
            valid_neighbors = []
            for n in neighbors:
                if 0 <= n[0] < grid_width and 0 <= n[1] < grid_height and cell_is_free(self.model, n):
                    valid_neighbors.append(n)
            for neighbor in valid_neighbors:
                tentative_g = g_score[current] + 1
                if neighbor not in g_score or tentative_g < g_score[neighbor]:
                    came_from[neighbor] = current
                    g_score[neighbor] = tentative_g
                    f_score[neighbor] = tentative_g + heuristic(neighbor, goal)
                    open_set.put((f_score[neighbor], neighbor))
        return []

    def step(self):
        if self.reached_goal:
            return  

        if self.path_index < len(self.a_star_path):
            next_move = self.a_star_path[self.path_index]
            self.model.grid.move_agent(self, next_move)
            self.x, self.y = next_move
            self.path.append({"x": self.x, "y": self.y})
            self.path_index += 1
            self.steps += 1 
            print(f"Agente A* se movió a ({self.x}, {self.y})")

            
            if (self.x, self.y) == (self.goalx, self.goaly):
                print(f"Agente AStar llegó a la meta ({self.x}, {self.y})! 🎯")
                print(f"Total de pasos: {self.steps} 🚶‍♂️")
                self.reached_goal = True  # Stop moving
        else:
            if not self.reached_goal:
                print(f"Agente AStar no encontró un camino ({self.goalx}, {self.goaly}). ❌")
                self.reached_goal = True  # Stop repeating messages


Agente de aprendizaje por refuerzo Q-Learning

$$
Q(s, a) \leftarrow Q(s, a) + \alpha \left[ r + \gamma max_{a'} Q(s', a') - Q(s, a) \right]
$$

Donde:

- $Q(s, a)$ es el valor Q actual para el estado $s$ y la acción $a$.
- $\alpha$ es la tasa de aprendizaje (learning rate).
- $r$ es la recompensa obtenida tras tomar la acción $a$ en el estado $s$.
- $\gamma$ es el factor de descuento (discount factor).
- $\max_{a'} Q(s', a')$ es el valor Q máximo para el siguiente estado $s'$.
- $Q(s, a)$ es el valor actual antes de la actualización.


In [17]:
# -------------------------
# Agente Q-Learning
# -------------------------
import random

class Walle_QLearning(Agent):
    """Agente Q-Learning con exploración adaptativa y memoria para evitar bucles."""

    def __init__(self, model, x, y, goalx, goaly, alpha=0.5, gamma=0.9, epsilon=0.2, epsilon_decay=0.99):
        super().__init__(model)
        self.x = x
        self.y = y
        self.spawn_x = x
        self.spawn_y = y
        self.goalx = goalx
        self.goaly = goaly
        self.alpha = alpha  # learning rate
        self.gamma = gamma  # descontador
        self.epsilon = epsilon  # exploration rate
        self.epsilon_decay = epsilon_decay  
        self.q_table = {}  # State -> {Action: Q-value}
        self.path = [{"x": x, "y": y}]
        self.visited = {}  
        self.steps = 0
        self.reached_goal = False 

    def get_state(self):
        return (self.x, self.y)

    def possible_actions(self):
        return self.possible_actions_at((self.x, self.y))

    def possible_actions_at(self, state):
        x, y = state
        actions = []
        grid_width, grid_height = self.model.grid.width, self.model.grid.height
        if x < grid_width - 1 and cell_is_free(self.model, (x + 1, y)):
            actions.append("DER")
        if x > 0 and cell_is_free(self.model, (x - 1, y)):
            actions.append("IZQ")
        if y < grid_height - 1 and cell_is_free(self.model, (x, y + 1)):
            actions.append("ARRIBA")
        if y > 0 and cell_is_free(self.model, (x, y - 1)):
            actions.append("ABAJO")
        return actions

    def choose_action(self):
        state = self.get_state()
        if state not in self.q_table:
            self.q_table[state] = {a: 0 for a in self.possible_actions()}

        if random.random() < self.epsilon:
            return random.choice(self.possible_actions())
        return max(self.q_table[state], key=self.q_table[state].get)

    def take_action(self, action):
        moves = {
            "DER": (self.x + 1, self.y),
            "IZQ": (self.x - 1, self.y),
            "ARRIBA": (self.x, self.y + 1),
            "ABAJO": (self.x, self.y - 1)
        }
        return moves[action]

    def update_q(self, state, action, reward, next_state):
        if next_state not in self.q_table:
            self.q_table[next_state] = {a: 0 for a in self.possible_actions_at(next_state)}

        best_next_q = max(self.q_table[next_state].values()) if self.q_table[next_state] else 0
        current_q = self.q_table[state][action]
        self.q_table[state][action] += self.alpha * (reward + self.gamma * best_next_q - current_q)

    def step(self):
        """Ejecuta acción y actualiza Q-table."""
        if self.reached_goal:
            return 
        state = self.get_state()
        action = self.choose_action()
        new_x, new_y = self.take_action(action)
        next_state = (new_x, new_y)
        # Stop movement if goal reached
        if (self.x, self.y) == (self.goalx, self.goaly):
            if not hasattr(self, "reached_goal") or not self.reached_goal:
                print(f"Agente Q-Learning llegó a la meta ({self.x}, {self.y})! 🎯")
                print(f"Total de pasos {self.steps} 🚶‍♂️")
                self.reached_goal = True
            return

        # Track visits for penalty
        self.visited[state] = self.visited.get(state, 0) + 1
        visit_penalty = -0.5 * self.visited[state]  # Stronger penalty

        if (0 <= new_x < self.model.grid.width and 0 <= new_y < self.model.grid.height and
            cell_is_free(self.model, (new_x, new_y))):

            self.model.grid.move_agent(self, (new_x, new_y))
            self.x, self.y = new_x, new_y
            self.path.append({"x": self.x, "y": self.y})
            
            self.steps += 1
            
            reward = 10 if (new_x, new_y) == (self.goalx, self.goaly) else -0.1 + visit_penalty
            self.update_q(state, action, reward, next_state)

            print(f"Agente Q-learning se movió a la {action} a ({self.x}, {self.y})")

        else:
            self.update_q(state, action, -2, state)
            print("Agente QLearning hizo un movimiento inválido!")

        self.epsilon = max(0.01, self.epsilon * self.epsilon_decay)


Agente Bayesiano

El agente selecciona una acción 𝑎 con una probabilidad proporcional a su peso actual 𝑃(𝑎), normalizando la suma de probabilidades:

$$
P(a) = \frac{w(a)}{\sum_{b \in A} w(b)}
$$

Después de ejecutar una acción, el agente ajusta la probabilidad 𝑤(𝑎) según el éxito o fracaso:

$$
w(a) \leftarrow w(a) \times f
$$

donde $f$ depende del resultado:

- $f = 1.10$ si la acción llevó al objetivo $\text{goal_reached} = \text{True}$,
- $f = 1.05$ si la acción tuvo éxito,
- $f = 0.95$ si la acción falló.

In [18]:
# -------------------------
# Agente Bayesiano
# -------------------------
import random

class Walle_Bayesiano(Agent):
    """Agente bayesiano que toma decisiones basadas en un modelo probabilístico simple,
    con memoria de visitas y condición de meta."""
    def __init__(self, model, x, y, goalx, goaly):
        super().__init__(model)
        self.x = x
        self.y = y
        self.spawn_x = x
        self.spawn_y = y
        self.goalx = goalx
        self.goaly = goaly
        self.path = [{"x": x, "y": y}]
        self.action_probs = {"RIGHT": 0.25, "LEFT": 0.25, "UP": 0.25, "DOWN": 0.25}
        self.visited = {(x, y): 1}
        self.visited_threshold = 3
        self.reached_goal = False
        self.steps = 0

    def take_action(self, action):
        """Devuelve las coordenadas destino según la acción elegida."""
        moves = {
            "RIGHT": (self.x + 1, self.y),
            "LEFT": (self.x - 1, self.y),
            "UP": (self.x, self.y + 1),
            "DOWN": (self.x, self.y - 1)
        }
        return moves.get(action, (self.x, self.y))

    def possible_actions(self):
        """Devuelve una lista de acciones posibles según el entorno y la memoria de visitas."""
        actions = []
        grid_width, grid_height = self.model.grid.width, self.model.grid.height
        possible = []
        if self.x < grid_width - 1 and cell_is_free(self.model, (self.x + 1, self.y)):
            possible.append("RIGHT")
        if self.x > 0 and cell_is_free(self.model, (self.x - 1, self.y)):
            possible.append("LEFT")
        if self.y < grid_height - 1 and cell_is_free(self.model, (self.x, self.y + 1)):
            possible.append("UP")
        if self.y > 0 and cell_is_free(self.model, (self.x, self.y - 1)):
            possible.append("DOWN")

        # filtrar acciones que llevan a celdas ya visitadas en exceso
        filtered_actions = []
        for action in possible:
            new_pos = self.take_action(action)
            count = self.visited.get(new_pos, 0)
            if count < self.visited_threshold:
                filtered_actions.append(action)
        return filtered_actions if filtered_actions else possible

    def choose_action(self):
        """Elige una acción de entre las posibles usando las probabilidades asociadas."""
        actions = self.possible_actions()
        total_prob = sum(self.action_probs[a] for a in actions)
        r = random.random() * total_prob
        cumulative = 0
        for a in actions:
            cumulative += self.action_probs[a]
            if r <= cumulative:
                return a
        return random.choice(actions)

    def update_beliefs(self, action, success, goal_reached=False):
        """
        Actualiza las probabilidades de acción mediante una regla multiplicativa.
        Se usa un factor mayor si se alcanza la meta.
        """
        if success:
            factor = 1.10 if goal_reached else 1.05
        else:
            factor = 0.95
        self.action_probs[action] *= factor

        total = sum(self.action_probs.values())
        if total > 0:
            for a in self.action_probs:
                self.action_probs[a] /= total

    def step(self):
        """Ejecuta un paso de acción y actualiza las creencias según el resultado."""
        if self.x == self.goalx and self.y == self.goaly:
            if not self.reached_goal:
                print("¡Meta alcanzada!")
                print(f"Número total de pasos: {self.steps} 🚶‍♂️")
                self.reached_goal = True
            return

        action = self.choose_action()
        new_x, new_y = self.take_action(action)

        if (0 <= new_x < self.model.grid.width and 0 <= new_y < self.model.grid.height and
            cell_is_free(self.model, (new_x, new_y))):
            self.model.grid.move_agent(self, (new_x, new_y))
            success = True
            self.x, self.y = new_x, new_y
            self.path.append({"x": self.x, "y": self.y})
            self.steps += 1 
            pos = (self.x, self.y)
            self.visited[pos] = self.visited.get(pos, 0) + 1
            print(f"Walle_Bayesiano moved {action} to ({self.x}, {self.y})")
            
        else:
            success = False
            print(f"Walle_Bayesiano attempted invalid move {action}!")

        if self.x == self.goalx and self.y == self.goaly:
            self.update_beliefs(action, success=True, goal_reached=True)
        else:
            self.update_beliefs(action, success)


# Escenarios
- Grid 1: Obstáculos simples en 5x5
<img src = 'esc1.jpeg' width = 250>
```
obstacles_data = [
    {"x": 2, "y": 2},
    {"x": 1, "y": 3}
]

```
- Grid 2: Obstáculos en 10x10
```
obstacles_data = [
    {"x": 0, "y": 1}, {"x": 0, "y": 2}, {"x": 0, "y": 3}, {"x": 0, "y": 4},
    {"x": 0, "y": 5}, {"x": 0, "y": 6}, {"x": 0, "y": 8}, {"x": 0, "y": 9},
    {"x": 1, "y": 6}, {"x": 1, "y": 9},
    {"x": 2, "y": 0}, {"x": 2, "y": 2}, {"x": 2, "y": 3}, {"x": 2, "y": 4}, {"x": 2, "y": 9},
    {"x": 3, "y": 0}, {"x": 3, "y": 2}, {"x": 3, "y": 6}, {"x": 3, "y": 8}, {"x": 3, "y": 9},
    {"x": 4, "y": 0}, {"x": 4, "y": 4},  {"x": 4, "y": 9},
    {"x": 5, "y": 0}, {"x": 5, "y": 2}, {"x": 5, "y": 9},
    {"x": 6, "y": 0}, {"x": 6, "y": 2}, {"x": 6, "y": 3}, {"x": 6, "y": 5}, {"x": 6, "y": 6},
    {"x": 7, "y": 0}, {"x": 7, "y": 3}, {"x": 7, "y": 5}, {"x": 7, "y": 9},
    {"x": 8, "y": 0}, {"x": 8, "y": 7}, {"x": 8, "y": 9},
    {"x": 9, "y": 0}, {"x": 9, "y": 1}, {"x": 9, "y": 2}, {"x": 9, "y": 3}, {"x": 9, "y": 4},
    {"x": 9, "y": 5}, {"x": 9, "y": 6}, {"x": 9, "y": 7}, {"x": 9, "y": 8}, {"x": 9, "y": 9},
]

```
<img src = 'esc2.png' width = 250>

In [21]:
# -------------------------
# Ejecución de la simulación
# -------------------------

grid_width = 10
grid_height = 10
goal_x = 6
goal_y = 9
obstacles_data = [
    {"x": 0, "y": 1}, {"x": 0, "y": 2}, {"x": 0, "y": 3}, {"x": 0, "y": 4},
    {"x": 0, "y": 5}, {"x": 0, "y": 6}, {"x": 0, "y": 8}, {"x": 0, "y": 9},
    {"x": 1, "y": 6}, {"x": 1, "y": 9},
    {"x": 2, "y": 0}, {"x": 2, "y": 2}, {"x": 2, "y": 3}, {"x": 2, "y": 4}, {"x": 2, "y": 9},
    {"x": 3, "y": 0}, {"x": 3, "y": 2}, {"x": 3, "y": 6}, {"x": 3, "y": 8}, {"x": 3, "y": 9},
    {"x": 4, "y": 0}, {"x": 4, "y": 4},  {"x": 4, "y": 9},
    {"x": 5, "y": 0}, {"x": 5, "y": 2}, {"x": 5, "y": 9},
    {"x": 6, "y": 0}, {"x": 6, "y": 2}, {"x": 6, "y": 3}, {"x": 6, "y": 5}, {"x": 6, "y": 6},
    {"x": 7, "y": 0}, {"x": 7, "y": 3}, {"x": 7, "y": 5}, {"x": 7, "y": 9},
    {"x": 8, "y": 0}, {"x": 8, "y": 7}, {"x": 8, "y": 9},
    {"x": 9, "y": 0}, {"x": 9, "y": 1}, {"x": 9, "y": 2}, {"x": 9, "y": 3}, {"x": 9, "y": 4},
    {"x": 9, "y": 5}, {"x": 9, "y": 6}, {"x": 9, "y": 7}, {"x": 9, "y": 8}, {"x": 9, "y": 9},
]

# tipo agente: "reactivo", "A*", "Q-Learning" o "Bayesiano"
agent_type = "Bayesiano"

model = GridModel(grid_width, grid_height, goal_x, goal_y, obstacles_data, agent_type=agent_type)

for _ in range(1000):
    model.step()

model.save_log("scenario_log.json")

Walle_Bayesiano moved RIGHT to (1, 0)
Walle_Bayesiano moved UP to (1, 1)
Walle_Bayesiano moved RIGHT to (2, 1)
Walle_Bayesiano moved RIGHT to (3, 1)
Walle_Bayesiano moved LEFT to (2, 1)
Walle_Bayesiano moved RIGHT to (3, 1)
Walle_Bayesiano moved LEFT to (2, 1)
Walle_Bayesiano moved RIGHT to (3, 1)
Walle_Bayesiano moved RIGHT to (4, 1)
Walle_Bayesiano moved RIGHT to (5, 1)
Walle_Bayesiano moved LEFT to (4, 1)
Walle_Bayesiano moved RIGHT to (5, 1)
Walle_Bayesiano moved RIGHT to (6, 1)
Walle_Bayesiano moved LEFT to (5, 1)
Walle_Bayesiano moved LEFT to (4, 1)
Walle_Bayesiano moved UP to (4, 2)
Walle_Bayesiano moved UP to (4, 3)
Walle_Bayesiano moved RIGHT to (5, 3)
Walle_Bayesiano moved UP to (5, 4)
Walle_Bayesiano moved UP to (5, 5)
Walle_Bayesiano moved LEFT to (4, 5)
Walle_Bayesiano moved RIGHT to (5, 5)
Walle_Bayesiano moved DOWN to (5, 4)
Walle_Bayesiano moved UP to (5, 5)
Walle_Bayesiano moved DOWN to (5, 4)
Walle_Bayesiano moved DOWN to (5, 3)
Walle_Bayesiano moved LEFT to (4, 3)
Wa

In [None]:
from google.colab import files
files.download("scenario_log.json")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Ejemplo Unity

In [None]:
import json
import random
from mesa import Agent, Model
from mesa.space import MultiGrid #Implementamos los agentes

class Walle_reactivo(Agent):
    """Agent that moves randomly until it reaches the goal or fails after max steps."""

    def __init__(self, model, x, y, goalx, goaly, max_steps=30):
        super().__init__(model)
        self.x = x
        self.y = y
        self.spawn_x = x
        self.spawn_y = y
        self.goalx = goalx
        self.goaly = goaly
        self.steps_taken = 0
        self.max_steps = max_steps
        self.path = []
    def step(self):
        """Move randomly in x or y direction until reaching goal or exceeding max steps."""
        #Aqui vamos a poner las cosas que haremos cada paso, como imprimir el camino y revisar si ya nos
        #Pasamos de pasos para detenernos

        if (self.x, self.y) == (self.goalx, self.goaly):
            print(f"Walle_reactivo reached the goal at ({self.x}, {self.y}) in {self.steps_taken} steps!")
            return

        #Justo aqui revisamos si ya nos pasamos de pasos
        if self.steps_taken >= self.max_steps:
            print(f"Walle_reactivo FAILED to reach the goal within {self.max_steps} steps!")
            return

        moves = []
        #Percibimos el tamaño del grid
        grid_width, grid_height = self.model.grid.width, self.model.grid.height

        # Revisamos si me puedo mover para esa direccion
        if self.x < grid_width - 1:
            moves.append((self.x + 1, self.y))
        if self.x > 0:
            moves.append((self.x - 1, self.y))
        if self.y < grid_height - 1:
            moves.append((self.x, self.y + 1))
        if self.y > 0:
            moves.append((self.x, self.y - 1))


        if moves:
            #Me muevo en una direccion random
            new_x, new_y = random.choice(moves)
            #Movemos el agente en el grid
            self.model.grid.move_agent(self, (new_x, new_y))
            #Actualizamos la posición del agente
            self.x, self.y = new_x, new_y
            #Sumamos un paso
            self.steps_taken += 1
            #Guardo el paso que di
            self.path.append({"x": self.x, "y": self.y})  # Save position in history
            #Imprimo a donde me movi
            print(f"Walle_reactivo moved to ({self.x}, {self.y})")

class GridModel(Model):
    """A grid model containing reactive agents."""

    def __init__(self, width, height, goalx, goaly):
        super().__init__()
        self.grid = MultiGrid(width, height, torus=False)

        # Creamos el agente reactivo y le digo que va a empezar en 0,0
        #Con Máximo 25 pasos para llegar a la meta
        self.agent1 = Walle_reactivo(self, 0, 0, goalx, goaly)
        #Lo pongo en el grid en el inicio
        self.grid.place_agent(self.agent1, (0, 0))

    def step(self):
        """Advance the model by one step."""
        #Caminamos un paso del agente, si tuviera mas agentes, aqui camino todos
        #esos pasos

        self.agent1.step()

    def save_log(self, filename="walle_log.json"):
        """Save movement history to a JSON file in the required format."""
        #Genero el JSON para UNITY en el formato que ya estamos usando en
        #El test bed

        log_data = {
            "robots": [
                {
                    "spawnPosition": {
                        "x": self.agent1.spawn_x,
                        "y": self.agent1.spawn_y
                    },
                    "path": self.agent1.path
                }
            ]
        }
        with open(filename, "w") as file:
            json.dump(log_data, file, indent=4)
        print(f"Simulation log saved to {filename}") #Corremos la solucion
# -------------------------
# RUNNING THE SIMULATION
# -------------------------

# Definimos la maya y la meta del agente
GRID_WIDTH = 10
GRID_HEIGHT = 10
GOAL_X = 7
GOAL_Y = 7

# Creamos una instancia del modelo para poder ejecutarlo
model = GridModel(GRID_WIDTH, GRID_HEIGHT, GOAL_X, GOAL_Y)

# Corremos el modelo
for _ in range(30):
    #Aquí andamos tomando los pasos, le pusimos 30 para apreciar que no termina
    #En 25 pasos
    model.step()

# Guardamos los resultados en el log, y el log lo vemos
# <- aca podras encontrar el archivo: walle_log.json
model.save_log()

Walle_reactivo moved to (0, 1)
Walle_reactivo moved to (0, 0)
Walle_reactivo moved to (1, 0)
Walle_reactivo moved to (2, 0)
Walle_reactivo moved to (1, 0)
Walle_reactivo moved to (1, 1)
Walle_reactivo moved to (1, 0)
Walle_reactivo moved to (1, 1)
Walle_reactivo moved to (0, 1)
Walle_reactivo moved to (0, 2)
Walle_reactivo moved to (0, 3)
Walle_reactivo moved to (0, 2)
Walle_reactivo moved to (0, 3)
Walle_reactivo moved to (0, 2)
Walle_reactivo moved to (0, 1)
Walle_reactivo moved to (1, 1)
Walle_reactivo moved to (2, 1)
Walle_reactivo moved to (2, 0)
Walle_reactivo moved to (1, 0)
Walle_reactivo moved to (0, 0)
Walle_reactivo moved to (0, 1)
Walle_reactivo moved to (0, 2)
Walle_reactivo moved to (0, 3)
Walle_reactivo moved to (1, 3)
Walle_reactivo moved to (2, 3)
Walle_reactivo moved to (2, 2)
Walle_reactivo moved to (1, 2)
Walle_reactivo moved to (1, 3)
Walle_reactivo moved to (0, 3)
Walle_reactivo moved to (0, 2)
Simulation log saved to walle_log.json
