In [2]:
"""
Multi-Agent Dynamic Grid World Environment
Created by: Ardianto Wibowo
"""

import numpy as np
import sys
import random

# Add the path to the 'env' folder to sys.path
sys.path.append('env')

from ma_gridworld import Env

def add_noise(data, agent, noise_type="color", env=None, is_noise_enabled=True):
    """
    تضيف نويز للبيانات بناءً على النوع ونسبة النويز.
    """
    noise_level = agent.noise_level  # مستوى نويز خاص بكل وكيل

    # تحقق إذا كانت البيانات None
    if data is None:
        return data

    # نويز الموقع
    if noise_type == "location" and is_noise_enabled and random.random() < noise_level:
        # تحقق إذا كانت البيانات قائمة تحتوي على إحداثيات
        if isinstance(data, list) and len(data) == 2 and all(isinstance(x, int) for x in data):
            grid_width = getattr(env, 'gwidth', 15)  # استخدام قيمة افتراضية إذا لم تكن الخاصية موجودة
            grid_height = getattr(env, 'gheight', 15)
            noise_x = random.choice([1, -1])
            noise_y = random.choice([1, -1])
            new_x = max(0, min(data[0] + noise_x, grid_width - 1))
            new_y = max(0, min(data[1] + noise_y, grid_height - 1))
            return [new_x, new_y]
        else:
            # إذا لم تكن البيانات صالحة لنويز الموقع، قم بإعادتها كما هي
            return data

    # إذا لم يتم تطبيق نويز، قم بإعادة البيانات كما هي
    return data


class SearchAgent:
    def __init__(self, num_actions, agent_id, num_agents):
        self.num_actions = num_actions
        self.agent_id = agent_id  # تعيين agent_id
        self.memory = []  # خاصية لتخزين البيانات التي يتم التحقق منها
        self.targets_seen = []  # قائمة فارغة لتخزين الأهداف كقواميس
        self.noise_level = round(random.uniform(0, 1), 2)  # مستوى نويز خاص بكل وكيل
        print(f"Agent {self.agent_id}: Initialized with noise level {self.noise_level:.2f}")
        # مستوى الثقة الابتدائي
        self.trust_level = round(random.uniform(0.3, 1.0), 2)

        # تعيين نفس قيمة الثقة الابتدائية لجميع الوكلاء الآخرين
        self.trust_values = {other_id: self.trust_level for other_id in range(num_agents) if other_id != self.agent_id}

        print(f"Agent {self.agent_id}: Initialized with trust level {self.trust_level:.2f}")


    def update_trust(self, other_agent_id, interaction_success, use_indirect=False):
        """
        تحديث الثقة باستخدام نموذج يشمل الثقة المباشرة وغير المباشرة.

        Args:
            other_agent_id (int): معرف الوكيل الآخر الذي يتم تحديث الثقة تجاهه.
            interaction_success (bool): نتيجة التفاعل الأخير (True للنجاح، False للفشل).
            use_indirect (bool): إذا كانت الثقة غير المباشرة مفعلة.
        """
        if other_agent_id in self.trust_values:
            # الثقة السابقة
            previous_trust = self.trust_values[other_agent_id]

            # تحويل نتيجة التفاعل إلى قيمة (1 للنجاح، 0 للفشل)
            interaction_result = 1.0 if interaction_success else 0.0

            # عامل النسيان
            alpha = 0.3  # يمكن تعديل هذه القيمة حسب الحاجة

            if not use_indirect:
                # تطبيق معادلة الثقة المباشرة فقط
                updated_trust = (1 - alpha) * previous_trust + alpha * interaction_result
            else:
                # حساب الثقة غير المباشرة
                indirect_trust = 0.0
                total_weight = 0.0

                for intermediary_agent, intermediary_trust in self.trust_values.items():
                    if intermediary_agent != other_agent_id and intermediary_agent in agents[other_agent_id].trust_values:
                        indirect_trust += (
                            intermediary_trust * agents[intermediary_agent].trust_values[other_agent_id]
                        )
                        total_weight += 1.0

                # إذا لم يكن هناك وسطاء، يتم تجاهل الثقة غير المباشرة
                if total_weight > 0:
                    indirect_trust /= total_weight

                # دمج الثقة المباشرة وغير المباشرة
                updated_trust = (alpha * ((1 - alpha) * previous_trust + alpha * interaction_result) +
                                 (1 - alpha) * indirect_trust)

            # تحديث قيمة الثقة
            self.trust_values[other_agent_id] = max(0.0, min(1.0, updated_trust))  # التأكد من بقاء القيم بين 0 و 1

            # طباعة تحديث الثقة
            trust_type = "Direct + Indirect" if use_indirect else "Direct"
            print(f"Agent {self.agent_id}: Updated trust towards Agent {other_agent_id} ({trust_type}) to {self.trust_values[other_agent_id]:.2f}")

        
    def normalize_targets(self):
        """
        توحيد شكل البيانات داخل قائمة targets_seen.
        """
        self.targets_seen = [
            {"location": target, "verified": True, "source": "self"} if isinstance(target, list) else target
            for target in self.targets_seen
        ]     
        


    def analyse_sensor_data(self, agent_id, coordinate_observation, sensor_data_observation):
        for i in range(len(sensor_data_observation)):
            for j in range(len(sensor_data_observation[i])):
                data = sensor_data_observation[i][j]
                location = [
                    coordinate_observation[0] + j - len(sensor_data_observation[i]) // 2,
                    coordinate_observation[1] + i - len(sensor_data_observation) // 2
                ]
                if data != None and 'target_' + str(agent_id) in data:
                    if location not in [target["location"] for target in self.targets_seen]:
                        self.targets_seen.append({
                            "location": location,
                            "verified": False,
                            "source": "self"  # أو مصدر آخر حسب الحاجة
                        })
                        print(f"Agent {agent_id}: Added target at {location} to targets_seen")

                # تحديث الذاكرة مع تمرير agent_id
                if data is not None and 'target_' in data:  # التحقق من وجود هدف
                    self.update_memory(location, data, agent_id)  # استدعاء update_memory

                # إزالة الأهداف التي تم جمعها
                if location[0] == coordinate_observation[0] and location[1] == coordinate_observation[1]:
                    if location in self.targets_seen:
                        self.targets_seen.remove(location)






    def analyse_communication(self, comm_observation, agent_id):
        for comm in comm_observation:
            origin_location = comm[0]
            sensor_data_observation = comm[1]
            reported_by = comm[2]  # الوكيل الذي أرسل الرسالة

            print(f"Agent {agent_id}: Received communication from Agent {reported_by} at {origin_location}")

            for i in range(len(sensor_data_observation)):
                for j in range(len(sensor_data_observation[i])):
                    data = sensor_data_observation[i][j]
                    location = [
                        origin_location[0] + j - len(sensor_data_observation[i]) // 2,
                        origin_location[1] + i - len(sensor_data_observation) // 2
                    ]

                    if data is not None and 'target_' in data:
                        if location not in [target["location"] for target in self.targets_seen]:
                            self.targets_seen.append({
                                "location": location,
                                "verified": False,
                                "source": f"agent_{reported_by}"
                            })
                            print(f"Agent {agent_id}: Added target at {location} from Agent {reported_by}")


                            
    def update_memory(self, location, data, agent_id):

        print(f"Agent {agent_id} is updating memory for location {location} with data {data}")
        if location not in [record["location"] for record in self.memory]:
            # add
            self.memory.append({"location": location, "data": data})
            print(f"Agent {agent_id} Updated Memory: {location} -> {data}")

    def select_action(self, coordinate_observation, agent_id):
        print(f"Agent {agent_id}: Current targets seen: {self.targets_seen}")

        # إزالة الأهداف المكررة مع تحويل الموقع إلى tuple
        self.targets_seen = list({tuple(target["location"]): target for target in self.targets_seen}.values())

        if len(self.targets_seen) > 0:
            closest_target = None
            closest_target_distance = float('inf')  # مسافة كبيرة مبدئيًا
            for target in self.targets_seen:
                target_coordinate = target["location"]
                horizontal_distance = target_coordinate[0] - coordinate_observation[0]
                vertical_distance = target_coordinate[1] - coordinate_observation[1]
                distance = abs(horizontal_distance) + abs(vertical_distance)
                print(f"Agent {agent_id}: Target {target_coordinate} at distance {distance}")

                if distance < closest_target_distance:
                    closest_target_distance = distance
                    closest_target = target_coordinate

            if closest_target:
                # التحقق من صلاحية الموقع المستهدف
                grid_width = 15  # عرض الشبكة
                grid_height = 15  # ارتفاع الشبكة
                if not (0 <= closest_target[0] < grid_width and 0 <= closest_target[1] < grid_height):
                    print(f"Invalid target location {closest_target}, skipping target.")
                    closest_target = None

                if closest_target and closest_target_distance == 0:
                    print(f"Agent {agent_id}: Reached target at {closest_target}, removing from targets_seen.")
                    self.targets_seen = [t for t in self.targets_seen if tuple(t["location"]) != tuple(closest_target)]
                    return 0  # Stay

                if closest_target:
                    print(f"Agent {agent_id}: Moving towards target at {closest_target}")
                    horizontal_distance = closest_target[0] - coordinate_observation[0]
                    vertical_distance = closest_target[1] - coordinate_observation[1]
                    if abs(horizontal_distance) >= abs(vertical_distance):
                        return 3 if horizontal_distance < 0 else 4
                    else:
                        return 1 if vertical_distance < 0 else 2
        else:
            print(f"Agent {agent_id}: No valid targets, attempting random action.")
            possible_actions = [1, 2, 3, 4]  # كل الاتجاهات الممكنة
            return np.random.choice(possible_actions)
def get_action(agent_id, observation, num_actions, agents, env):
    """
    تنفيذ خطوة واحدة لكل وكيل، مع تحليل البيانات وتحديث الثقة.
    """
    coordinate_observation = tuple(observation[0])  # Keep observation as (x, y) tuple
    sensor_data_observation = observation[2]
    comm_observation = observation[3]

    print(f"Observation for Agent {agent_id}: {observation}")

    # تحقق من وجود بيانات الاتصال
    if comm_observation:
        for comm in comm_observation:
            origin_location = comm[0]
            data = comm[1]
            reported_by = comm[2]  # الوكيل الذي أرسل البيانات

            # تحليل البيانات المستلمة من وكيل معين
            agents[agent_id].analyse_communication([comm], agent_id)

            # تحديث الثقة بناءً على الوكيل المصدر للاتصال
            interaction_success = False  # افتراض التفاعل فشل
            if data and all(isinstance(row, list) for row in data):  # تحقق من أن البيانات ليست None وأنها قائمة من القوائم
                if any("target_" in cell for row in data for cell in row if cell is not None):
                    interaction_success = True

            # تحديث الثقة تجاه الوكيل المرسل
            agents[agent_id].update_trust(reported_by, interaction_success=interaction_success)
    else:
        print(f"Agent {agent_id}: No communication data available this step.")


    # تحليل بيانات المستشعر
    agents[agent_id].analyse_sensor_data(agent_id, coordinate_observation, sensor_data_observation)

    # اختيار الإجراء الفيزيائي
    physical_action = agents[agent_id].select_action(coordinate_observation, agent_id)

    if env.is_agent_silent:
        comm_action = []  # Communication action is set to empty if agents are silent
    else:
        # Add noise to communication
        noisy_location = add_noise(coordinate_observation, agents[agent_id], "location", env)
        noisy_data = [
            [add_noise(cell, agents[agent_id], "color", env) for cell in row] 
            for row in sensor_data_observation
        ]
        comm_action = [noisy_location, noisy_data, agent_id]
        print(f"Agent {agent_id}: Sending noisy communication: Location={noisy_location}, Data={noisy_data}")
    
    return (physical_action, comm_action)


def run(num_episodes, max_steps_per_episode, agents, num_actions, env):
    for episode in range(num_episodes):
        print(f"Starting episode {episode + 1}")
        observations = env.reset()  # Reset the environment at the start of each episode

        
        done = [False] * env.num_agents  # Initialize 'done' as a list for each agent
        step_count = 0

        while not all(done) and step_count < max_steps_per_episode:  # Stop if all agents are done or max steps reached
            actions = []
            next_observations = []
            
            for agent_id in range(env.num_agents):
                observation = observations[agent_id]
                action = get_action(agent_id, observation, num_actions, agents, env)
                
                actions.append(action)
                next_observations.append(observation)

            next_observations, rewards, done = env.step(actions)  # Step in the environment

            observations = next_observations
            step_count += 1

            # Render the environment
            env.render()

            # طباعة النتائج على الشاشة
            print(f"Step {step_count}:")
            for agent_id in range(env.num_agents):
                print(
                    f"  Agent {agent_id}: Observation: {observations[agent_id]}, "
                    f"Action: {actions[agent_id]}, Reward: {rewards[agent_id]}, Done: {done[agent_id]}"
                )

        # طباعة عدد الخطوات عند نهاية الحلقة
        print(f"Episode {episode + 1} finished after {step_count} steps.\n")

    # طباعة الثقة النهائية للوكلاء بعد انتهاء جميع الحلقات
    print("\nFinal Trust Levels:")
    for agent_id, agent in enumerate(agents):
        print(f"Agent {agent_id} final trust levels: {agent.trust_values}")



if __name__ == "__main__":

    gsize=15 #grid size (square)
    gpixels=30 #grid cell size in pixels

    is_sensor_active = True #True:  Activate the sensory observation data
    sensory_size = 3 #'is_sensor_active' must be True. The value must be odd, if event will be converted to one level odd number above
    
    num_agents = 5 #the number of agents will be run in paralel
    num_obstacles = 0 #the number of obstacles
    is_single_target = False #True: all agents have a single target, False: each agent has their own target
    num_targets_per_agent = 2 #'is_single_target' must be true to have an effect
    
    is_agent_silent = False #True: communication among agents is allowed
    
      
    noise_levels = noise_levels = {agent_id: round(random.uniform(0, 1), 2) for agent_id in range(num_agents)}

    
    enable_location_noise = True  # تفعيل نويز الموقع
    enable_color_noise = False  # تفعيل نويز اللون
     # نسبة نويز اللون


    num_episodes=1 #the number of episode will be run
    max_steps_per_episode=1000 #each episode will be stopped when max_step is reached

    eps_moving_targets = 10 #set this value greater than 'num_episodes' to keep the targets in a stationary position
    eps_moving_obstacles = 10 #set this value greater than 'num_episodes' to keep the obstacles in a stationary position

    render = True #True: render the animation into the screen (so far, it is still can not be deactivated)

    min_obstacle_distance_from_target = 1 #min grid distance of each obstacles relative to targets
    max_obstacle_distance_from_target = 5 #max grid distance of each obstacles relative to targets
    min_obstacle_distance_from_agents = 1 #min grid distance of each obstacles relative to agents

    reward_normal = -1 #reward value of normal steps
    reward_obstacle = -5 #reward value when hit an obstacle
    reward_target = 50 #reward value when reach the target

    is_totally_random = True #True: target and obstacles initial as well as movement position is always random on each call, False: only random at the beginning. 
    animation_speed = 0.1 #smaller is faster 
    is_destroy_environment = True #True: automatically close the animation after all episodes end.  

    # Initialize environment
    env = Env(
        num_agents=num_agents, num_targets_per_agent=num_targets_per_agent, num_obstacles=num_obstacles,
        eps_moving_obstacles=eps_moving_obstacles, eps_moving_targets=eps_moving_targets,
        is_agent_silent=is_agent_silent, is_single_target=is_single_target, sensory_size=sensory_size,
        gpixels=gpixels, gheight=gsize, gwidth=gsize, is_sensor_active=is_sensor_active,
        min_obstacle_distance_from_target=min_obstacle_distance_from_target,
        max_obstacle_distance_from_target=max_obstacle_distance_from_target,
        min_obstacle_distance_from_agents=min_obstacle_distance_from_agents,
        is_totally_random=is_totally_random, animation_speed=animation_speed,
        reward_normal=reward_normal, reward_obstacle=reward_obstacle, reward_target=reward_target
    )
    
    num_actions = len(env.action_space)
    

    # Initialize Q-learning agents
    agents = [SearchAgent(num_actions, agent_id, num_agents) for agent_id in range(num_agents)]



    # Run episodes
    run(num_episodes, max_steps_per_episode, agents, num_actions, env)

    if is_destroy_environment:
        env.destroy_environment() 


Agent 0: Initialized with noise level 0.48
Agent 0: Initialized with trust level 0.72
Agent 1: Initialized with noise level 0.69
Agent 1: Initialized with trust level 0.74
Agent 2: Initialized with noise level 0.18
Agent 2: Initialized with trust level 0.42
Agent 3: Initialized with noise level 0.14
Agent 3: Initialized with trust level 0.83
Agent 4: Initialized with noise level 0.74
Agent 4: Initialized with trust level 0.76
Starting episode 1
Observation for Agent 0: [[0, 0], False, [[None, None, None], [None, 'agent', 'empty'], [None, 'empty', 'empty']], []]
Agent 0: No communication data available this step.
Agent 0: Current targets seen: []
Agent 0: No valid targets, attempting random action.
Agent 0: Sending noisy communication: Location=(0, 0), Data=[[None, None, None], [None, 'agent', 'empty'], [None, 'empty', 'empty']]
Observation for Agent 1: [[14, 0], False, [[None, None, None], ['empty', 'agent', None], ['empty', 'empty', None]], []]
Agent 1: No communication data available

Step 2:
  Agent 0: Observation: [[1, 1], False, [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], [[(14, 1), [['empty', 'agent', None], ['empty', 'empty', None], ['empty', 'empty', None]], 1], [(14, 14), [['empty', 'empty', None], ['empty', 'agent', None], [None, None, None]], 2], [(0, 13), [[None, 'empty', 'empty'], [None, 'empty', 'empty'], [None, 'agent', 'empty']], 3], [(8, 0), [[None, None, None], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 4]]], Action: (2, [(1, 0), [[None, None, None], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[14, 1], False, [['empty', 'empty', None], ['empty', 'agent', None], ['empty', 'empty', None]], [[(1, 0), [[None, None, None], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0], [(14, 14), [['empty', 'empty', None], ['empty', 'agent', None], [None, None, None]], 2], [(0, 13), [[None, 'empty', 'empty'], [None, 'empty', 'em

Step 4:
  Agent 0: Observation: [[1, 1], False, [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], [[(14, 2), [['empty', 'agent', None], ['empty', 'empty', None], ['empty', 'empty', None]], 1], [(14, 13), [['target_1_0', 'empty', None], ['empty', 'empty', None], ['empty', 'agent', None]], 2], [(1, 13), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 3], [(10, 0), [[None, None, None], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 4]]], Action: (2, [(1, 0), [[None, None, None], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[14, 2], False, [['empty', 'empty', None], ['empty', 'agent', None], ['empty', 'empty', None]], [[(1, 0), [[None, None, None], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 0], [(14, 13), [['target_1_0', 'empty', None], ['empty', 'empty', None], ['empty', 'agent', None]], 2], [(1, 13), [['empty', 'em

Step 6:
  Agent 0: Observation: [[3, 1], False, [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], [[(14, 3), [['empty', 'agent', None], ['empty', 'empty', None], ['empty', 'empty', None]], 1], [(13, 12), [['empty', 'empty', 'empty'], ['empty', 'target_1_0', 'empty'], ['target_0_0', 'agent', 'empty']], 2], [(3, 13), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 3], [(10, 1), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'target_4_1']], 4]]], Action: (4, [(2, 1), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[14, 4], False, [['empty', 'agent', None], ['empty', 'empty', None], ['empty', 'empty', None]], [[(2, 1), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0], [(13, 12), [['empty', 'empty', 'empty'], ['empty', 'target_1_0', 'empty'], ['t

Step 8:
  Agent 0: Observation: [[5, 1], False, [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], [[(13, 4), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 1], [(13, 10), [['empty', 'empty', 'empty'], ['target_4_0', 'empty', 'empty'], ['empty', 'agent', 'empty']], 2], [(5, 13), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 3], [(11, 2), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], 4]]], Action: (4, [(4, 1), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[12, 4], False, [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], [[(4, 1), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0], [(13, 10), [['empty', 'empty', 'empty'], ['target_4_0', 'empty', 'empt

Step 10:
  Agent 0: Observation: [[7, 1], False, [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], [[(12, 3), [['agent', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 1], [(13, 8), [['empty', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 2], [(7, 13), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 3], [(11, 3), [['empty', 'agent', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 4]]], Action: (4, [(6, 1), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[11, 3], False, [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], [[(6, 1), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0], [(13, 8), [['empty', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empt

Step 12:
  Agent 0: Observation: [[9, 1], False, [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], [[(11, 2), [['empty', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 1], [(13, 6), [['empty', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 2], [(9, 13), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 3], [(11, 5), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], 4]]], Action: (4, [(8, 1), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[11, 2], False, [['empty', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], [[(8, 1), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0], [(13, 6), [['empty', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empt

Step 14:
  Agent 0: Observation: [[11, 1], False, [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], [[(11, 3), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], 1], [(13, 4), [['empty', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 2], [(11, 13), [['empty', 'target_3_1', 'empty'], ['agent', 'empty', 'target_0_0'], ['empty', 'empty', 'empty']], 3], [(11, 7), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['target_1_1', 'empty', 'empty']], 4]]], Action: (4, [(10, 1), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'agent']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[11, 4], False, [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], [[(10, 1), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'agent']], 0], [(13, 4), [['empty', 'empty', 'empty'], ['empty', 'empty

Step 16:
  Agent 0: Observation: [[11, 2], False, [['empty', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'agent']], [[(11, 5), [['empty', 'agent', 'agent'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], 1], [(12, 3), [['agent', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 2], [(12, 13), [['target_3_1', 'empty', 'target_1_0'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], 3], [(11, 9), [['target_1_1', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'target_4_0']], 4]]], Action: (0, [(11, 2), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[11, 6], False, [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], [[(11, 2), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0], [(12, 3), [['agent', 'empty', 'empty'], ['empty', '

Step 18:
  Agent 0: Observation: [[11, 4], False, [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], [[(11, 7), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['target_1_1', 'empty', 'empty']], 1], [(11, 2), [['empty', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 2], [(13, 12), [['empty', 'empty', 'empty'], ['empty', 'target_1_0', 'empty'], ['target_0_0', 'agent', 'empty']], 3], [(12, 10), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], 4]]], Action: (2, [(11, 3), [['empty', 'agent', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[10, 7], False, [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'target_1_1', 'empty']], [[(11, 3), [['empty', 'agent', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], 0], [(11, 2), [['empty', 'empty', 'empty'], ['empty', 

Step 20:
  Agent 0: Observation: [[11, 6], False, [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], [[(10, 8), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], 1], [(11, 3), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], 2], [(12, 12), [['empty', 'empty', 'empty'], ['target_3_1', 'empty', 'agent'], ['empty', 'target_0_0', 'empty']], 3], [(12, 11), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['target_3_1', 'agent', 'target_1_0']], 4]]], Action: (2, [(11, 5), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[10, 8], False, [['empty', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], [[(11, 5), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0], [(11, 3), [['empty', 'agent', 'empty'], ['empty', 

Step 22:
  Agent 0: Observation: [[10, 7], False, [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'agent']], [[(11, 8), [['empty', 'agent', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 1], [(11, 5), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], 2], [(11, 12), [['empty', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'target_0_0']], 3], [(13, 12), [['empty', 'agent', 'empty'], ['empty', 'target_1_0', 'empty'], ['target_0_0', 'empty', 'empty']], 4]]], Action: (3, [(11, 7), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['agent', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[11, 9], False, [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], [[(11, 7), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['agent', 'empty', 'empty']], 0], [(11, 5), [['empty', 'agent', 'empty'], ['empty', 'empt

Step 24:
  Agent 0: Observation: [[10, 8], False, [['empty', 'empty', 'agent'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], [[(12, 9), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 1], [(11, 7), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['agent', 'empty', 'empty']], 2], [(13, 12), [['empty', 'empty', 'empty'], ['agent', 'agent', 'empty'], ['target_0_0', 'empty', 'empty']], 3], [(12, 12), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'target_0_0', 'empty']], 4]]], Action: (0, [(10, 8), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'agent']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[12, 10], False, [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], [[(10, 8), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'agent']], 0], [(11, 7), [['empty', 'agent', 'empty'], ['empty', 'empty', 

Step 26:
  Agent 0: Observation: [[11, 9], False, [['agent', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'agent']], [[(12, 10), [['empty', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], 1], [(10, 8), [['empty', 'agent', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 2], [(12, 12), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'agent', 'empty']], 3], [(12, 13), [['empty', 'agent', 'target_1_0'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], 4]]], Action: (2, [(11, 8), [['agent', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[12, 11], False, [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'target_1_0']], [[(11, 8), [['agent', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0], [(10, 8), [['empty', 'agent', 'empty'], ['empty', 'empty',

Step 28:
  Agent 0: Observation: [[12, 10], False, [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'agent']], [[(13, 11), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'target_1_0', 'empty']], 1], [(11, 8), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'agent']], 2], [(12, 13), [['empty', 'empty', 'target_1_0'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], 3], [(11, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 4]]], Action: (2, [(12, 9), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[13, 12], False, [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['agent', 'empty', 'empty']], [[(12, 9), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 0], [(11, 8), [['empty', 'empty', 'empty'], ['agent', 'empty'

Step 30:
  Agent 0: Observation: [[12, 11], False, [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['agent', 'empty', 'agent']], [[(13, 12), [['empty', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['target_0_0', 'agent', 'empty']], 1], [(12, 9), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'agent', 'empty']], 2], [(13, 12), [['empty', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['target_0_0', 'agent', 'empty']], 3], [(11, 12), [['empty', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'target_0_0']], 4]]], Action: (2, [(12, 10), [['agent', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[12, 12], False, [['empty', 'agent', 'empty'], ['agent', 'empty', 'agent'], ['empty', 'target_0_0', 'empty']], [[(12, 10), [['agent', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], 0], [(12, 9), [['empty', 'empty', 'empty'], ['age

Step 32:
  Agent 0: Observation: [[13, 12], False, [['empty', 'agent', 'empty'], ['agent', 'agent', 'empty'], ['agent', 'empty', 'empty']], [[(12, 13), [['empty', 'agent', 'agent'], ['empty', 'target_0_0', 'empty'], ['empty', 'empty', 'empty']], 1], [(12, 10), [['empty', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'agent']], 2], [(12, 12), [['empty', 'empty', 'agent'], ['empty', 'agent', 'agent'], ['empty', 'agent', 'empty']], 3], [(13, 12), [['empty', 'agent', 'empty'], ['agent', 'empty', 'empty'], ['agent', 'empty', 'empty']], 4]]], Action: (2, [(13, 11), [['agent', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['agent', 'agent', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[12, 13], False, [['empty', 'agent', 'agent'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], [[(13, 11), [['agent', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['agent', 'agent', 'empty']], 0], [(12, 10), [['empty', 'empty', 'empty'], ['empty', 'agent',

Step 34:
  Agent 0: Observation: [[12, 12], False, [['agent', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['agent', 'agent', 'empty']], [[(11, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 1], [(11, 11), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 2], [(12, 13), [['empty', 'empty', 'agent'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'empty']], 3], [(12, 12), [['agent', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['agent', 'agent', 'empty']], 4]]], Action: (3, [(13, 12), [['agent', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['agent', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[11, 12], False, [['empty', 'agent', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'agent', 'agent']], [[(13, 12), [['agent', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['agent', 'empty', 'empty']], 0], [(11, 11), [['empty', 'empty', 'empty'], ['empty', 'empty', 'age

Step 36:
  Agent 0: Observation: [[12, 13], False, [['agent', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], [[(11, 12), [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'agent']], 1], [(11, 12), [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'agent']], 2], [(12, 11), [['empty', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['agent', 'agent', 'empty']], 3], [(12, 13), [['agent', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], 4]]], Action: (0, [(12, 13), [['agent', 'agent', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[12, 12], False, [['empty', 'agent', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'agent', 'empty']], [[(12, 13), [['agent', 'agent', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], 0], [(11, 12), [['empty', 'empty', 'empty'], ['empty', 'agent', 'age

Step 38:
  Agent 0: Observation: [[11, 12], False, [['empty', 'empty', 'agent'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'agent']], [[(12, 13), [['empty', 'agent', 'empty'], ['agent', 'target_2_0', 'empty'], ['empty', 'empty', 'empty']], 1], [(12, 13), [['empty', 'agent', 'empty'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'empty']], 2], [(12, 10), [['empty', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], 3], [(12, 11), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 4]]], Action: (1, [(11, 13), [['empty', 'empty', 'agent'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[12, 13], False, [['agent', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], [[(11, 13), [['empty', 'empty', 'agent'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 0], [(12, 13), [['empty', 'agent', 'empty'], ['agent', 'agent',

Step 40:
  Agent 0: Observation: [[12, 12], False, [['empty', 'agent', 'empty'], ['agent', 'agent', 'empty'], ['empty', 'agent', 'agent']], [[(13, 13), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 1], [(12, 13), [['agent', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], 2], [(12, 12), [['empty', 'agent', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'agent', 'agent']], 3], [(12, 11), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['agent', 'agent', 'empty']], 4]]], Action: (4, [(11, 12), [['empty', 'empty', 'agent'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'agent']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[12, 13], False, [['empty', 'agent', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], [[(11, 12), [['empty', 'empty', 'agent'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'agent']], 0], [(12, 13), [['agent', 'empty', 'empty'], ['empty', 'agent', 'age

Step 42:
  Agent 0: Observation: [[12, 13], False, [['empty', 'empty', 'agent'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'empty']], [[(11, 13), [['empty', 'empty', 'agent'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 1], [(13, 12), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['agent', 'agent', 'empty']], 2], [(12, 13), [['empty', 'agent', 'agent'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'empty']], 3], [(12, 13), [['empty', 'agent', 'agent'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'empty']], 4]]], Action: (0, [(12, 13), [['empty', 'agent', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[10, 13], False, [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], [[(12, 13), [['empty', 'agent', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], 0], [(13, 12), [['empty', 'empty', 'empty'], ['agent', 'empty', 'emp

Step 44:
  Agent 0: Observation: [[12, 13], False, [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], [[(10, 12), [['empty', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 1], [(13, 11), [['empty', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['agent', 'agent', 'empty']], 2], [(12, 11), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'agent', 'empty']], 3], [(12, 12), [['empty', 'agent', 'agent'], ['empty', 'agent', 'empty'], ['empty', 'agent', 'empty']], 4]]], Action: (2, [(12, 12), [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'agent', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[9, 12], False, [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], [[(12, 12), [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'agent', 'empty']], 0], [(13, 11), [['empty', 'empty', 'empty'], ['empty', 'empty', 'empt

Step 46:
  Agent 0: Observation: [[11, 14], False, [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], [None, None, None]], [[(8, 12), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 1], [(12, 12), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], 2], [(11, 10), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'agent']], 3], [(12, 10), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'agent', 'empty']], 4]]], Action: (3, [(12, 14), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], [None, None, None]], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[7, 12], False, [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], [[(12, 14), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], [None, None, None]], 0], [(12, 12), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'emp

Step 48:
  Agent 0: Observation: [[10, 13], False, [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], [[(7, 13), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], 1], [(11, 13), [['empty', 'agent', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], 2], [(10, 9), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'agent']], 3], [(11, 9), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'agent', 'empty']], 4]]], Action: (3, [(11, 13), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[6, 13], False, [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], [[(11, 13), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 0], [(11, 13), [['empty', 'agent', 'empty'], ['empty', 'agent', 'empty']

Step 50:
  Agent 0: Observation: [[9, 14], False, [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], [None, None, None]], [[(5, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 1], [(11, 13), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 2], [(10, 8), [['empty', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'agent', 'empty']], 3], [(10, 8), [['empty', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'agent', 'empty']], 4]]], Action: (2, [(9, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[4, 13], False, [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], [[(9, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 0], [(11, 13), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 

Step 52:
  Agent 0: Observation: [[10, 14], False, [['empty', 'empty', 'empty'], ['empty', 'agent', 'empty'], [None, None, None]], [[(3, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 1], [(12, 14), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], [None, None, None]], 2], [(10, 6), [['empty', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 3], [(10, 9), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], 4]]], Action: (2, [(10, 14), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], [None, None, None]], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[3, 14], False, [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], [None, None, None]], [[(10, 14), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], [None, None, None]], 0], [(12, 14), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], [None, None, None]], 2], [(10, 6), [['empty

Step 54:
  Agent 0: Observation: [[11, 13], False, [['empty', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], [[(3, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 1], [(13, 14), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], [None, None, None]], 2], [(10, 4), [['empty', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 3], [(11, 10), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], 4]]], Action: (1, [(11, 14), [['empty', 'empty', 'empty'], ['agent', 'empty', 'agent'], [None, None, None]], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[3, 12], False, [['target_0_0', 'target_3_0', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], [[(11, 14), [['empty', 'empty', 'empty'], ['agent', 'empty', 'agent'], [None, None, None]], 0], [(13, 14), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], [None, None, N

Step 56:
  Agent 0: Observation: [[9, 13], False, [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], [[(4, 12), [['target_3_0', 'empty', 'target_2_0'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 1], [(13, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 2], [(11, 3), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 3], [(9, 10), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 4]]], Action: (3, [(10, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[5, 12], False, [['empty', 'target_2_0', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], [[(10, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 0], [(13, 13), [['empty', 'empty', 'empty'], ['empty', 'em

Step 58:
  Agent 0: Observation: [[7, 13], False, [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], [[(5, 11), [['empty', 'empty', 'empty'], ['empty', 'target_2_0', 'empty'], ['empty', 'agent', 'empty']], 1], [(11, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 2], [(11, 2), [['empty', 'empty', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'empty']], 3], [(7, 10), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 4]]], Action: (3, [(8, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[5, 10], False, [['empty', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], [[(8, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 0], [(11, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent

Step 60:
  Agent 0: Observation: [[6, 12], False, [['agent', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], [[(5, 11), [['empty', 'agent', 'agent'], ['empty', 'target_2_0', 'empty'], ['empty', 'empty', 'empty']], 1], [(9, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 2], [(11, 4), [['empty', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'empty']], 3], [(5, 10), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'agent', 'empty']], 4]]], Action: (1, [(6, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[5, 11], False, [['empty', 'agent', 'empty'], ['empty', 'agent', 'empty'], ['empty', 'empty', 'agent']], [[(6, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 0], [(9, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent']

Step 62:
  Agent 0: Observation: [[5, 11], False, [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'agent', 'empty']], [[(6, 11), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['agent', 'empty', 'empty']], 1], [(7, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 2], [(10, 5), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 3], [(5, 11), [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'agent', 'empty']], 4]]], Action: (1, [(5, 12), [['empty', 'agent', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[6, 12], False, [['agent', 'agent', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'empty', 'agent']], [[(5, 12), [['empty', 'agent', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 0], [(7, 13), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['e

Step 64:
  Agent 0: Observation: [[4, 11], False, [['empty', 'empty', 'empty'], ['agent', 'empty', 'agent'], ['empty', 'empty', 'agent']], [[(5, 12), [['agent', 'agent', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'agent']], 1], [(6, 12), [['agent', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'agent', 'empty']], 2], [(9, 6), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 3], [(3, 11), [['empty', 'empty', 'empty'], ['target_0_0', 'target_3_0', 'agent'], ['empty', 'empty', 'empty']], 4]]], Action: (3, [(5, 11), [['empty', 'empty', 'empty'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'agent']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[4, 12], False, [['agent', 'agent', 'target_2_0'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], [[(5, 11), [['empty', 'empty', 'empty'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'agent']], 0], [(6, 12), [['agent', 'empty', 'empty'], ['agent', 'empty',

Step 66:
  Agent 0: Observation: [[3, 11], False, [['empty', 'empty', 'empty'], ['agent', 'agent', 'empty'], ['empty', 'agent', 'empty']], [[(3, 12), [['target_0_0', 'agent', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 1], [(5, 11), [['empty', 'empty', 'empty'], ['empty', 'empty', 'empty'], ['empty', 'agent', 'empty']], 2], [(8, 7), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 3], [(2, 11), [['empty', 'empty', 'empty'], ['empty', 'target_0_0', 'agent'], ['empty', 'empty', 'agent']], 4]]], Action: (0, [(3, 11), [['empty', 'empty', 'empty'], ['target_0_0', 'agent', 'agent'], ['empty', 'empty', 'agent']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[3, 11], False, [['empty', 'empty', 'empty'], ['agent', 'agent', 'empty'], ['empty', 'agent', 'empty']], [[(3, 11), [['empty', 'empty', 'empty'], ['target_0_0', 'agent', 'agent'], ['empty', 'empty', 'agent']], 0], [(5, 11), [['empty', 'empty', 'empty'], ['empty', 'em

Step 68:
  Agent 0: Observation: [[2, 11], False, [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], [[(3, 11), [['empty', 'empty', 'empty'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'empty']], 1], [(4, 11), [['empty', 'empty', 'empty'], ['agent', 'empty', 'agent'], ['empty', 'empty', 'empty']], 2], [(7, 8), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 3], [(2, 11), [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], 4]]], Action: (0, [(2, 11), [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[2, 11], False, [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], [[(2, 11), [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], 0], [(4, 11), [['empty', 'empty', 'empty'], ['agent', 'empty', 'agent'], ['em

Step 70:
  Agent 0: Observation: [[3, 11], False, [['empty', 'empty', 'empty'], ['agent', 'agent', 'agent'], ['empty', 'empty', 'empty']], [[(2, 11), [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], 1], [(3, 11), [['empty', 'empty', 'empty'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'empty']], 2], [(6, 9), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['empty', 'empty', 'empty']], 3], [(4, 11), [['empty', 'empty', 'empty'], ['agent', 'empty', 'empty'], ['empty', 'empty', 'empty']], 4]]], Action: (0, [(3, 11), [['empty', 'empty', 'empty'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[3, 11], False, [['empty', 'empty', 'empty'], ['agent', 'agent', 'agent'], ['empty', 'empty', 'empty']], [[(3, 11), [['empty', 'empty', 'empty'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'empty']], 0], [(3, 11), [['empty', 'empty', 'empty'], ['agent', 'agent', 'empty'], ['em

Step 72:
  Agent 0: Observation: [[5, 11], False, [['empty', 'agent', 'empty'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'empty']], [[(3, 11), [['empty', 'empty', 'empty'], ['agent', 'agent', 'agent'], ['empty', 'empty', 'empty']], 1], [(2, 11), [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], 2], [(5, 10), [['empty', 'empty', 'empty'], ['empty', 'empty', 'agent'], ['agent', 'agent', 'empty']], 3], [(5, 11), [['empty', 'agent', 'empty'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'empty']], 4]]], Action: (4, [(4, 11), [['empty', 'empty', 'empty'], ['agent', 'empty', 'agent'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[4, 11], False, [['empty', 'empty', 'agent'], ['agent', 'empty', 'agent'], ['empty', 'empty', 'empty']], [[(4, 11), [['empty', 'empty', 'empty'], ['agent', 'empty', 'agent'], ['empty', 'empty', 'empty']], 0], [(2, 11), [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['e

Step 74:
  Agent 0: Observation: [[4, 11], False, [['empty', 'empty', 'empty'], ['agent', 'empty', 'agent'], ['empty', 'empty', 'empty']], [[(5, 11), [['empty', 'empty', 'empty'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'empty']], 1], [(2, 11), [['empty', 'empty', 'empty'], ['agent', 'empty', 'target_3_0'], ['empty', 'empty', 'empty']], 2], [(5, 11), [['empty', 'empty', 'empty'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'empty']], 3], [(3, 11), [['empty', 'empty', 'empty'], ['agent', 'target_3_0', 'agent'], ['empty', 'empty', 'empty']], 4]]], Action: (3, [(5, 11), [['empty', 'empty', 'empty'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[5, 11], False, [['empty', 'empty', 'empty'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'empty']], [[(5, 11), [['empty', 'empty', 'empty'], ['agent', 'agent', 'empty'], ['empty', 'empty', 'empty']], 0], [(2, 11), [['empty', 'empty', 'empty'], ['agent', 'empty', 'ta

Step 76:
  Agent 0: Observation: [[3, 11], True, [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], [[(4, 11), [['empty', 'empty', 'empty'], ['agent', 'agent', 'agent'], ['empty', 'empty', 'empty']], 1], [(3, 11), [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], 2], [(3, 11), [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], 3], [(3, 11), [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], 4]]], Action: (0, [(3, 11), [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], 0]), Reward: -1, Done: False
  Agent 1: Observation: [[3, 11], True, [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], [[(3, 11), [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['empty', 'empty', 'empty']], 0], [(3, 11), [['empty', 'empty', 'empty'], ['empty', 'agent', 'agent'], ['emp