In [8]:
"""
Multi-Agent Dynamic Grid World Environment
Created by: Ardianto Wibowo
"""

import numpy as np
import sys
import random
import sys

# فتح ملف نصي لكتابة النتائج
log_file = open("output_results.txt", "w")

# إعادة توجيه المخرجات المطبوعة إلى الملف النصي
sys.stdout = log_file
# Add the path to the 'env' folder to sys.path
sys.path.append('env')

from ma_gridworld import Env

# Define color dictionary to map targets to colors
color_dict = {
    1: "blue",
    2: "green",
    3: "orange",
    4: "purple",
    5: "cyan",
    6: "magenta"
}

class SearchAgent:
    def __init__(self, num_actions, target_color, noise_probability):
        self.num_actions = num_actions
        self.targets_seen = []  # List of seen targets
        self.memory = []  # Memory to track visited targets
        self.target_color = target_color  # The color this agent is looking for
        self.noise_probability = noise_probability  # Probability of noise in data
    
    # الدالة الجديدة
    def get_target_color(self, target_coordinate):
        """
        Get the color of the target based on its data in memory or predefined rules.
        """
        for record in self.memory:
            if record["location"] == target_coordinate:
                target_data = record["data"]
                target_id = int(target_data.split('_')[1])  # Extract the target ID
                return color_dict.get(target_id, None)  # Return the corresponding color
        return None  # Return None if the target is not in memory
    
    # باقي الدوال كما هي


  
    def analyse_sensor_data(self, agent_id, coordinate_observation, sensor_data_observation):
        for i in range(len(sensor_data_observation)):
            for j in range(len(sensor_data_observation[i])):
                data = sensor_data_observation[i][j]
                location = [
                    coordinate_observation[0] + j - len(sensor_data_observation[i]) // 2,
                    coordinate_observation[1] + i - len(sensor_data_observation) // 2
                ]
                if data is not None and 'target_' in data:  # التحقق من وجود هدف
                    self.update_memory(location, data, agent_id)  # استدعاء update_memory

                # تطبيق الضوضاء إذا كانت مفعلة
                if is_noise_enabled and random.random() < self.noise_probability:
                    noisy_location = [
                        location[0] + np.random.choice([-1, 0, 1]),
                        location[1] + np.random.choice([-1, 0, 1])
                    ]
                    print(f"Agent {agent_id}: Noise applied. Original: {location}, Noisy: {noisy_location}")
                    location = noisy_location

                # إزالة الأهداف التي تم جمعها
                if location[0] == coordinate_observation[0] and location[1] == coordinate_observation[1]:
                    if location in self.targets_seen:
                        self.targets_seen.remove(location)



        
    def analyse_communication(self, comm_observation, agent_id):
        """
        Analyze communication data and apply noise if enabled.
        """
        for comm in comm_observation:
            origin_location = comm[0]
            sensor_data_observation = comm[1]
            reported_by = comm[2]

            print(f"Agent {agent_id}: Received communication from Agent {reported_by} at {origin_location}")

            for i in range(len(sensor_data_observation)):
                for j in range(len(sensor_data_observation[i])):
                    data = sensor_data_observation[i][j]
                    location = [
                        origin_location[0] + j - len(sensor_data_observation[i]) // 2,
                        origin_location[1] + i - len(sensor_data_observation) // 2
                    ]

                    # Apply noise based on probability
                    if is_noise_enabled and random.random() < self.noise_probability:
                        noisy_location = [
                            location[0] + random.choice([-1, 0, 1]),
                            location[1] + random.choice([-1, 0, 1])
                        ]
                        print(f"Agent {agent_id}: Noise applied. Original: {location}, Noisy: {noisy_location}")
                        location = noisy_location

                    # Extract color and check if it matches the target
                    target_color = color_dict.get(int(data.split('_')[1]), None) if data and '_' in data else None
                    if target_color == self.target_color:
                        if location not in self.targets_seen:
                            self.targets_seen.append(location)
                            print(f"Agent {agent_id}: Added target at {location} with color {target_color} from Agent {reported_by}")
                  

                           
                            
    def update_memory(self, location, data, agent_id):

        print(f"Agent {agent_id} is updating memory for location {location} with data {data}")
        if location not in [record["location"] for record in self.memory]:
            # add
            self.memory.append({"location": location, "data": data})
            print(f"Agent {agent_id} Updated Memory: {location} -> {data}")





    def select_action(self, coordinate_observation, agent_id):
        print(f"Agent {agent_id}: Current targets seen: {self.targets_seen}")
        if len(self.targets_seen) > 0:
            closest_target = None
            closest_target_distance = 999999
            for target_coordinate in self.targets_seen:
                # حساب المسافة المانهاتنية
                horizontal_distance = target_coordinate[0] - coordinate_observation[0]
                vertical_distance = target_coordinate[1] - coordinate_observation[1]
                distance = abs(horizontal_distance) + abs(vertical_distance)
    
                # إضافة شروط إضافية هنا
                target_color = self.get_target_color(target_coordinate)  # احصل على لون الهدف
                if target_color != self.target_color:
                    print(f"Agent {agent_id}: Skipping target at {target_coordinate} (Color mismatch)")
                    continue  # تخطي الهدف إذا كان اللون لا يطابق
                
                # إضافة عقوبات/أوزان
                if target_coordinate[0] < 2 or target_coordinate[1] < 2:  # إذا كان قريبًا من الحدود
                    distance += 5  # إضافة عقوبة
    
                # التحقق من أقرب هدف
                if distance < closest_target_distance:
                    closest_target_distance = distance
                    closest_target = target_coordinate
    
            if closest_target:
                print(f"Agent {agent_id}: Moving towards target at {closest_target}")
                horizontal_distance = closest_target[0] - coordinate_observation[0]
                vertical_distance = closest_target[1] - coordinate_observation[1]
                if abs(horizontal_distance) >= abs(vertical_distance):
                    return 3 if horizontal_distance < 0 else 4
                else:
                    return 1 if vertical_distance < 0 else 2
        else:
            print(f"Agent {agent_id}: No targets seen, taking random action.")
            return np.random.choice(self.num_actions - 1) + 1






def get_action(agent_id, observation, num_actions, agents, env):
    """
    This method provides a random action chosen recognized by the ma-gridworld environment:
    1: up, 2: down, 3: left, 4: right, 0: stay
    """
    
    coordinate_observation = tuple(observation[0])  # Keep observation as (x, y) tuple

    # Optional observation data may be used, depend on the agent needs.
    win_state_observation = observation[1]
    sensor_data_observation = observation[2]
    comm_observation = observation[3]
    

    print(f"Observation for Agent {agent_id}: {observation}")

    # Check if communication data is available
    if comm_observation:
        agents[agent_id].analyse_communication(comm_observation, agent_id)
    else:
        print(f"Agent {agent_id}: No communication data available this step.")

    # Analyze sensor data
    agents[agent_id].analyse_sensor_data(agent_id, coordinate_observation, sensor_data_observation)
    
    # Select physical action
    physical_action = agents[agent_id].select_action(coordinate_observation, agent_id)

    # Define communication action
    if env.is_agent_silent:
        comm_action = []  # Communication action is set to be zero if agent silent
    else:
        comm_action = [coordinate_observation, sensor_data_observation, agent_id] 
    
    return (physical_action, comm_action)


def run(num_episodes, max_steps_per_episode, agents, num_actions, env):
    for episode in range(num_episodes):
        print(f"Starting episode {episode + 1}")
        observations = env.reset()  # Reset the environment at the start of each episode
        
        # Reset targets_seen and memory for each agent
        for agent in agents:
            agent.targets_seen = []  # Reset the seen targets
            agent.memory = []  # Reset the memory
        
        done = [False] * env.num_agents  # Initialize 'done' as a list for each agent
        step_count = 0

        while not all(done) and step_count < max_steps_per_episode:  # Stop if all agents are done or max steps reached
            actions = []
            next_observations = []
            
            for agent_id in range(env.num_agents):
                observation = observations[agent_id]
                action = get_action(agent_id, observation, num_actions, agents, env)
                
                actions.append(action)
                next_observations.append(observation)

            next_observations, rewards, done = env.step(actions)  # Step in the environment

            observations = next_observations
            step_count += 1

            # Render the environment
            env.render()

            # طباعة النتائج على الشاشة
            print(f"Step {step_count}:")
            for agent_id in range(env.num_agents):
                print(
                    f"  Agent {agent_id}: Observation: {observations[agent_id]}, "
                    f"Action: {actions[agent_id]}, Reward: {rewards[agent_id]}, Done: {done[agent_id]}"
                )

        # طباعة عدد الخطوات عند نهاية الحلقة
        print(f"Episode {episode + 1} finished after {step_count} steps.\n")
        
        sys.stdout = sys.__stdout__
        log_file.close()

        print("تم تخزين جميع النتائج في ملف output_results.txt.")




if __name__ == "__main__":

    gsize=15 #grid size (square)
    gpixels=30 #grid cell size in pixels

    is_sensor_active = True #True:  Activate the sensory observation data
    sensory_size = 3 #'is_sensor_active' must be True. The value must be odd, if event will be converted to one level odd number above
    
    num_agents = 10 #the number of agents will be run in paralel
    num_obstacles = 0 #the number of obstacles
    is_single_target = False #True: all agents have a single target, False: each agent has their own target
    num_targets_per_agent = 5 #'is_single_target' must be true to have an effect
    
    is_agent_silent = False #True: communication among agents is allowed
    
    
    is_noise_enabled = True  # تفعيل الضوضاء
    
   

    num_episodes=1 #the number of episode will be run
    max_steps_per_episode=1000 #each episode will be stopped when max_step is reached

    eps_moving_targets = 10 #set this value greater than 'num_episodes' to keep the targets in a stationary position
    eps_moving_obstacles = 10 #set this value greater than 'num_episodes' to keep the obstacles in a stationary position

    render = True #True: render the animation into the screen (so far, it is still can not be deactivated)

    min_obstacle_distance_from_target = 1 #min grid distance of each obstacles relative to targets
    max_obstacle_distance_from_target = 5 #max grid distance of each obstacles relative to targets
    min_obstacle_distance_from_agents = 1 #min grid distance of each obstacles relative to agents

    reward_normal = -1 #reward value of normal steps
    reward_obstacle = -5 #reward value when hit an obstacle
    reward_target = 50 #reward value when reach the target

    is_totally_random = True #True: target and obstacles initial as well as movement position is always random on each call, False: only random at the beginning. 
    animation_speed = 0.1 #smaller is faster 
    is_destroy_environment = True #True: automatically close the animation after all episodes end.  

    # Initialize environment
    env = Env(
        num_agents=num_agents, num_targets_per_agent=num_targets_per_agent, num_obstacles=num_obstacles,
        eps_moving_obstacles=eps_moving_obstacles, eps_moving_targets=eps_moving_targets,
        is_agent_silent=is_agent_silent, is_single_target=is_single_target, sensory_size=sensory_size,
        gpixels=gpixels, gheight=gsize, gwidth=gsize, is_sensor_active=is_sensor_active,
        min_obstacle_distance_from_target=min_obstacle_distance_from_target,
        max_obstacle_distance_from_target=max_obstacle_distance_from_target,
        min_obstacle_distance_from_agents=min_obstacle_distance_from_agents,
        is_totally_random=is_totally_random, animation_speed=animation_speed,
        reward_normal=reward_normal, reward_obstacle=reward_obstacle, reward_target=reward_target
    )
    
    num_actions = len(env.action_space)
    agents = [
        SearchAgent(num_actions, target_color=color_dict[(i % len(color_dict)) + 1], noise_probability=0.2)
        for i in range(num_agents)
    ]
    run(num_episodes, max_steps_per_episode, agents, num_actions, env)

    if env.is_destroy_environment:
        env.destroy_environment()


AttributeError: '_tkinter.tkapp' object has no attribute 'is_destroy_environment'