In [None]:
"""
Multi-Agent Dynamic Grid World Environment
Created by: Ardianto Wibowo
"""

import numpy as np
import sys

# Add the path to the 'env' folder to sys.path
sys.path.append('env')

from ma_gridworld import Env


def get_action(num_actions):
    """
    This method provide a random action chosen recognized by the ma-gridworld environment:
    1: up, 2: down, 3: left, 4: right, 0: stay
    """
    physical_action = np.random.choice(num_actions) # example of random value as a physical action

    if env.is_agent_silent:
        comm_action = [] # communication action is set to be zero if agent silent
    else:
        comm_action = np.random.choice(num_actions) # example of random value as a communication action
    
    return (physical_action, comm_action)


def run(num_episodes, max_steps_per_episode):
    for episode in range(num_episodes):
        print(f"Starting episode {episode + 1}")
        observations = env.reset()  # Reset the environment at the start of each episode
        
        done = [False] * env.num_agents  # Initialize 'done' as a list for each agent
        step_count = 0

        while not all(done) and step_count < max_steps_per_episode:  # Stop if all agents are done or max steps reached
            actions = []
            next_observations = []
            
            for agent_id in range(env.num_agents):
                action = get_action(num_actions)  # Call get action method to determine an action
                actions.append(action)

            next_observations, rewards, done = env.step(actions)  # Step in the environment
            
            
            observations = next_observations
            step_count += 1

            # Render the environment
            env.render()

            print(f"Step {step_count}:")
            for agent_id in range(env.num_agents):
                print(f"  Agent {agent_id}: Observation: {observations}, Action: {actions[agent_id]}, Reward: {rewards[agent_id]}, Done: {done[agent_id]}")

        
        print(f"Episode {episode + 1} finished after {step_count} steps.\n")


if __name__ == "__main__":

    gsize=15 #grid size (square)
    gpixels=30 #grid cell size in pixels

    is_sensor_active = True #True:  Activate the sensory observation data
    sensory_size = 3 #'is_sensor_active' must be True. The value must be odd, if event will be converted to one level odd number above
    
    num_agents = 3 #the number of agents will be run in paralel
    num_obstacles = 10 #the number of obstacles
    is_single_target = True #True: all agents have a single target, False: each agent has their own target
    num_targets_per_agent = 2 #'is_single_target' must be true to have an effect
    
    is_agent_silent = True #True: communication among agents is allowed

    num_episodes=150 #the number of episode will be run
    max_steps_per_episode=400 #each episode will be stopped when max_step is reached

    eps_moving_targets = 151 #set this value greater than 'num_episodes' to keep the targets in a stationary position
    eps_moving_obstacles = 151 #set this value greater than 'num_episodes' to keep the obstacles in a stationary position

    render = True #True: render the animation into the screen (so far, it is still can not be deactivated)

    min_obstacle_distance_from_target = 1 #min grid distance of each obstacles relative to targets
    max_obstacle_distance_from_target = 5 #max grid distance of each obstacles relative to targets
    min_obstacle_distance_from_agents = 1 #min grid distance of each obstacles relative to agents

    reward_normal = -1 #reward value of normal steps
    reward_obstacle = -5 #reward value when hit an obstacle
    reward_target = 50 #reward value when reach the target

    is_totally_random = False #True: target and obstacles initial as well as movement position is always random on each call, False: only random at the beginning. 
    animation_speed = 0.0000001 #smaller is faster 
    is_destroy_environment = True #True: automatically close the animation after all episodes end.  

    # Initialize environment
    env = Env(
        num_agents=num_agents, num_targets_per_agent=num_targets_per_agent, num_obstacles=num_obstacles,
        eps_moving_obstacles=eps_moving_obstacles, eps_moving_targets=eps_moving_targets,
        is_agent_silent=is_agent_silent, is_single_target=is_single_target, sensory_size=sensory_size,
        gpixels=gpixels, gheight=gsize, gwidth=gsize, is_sensor_active=is_sensor_active,
        min_obstacle_distance_from_target=min_obstacle_distance_from_target,
        max_obstacle_distance_from_target=max_obstacle_distance_from_target,
        min_obstacle_distance_from_agents=min_obstacle_distance_from_agents,
        is_totally_random=is_totally_random, animation_speed=animation_speed,
        reward_normal=reward_normal, reward_obstacle=reward_obstacle, reward_target=reward_target
    )
    
    num_actions = len(env.action_space)
    
    agent = run(num_episodes, max_steps_per_episode)

    if is_destroy_environment:
        env.destroy_environment() 


Starting episode 1
Step 1:
  Agent 0: Observation: [[[0, 0], False, [[None, None, None], [None, 'agent', 'empty'], [None, 'empty', 'empty']], []], [[14, 0], False, [[None, None, None], ['empty', 'agent', None], ['empty', 'empty', None]], []], [[14, 13], False, [['empty', 'empty', None], ['empty', 'empty', None], ['empty', 'agent', None]], []]], Action: (0, []), Reward: -1, Done: False
  Agent 1: Observation: [[[0, 0], False, [[None, None, None], [None, 'agent', 'empty'], [None, 'empty', 'empty']], []], [[14, 0], False, [[None, None, None], ['empty', 'agent', None], ['empty', 'empty', None]], []], [[14, 13], False, [['empty', 'empty', None], ['empty', 'empty', None], ['empty', 'agent', None]], []]], Action: (1, []), Reward: -1, Done: False
  Agent 2: Observation: [[[0, 0], False, [[None, None, None], [None, 'agent', 'empty'], [None, 'empty', 'empty']], []], [[14, 0], False, [[None, None, None], ['empty', 'agent', None], ['empty', 'empty', None]], []], [[14, 13], False, [['empty', 'empty

KeyboardInterrupt: 