In [2]:
import numpy as np

In [3]:
class Agent:
    def __init__(self, agent_name, agent_index):
        self.index = agent_index
        self.agent = agent_name
        self.health = None
        self.isHit = False
        self.move = True
        self.movement_speed = 1.00
        self.previous_position = np.array([0, 0], dtype=np.float32)
        self.current_position = None
        self.same_position = False
        self.current_step = 0
        self.action = None
        pass

    def agent_action(self, action):

        pass

    def agent_update(self, step, action, width, height):
        if step > 0:
            if (self.previous_position != self.current_position).all():
                self.previous_position = self.current_position
                self.same_position = False

                if action:
                    self.step_update(action)
                else:
                    pass
            else:
                self.same_position = True

    def agent_reset(self, width, height):
        padding = 30
        self.current_position = np.array(
            [np.random.uniform(30, width - padding), np.random.uniform(30, width - padding)], dtype=np.float32)

    def step_update(self, action, range_x, range_y):

        if action == 0:
            self.current_position[0] -= self.movement_speed
        elif action == 1:
            self.current_position[0] += self.movement_speed
        elif action == 2:
            self.current_position[1] -= self.movement_speed
        elif action == 3:
            self.current_position[1] += self.movement_speed
        
        self.current_position[0] = np.clip(self.current_position[0], 0, range_x)
        self.current_position[1] = np.clip(self.current_position[1], 0, range_y)

In [4]:

from gymnasium.spaces import Discrete, Box, MultiDiscrete
from gymnasium import Env
import numpy as np
import pygame

In [11]:
class GameEnv(Env):
    def __init__(self, screen_width=400, screen_height=400, render_mode='human'):
        super(GameEnv, self).__init__()

        # defining the screen dimension for render purpose
        self.screen_width = screen_width
        self.screen_height = screen_height
        self.render_mode = render_mode

        # defining the observation and action spaces for all the agents
        self.action_space = Discrete(4)
        self.observation_space = Box(low=np.array([0, 0, 0, 0], dtype=np.float32),
                                    high=np.array([self.screen_width, self.screen_height, self.screen_width, self.screen_height], dtype=np.float32),
                                    dtype=np.float32)

        # the pygame window should be initialized in the render function

        # setting the total number of agent
        
        self.number_of_prey = 1
        self.number_of_predator = 1
        self.prey_agents = []
        self.predator_agents = []
        self.number_of_agents = self.number_of_prey + self.number_of_prey

        # if self.number_of_prey > 0 and self.number_of_predator > 0:
        #     self.agent_init()
        # else:
        #     self.prey_agents.append(Agent('prey', 0))
        #     self.predator_agents.append(Agent('predator', 0))

        # setting the total number of obstacles
        self.total_obstacles = None

        # keeping a counter to save the total steps
        self.total_steps = 0

        # initializing the pygame
        pygame.init()

        # setting the screen size
        self.screen = pygame.display.set_mode((self.screen_width, self.screen_height))
        pygame.display.set_caption('Multi Agent Environment(simple)')
        self.clock = pygame.time.Clock()

        # initializing the font
        pygame.font.init()
        self.font = pygame.font.Font(None, 36)

    def agent_init(self):
        # initializing all the agents
        prey_agents = []
        predator_agents = []

        for i in range(0, self.number_of_prey):
            agent = Agent('prey', i)
            prey_agents.append(agent)

        for i in range(0, self.number_of_predator):
            agent = Agent('predator', i)
            predator_agents.append(agent)

        self.prey_agents = prey_agents
        self.predator_agents = predator_agents

    def set_agent_number(self, prey_number, predator_number):
        self.number_of_predator = predator_number
        self.number_of_prey = prey_number

    def reset(self, seed=0):
        self.total_steps = 0
        observation = []

        for prey in self.prey_agents:
            prey.agent_reset(width=self.screen_width, height=self.screen_height)
            observation.append([prey.index, prey.agent, prey.current_position])

        for predator in self.predator_agents:
            predator.agent_reset(width=self.screen_width, height=self.screen_height)
            observation.append([predator.index, predator.agent, predator.current_position])

        return observation, seed


    def step(self, action):

        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()

        observation = []

        prey_actions, predator_actions = action

        for prey, action in zip(self.prey_agents, prey_actions):

            # print(f'prey_{prey.index} = action:{action} current_position: {prey.current_position}')
            prey.step_update(action=action, range_x=self.screen_width - 10, range_y=self.screen_height - 10)
            # print(f'prey_{prey.index}: new_position: {prey.current_position}')

            observation.append({'index': prey.index, 'name': prey.agent, 'position': prey.current_position})
                
        for predator, action in zip(self.predator_agents, predator_actions):

            # print(f'predator_{predator.index} = action:{action} current_position: {predator.current_position}')
            predator.step_update(action=action, range_x=self.screen_width - 10, range_y=self.screen_height - 10)
            # print(f'predator_{predator.index}: new_position: {predator.current_position}')
            
            observation.append({'index': predator.index, 'name': predator.agent, 'position': predator.current_position})
        
        self.total_steps += 1

        done = False
        reward = 0.00
        truncated = False
        info = {}

        # print(self.total_steps)
        self.render()

        return observation, reward, done, truncated, info
        

    def render(self):
        if self.render_mode == 'human':
            screen = self.screen

            # clear screen
            screen.fill((255, 255, 255))

            for prey in self.prey_agents:
                pos_x, pos_y = prey.current_position
                prey_radius = 10
                pygame.draw.circle(screen, (0, 0, 255), (int(pos_x), int(pos_y)), prey_radius)

            for predator in self.predator_agents:
                pos_x, pos_y = predator.current_position
                predator_radius = 10

                pygame.draw.circle(screen, (255, 0, 0), (int(pos_x), int(pos_y)), predator_radius)

            pygame.display.update()

    def close(self):
        pygame.quit()


In [12]:
env = GameEnv()

In [10]:
env.close()

In [None]:

done = False
number_of_prey = 2
number_of_predator = 3

env.set_agent_number(prey_number=number_of_prey, predator_number=number_of_predator)
env.agent_init()
env.reset()

while not done:
    prey_action = []
    predator_action = []
    for i in range(0, number_of_prey):
        prey_action.append(env.action_space.sample())
    
    for i in range(0, number_of_predator):
        predator_action.append(env.action_space.sample())

    action = [prey_action, predator_action]

    obs, reward, done, _, _ = env.step(action)
    print(obs)
    # env.render()


In [22]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
import os

In [23]:
env = GameEnv()
number_of_prey = 2
number_of_predator = 3

env.set_agent_number(prey_number=number_of_prey, predator_number=number_of_predator)
env.agent_init()
env.reset()

([[0, 'prey', array([249.11028, 316.7625 ], dtype=float32)],
  [1, 'prey', array([286.55774,  70.27204], dtype=float32)],
  [0, 'predator', array([152.40836 , 122.442825], dtype=float32)],
  [1, 'predator', array([184.71822, 320.53717], dtype=float32)],
  [2, 'predator', array([185.87051, 259.77145], dtype=float32)]],
 0)

In [24]:
env = DummyVecEnv([lambda: env])

In [25]:
log_path = os.path.join('Training', 'Logs')

In [26]:
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path)

Using cuda device


In [27]:
baseline_path = os.path.join('Training', 'Models', 'test_baseline')

In [28]:
model.learn(total_timesteps=10000)

ValueError: setting an array element with a sequence. The requested array would exceed the maximum number of dimension of 1.