In [5]:
import numpy as np

In [6]:
class Agent:
    def __init__(self, agent_name, agent_index):
        self.index = agent_index
        self.agent = agent_name
        self.health = None
        self.isHit = False
        self.move = True
        self.movement_speed = 1.00
        self.previous_position = np.array([0, 0], dtype=np.float32)
        self.current_position = None
        self.same_position = False
        self.current_step = 0
        self.action = None
        pass

    def agent_action(self, action):

        pass

    def agent_update(self, step, action, width, height):
        if step > 0:
            if (self.previous_position != self.current_position).all():
                self.previous_position = self.current_position
                self.same_position = False

                if action:
                    self.step_update(action)
                else:
                    pass
            else:
                self.same_position = True

    def agent_reset(self, width, height):
        padding = 30
        self.current_position = np.array(
            [np.random.uniform(30, width - padding), np.random.uniform(30, width - padding)], dtype=np.float32)

    def step_update(self, action, range_x, range_y):

        if action == 0:
            self.current_position[0] -= self.movement_speed
        elif action == 1:
            self.current_position[0] += self.movement_speed
        elif action == 2:
            self.current_position[1] -= self.movement_speed
        elif action == 3:
            self.current_position[1] += self.movement_speed
        
        self.current_position[0] = np.clip(self.current_position[0], 0, range_x)
        self.current_position[1] = np.clip(self.current_position[1], 0, range_y)

In [7]:

from gymnasium.spaces import Discrete, Box, MultiDiscrete
from gymnasium import Env
import numpy as np
import pygame

In [58]:
class GameEnv(Env):
    def __init__(self, screen_width=400, screen_height=400, render_mode='human'):
        super(GameEnv, self).__init__()

        # defining the screen dimension for render purpose
        self.screen_width = screen_width
        self.screen_height = screen_height
        self.render_mode = render_mode

        # defining the observation and action spaces for all the agents
        
        self.observation_space = Box(low=np.array([0, 0, 0, 0], dtype=np.float32),
                                    high=np.array([self.screen_width, self.screen_height, self.screen_width, self.screen_height], dtype=np.float32),
                                    dtype=np.float32)

        # the pygame window should be initialized in the render function

        # setting the total number of agent
        
        self.number_of_prey = 1
        self.number_of_predator = 1
        self.prey_agent = None
        self.predator_agent = None
        self.number_of_agents = self.number_of_prey + self.number_of_prey

        # defining the action space based on total number of predator and prey
        self.action_space = Discrete(4)

        # if self.number_of_prey > 0 and self.number_of_predator > 0:
        #     self.agent_init()
        # else:
        #     self.prey_agents.append(Agent('prey', 0))
        #     self.predator_agents.append(Agent('predator', 0))

        # setting the total number of obstacles
        self.total_obstacles = None

        # keeping a counter to save the total steps
        self.total_steps = 0

        # initializing the pygame
        pygame.init()

        # setting the screen size
        self.screen = pygame.display.set_mode((self.screen_width, self.screen_height))
        pygame.display.set_caption('Multi Agent Environment(simple)')
        
        # keep the track of time of the rendering
        self.clock = pygame.time.Clock()

        # *it is set in milisec format (time is sec is time/1000)
        self.total_running_time = 10000

        # start the tick timer
        self.start_time = None

        # initializing the font
        pygame.font.init()
        self.font = pygame.font.Font(None, 36)

    # this function rerturns the value of the action into 2 digits 
    # if the action_space.sample() gives 1 digit number
    # * if  the number is 3 it will return 03 
    # * if  the number is 14 then it will return 14
    def expand_action_digit(self, action):

        # this basically checks the number if it has 1 then fills the rest with 0
        # if the number is 2 digits then it stays the same
        action = str(action).zfill(2)
        prey_action = int(action[0]) % 4
        predator_action = int(action[1]) % 4
        return prey_action, predator_action
        

    # this method will initialize the number of agents
    # ! this must be called from outside
    def agent_init(self):

        prey_agents = Agent('prey', 0)

        predator_agents = Agent('predator', 0)

        self.prey_agent = prey_agents
        self.predator_agent = predator_agents

    # this function is used to explicitly set the number of agents
    # ! this needs to be called from outside
    def set_agent_number(self, prey_number, predator_number):
        self.number_of_predator = predator_number
        self.number_of_prey = prey_number

    # the usual reset function
    def reset(self, seed=0):
        self.total_steps = 0
        prey = self.prey_agent
        predator = self.predator_agent
        # for prey in self.prey_agents:
        prey.agent_reset(width=self.screen_width, height=self.screen_height)
        # observation.append([prey.index, prey.agent, prey.current_position])

        # for predator in self.predator_agents:
        predator.agent_reset(width=self.screen_width, height=self.screen_height)
        # observation.append([predator.index, predator.agent, predator.current_position])
        
        self.prey_agent = prey
        self.predator_agent = predator
        observation = np.concatenate([self.prey_agent.current_position, self.predator_agent.current_position])
        return observation, seed

    # the step function
    # this function is called for every timesteps
    # this function updates the actions or states of agents in the env
    # this function is called default by the algorithms of all sorts
    # * it returns observation, reward, done, truncated, info
    # * any game policy change can be done here
    # * reward must be set here
    def step(self, action):
        # initializing the return variables
        done = False
        reward = 0.00
        truncated = False
        info = {}
        current_time = 0
        # when ever the step is starting set the start time
        if self.total_steps == 0:
            self.start_time = pygame.time.get_ticks()
        else:
            current_time = pygame.time.get_ticks()

        elapsed_time = current_time + self.start_time
        # handles the pygame window event when closing
        # !if the window still crashes pygame.event needs to be managed properly
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                done = True
                pygame.quit()
        
        # print(f'action: {action}')
        # *the actions are split as required
        # prey_action, predator_action = self.expand_action_digit(action)
        predator_action = action
        predator = self.predator_agent
        # print(f'prey: {prey_action}, predator: {predator_action}')

        # for prey in self.prey_agents:

        #     # print(f'prey_{prey.index} = action:{action} current_position: {prey.current_position}')
        #     prey.step_update(action=prey_action, range_x=self.screen_width - 10, range_y=self.screen_height - 10)
        #     # print(f'prey_{prey.index}: new_position: {prey.current_position}')

        #     observation.append({'index': prey.index, 'name': prey.agent, 'position': prey.current_position})
                


        # print(f'predator_{predator.index} = action:{action} current_position: {predator.current_position}')
        predator.step_update(action=predator_action, range_x=self.screen_width - 10, range_y=self.screen_height - 10)
        # print(f'predator_{predator.index}: new_position: {predator.current_position}')
            
        # !observation.append({'index': predator.index, 'name': predator.agent, 'position': predator.current_position})
        # observation = self.predator_agent.current_position
        observation = np.concatenate([self.prey_agent.current_position, self.predator_agent.current_position])

        # print(f'observation: {self.predator_agent.current_position}')
        self.total_steps += 1

        direction = self.predator_agent.current_position - self.prey_agent.current_position

        # Calculate the distance between the centers of the two dots
        distance_between_centers = np.linalg.norm(direction)

        # Check if there is a collision (distance <= sum of radii)
        
        
        if elapsed_time < self.total_running_time:
            if distance_between_centers <= 20:
                reward += 20
                done = True
                # pygame.quit()
                # self.close()

        else:
            done = True
            reward -= 30
            # pygame.quit()
            # self.close()

        # print(self.total_steps)
        self.render()

        return observation, reward, done, truncated, info
        

    def render(self):
        if self.render_mode == 'human':
            screen = self.screen

            # clear screen
            screen.fill((255, 255, 255))
            prey = self.prey_agent
            pos_x, pos_y = prey.current_position
            prey_radius = 10
            pygame.draw.circle(screen, (0, 0, 255), (int(pos_x), int(pos_y)), prey_radius)

            predator = self.predator_agent
            pos_x, pos_y = predator.current_position
            predator_radius = 10

            pygame.draw.circle(screen, (255, 0, 0), (int(pos_x), int(pos_y)), predator_radius)

            pygame.display.update()

    def close(self):
        pygame.quit()


In [72]:
env = GameEnv()

In [29]:
action = str(30).zfill(2)
digit1 = int(action[0]) % 4
digit2 = int(action[1]) % 4
action = [digit1, digit2]
print(action)

[3, 0]


In [12]:
env.action_space.sample()

19

In [73]:
env.close()

Testing the environment

In [None]:

done = False
number_of_prey = 2
number_of_predator = 3

env.set_agent_number(prey_number=number_of_prey, predator_number=number_of_predator)
env.agent_init()
env.reset()

while not done:
    prey_action = []
    predator_action = []
    for i in range(0, number_of_prey):
        prey_action.append(env.action_space.sample())
    
    for i in range(0, number_of_predator):
        predator_action.append(env.action_space.sample())

    action = [prey_action, predator_action]

    obs, reward, done, _, _ = env.step(action)
    print(obs)
    # env.render()


In [74]:
env = GameEnv()
done = False

env.agent_init()
env.reset()
total_reward = 0
while not done:
    action = env.action_space.sample()
    obs, reward, done, _, _ = env.step(action)
    total_reward += reward
print(f'total reward: {total_reward}')
print(f'Number of steps: {env.total_steps}')
    # env.render()

total reward: -30.0
Number of steps: 30203


In [32]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
import os

In [69]:
env.close()

In [60]:
env = GameEnv()
# number_of_prey = 2
# number_of_predator = 3

# env.set_agent_number(prey_number=number_of_prey, predator_number=number_of_predator)
env.agent_init()
env.reset()

(array([286.70355, 177.60149, 230.14369, 314.30106], dtype=float32), 0)

In [56]:
# env = DummyVecEnv([lambda: env])

In [54]:
log_path = os.path.join('Training', 'Logs')

In [61]:
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_path)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [56]:
baseline_path = os.path.join('Training', 'Models', 'test_baseline')

In [63]:
model.learn(total_timesteps=100000)

Logging to Training\Logs\PPO_4


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -30      |
| time/              |          |
|    fps             | 368      |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1           |
|    ep_rew_mean          | -30         |
| time/                   |             |
|    fps                  | 296         |
|    iterations           | 2           |
|    time_elapsed         | 13          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.007848448 |
|    clip_fraction        | 0.0683      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.32       |
|    explained_variance   | nan         |
|    learning_rate        | 0.

<stable_baselines3.ppo.ppo.PPO at 0x129ee2c8690>

In [None]:
%tensorboard