In [2]:
import gymnasium as gym
import numpy as np
from gymnasium import spaces
import pygame
import random
import pygame
import pygame_sdl2
pygame_sdl2.init()

ModuleNotFoundError: No module named 'pygame_sdl2'

In [30]:
class PredatorPreyENV(gym.Env):
    # The primary purpose of PredatorPreyENV is to store configuration settings and sensitive information
    #  Python class named PredatorPreyENV that inherits from the custom gym.Env
    def __init__(self, screen_width=800, screen_height=600, circle_radius=150):
        super(PredatorPreyENV, self).__init__()

        # Define the screen dimensions
        self.screen_width = screen_width
        self.screen_height = screen_height
        self.circle_radius = circle_radius


        # defining the agent policies
        self.blue_dot_radius = 40
        self.direction_line_length = 40
        self.blue_dot_health = 50
        self.red_dot_health = 50
        
        # Define the attack damage of the red dot
        self.red_dot_attack_dmg = 10  # Adjust this value as needed
        
        # Define movement speeds for blue and red dots
        self.blue_dot_move_speed = 0.8
        self.red_dot_move_speed = 0.1

        # Define 4 discreet action space (left, right, up, down)
        self.action_space = spaces.Discrete(4)

        # Define observation space (positions of blue dot and red dot)
        self.observation_space = spaces.Box(low=np.array([0, 0, 0, 0], dtype=np.float32), high=np.array(
            [self.screen_width / 2, self.screen_height, self.screen_width, self.screen_height], dtype=np.float32),
                                            dtype=np.float32)

        # Initialize the pygame window
        pygame.init()
        self.screen = pygame.display.set_mode((self.screen_width, self.screen_height))
        pygame.display.set_caption('Dots Moving Environment')

        # Initialize the positions of the blue and red dots
        # self.blue_dot_pos = np.array([self.screen_width / 4, self.screen_height / 2], dtype=np.float32)
        self.blue_dot_pos = np.array([self.screen_width / 4, self.screen_height / 2], dtype=np.float32)

        # self.red_dot_pos = np.array([3 * self.screen_width / 4, self.screen_height / 2], dtype=np.float32)
        self.red_dot_pos = np.array([3 * self.screen_width / 4, self.screen_height / 2], dtype=np.float32)

        # Define grid line properties
        self.grid_color = (210, 210, 210)
        self.grid_spacing = 40  # Adjust this value to change the grid spacing

        pygame.font.init()
        self.font = pygame.font.Font(None, 36)

        self.total_reward = 0
    #     keep track of the cumulative reward earned by the agent as it interacts with the environment

    def reset(self, seed=0):
        super().reset(seed=seed)
        # Reset the positions of the blue and red dots at start of each episodes
        # self.blue_dot_pos = np.array([0, 0], dtype=np.float32)
        self.blue_dot_pos = np.array([self.screen_width / 2, self.screen_height / 2], dtype=np.float32)

        # self.red_dot_pos = np.array([3 * self.screen_width / 4, self.screen_height / 2], dtype=np.float32)
        # Randomly set the initial position of the red dot along the circumference of the circle
        angle = np.random.uniform(0, 2 * np.pi)
        x = self.blue_dot_pos[0] + self.circle_radius * np.cos(angle)
        y = self.blue_dot_pos[1] + self.circle_radius * np.sin(angle)
        self.red_dot_pos = np.array([x, y], dtype=np.float32)

        # Reset the position of the red dot to the middle of the screen
        # self.red_dot_pos = np.array([self.screen_width / 2, self.screen_height / 2], dtype=np.float32)
        self.red_dot_pos = np.array([3 * self.screen_width / 4, self.screen_height / 2], dtype=np.float32)

        self.blue_dot_health = 50
        self.total_reward = 0
        return np.concatenate([self.blue_dot_pos, self.red_dot_pos]), seed 

    # This function essentially describes how the blue and red dots interact
    # based on the selected actions, handle collisions, and update their positions and rewards within the environment.
    def step(self, action_blue_dot):  #per second 1 frame pass what happens determines step function
        # truncated == false
        # Define the movement speed
        # self used to access variables
        move_speed_blue = 0.8   #blue
        move_speed_red = 0.1   # red

        action_red_dot = 0

        # Separate the action for blue and red dots
        # action_blue_dot, action_red_dot = action
        # POLICIES
        if action_blue_dot == 0:  # Move blue dot left
            self.blue_dot_pos[0] -= move_speed_blue
        elif action_blue_dot == 1:  # Move blue dot right
            self.blue_dot_pos[0] += move_speed_blue
        elif action_blue_dot == 2:  # Move blue dot up
            self.blue_dot_pos[1] -= move_speed_blue
        elif action_blue_dot == 3:  # Move blue dot down
            self.blue_dot_pos[1] += move_speed_blue


        # Calculate the direction vector from the red dot to the blue dot by subtracting
        direction = self.blue_dot_pos - self.red_dot_pos

        # Normalize the direction vector by dividing the vector by its magnitude (length) to turn it into a unit vector
        direction /= np.linalg.norm(direction)
        # normalized vector (direction) indicates the direction from the red dot to the blue dot.
        #print("Direction: ", direction)
        distance_between_centers = np.linalg.norm(self.blue_dot_pos - self.red_dot_pos)
        # calculates the Euclidean distance between the centers of the blue and red dots,
        # measures how far apart the two dots are in terms of pixel distance.

        # Calculate the distance between the centers of the blue and red dots
        distance_between_centers = np.linalg.norm(self.blue_dot_pos - self.red_dot_pos)

        # Check if the red dot crosses the circle boundary
        if distance_between_centers > self.circle_radius:
            # Penalize the agent and set the 'done' flag if the red dot crosses the circle
            done = True
            reward -= 10

        # radii for collision detection. The red_dot_radius is set to be 15 pixels smaller than the blue_dot_radius.
        blue_dot_radius = self.blue_dot_radius
        red_dot_radius = blue_dot_radius - 15
        reward = 0
        done = False

        # Check if the dots collide with each other
        # checking if the distance between the centers of the blue and red dots (distance_between_centers)
        # is less than the sum of their radii (blue_dot_radius + red_dot_radius)
        if distance_between_centers < blue_dot_radius + red_dot_radius:
            # Separate the dots by moving the red dot away from the blue dot
            self.red_dot_pos -= -50.0 + move_speed_red * direction
            self.blue_dot_health -= self.red_dot_attack_dmg
            if(self.blue_dot_health <= 0):
                done = True
            reward -= 5
            #print("collision")

        else:
            # Move the red dot towards the blue dot with a fixed speed
            self.red_dot_pos += move_speed_red * direction

        # Clip blue dot position to stay within the first half of the screen
        self.blue_dot_pos[0] = np.clip(self.blue_dot_pos[0], 0, self.screen_width)
        self.blue_dot_pos[1] = np.clip(self.blue_dot_pos[1], 0, self.screen_height)

        # Clip red dot position to stay within the entire screen
        self.red_dot_pos = np.clip(self.red_dot_pos, [0, 0], [self.screen_width, self.screen_height])

        # Define a simple reward function (e.g., distance between the two dots)
        # reward = -np.linalg.norm(self.blue_dot_pos - self.red_dot_pos)
        self.total_reward += reward

        # Check if the dots are close to each other (you can adjust the distance threshold as needed)
        # done = np.linalg.norm(self.blue_dot_pos - self.red_dot_pos) < 10
        return np.concatenate([self.blue_dot_pos, self.red_dot_pos]), reward, done, False, {}
    # done flag indicating the end of the episode, and an empty dictionary ({}) for additional information

    def display_total_reward(self):
        text_surface = self.font.render(f"Reward: {self.total_reward: .2f} Blue Health: {self.blue_dot_health}", True, (0, 0, 0))
        # the reward and blue dot health values text
        text_rect = text_surface.get_rect()
        # position the text on the pygame window.
        text_rect.center = (self.screen_width - 200, 10)
        self.screen.blit(text_surface, text_rect)
    # The blit method is used to draw the text surface (text_surface) onto the pygame window (self.screen)

    # render function is responsible for creating a visual representation of the environment
    def render(self, action_blue, action_red):
        # Clear the screen
        self.screen.fill((229, 222, 248))

        # Draw grid lines
        for x in range(0, self.screen_width, self.grid_spacing):
            pygame.draw.line(self.screen, self.grid_color, (x, 0), (x, self.screen_height), 1)
        for y in range(0, self.screen_height, self.grid_spacing):
            pygame.draw.line(self.screen, self.grid_color, (0, y), (self.screen_width, y), 1)

        # Draw blue dot
        pygame.draw.circle(self.screen, (141,144,226), (int(self.blue_dot_pos[0]), int(self.blue_dot_pos[1])), self.blue_dot_radius)

        # Draw red dot
        pygame.draw.circle(self.screen, (158, 50, 90), (int(self.red_dot_pos[0]), int(self.red_dot_pos[1])), self.blue_dot_radius - 10)

        # calculating the position of facing direction lines
        blue_dot_direction_end = tuple(map(int, self.blue_dot_pos + self.direction_line_length * action_blue))
        red_dot_direction_end = tuple(map(int, self.red_dot_pos + self.direction_line_length * action_red))

        # direction line draw
        pygame.draw.line(self.screen, (0, 0, 255), tuple(map(int, self.blue_dot_pos)), blue_dot_direction_end, 2)
        pygame.draw.line(self.screen, (255, 0, 0), tuple(map(int, self.red_dot_pos)), red_dot_direction_end, 2)

        self.display_total_reward()

        # Update the display
        pygame.display.update()

    # def close(self):
    #     pygame.quit()

In [31]:
env = PredatorPreyENV()

In [32]:
env.action_space.sample()

1

In [33]:
env.close()

In [34]:
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
import os

In [35]:
model_path = os.path.join('Training', 'Models', 'DQN_Model')
log_path = os.path.join('Training', 'DQN_Logs')

In [36]:
model = DQN('MlpPolicy', env=env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [39]:
model.learn(total_timesteps=500000)

Logging to Training\DQN_Logs\DQN_10
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 5.82e+03 |
|    ep_rew_mean      | -25      |
|    exploration_rate | 0.558    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 4143     |
|    time_elapsed     | 5        |
|    total_timesteps  | 23270    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 6.05e+03 |
|    ep_rew_mean      | -25      |
|    exploration_rate | 0.0799   |
| time/               |          |
|    episodes         | 8        |
|    fps              | 4655     |
|    time_elapsed     | 10       |
|    total_timesteps  | 48425    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 6.09e+03 |
|    ep_rew_mean      | -25      |
|    exploration_rate | 0.05     |
| time/            

<stable_baselines3.dqn.dqn.DQN at 0x1ec13cb0110>

: 

In [None]:
env.close()

In [None]:
model.save(model_path)

In [None]:
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
print(f"Mean reward: {mean_reward} Std reward: {std_reward}" )