In [None]:
import os
import sys
import time
import math
import numpy as np
import pygame
from gymnasium import Env

from gymnasium.spaces import Discrete, Dict, Box

# from Agents.agent import Agent
from Constants.constants import WHITE, RED, BLUE, SCREEN_WIDTH, SCREEN_HEIGHT

from Walls.collision_detection import detect_collision
from Walls.wall_class import Walls
from Walls.Point_Ray import is_ray_blocked

from Entities.turret import Turret


sys.path.insert(1, os.path.join(sys.path[0], '..'))

In [None]:

LEVEL_5_WALLS = {
    '1': {'x': 200, 'y': 100, 'width': 25, 'height': 100},
    '2': {'x': 200, 'y': 400, 'width': 25, 'height': 100},
    '3': {'x': 600, 'y': 100, 'width': 25, 'height': 100},
    '4': {'x': 600, 'y': 400, 'width': 25, 'height': 100},
    '5': {'x': 200, 'y': 100, 'width': 100, 'height': 25},
    '6': {'x': 525, 'y': 100, 'width': 100, 'height': 25},
    '7': {'x': 200, 'y': 475, 'width': 100, 'height': 25},
    '8': {'x': 525, 'y': 475, 'width': 100, 'height': 25},
    "far-left": {"x": -10, "y": -10, "width": 10, "height": SCREEN_HEIGHT + 10},
    "far-right": {"x": SCREEN_WIDTH, "y": -10, "width": 10, "height": SCREEN_HEIGHT + 10},
    "start_top": {"x": -10, "y": -10, "width": SCREEN_WIDTH + 10, "height": 10},
    "finish_bottom": {"x": -10, "y": SCREEN_HEIGHT + 10, "width": SCREEN_WIDTH, "height": 10},
}



In [None]:
class CastRay:
    def __init__(self):
        pass

    def get_cast_ray_angles(self):
        start_angle = 0  # 65 degrees to the left
        end_angle = 350  # 65 degrees to the right
        angle_step = 10  # One ray every 10 degrees
        ray_angles = np.arange(start_angle, end_angle + angle_step, angle_step).tolist()
        ray_angles = [angle%360 for angle in ray_angles]

        return ray_angles

    def cast_rays(self, agent, wall_list):
        
        ray_angles = self.get_cast_ray_angles()
        ray_lengths = []
        
        for ray_angle in ray_angles:
            x1, y1 = agent.center
            x2, y2 = x1 + 1000 * math.cos(math.radians(ray_angle)), y1 + 1000 * math.sin(math.radians(ray_angle))
            lengths = None

            for wall in wall_list:
                x3, y3 = wall.x, wall.y
                x4, y4 = wall.topright[0], wall.bottomright[1]

                for side in [(x3, y3, x4, y3), (x4, y3, x4, y4), (x4, y4, x3, y4), (x3, y4, x3, y3)]:
                    x5, y5, x6, y6 = side

                    denominator = (x1 - x2) * (y5 - y6) - (y1 - y2) * (x5 - x6)

                    if denominator == 0:
                        continue

                    t = ((x1 - x5) * (y5 - y6) - (y1 - y5) * (x5 - x6)) / denominator
                    u = -((x1 - x2) * (y1 - y5) - (y1 - y2) * (x1 - x5)) / denominator

                    epsilon = 1e-5  # Small epsilon value

                    if 0 <= t <= 1 and 0 <= u <= 1:
                        intersection_x = x1 + t * (x2 - x1)
                        intersection_y = y1 + t * (y2 - y1)

                        # Calculate the distance from the ray start to the intersection point
                        distance = math.sqrt((intersection_x - x1) ** 2 + (intersection_y - y1) ** 2)

                        if lengths is None or distance < lengths:
                            lengths = distance
                
            if lengths is None:
                lengths = 1000
                
            ray_lengths.append(lengths)
        return ray_lengths, ray_angles

In [None]:
import math

import numpy as np
import os
import sys

sys.path.insert(1, os.path.join(sys.path[0], '..'))


class Agent:
    def __init__(self, agent_name, agent_index):
        # identity
        self.index = agent_index
        self.agent = agent_name

        # additional attributes
        self.health = None
        self.isHit = False
        self.move = True
        self.movement_speed = 300

        # positional attributes
        self.previous_position = np.array([0, 0], dtype=np.float32)
        self.current_position = None
        self.same_position = False

        self.current_step = 0
        self.action = None
        self.has_grabbed = False

        # these are for the angular motion of the agent
        self.angle = 0
        self.center = 0
        self.direction = 0
        self.direction_end = 0
        self.radius = 15

        # this is custom only for the render function
        self.draw_direction_end = 0

    # for handling what the action does
    def agent_action(self, action):
        pass

    def _get_min_left(self, walls):
        min_x = 1000
        for wall in walls:
            if wall.left < min_x:
                min_x = wall.left
        return min_x

    # for handling all the initial states
    def agent_reset(self, width, height):
        padding = 30
        # updating the initial random position of the agent at 1st
        # self.current_position = np.array(
        #     [np.random.uniform(30, self._get_min_left(walls)), np.random.uniform(30, height - padding)],
        #     dtype=np.float32)

        # self.current_position = np.array([40, height/2], dtype=np.float32)

        self.current_position = np.array([80, 550], dtype=np.float32)

        # updating the initial orientation to 0 degree at 1st
        theta = math.radians(self.angle)
        magnitude = padding
        # this is for the trigonometry function X and Y
        dir_vec_x = magnitude * math.cos(theta)
        dir_vec_y = magnitude * math.sin(theta)

        # adding the direction vector to the center and get an end point for direction
        self.direction_end = np.array([self.current_position[0] + dir_vec_x, self.current_position[1] + dir_vec_y],
                                      dtype=np.float32)

        # this part is only for the render function
        self.draw_direction_end = (self.current_position[0] + dir_vec_x, self.current_position[1] + dir_vec_y)
        self.center = (int(self.current_position[0]), int(self.current_position[1]))

    # updating the direction, line-end according to given angle when called
    def get_direction(self):
        # as render function demands an int value
        center = (int(self.current_position[0]), int(self.current_position[1]))
        self.center = center

        # the X, Y angular equation
        theta = math.radians(self.angle)
        magnitude = 30
        # here is the X=cos()
        directional_vector_x = magnitude * math.cos(theta)
        # here is the Y=sin()
        directional_vector_y = magnitude * math.sin(theta)

        directional_line_end = np.array([center[0] + directional_vector_x, center[1] + directional_vector_y],
                                        dtype=np.float32)
        self.direction_end = directional_line_end

        direction = directional_line_end - center
        direction /= np.linalg.norm(direction)
        self.direction = direction
        self.draw_direction_end = (center[0] + directional_vector_x, center[1] + directional_vector_y)

    # for updating the states of the agent when called
    def step_update(self, action, speed_factor, range_x, range_y):

        # ! if used directional rotational movement
        # rotate clockwise
        # if action == 0:

        #     self.angle += 10
        #     self.angle = self.angle % 360
        #     # self.get_direction()

        # # rotate anti-clockwise
        # elif action == 1:
        #     self.angle -= 10
        #     self.angle = self.angle % 360
        #     # self.get_direction()

        # # move front
        # elif action == 2:

        #     self.current_position = self.current_position + self.direction * self.movement_speed * speed_factor
        #     # self.get_direction()

        # elif action == 3:


        # move back
        # elif action == 3:
        #     self.current_position = self.current_position - self.direction * self.movement_speed
            # self.get_direction()

        # do nothing / wait
        # elif action == 4:
        #     pass

        movement_speed = self.movement_speed * speed_factor
        if action == 0:
            self.current_position[0] = self.current_position[0] - self.movement_speed * speed_factor
        elif action == 1:
            self.current_position[0] = self.current_position[0] + self.movement_speed * speed_factor
        elif action == 2:
            self.current_position[1] = self.current_position[1] - self.movement_speed * speed_factor
        elif action == 3:
            self.current_position[1] = self.current_position[1] + self.movement_speed * speed_factor
            
        self.get_direction()
        self.current_position[0] = np.clip(self.current_position[0], 10, range_x-10)
        self.current_position[1] = np.clip(self.current_position[1], 10, range_y-10)

    # this function returns all the state needed for the observations
    # ! can be changed with need for the algorithm
    def get_agent_state(self):

        agent_state = {
            'agent_id': self.index,
            'agent_name': self.agent,
            'agent_move_speed': self.movement_speed,
            'agent_current_position': self.current_position,
            'agent_angle': self.angle
        }

        return agent_state


In [None]:
class GameEnv(Env):
    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 300}

    def __init__(self, render_mode=None):
        super(GameEnv, self).__init__()

        # defining the screen dimension for render purpose
        self.screen_width = SCREEN_WIDTH
        self.screen_height = SCREEN_HEIGHT

        assert render_mode is None or render_mode in self.metadata["render_modes"]
        self.render_mode = render_mode

        # self.observation_space = Dict({
        #     "predator_position": Box(low=np.array([0, 0], dtype=np.float32),
        #                              high=np.array([self.screen_width, self.screen_height], dtype=np.float32),
        #                              dtype=np.float32),

        #     "bullet_position": Box(low=np.array([0, 0], dtype=np.float32),
        #                            high=np.array([self.screen_width, self.screen_height], dtype=np.float32),
        #                            dtype=np.float32),

        #     "target_position": Box(low=np.array([0, 0], dtype=np.float32),
        #                            high=np.array([self.screen_width, self.screen_height], dtype=np.float32),
        #                            dtype=np.float32),
        # })

        self.observation_space = Box(low=np.zeros(42, np.float32), 
                                    high=np.array([1000 for _ in range(42)], dtype=np.float32), dtype=np.float32)

        self.action_space = Discrete(4)

        self.total_steps = 0
        self.predator_agent = Agent('predator', 0)
        self.predator_total_reward = 0
        self.cast_ray = CastRay()

        self.obs = None

        self.start_time = 0
        self.animation_time = None
        self.total_running_time = 10

        self.window = None
        self.clock = None

        # for the wall initializations
        self.wall = Walls(pygame)
        self.walls = None

        self.turret = Turret(SCREEN_WIDTH, SCREEN_HEIGHT)
        self.bullet = self.turret.get_bullets()

    def _get_obs(self):

        if len(self.bullet) == 1:
            bullet_pos = self.bullet[0].pos
        else:
            bullet_pos = [0, 0]

        # observation = {
        #     "predator_position": self.predator_agent.current_position.tolist(),
        #     "bullet_position": bullet_pos,  # get bullet position
        #     "target_position": self.turret.position.tolist(),  # get the main target position
        # }
        # object_list = self.walls
        # object_list.extend()
        lengths, _ = self.cast_ray.cast_rays(self.predator_agent, self.walls)

        observation = []
        observation.extend(self.predator_agent.current_position.tolist())
        observation.extend(bullet_pos)
        observation.extend(self.turret.position.tolist())
        observation.extend(lengths)
        # print(f'observation:{observation}')
        return observation

    # def _get_info(self):
    #     distance = 10000
    #     self.goal_seen = is_ray_blocked(self.predator_agent.current_position, self.goal_coordinate, self.walls)
    #     if self.goal_seen:
    #         direction = self.goal_coordinate - self.predator_agent.current_position
    #         distance = np.linalg.norm(direction)
    #
    #     info = {
    #         "goal_seen": self.goal_seen,
    #         "distance": distance,
    #         "vision_blocked": not self.goal_seen,
    #     }
    #     # print(f'info: {info}')
    #     return info

    def get_reward(self, reward, done):
        bullet_pos = 0
        if len(self.bullet) == 1:
            bullet_pos = self.bullet[0].pos
        else:
            bullet_pos = self.turret.position

        if np.linalg.norm(np.abs(self.predator_agent.current_position - bullet_pos)) < self.predator_agent.radius + self.bullet[0].radius:
            self.turret.destroy_bullet(self.bullet[0])
            reward -= 50
            done = True

        if np.linalg.norm(np.abs(self.predator_agent.current_position - self.turret.position)) < self.predator_agent.radius + self.turret.radius + 20:
            reward += 200
            done = True

        reward += 0.01

        distance_between_targets = np.linalg.norm(np.abs(self.predator_agent.current_position - self.turret.position))
        # print(10/distance_between_targets)
        reward += (10/distance_between_targets)

        return reward, done

    def reset(self, seed=None, option=None):
        super().reset(seed=seed)
        self.start_time = time.time()

        self.wall.clear_walls()
        self.walls = self.wall.make_wall(LEVEL_5_WALLS)

        self.total_steps = 0
        self.predator_total_reward = 0
        self.animation_time = time.time()

        # for predator in self.predator_agents:
        self.predator_agent.agent_reset(width=self.screen_width, height=self.screen_height)
        # self.predator_agent.movement_speed = 300
        self.turret.rotate_turret(self.predator_agent.center)

        # all the variable values inside the observation space needs to be sent inside the observation variable
        # for this level purpose we decided to add the dictionary observation
        # set the observation to a dictionary
        observation = self._get_obs()
        # info = self._get_info()

        return observation, {}

    def step(self, action):
        # initializing the return variables
        done = False
        reward = 0
        truncated = False
        info = {}
        current_time = time.time()

        current_animation_time = time.time()
        difference_in_animaton_time = current_animation_time - self.animation_time
        self.animation_time = current_animation_time
        # print(difference_in_animaton_time * 300)
        
        elapsed_time = current_time - self.start_time

        self.predator_agent.step_update(action, speed_factor=difference_in_animaton_time, range_x=self.screen_width, range_y=self.screen_height)
        self.predator_agent = detect_collision(self.predator_agent, self.walls)

        if len(self.turret.get_bullets()) == 0:
            self.turret.shoot()

        self.bullet[0].move(difference_in_animaton_time)
        # if np.linalg.norm(np.abs(self.predator_agent.center - self.bullet[0].center)) < self.predator_agent.radius + self.bullet[0].radius:

        # observation needs to be set a dictionary

        self.total_steps += 1
        reward, done = self.get_reward(reward, done)

        if elapsed_time >= self.total_running_time:
            reward -= 20
            done = True

        # getting observation and info
        observation = self._get_obs()
        # info = self._get_info()

        self.predator_total_reward = reward
        self.obs = observation

        if self.render_mode == "human":
            self._render_frame()

        return observation, reward, done, truncated, info

    def render(self):
        if self.render_mode == 'rgb_array':
            self._render_frame()

    def _render_frame(self):
        if self.window is None and self.render_mode == "human":
            pygame.init()
            pygame.display.init()
            self.window = pygame.display.set_mode((self.screen_width, self.screen_height))
            pygame.font.init()

        if self.clock is None and self.render_mode == "human":
            self.clock = pygame.time.Clock()

        screen = pygame.Surface((self.screen_width, self.screen_height))
        screen.fill((249, 245, 246))

        # for the predator
        predator = self.predator_agent
        predator_rect = pygame.draw.circle(screen, (223, 106, 106), predator.center, predator.radius)
        # pygame.draw.line(screen, RED, predator.center, predator.draw_direction_end, 5)

        # for cast rays
        lengths, angles = self.cast_ray.cast_rays(predator, self.walls)

        for a, l in zip(angles, lengths):
            end_point = (int(predator.center[0] + l * math.cos(math.radians(a))),
                        int(predator.center[1] + l * math.sin(math.radians(a))))

            pygame.draw.line(screen, (223, 106, 106), predator.center, end_point)

        # for turret
        pygame.draw.circle(screen, (82, 82, 78), self.turret.center, self.turret.radius)
        pygame.draw.line(screen, (82, 82, 78), self.turret.center, self.turret.rotate_turret(predator.center), 4)

        
        # for the bullet
        if len(self.bullet) != 0:
            bullet_rect = pygame.draw.circle(screen, (115, 147, 167), self.bullet[0].center, self.bullet[0].radius)
            self.turret.auto_destroy()
            for wall in self.walls:
                if bullet_rect.colliderect(wall):
                    self.turret.destroy_bullet(self.bullet[0])

        for key, wall in LEVEL_5_WALLS.items():
            pygame.draw.rect(screen, (71, 151, 177), (wall['x'], wall['y'], wall['width'], wall['height']))

        if self.render_mode == "human":

            font = pygame.font.Font(None, 18)

            text_surface = font.render(f"Reward: {self.predator_total_reward: .5f} ", True, (0, 0, 0))

            text_rect = text_surface.get_rect()

            text_rect.center = (self.screen_width - 200, 10)

            screen.blit(text_surface, text_rect)
            self.window.blit(screen, screen.get_rect())
            pygame.event.pump()
            pygame.display.update()

            # this part is to fix the fps of rendering
            # self.clock.tick(self.metadata["render_fps"])

        else:
            return np.transpose(
                np.array(pygame.surfarray.pixels3d(screen)), axes=(1, 0, 2)
            )
        

    def close(self):
        if self.window is not None:
            pygame.display.quit()
            pygame.font.quit()
            pygame.quit()


In [None]:
pygame.quit()

In [None]:
start_angle = 0  # 65 degrees to the left
end_angle = 350  # 65 degrees to the right
angle_step = 10  # One ray every 10 degrees
ray_angles = np.arange(start_angle, end_angle + angle_step, angle_step).tolist()
ray_angles = [angle%360 for angle in ray_angles]

len(ray_angles)

In [None]:
arr = np.array([800 for _ in range(42)], dtype=np.float32)
len(arr) 

In [None]:
env = GameEnv('human')
env.reset()

done = False
total_reward = 0
while not done:
    action = env.action_space.sample()
    obs, reward, done, _, _ = env.step(action)
    print(obs)
    total_reward += reward

env.close()
print(env.total_steps)
print(total_reward)

In [None]:
from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import CheckpointCallback

In [None]:
log_path = os.path.join('Training', 'Logs', 'Final_3')
model_path = os.path.join('Training', 'Models', 'Final_3')
best_save_path = os.path.join('Training', 'Models', 'Final_3', 'best')

In [None]:
checkpoint_callback = CheckpointCallback(
  save_freq=1000000,
  save_path=model_path,
  name_prefix="rl_model",
  save_replay_buffer=True,
  save_vecnormalize=True,
)

In [None]:
env = GameEnv('human')

In [None]:
import torch as th

In [None]:
number_of_neurons = 256
policy_kwargs = dict(activation_fn=th.nn.ReLU, net_arch=dict(pi=[128, 128], vf=[128, 128]))
net_arch = [dict(pi=[128, 128, 128, 128], vf=[128, 128, 128, 128])]
new_arch = dict(net_arch=[number_of_neurons, number_of_neurons, number_of_neurons, number_of_neurons])

In [None]:
model = DQN('MlpPolicy', env, learning_rate=0.003, policy_kwargs=new_arch, verbose=1, tensorboard_log=log_path)

In [None]:
env.reset()
model.learn(total_timesteps=50000000, callback=checkpoint_callback)

In [None]:
load_path = os.path.join('Training', 'Models', 'Final_3', 'rl_model_5642788_steps.zip')
buffer_path = os.path.join('Training', 'Models', 'Final_3', 'rl_model_replay_buffer_5642788_steps.pkl')

In [None]:
model = DQN.load(load_path)

In [None]:
model.load_replay_buffer(buffer_path)

In [None]:
model.set_env(env)

In [None]:
model.reset ??

In [None]:
env.reset()
model.learn(total_timesteps=50000000, callback=checkpoint_callback)

In [None]:
env.close()

In [None]:
%tensorboard