In [2]:
import os
import sys
import time

import numpy as np
import pygame
from gymnasium import Env
from gymnasium.spaces import Discrete, Dict, Box, MultiDiscrete, Tuple

from Agents.agent import Agent
# from Agents.fov_points import get_fov_points
# from Agents.overlap_detection import detect_overlapping_points
from Agents.RayCast import get_fov_rays
from Constants.constants import WHITE, RED, BLUE, SCREEN_WIDTH, SCREEN_HEIGHT, WALLS, WALLS2, FOV_RADIUS
from Walls.collision_detection import detect_collision
from Walls.wall_class import Walls

sys.path.insert(1, os.path.join(sys.path[0], '..'))

In [27]:
class GameEnv(Env):
    def __init__(self, render_mode='human'):
        super(GameEnv, self).__init__()

        # defining the screen dimension for render purpose
        self.screen_width = SCREEN_WIDTH
        self.screen_height = SCREEN_HEIGHT
        self.render_mode = render_mode

        # point_spaces = [Discrete(2) for _ in range(360)]

        # dict1 = {
        #     'predator_position': Box(low=np.array([0, 0], dtype=np.float32),
        #                              high=np.array([SCREEN_WIDTH, SCREEN_HEIGHT], dtype=np.float32),
        #                              dtype=np.float32),
        #     'predator_angle': Discrete(360),
        # }

        # self.fov_points = {
        #     f'point{point}': Tuple((Box(low=np.array([0, 0], dtype=np.float32),
        #                              high=np.array([SCREEN_WIDTH, SCREEN_HEIGHT], dtype=np.float32),
        #                              dtype=np.float32), Discrete(2))) for point in range(360)
        # }

        # custom_obs_space = {**dict1, **self.fov_points}
        total_values = 219
        self.observation_space = Box(low=np.zeros(total_values, dtype=np.float32), 
                                    high=self.screen_width * np.ones(total_values, dtype=np.float32), 
                                    dtype=np.float32)
        # defining the observation and action spaces for all the agents
        # self.observation_space = None

        # defining the action space based on total number of predator and prey
        # since we are training only one agent so, defining only the necessary number of actions
        self.action_space = Discrete(5)
        # 5 for rotate
        # clockwise, anti-clock
        # move front, move back and wait

        self.total_steps = 0

        self.number_of_predator = 1

        self.predator_agent = None

        self.predator_total_reward = 0

        self.obs = None

        # start the tick timer
        self.start_time = 0
        self.total_running_time = 10

        # the pygame window should be initialized in the render function
        # initializing the pygame
        pygame.init()

        # setting the screen size
        self.screen = pygame.display.set_mode((self.screen_width, self.screen_height))
        pygame.display.set_caption('Multi Agent Environment(simple)')

        # initializing the font
        pygame.font.init()
        self.font = pygame.font.Font(None, 18)

        # for the wall initializations
        self.wall = Walls(pygame)
        self.walls = None

    def agent_init(self):
        predator_agents = Agent('predator', 0)

        self.predator_agent = predator_agents

    def flatten_list(self, nested_list):
        flattened_list = []
        for item in nested_list:
            if isinstance(item, list) :
                flattened_list.extend(self.flatten_list(item))
            else:
                flattened_list.append(item)
        return flattened_list
    
    def _get_obs(self):
        # obs1= {
        #     'predator_position': self.predator_agent.current_position,
        #     'predator_angle': self.predator_agent.angle,    
        # }
        # value_list = detect_overlapping_points(self.predator_agent.current_position, WALLS)
        
        # obs2 = {f'point{index}': value for index, (_, value) in enumerate(value_list)}

        # observation = {**obs1, **obs2}
        observation = []
        agent_pos = [self.predator_agent.current_position[0], self.predator_agent.current_position[1]]
        observation.append(agent_pos)

        angle = self.predator_agent.angle
        observation.append(angle)

        # value_list = detect_overlapping_points(self.predator_agent.current_position, WALLS)
        value_list = get_fov_rays(agent_pos)
        observation.append(value_list)
        
        observation = self.flatten_list(observation)
        # print(observation)
        return observation

    def _max_right(self):
        max_right = 0

        for wall in self.walls:
            if wall.right > max_right:
                max_right = wall.right
        return max_right

    # the usual reset function
    def reset(self, seed=0):
        self.start_time = time.time()

        self.agent_init()
        self.wall.clear_walls()
        self.walls = self.wall.make_wall(WALLS2)

        # self.set_obs_space()

        self.total_steps = 0
        self.predator_total_reward = 0

        predator = self.predator_agent

        # for predator in self.predator_agents:
        predator.agent_reset(width=self.screen_width, height=self.screen_height, walls=self.walls)
        # observation.append([predator.index, predator.agent, predator.current_position])

        # setting the predator and prey to their initial position

        self.predator_agent = predator


        # all the variable values inside the observation space needs to be sent inside the observation variable
        # for this level purpose we decided to add the dictionary observation
        # set the observation to a dictionary
        observation = self._get_obs()
        self.obs = observation

        return observation, seed

    def step(self, action):
        # initializing the return variables
        done = False
        reward = 0
        truncated = False
        info = {}
        current_time = time.time()

        elapsed_time = current_time - self.start_time
        # handles the pygame window event when closing
        # !if the window still crashes pygame.event needs to be managed properly
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                done = True
                pygame.quit()
        self.predator_agent.step_update(action, range_x=self.screen_width, range_y=self.screen_height)
        self.predator_agent = detect_collision(self.predator_agent, self.walls)

        # observation needs to be set a dictionary

        self.total_steps += 1
        # for wall in self.walls:
        if self.predator_agent.current_position[0] > self._max_right():
            reward += 150
            done = True

        if elapsed_time >= self.total_running_time:
            reward -= 50
            done = True
        """
        here lies the most important task
        handling the rewards
        """
        reward += 0.01
        self.render()

        # it will update the total reward every step
        observation = self._get_obs()
        self.predator_total_reward = reward
        self.obs = observation

        return observation, reward, done, truncated, info

    def render(self):
        if self.render_mode == 'human':
            screen = self.screen

            screen.fill(WHITE)
            predator = self.predator_agent
            pygame.draw.circle(screen, RED, predator.center, predator.radius)
            pygame.draw.line(screen, RED, predator.center, predator.draw_direction_end, 5)

            for key, wall in WALLS2.items():
                pygame.draw.rect(screen, BLUE, (wall['x'], wall['y'], wall['width'], wall['height']))

            pygame.display.update()

    def close(self):
        pygame.quit()

In [28]:
env = GameEnv()

In [21]:
res = env.reset()

In [22]:
from stable_baselines3 import DQN

In [16]:
log_path = os.path.join('Training', 'Logs', 'Level_01_DQN')
baseline_path = os.path.join('Training', 'Models', 'Level_01_DQN')

In [29]:
model = DQN('MlpPolicy', env, verbose=1, tensorboard_log=log_path )
env.reset()
model.learn(total_timesteps=2000000)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to Training\Logs\Level_01_DQN\DQN_18
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 736      |
|    ep_rew_mean      | -42.6    |
|    exploration_rate | 0.986    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 73       |
|    time_elapsed     | 40       |
|    total_timesteps  | 2945     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 703      |
|    ep_rew_mean      | -18      |
|    exploration_rate | 0.973    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 73       |
|    time_elapsed     | 76       |
|    total_timesteps  | 5622     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean    

In [18]:
env.close()

In [86]:
dict1 = {
    'pos':[23542, 345],
    'angle': 340,
    (123, 534): 0,
}

In [125]:
dict2 = {
            f'point{point}': Tuple((Box(low=np.array([0, 0], dtype=np.float32),
                                     high=np.array([SCREEN_WIDTH, SCREEN_HEIGHT], dtype=np.float32),
                                     dtype=np.float32), Discrete(2))) for point in range(360)
        }

In [129]:
merged_dict = {**dict1, **dict2}

In [140]:
merged_dict['point0'].sample()

(array([240.3118 , 220.21739], dtype=float32), 1)

In [118]:
dict1 = {
            'predator_position': Box(low=np.array([0, 0], dtype=np.float32),
                                     high=np.array([SCREEN_WIDTH, SCREEN_HEIGHT], dtype=np.float32),
                                     dtype=np.float32),
            'predator_angle': Discrete(360),
        }

dict2 = {
            f'point{point}': Discrete(2) for point in range(31)
        }
custom_obs_space = {**dict1, **dict2}

In [147]:
fov = [([428., 301.], 0), ([428., 303.], 0), ([428., 304.], 0)]

result_dict = {f'point{index}': value for index, (_, value) in enumerate(fov)}

In [148]:
result_dict

{'point0': 0, 'point1': 0, 'point2': 0}

In [150]:
custom_obs_space = { **dict1, **result_dict}

In [157]:
custom_obs_space


{'predator_position': Box(0.0, 600.0, (2,), float32),
 'predator_angle': Discrete(360),
 'point0': 0,
 'point1': 0,
 'point2': 0}

In [200]:
def flatten_list(nested_list):
    flattened_list = []
    for item in nested_list:
        if isinstance(item, type(list)) :
            flattened_list.extend(flatten_list(item))
        else:
            flattened_list.append(item)
    return flattened_list

In [204]:
list1 = [[234, 234], 24, [0,0,0,0,0,1,1,0,0,1,1,1,0,0,0,0,0]]

In [205]:
list1 = flatten_list(list1)

In [206]:
list1

[234, 234, 24, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0]

In [132]:
from Agents.overlap_detection import detect_overlapping_points

In [133]:
agent_location = [328.8917, 301.3598]

fov = detect_overlapping_points(agent_location, WALLS)

print(fov)

{(428, 301): 0, (428, 303): 0, (428, 304): 0, (428, 306): 0, (428, 308): 0, (428, 310): 0, (428, 311): 0, (428, 313): 0, (427, 315): 0, (427, 317): 0, (427, 318): 0, (427, 320): 0, (426, 322): 0, (426, 323): 0, (425, 325): 0, (425, 327): 0, (425, 328): 0, (424, 330): 0, (423, 332): 0, (423, 333): 0, (422, 335): 0, (422, 337): 0, (421, 338): 0, (420, 340): 0, (420, 342): 0, (419, 343): 0, (418, 345): 0, (417, 346): 0, (417, 348): 0, (416, 349): 0, (415, 351): 0, (414, 352): 0, (413, 354): 0, (412, 355): 0, (411, 357): 0, (410, 358): 0, (409, 360): 0, (408, 361): 0, (407, 362): 0, (406, 364): 0, (405, 365): 0, (404, 366): 0, (403, 368): 0, (402, 369): 0, (400, 370): 0, (399, 372): 0, (398, 373): 0, (397, 374): 0, (395, 375): 0, (394, 376): 0, (393, 377): 0, (391, 379): 0, (390, 380): 0, (389, 381): 0, (387, 382): 0, (386, 383): 0, (384, 384): 0, (383, 385): 0, (381, 386): 0, (380, 387): 0, (378, 387): 0, (377, 388): 0, (375, 389): 0, (374, 390): 0, (372, 391): 0, (371, 391): 0, (369, 392

In [190]:
observation = []
agent_pos = [234, 234]
observation.append(agent_pos)

angle = 30
observation.append(angle)

value_list = detect_overlapping_points(agent_pos, WALLS)
observation.append(value_list)

observation = flatten_list(observation)

In [207]:
detect_overlapping_points(agent_pos, WALLS)

{(334, 234): 0,
 (333, 235): 0,
 (333, 237): 0,
 (333, 239): 0,
 (333, 240): 0,
 (333, 242): 0,
 (333, 244): 0,
 (333, 246): 0,
 (333, 247): 0,
 (332, 249): 0,
 (332, 251): 0,
 (332, 253): 0,
 (331, 254): 0,
 (331, 256): 0,
 (331, 258): 0,
 (330, 259): 0,
 (330, 261): 0,
 (329, 263): 0,
 (329, 264): 0,
 (328, 266): 0,
 (327, 268): 0,
 (327, 269): 0,
 (326, 271): 0,
 (326, 273): 0,
 (325, 274): 0,
 (324, 276): 0,
 (323, 277): 0,
 (323, 279): 0,
 (322, 280): 0,
 (321, 282): 0,
 (320, 284): 0,
 (319, 285): 0,
 (318, 286): 0,
 (317, 288): 0,
 (316, 289): 0,
 (315, 291): 0,
 (314, 292): 0,
 (313, 294): 0,
 (312, 295): 0,
 (311, 296): 0,
 (310, 298): 0,
 (309, 299): 0,
 (308, 300): 0,
 (307, 302): 0,
 (305, 303): 0,
 (304, 304): 0,
 (303, 305): 0,
 (302, 307): 0,
 (300, 308): 0,
 (299, 309): 0,
 (298, 310): 0,
 (296, 311): 0,
 (295, 312): 0,
 (294, 313): 0,
 (292, 314): 0,
 (291, 315): 0,
 (289, 316): 0,
 (288, 317): 0,
 (286, 318): 0,
 (285, 319): 0,
 (284, 320): 0,
 (282, 321): 0,
 (280, 3

In [191]:
observation

[234,
 234,
 30,
 {(334, 234): 0,
  (333, 235): 0,
  (333, 237): 0,
  (333, 239): 0,
  (333, 240): 0,
  (333, 242): 0,
  (333, 244): 0,
  (333, 246): 0,
  (333, 247): 0,
  (332, 249): 0,
  (332, 251): 0,
  (332, 253): 0,
  (331, 254): 0,
  (331, 256): 0,
  (331, 258): 0,
  (330, 259): 0,
  (330, 261): 0,
  (329, 263): 0,
  (329, 264): 0,
  (328, 266): 0,
  (327, 268): 0,
  (327, 269): 0,
  (326, 271): 0,
  (326, 273): 0,
  (325, 274): 0,
  (324, 276): 0,
  (323, 277): 0,
  (323, 279): 0,
  (322, 280): 0,
  (321, 282): 0,
  (320, 284): 0,
  (319, 285): 0,
  (318, 286): 0,
  (317, 288): 0,
  (316, 289): 0,
  (315, 291): 0,
  (314, 292): 0,
  (313, 294): 0,
  (312, 295): 0,
  (311, 296): 0,
  (310, 298): 0,
  (309, 299): 0,
  (308, 300): 0,
  (307, 302): 0,
  (305, 303): 0,
  (304, 304): 0,
  (303, 305): 0,
  (302, 307): 0,
  (300, 308): 0,
  (299, 309): 0,
  (298, 310): 0,
  (296, 311): 0,
  (295, 312): 0,
  (294, 313): 0,
  (292, 314): 0,
  (291, 315): 0,
  (289, 316): 0,
  (288, 317): 

In [199]:
env.close()

In [None]:
%tensorboard