In [1]:
import os
import sys
import time

import numpy as np
import pygame
from gymnasium import Env
from gymnasium.spaces import Discrete, Dict, Box, MultiDiscrete, Tuple

from Agents.agent import Agent
# from Agents.fov_points import get_fov_points
# from Agents.overlap_detection import detect_overlapping_points
# from Agents.RayCast import get_fov_rays
from Constants.constants import WHITE, RED, BLUE, SCREEN_WIDTH, SCREEN_HEIGHT, LEVEL_4_WALLS
from Walls.collision_detection import detect_collision
from Walls.wall_class import Walls
from Walls.Point_Ray import is_ray_blocked

sys.path.insert(1, os.path.join(sys.path[0], '..'))

pygame 2.5.2 (SDL 2.28.3, Python 3.11.5)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [None]:

class GameEnv(Env):
    metadata = {"render_modes" : ["human", "rgb_array"], "render_fps": 4}

    def __init__(self, render_mode=None):
        super(GameEnv, self).__init__()

        # defining the screen dimension for render purpose
        self.screen_width = SCREEN_WIDTH
        self.screen_height = SCREEN_HEIGHT

        assert render_mode is None or render_mode in self.metadata["render_modes"]
        self.render_mode = render_mode

        self.observation_space = Dict({
            "predator_position": Box(low=np.array([0, 0], dtype=np.float32),
                                    high=np.array([self.screen_width, self.screen_height], dtype=np.float32),
                                    dtype=np.float32),
            "predator_angle": Discrete(360),
            # to send only the points for which he has to cross
            "destination_coordinates": Box(low=np.array([0, 0], dtype=np.float32),
                                    high=np.array([self.screen_width, self.screen_height], dtype=np.float32),
                                    dtype=np.float32),
            
        })

        self.action_space = Discrete(3)
        # 3 for 
        # rotate clockwise, anti-clock
        # move front

        self.total_steps = 0
        self.predator_agent = Agent('predator', 0)
        self.predator_total_reward = 0

        self.goal_coordinate = np.array([690, 110], dtype=np.float32)
        self.goal_area = {'x': SCREEN_WIDTH-150, 'y': 0, 'width': 150, 'height': 150}
        self.total_seen = 0

        self.obs = None

        # start the tick timer
        self.start_time = 0
        self.total_running_time = 10

        self.window = None
        self.clock = None

        # for the wall initializations
        self.wall = Walls(pygame)
        self.walls = None

    def _flatten_list(self, nested_list):
        flattened_list = []
        for item in nested_list:
            if isinstance(item, list) :
                flattened_list.extend(self.flatten_list(item))
            else:
                flattened_list.append(item)
        return flattened_list


    def _get_obs(self):
        observation = {
            "predator_position": self.predator_agent.current_position,
            "predator_angle": self.predator_agent.angle,
            "destination_coordinates": self.goal_coordinate,  # need to send a np.array for the goal to reach,
        }

        # print(f'observation:{observation}')
        return observation
    
    # to capture all the info
    def _get_info(self):
        distance = 10000
        self.goal_seen = is_ray_blocked(self.predator_agent.current_position, self.goal_coordinate, self.walls)
        if self.goal_seen:
            direction = self.goal_coordinate - self.predator_agent.current_position
            distance = np.linalg.norm(direction)    
        
        info = {
            "goal_seen": self.goal_seen,
            "distance": distance,
            "vision_blocked": not self.goal_seen,
        }
        # print(f'info: {info}')
        return info

    def get_reward(self, reward, done):
        curve = -0.09
        ascend = 0.009
        clamp = 10
        reward = reward
        goal_coordinate = 0
        agent_pos = self.predator_agent.current_position
        
        if is_ray_blocked(self.predator_agent.current_position, self.goal_coordinate, self.walls):
            direction = np.abs(self.goal_coordinate - agent_pos)
            distance = np.linalg.norm(direction)
            reward += (ascend * np.exp(curve * distance) * clamp)
            # print(f'distance:{distance}, reward: {reward}')
            reward += 0.005
            self.total_seen += 1
            if self.total_seen == 1:
                reward += 50
            
        if agent_pos[0] > self.goal_area['x'] and agent_pos[1] < self.goal_area['height']:
            done = True
            reward += 200


        if self.walls

        return reward, done


    # the usual reset function
    def reset(self, seed=None, option=None):
        super().reset(seed=seed)
        self.start_time = time.time()

        self.wall.clear_walls()
        self.walls = self.wall.make_wall(LEVEL_4_WALLS)

        self.total_steps = 0
        self.predator_total_reward = 0
        self.total_seen = 0

        # for predator in self.predator_agents:
        self.predator_agent.agent_reset(width=self.screen_width, height=self.screen_height)

        # all the variable values inside the observation space needs to be sent inside the observation variable
        # for this level purpose we decided to add the dictionary observation
        # set the observation to a dictionary
        observation = self._get_obs()
        info = self._get_info()

        return observation, info

    def step(self, action):
        # initializing the return variables
        done = False
        reward = 0
        truncated = False
        info = {}
        current_time = time.time()

        elapsed_time = current_time - self.start_time

        self.predator_agent.step_update(action, range_x=self.screen_width, range_y=self.screen_height)
        self.predator_agent = detect_collision(self.predator_agent, self.walls)

        # observation needs to be set a dictionary

        self.total_steps += 1
        reward, done = self.get_reward(reward, done)
        

        if elapsed_time >= self.total_running_time + 10:
            reward -= 100
            done = True
        
        # getting observation and info
        observation = self._get_obs()
        info = self._get_info()

        self.predator_total_reward = reward
        self.obs = observation

        if self.render_mode == "human":
            self._render_frame()

        return observation, reward, done, truncated, info

    def render(self):
        if self.render_mode == 'rgb_array':
            self._render_frame()


    def _render_frame(self):
        if self.window is None and self.render_mode == "human":
            pygame.init()
            pygame.display.init()
            self.window = pygame.display.set_mode((self.screen_width, self.screen_height))
            pygame.font.init()

        if self.clock is None and self.render_mode == "human":
            self.clock = pygame.time.Clock()

        screen = pygame.Surface((self.screen_width, self.screen_height))
        screen.fill(WHITE)
        
        predator = self.predator_agent
        pygame.draw.circle(screen, RED, predator.center, predator.radius)
        pygame.draw.line(screen, RED, predator.center, predator.draw_direction_end, 5)

        goalx, goaly = self.goal_coordinate
        goalx, goaly = (int(goalx), int(goaly))
        goal = (goalx, goaly)
        pygame.draw.circle(screen, (255, 255, 50), goal, 40)
        if self.goal_seen:
            pygame.draw.line(screen, RED, predator.center, goal, 3)

        pygame.draw.rect(screen, (200, 200, 50), (self.goal_area['x'], self.goal_area['y'], self.goal_area['width'], self.goal_area['height']), 3)

        for key, wall in LEVEL_4_WALLS.items():
            pygame.draw.rect(screen, BLUE, (wall['x'], wall['y'], wall['width'], wall['height']))

        if self.render_mode == "human":

            font = pygame.font.Font(None, 18)
        
            text_surface = font.render(f"Reward: {self.predator_total_reward: .5f} ", True, (0, 0, 0))

            text_rect = text_surface.get_rect()

            text_rect.center = (self.screen_width - 200, 10)
            
            screen.blit(text_surface, text_rect)
            self.window.blit(screen, screen.get_rect())
            pygame.event.pump()
            pygame.display.update()

            # this part is to fix the fps of rendering
            # self.clock.tick(self.metadata["render_fps"])
        
        else:
            return np.transpose(
                np.array(pygame.surfarray.pixels3d(screen)), axes=(1, 0, 2)
            )

    def close(self):
        if self.window is not None:
            pygame.display.quit()
            pygame.font.quit()
            pygame.quit()

In [147]:
env = GameEnv(render_mode="human")

In [97]:
res = env.reset()

In [98]:
CURVE = -0.02
ASCEND = 0.038
CLAMP = 60
agent_pos1 = np.array([600, 200], dtype=np.float32)
target1 = np.array([725,  75], dtype=np.float32)
direction1 = target1-agent_pos1
distance1 = np.linalg.norm(direction1)
print(distance1)
rew = (ASCEND * np.exp(CURVE * distance1) * CLAMP)
print(rew)

176.7767
0.06644647021473574


In [99]:
env.reset()
env.predator_agent.current_position = np.array([600, 200], dtype=np.float32)

done = False
traunc = False
total_reward = 0
while traunc != True and done != True:
    action = env.action_space.sample()
    _, reward, done, traunc, _ = env.step(action)
    total_reward += reward

print(total_reward)


-68.56397693978732


In [103]:
env.close()

In [66]:
from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold

In [143]:
log_path = os.path.join('Training', 'Logs', 'Level_04_DQN')
baseline_path = os.path.join('Training', 'Models', 'Level_04_DQN')
callback_basealine_path = os.path.join('Training', 'Models', 'callback_Level_04_DQN')

In [131]:
load_path = os.path.join('Training', 'Models', 'Level_04_DQN', 'best_model.zip')

In [148]:
stop_callback = StopTrainingOnRewardThreshold(reward_threshold=400, verbose=1)
eval_callback = EvalCallback(env, 
                                callback_on_new_best=stop_callback, 
                                eval_freq=100000, 
                                best_model_save_path=baseline_path, 
                                verbose=1)

In [149]:
model = DQN('MultiInputPolicy', env, verbose=1, tensorboard_log=log_path )

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [150]:
env.reset()
model.learn(total_timesteps=30000000, callback=eval_callback)

Logging to Training\Logs\Level_04_DQN\DQN_5


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 6.15e+03 |
|    ep_rew_mean      | -100     |
|    exploration_rate | 0.992    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 307      |
|    time_elapsed     | 80       |
|    total_timesteps  | 24608    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 6.36e+03 |
|    ep_rew_mean      | -100     |
|    exploration_rate | 0.984    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 317      |
|    time_elapsed     | 160      |
|    total_timesteps  | 50845    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00552  |
|    n_updates        | 211      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean    

In [138]:
env.close()

In [104]:
model.save(baseline_path)

In [132]:
del model

In [129]:
from stable_baselines3.common.evaluation import evaluate_policy

In [None]:
load_path = 

In [133]:
model = DQN.load(load_path)

In [134]:
env = GameEnv(render_mode='human')
model.set_env(env)

In [136]:
done = False
traunc = False
total_reward = 0
obs, info = env.reset()
while not done:
    action = model.predict(obs)
    obs, reward, done, traunc, info = env.step(action)
    total_reward += reward

print(total_reward)


KeyboardInterrupt: 

In [137]:
evaluate_policy(model, env, n_eval_episodes=10)



KeyboardInterrupt: 

In [11]:
OBSTACLES = {
    'walls': {
        1: {'x': SCREEN_WIDTH // 2 - 25, 'y': 0, 'width': 50, 'height': 50, 'name': 'b_w'},
        2: {'x': SCREEN_WIDTH // 2 - 25, 'y': 0, 'width': 50, 'height': 50, 'name': 'b_w'},
        3: {'x': SCREEN_WIDTH // 2 - 25, 'y': 0, 'width': 50, 'height': 50, 'name': 'b_w'},
        4: {'x': SCREEN_WIDTH // 2 - 25, 'y': 0, 'width': 50, 'height': 50, 'name': 'b_w'},
        5: {'x': SCREEN_WIDTH // 2 - 25, 'y': 0, 'width': 50, 'height': 50, 'name': 'o_w', 'gate_with': 2, 'access': 'bottom_top', 'orientation': 'horizontal'},
        6: {'x': SCREEN_WIDTH // 2 - 25, 'y': 0, 'width': 50, 'height': 50, 'name': 'o_w', 'gate_with': 1},
        7: {'x': SCREEN_WIDTH // 2 - 25, 'y': 0, 'width': 50, 'height': 50, 'name': 'o_w', 'gate_with': 4},
        8: {'x': SCREEN_WIDTH // 2 - 25, 'y': 0, 'width': 50, 'height': 50, 'name': 'o_w', 'gate_with': 5},
        9: {'x': SCREEN_WIDTH // 2 - 25, 'y': 0, 'width': 50, 'height': 50, 'name': 'o_w', 'gate_with': 4},
    },
    'gates': {
        1: {'type': 'left_right', 'left': 5, 'right':6, 'orientiation': 'vertical'},
        2: {'type': 'bottom_top', 'top': 3, 'right':4, 'orientiation': 'horizontal'},
    }

}
# direction

In [12]:
for objects in OBSTACLES.items():
    for walls in 

(1, {'x': 375, 'y': 0, 'width': 50, 'height': 50, 'name': 'b_w'})
(2, {'x': 375, 'y': 0, 'width': 50, 'height': 50, 'name': 'b_w'})
(3, {'x': 375, 'y': 0, 'width': 50, 'height': 50, 'name': 'b_w'})
(4, {'x': 375, 'y': 0, 'width': 50, 'height': 50, 'name': 'b_w'})
(5, {'x': 375, 'y': 0, 'width': 50, 'height': 50, 'name': 'o_w', 'gate_with': 2, 'access': 'bottom_top', 'orientation': 'horizontal'})
(6, {'x': 375, 'y': 0, 'width': 50, 'height': 50, 'name': 'o_w', 'gate_with': 1})
(7, {'x': 375, 'y': 0, 'width': 50, 'height': 50, 'name': 'o_w', 'gate_with': 4})
(8, {'x': 375, 'y': 0, 'width': 50, 'height': 50, 'name': 'o_w', 'gate_with': 5})
(9, {'x': 375, 'y': 0, 'width': 50, 'height': 50, 'name': 'o_w', 'gate_with': 4})


In [21]:
env.close()

In [None]:
%tensorboard