In [1]:
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from gymnasium import Env, spaces
import torch

Set up environment

In [2]:
# Constants
UNEXPLORED = -2
OBSTACLE = -1
SAFE = 0

# === Utility Functions ===
def drone_scan(drone_pos, scan_range, actual_env):
    half_range = scan_range // 2
    local_info = np.full((scan_range, scan_range), UNEXPLORED)
    grid_h, grid_w = actual_env.shape

    for i in range(scan_range):
        for j in range(scan_range):
            global_x = drone_pos[0] - half_range + i
            global_y = drone_pos[1] - half_range + j
            if 0 <= global_x < grid_h and 0 <= global_y < grid_w:
                local_info[i, j] = actual_env[global_x, global_y]

    return local_info, (drone_pos[0] - half_range, drone_pos[1] - half_range)

def stitch_information(global_grid, local_info, top_left):
    x_offset, y_offset = top_left
    grid_h, grid_w = global_grid.shape

    for i in range(local_info.shape[0]):
        for j in range(local_info.shape[1]):
            x, y = x_offset + i, y_offset + j
            if 0 <= x < grid_h and 0 <= y < grid_w:
                if global_grid[x, y] == UNEXPLORED:
                    global_grid[x, y] = local_info[i, j]
                elif global_grid[x, y] != local_info[i, j]:
                    if local_info[i, j] == SAFE:
                        global_grid[x, y] = SAFE
    return global_grid

# === Custom Environment ===
class DronePlacementEnv(Env):
    def __init__(self, grid_size=10, max_steps=50):
        super().__init__()
        self.grid_size = grid_size
        self.max_steps = max_steps
        self.current_step = 0

        self.action_space = spaces.Discrete(grid_size * grid_size * 2)
        self.observation_space = spaces.Box(low=UNEXPLORED, high=SAFE, shape=(grid_size, grid_size), dtype=np.int32)

        self.reset()

    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)
        self.actual_env = np.random.choice([OBSTACLE, SAFE], size=(self.grid_size, self.grid_size), p=[0.2, 0.8]).astype(np.int32)
        self.global_grid = np.full((self.grid_size, self.grid_size), UNEXPLORED, dtype=np.int32)
        self.current_step = 0
        return self.global_grid.copy(), {}

    def step(self, action):
        self.current_step += 1
        action = int(min(action, self.grid_size * self.grid_size * 2 - 1))
        action_per_row = self.grid_size * 2
        x = action // action_per_row
        y = (action % action_per_row) // 2
        drone_type = action % 2

        if not (0 <= x < self.grid_size and 0 <= y < self.grid_size):
            raise ValueError(f"Decoded position ({x}, {y}) is out of bounds!")

        scan_range = 3 if drone_type == 0 else 5
        local_info, top_left = drone_scan((x, y), scan_range, self.actual_env)
        prev_unexplored = np.sum(self.global_grid == UNEXPLORED)
        self.global_grid = stitch_information(self.global_grid, local_info, top_left)
        new_unexplored = np.sum(self.global_grid == UNEXPLORED)

        reward = float(prev_unexplored - new_unexplored - 0.2 - 0.2 * drone_type)
        if x <= 1 or x >= self.grid_size - 2 or y <= 1 or y >= self.grid_size - 2:
            reward += 0.3

        terminated = bool(new_unexplored == 0)
        truncated = bool(self.current_step >= self.max_steps)
        if terminated:
            reward += 10
        elif truncated:
            reward -= 5

        return self.global_grid.copy(), reward, terminated, truncated, {}

    def render(self):
        print(self.global_grid)

# === Small Map Env ===
class SmallDroneEnv(DronePlacementEnv):
    def __init__(self):
        super().__init__(grid_size=6, max_steps=8)

In [3]:
def run_trained_model():
    model = PPO.load("ppo_large_model")
    env = DronePlacementEnv()
    obs, _ = env.reset()

    for step in range(env.max_steps):
        action, _ = model.predict(obs)
        obs, reward, terminated, truncated, _ = env.step(action)
        x = action // 20
        y = (action % 20) // 2
        drone_type = "3x3" if action % 2 == 0 else "5x5"
        print(f"Step {step}: Placed {drone_type} drone at ({x}, {y}), reward: {reward:.2f}")
        env.render()
        if terminated or truncated:
            print("🎉 Mission Complete: All cells explored!")
            break
        if truncated:
            print("⚠️ Max steps reached.")
            break

# === Entry Point ===
if __name__ == "__main__":
    run_trained_model()



Step 0: Placed 5x5 drone at (2, 7), reward: 24.60
[[-2 -2 -2 -2 -2  0  0  0  0  0]
 [-2 -2 -2 -2 -2  0  0  0  0  0]
 [-2 -2 -2 -2 -2  0  0 -1  0 -1]
 [-2 -2 -2 -2 -2  0 -1  0  0  0]
 [-2 -2 -2 -2 -2 -1 -1  0  0  0]
 [-2 -2 -2 -2 -2 -2 -2 -2 -2 -2]
 [-2 -2 -2 -2 -2 -2 -2 -2 -2 -2]
 [-2 -2 -2 -2 -2 -2 -2 -2 -2 -2]
 [-2 -2 -2 -2 -2 -2 -2 -2 -2 -2]
 [-2 -2 -2 -2 -2 -2 -2 -2 -2 -2]]
Step 1: Placed 5x5 drone at (7, 1), reward: 19.90
[[-2 -2 -2 -2 -2  0  0  0  0  0]
 [-2 -2 -2 -2 -2  0  0  0  0  0]
 [-2 -2 -2 -2 -2  0  0 -1  0 -1]
 [-2 -2 -2 -2 -2  0 -1  0  0  0]
 [-2 -2 -2 -2 -2 -1 -1  0  0  0]
 [ 0  0  0  0 -2 -2 -2 -2 -2 -2]
 [ 0  0  0 -1 -2 -2 -2 -2 -2 -2]
 [ 0  0  0  0 -2 -2 -2 -2 -2 -2]
 [ 0 -1 -1  0 -2 -2 -2 -2 -2 -2]
 [ 0 -1  0  0 -2 -2 -2 -2 -2 -2]]
Step 2: Placed 5x5 drone at (1, 7), reward: -0.10
[[-2 -2 -2 -2 -2  0  0  0  0  0]
 [-2 -2 -2 -2 -2  0  0  0  0  0]
 [-2 -2 -2 -2 -2  0  0 -1  0 -1]
 [-2 -2 -2 -2 -2  0 -1  0  0  0]
 [-2 -2 -2 -2 -2 -1 -1  0  0  0]
 [ 0  0  0  0 -2 -2 -2 