In [9]:
'''
git add .
git commit -m "update recorded" 
git push
'''

'\ngit add .\ngit commit -m "update recorded" \ngit push\n'

In [10]:
import numpy as np

# Constants representing the state of each cell
UNEXPLORED = -2
OBSTACLE = -1
SAFE = 0

# Initialize the global grid status as unexplored
global_grid = np.full((10, 10), UNEXPLORED)

def drone_scan(drone_pos, scan_range, actual_env):
    """
    Perform a local scan around a drone position.

    Args:
        drone_pos (tuple): Drone coordinates (x, y).
        scan_range (int): The range of the drone's field of view (e.g., 3 or 5).
        actual_env (np.array): The actual environment (10x10 matrix).

    Returns:
        tuple: local scanned information and top-left position of the scan.
    """
    half_range = scan_range // 2
    local_info = np.full((scan_range, scan_range), UNEXPLORED)

    for i in range(scan_range):
        for j in range(scan_range):
            global_x = drone_pos[0] - half_range + i
            global_y = drone_pos[1] - half_range + j

            # Check boundaries
            if 0 <= global_x < 10 and 0 <= global_y < 10:
                local_info[i, j] = actual_env[global_x, global_y]
                #print(f"⚠️ Warning: Drone scan out of bounds at ({global_x}, {global_y})")

    return local_info, (drone_pos[0] - half_range, drone_pos[1] - half_range)

def stitch_information(global_grid, local_info, top_left):
    """
    Merge local drone scans into the global coverage grid.

    Args:
        global_grid (np.array): Current global grid state.
        local_info (np.array): Local scan result from a drone.
        top_left (tuple): Top-left coordinate of the local scan in the global grid.

    Returns:
        np.array: Updated global grid.
    """
    # find top left corner of local scan for better aligning and stitching
    x_offset, y_offset = top_left

    for i in range(local_info.shape[0]):
        for j in range(local_info.shape[1]):
            x, y = x_offset + i, y_offset + j
            if 0 <= x < 10 and 0 <= y < 10:
                # Update global grid only if it's unexplored
                if global_grid[x, y] == UNEXPLORED:
                    global_grid[x, y] = local_info[i, j]
                # If conflicting info, prioritize safe information
                elif global_grid[x, y] != local_info[i, j]:
                    if local_info[i, j] == SAFE:
                        global_grid[x, y] = SAFE
    return global_grid

In [11]:
# Example actual environment (randomly generated obstacles and safe zones when searching)
actual_env = np.random.choice([OBSTACLE, SAFE], size=(10, 10), p=[0.2, 0.8])

Training to put drones

In [12]:
from gymnasium import Env, spaces
import numpy as np

class DronePlacementEnv(Env):
    def __init__(self):
        super().__init__()
        self.grid_size = 10
        self.max_steps = 50
        self.current_step = 0

        # 10 x 10 grid × 2 drone types = 200 possible actions
        self.action_space = spaces.Discrete(200)
        self.observation_space = spaces.Box(low=UNEXPLORED, high=SAFE,shape=(self.grid_size, self.grid_size), dtype=np.int32)

        self.reset()

    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)
        self.actual_env = np.random.choice([OBSTACLE, SAFE], size=(self.grid_size, self.grid_size), p=[0.2, 0.8]).astype(np.int32)

        self.global_grid = np.full((self.grid_size, self.grid_size), UNEXPLORED, dtype=np.int32)
        self.current_step = 0

        # ✅ 引导探索：预先把边缘随机几个点设为 SAFE
        edge_indices = list(range(self.grid_size))
        edge_cells = []

        # top and bottom rows
        for j in edge_indices:
            edge_cells.append((0, j))         # top row
            edge_cells.append((self.grid_size - 1, j))  # bottom row

        # left and right columns（去掉 corners 避免重复）
        for i in edge_indices[1:-1]:
            edge_cells.append((i, 0))         # left col
            edge_cells.append((i, self.grid_size - 1))  # right col

        # 随机选 10 个边缘格子填入已知 SAFE
        np.random.shuffle(edge_cells)
        for (x, y) in edge_cells[:10]:
            self.global_grid[x, y] = self.actual_env[x, y]

        # 明确暴露最底部几行地图信息（强行 agent 看得见）
        for row in [7, 8, 9]:
            self.global_grid[row, :] = self.actual_env[row, :]

        return self.global_grid.copy(), {}


    def step(self, action):
        self.current_step += 1

        x = action // 20
        y = (action % 20) // 2
        drone_type = action % 2
        scan_range = 3 if drone_type == 0 else 5

        local_info, top_left = drone_scan((x, y), scan_range, self.actual_env)
        prev_unexplored = np.sum(self.global_grid == UNEXPLORED)
        self.global_grid = stitch_information(self.global_grid, local_info, top_left)
        new_unexplored = np.sum(self.global_grid == UNEXPLORED)

        # reward = new coverage - drone penalty - large drone penalty
        reward = float(prev_unexplored - new_unexplored - 0.2 - 0.2 * drone_type)

        # 边缘探索奖励：如果 drone 靠近边缘，就给鼓励
        if x <= 1 or x >= 8 or y <= 1 or y >= 8:
            reward += 0.3

        terminated = bool(new_unexplored == 0)
        truncated = bool(self.current_step >= self.max_steps)

        # 奖励完成地图探索
        if terminated:
            reward += 10  # 成功覆盖全图奖励
        elif truncated and np.sum(self.global_grid == UNEXPLORED) > 0:
            reward -= 5   # 达到最大步数却没扫完，惩罚


        for row in [7, 8, 9]:
            if np.all(self.global_grid[row, :] != UNEXPLORED):
                reward += 3  # 每覆盖一整行奖励 3 分



        return self.global_grid.copy(), reward, terminated, truncated, {}

    def render(self):
        print(self.global_grid)


In [13]:
# train a small grid 6*6, then the transfer to 10*10 grid

class SmallDroneEnv(DronePlacementEnv):
    def __init__(self):
        super().__init__()
        self.grid_size = 6
        self.max_steps = 8  # 适合小地图的 max steps
        self.action_space = spaces.Discrete(self.grid_size * self.grid_size * 2)
        self.observation_space = spaces.Box(low=UNEXPLORED, high=SAFE, shape=(6, 6), dtype=np.int32)

def train_on_small_map():
    env = SmallDroneEnv()
    check_env(env)
    model = PPO("MlpPolicy", env, verbose=1)
    model.learn(total_timesteps=50000)
    model.save("ppo_small_model")
    return model


In [14]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

def train_with_ppo():
    env = DronePlacementEnv()
    check_env(env)
    model = PPO.load("ppo_small_model", env=env, verbose=1)
    model.learn(total_timesteps=1e5)
    model.save("ppo_large_model")
    return model


In [15]:
def run_trained_model():
    model = PPO.load("ppo_drone_model")
    env = DronePlacementEnv()
    obs, _ = env.reset()

    print("=== DRONE DEPLOYMENT SEQUENCE ===")
    for step in range(env.max_steps):
        action, _ = model.predict(obs)
        obs, reward, terminated, truncated, _ = env.step(action)

        x = action // 20
        y = (action % 20) // 2
        drone_type = "3x3" if action % 2 == 0 else "5x5"
        print(f"Step {step}: Placed {drone_type} drone at ({x}, {y}), reward: {reward:.2f}")

        env.render()

        if terminated:
            print("🎉 Mission Complete: All cells explored!")
            break
        if truncated:
            print("⚠️ Max steps reached.")
            break

In [16]:
model = PPO.load("ppo_drone_model")

env = DronePlacementEnv()
obs, _ = env.reset()

for step in range(env.max_steps):  # 最多放 max_steps 架 drone
    action, _ = model.predict(obs)

    # 解码动作
    x = action // 20
    y = (action % 20) // 2
    drone_type = action % 2
    scan_range = 3 if drone_type == 0 else 5
    pos = (x, y)

    # 模拟扫描 + 拼接
    local_info, top_left = drone_scan(pos, scan_range, env.actual_env)
    global_grid = stitch_information(env.global_grid, local_info, top_left)

    # 真正执行一步
    obs, reward, terminated, truncated, _ = env.step(action)

    print(f"Step {step}: Placed {scan_range}x{scan_range} drone at {pos}, reward = {reward:.2f}")
    env.render()

    if terminated:
        print("🎉 部署完成：所有格子已覆盖！")
        break
    if truncated:
        print("⚠️ 达到最大步数")
        break

print("Final Global Grid after Stitching:")
print(env.global_grid)




Step 0: Placed 3x3 drone at (np.int64(0), np.int64(6)), reward = 9.10
[[-2 -2 -2 -2 -2  0 -1  0 -1 -2]
 [-2 -2 -2 -2 -2  0  0  0 -2 -1]
 [-2 -2 -2 -2 -2 -2 -2 -2 -2 -2]
 [-2 -2 -2 -2 -2 -2 -2 -2 -2 -2]
 [-1 -2 -2 -2 -2 -2 -2 -2 -2 -2]
 [ 0 -2 -2 -2 -2 -2 -2 -2 -2 -2]
 [-1 -2 -2 -2 -2 -2 -2 -2 -2  0]
 [ 0  0  0 -1  0  0  0  0  0  0]
 [-1  0  0 -1  0  0  0  0  0  0]
 [ 0  0  0 -1 -1  0  0  0 -1  0]]
Step 1: Placed 3x3 drone at (np.int64(3), np.int64(3)), reward = 8.80
[[-2 -2 -2 -2 -2  0 -1  0 -1 -2]
 [-2 -2 -2 -2 -2  0  0  0 -2 -1]
 [-2 -2 -1 -1 -1 -2 -2 -2 -2 -2]
 [-2 -2  0  0  0 -2 -2 -2 -2 -2]
 [-1 -2  0  0  0 -2 -2 -2 -2 -2]
 [ 0 -2 -2 -2 -2 -2 -2 -2 -2 -2]
 [-1 -2 -2 -2 -2 -2 -2 -2 -2  0]
 [ 0  0  0 -1  0  0  0  0  0  0]
 [-1  0  0 -1  0  0  0  0  0  0]
 [ 0  0  0 -1 -1  0  0  0 -1  0]]
Step 2: Placed 5x5 drone at (np.int64(3), np.int64(7)), reward = 8.60
[[-2 -2 -2 -2 -2  0 -1  0 -1 -2]
 [-2 -2 -2 -2 -2  0  0  0 -1 -1]
 [-2 -2 -1 -1 -1  0  0 -1  0  0]
 [-2 -2  0  0  0  0  0  0 -1 