In [3]:
import cv2
import numpy as np
from collections import deque
from ultralytics import YOLO
import time

def iou(boxA, boxB):
    """
    計算兩個 Bounding Box 的 Intersection over Union (IoU)。
    Box 格式: (x, y, w, h)
    """
    x1_A, y1_A, w1, h1 = boxA
    x2_A = x1_A + w1
    y2_A = y1_A + h1

    x1_B, y1_B, w2, h2 = boxB
    x2_B = x1_B + w2
    y2_B = y1_B + h2

    # 計算交集區域座標
    inter_x1 = max(x1_A, x1_B)
    inter_y1 = max(y1_A, y1_B)
    inter_x2 = min(x2_A, x2_B)
    inter_y2 = min(y2_A, y2_B)

    # 計算交集區域面積
    inter_w = max(0, inter_x2 - inter_x1)
    inter_h = max(0, inter_y2 - inter_y1)
    inter_area = inter_w * inter_h

    # 計算各框面積
    boxA_area = w1 * h1
    boxB_area = w2 * h2

    # 計算聯集面積
    union_area = float(boxA_area + boxB_area - inter_area)

    # 計算 IoU
    iou_val = inter_area / union_area if union_area > 0 else 0.0
    return iou_val

def union_box(boxA, boxB):
    """
    合併兩個 Bounding Box，返回包含兩者的新 Bounding Box。
    Box 格式: (x, y, w, h)
    """
    x1_A, y1_A, w1, h1 = boxA
    x2_A = x1_A + w1
    y2_A = y1_A + h1

    x1_B, y1_B, w2, h2 = boxB
    x2_B = x1_B + w2
    y2_B = y1_B + h2

    # 計算合併後的新座標
    union_x1 = min(x1_A, x1_B)
    union_y1 = min(y1_A, y1_B)
    union_x2 = max(x2_A, x2_B)
    union_y2 = max(y2_A, y2_B)

    # 計算合併後的新寬高
    union_w = union_x2 - union_x1
    union_h = union_y2 - union_y1

    return (union_x1, union_y1, union_w, union_h)

def merge_overlapping_boxes(boxes, iou_threshold=0.5):
    """
    合併 Bounding Box 列表中重疊度高於閾值的框。
    使用迭代方式確保所有可能的合併都被處理。
    """
    if not boxes:
        return []

    # 轉換為可修改的列表
    boxes = list(boxes)

    while True:
        merged_one = False
        new_boxes = []
        merged_indices = set() # 追蹤已經被合併的 box 索引

        for i in range(len(boxes)):
            if i in merged_indices:
                continue

            current_box = boxes[i]
            indices_to_merge = [i] # 當前 group 的索引

            # 尋找與 current_box 重疊的其他 box
            for j in range(i + 1, len(boxes)):
                if j in merged_indices:
                    continue

                # 計算與當前 group 代表框 (current_box) 的 IoU
                # 或計算與 group 內任一框的 IoU 也可以，這裡用 current_box 較簡單
                if iou(current_box, boxes[j]) > iou_threshold:
                    indices_to_merge.append(j)
                    merged_one = True # 標記發生了合併

            # 合併這個 group 裡的所有 box
            merged_box = boxes[indices_to_merge[0]]
            for k in range(1, len(indices_to_merge)):
                merged_box = union_box(merged_box, boxes[indices_to_merge[k]])

            new_boxes.append(merged_box)
            merged_indices.update(indices_to_merge) # 將合併過的索引加入 set

        boxes = new_boxes # 更新 box 列表為合併後的結果

        # 如果這一輪沒有任何 box 被合併，則結束循環
        if not merged_one:
            break

    return boxes


# ---------- 差分 + Canny + YOLO 核心函式 ----------
def image_process(img):
    """
    使用 Sobel 算子計算梯度圖。
    """
    # 如果輸入是彩色圖，先轉灰階
    if len(img.shape) == 3:
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    else:
        img_gray = img

    dstx = cv2.Sobel(img_gray, cv2.CV_32F, 1, 0, ksize=3) # 使用 3x3 kernel
    dsty = cv2.Sobel(img_gray, cv2.CV_32F, 0, 1, ksize=3)
    dstx = cv2.convertScaleAbs(dstx)
    dsty = cv2.convertScaleAbs(dsty)
    dst = cv2.addWeighted(dstx, 0.5, dsty, 0.5, 0)
    return dst

def generate_candidate_boxes(edge_img):
    """
    根據 Canny 邊緣圖生成候選 Bounding Box。
    """
    contours, _ = cv2.findContours(edge_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    boxes = []
    for contour in contours:
        # 增加最小面積閾值，過濾微小噪點
        if cv2.contourArea(contour) < 10: # 可調整此閾值
            continue
        x, y, w, h = cv2.boundingRect(contour)
        boxes.append((x, y, w, h))
    return boxes

def calculate_overlap_ratio(box1, box2):
    """
    計算兩個框的重疊區域佔比（IoU）。
    (此函數與 iou 函數功能相同，可擇一使用或整合)
    """
    return iou(box1, box2) # 直接複用 iou 函數

# ---------- Refactored Object Detection Function for Single Frame ----------
def detect_objects_in_frame(depth_frame,
                            rgb_frame,
                            prev_processed_depth, # Previous frame after image_process
                            overlap_threshold=0.5,
                            iou_threshold_for_union=0.5,
                            yolo_model=None,
                            yolo_conf=0.99):
    """
    在單一幀中，使用深度差分/Canny 和 YOLO 檢測物件。
    返回最終合併後的動態物件 Bounding Box 列表，以及當前幀處理後的深度圖。
    """
    if depth_frame is None or rgb_frame is None or prev_processed_depth is None:
        print("Warning: Received None frame in detect_objects_in_frame.")
        return [], None # Return empty list and None for processed frame if input is bad

    # 確保輸入是 3 通道 BGR (YOLO 通常需要)
    if len(rgb_frame.shape) == 2:
        rgb_frame = cv2.cvtColor(rgb_frame, cv2.COLOR_GRAY2BGR)
     # 確保深度圖是 3 通道 (如果需要) 或灰階
    if len(depth_frame.shape) == 2:
         depth_frame_processed = cv2.cvtColor(depth_frame, cv2.COLOR_GRAY2BGR)
    else:
         depth_frame_processed = depth_frame

    # ---------------------------
    #   1) 差分 + Canny 部分 (使用深度圖)
    # ---------------------------
    processed_frame = image_process(depth_frame_processed) # 處理當前深度幀

    # 計算幀差分
    frame_diff = cv2.absdiff(prev_processed_depth, processed_frame)

    # 確保差分圖是灰階且為 uint8
    if len(frame_diff.shape) == 3:
        frame_diff_gray = cv2.cvtColor(frame_diff, cv2.COLOR_BGR2GRAY)
    else:
        frame_diff_gray = frame_diff
    frame_diff_gray = np.clip(frame_diff_gray, 0, 255).astype(np.uint8)

    # 二值化差分圖
    _, sobel_thresh = cv2.threshold(frame_diff_gray, 20, 255, cv2.THRESH_BINARY) # 調整閾值 15->20

    # 尋找 Sobel 框
    sobel_contours, _ = cv2.findContours(sobel_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    sobel_boxes = [cv2.boundingRect(contour) for contour in sobel_contours if cv2.contourArea(contour) >= 20] # 調整面積 10->20

    # Canny 邊緣檢測 (使用原始深度圖的灰階版本)
    if len(depth_frame.shape) == 3:
      gray_depth_frame = cv2.cvtColor(depth_frame, cv2.COLOR_BGR2GRAY)
    else:
      gray_depth_frame = depth_frame
    # 稍微提高 Canny 閾值，減少噪點
    canny_edges = cv2.Canny(gray_depth_frame, 50, 100) # 調整閾值 10,15 -> 50,100
    canny_boxes = generate_candidate_boxes(canny_edges)

    # 篩選 Sobel 和 Canny 重疊的框
    final_diff_canny_boxes = []
    added_canny_indices = set()
    for i, canny_box in enumerate(canny_boxes):
        for sobel_box in sobel_boxes:
            if calculate_overlap_ratio(canny_box, sobel_box) >= overlap_threshold:
                 if i not in added_canny_indices:
                    final_diff_canny_boxes.append(canny_box)
                    added_canny_indices.add(i)
                 break # 找到一個匹配就跳到下一個 canny_box

    # ---------------------------
    #   2) YOLO 偵測部分 (使用 RGB 圖)
    # ---------------------------
    yolo_boxes = []
    if yolo_model is not None:
        yolo_results = yolo_model.predict(rgb_frame, verbose=False, conf=yolo_conf)
        for result in yolo_results:
            for box in result.boxes:
                # 轉換為 (x, y, w, h) 格式
                x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy()) # 確保是整數
                w = x2 - x1
                h = y2 - y1
                if w > 0 and h > 0: # 確保寬高有效
                    yolo_boxes.append((x1, y1, w, h))

    # ---------------------------
    #   3) 合併 (Union) final_diff_canny_boxes + yolo_boxes
    # ---------------------------
    all_boxes = final_diff_canny_boxes + yolo_boxes
    # 使用 merge_overlapping_boxes 進行最終合併
    merged_final_boxes = merge_overlapping_boxes(all_boxes, iou_threshold=iou_threshold_for_union)

    # 返回檢測到的框和當前處理過的深度幀 (用於下一次差分)
    return merged_final_boxes, processed_frame

In [None]:
import cv2
import numpy as np
from collections import deque
from ultralytics import YOLO
import time

# ... (在這裡保留上面提供的 iou, union_box, merge_overlapping_boxes,
#          image_process, generate_candidate_boxes, calculate_overlap_ratio,
#          detect_objects_in_frame 這些輔助函數) ...



# ---------- Modified Background Estimation Function ----------

def background_estimation_with_object_weighting(
                            depth_video_path,
                            rgb_video_path,
                            yolo_model_path,
                            alpha_bg=0.05,      # 背景像素學習率
                            alpha_fg=0.001,     # 前景(物件)像素學習率 (低權重)
                            frame_gap=1,        # 差分計算的幀間隔
                            overlap_threshold=0.3, # Sobel/Canny 框重疊閾值
                            iou_threshold_for_union=0.4, # 最終框合併 IoU 閾值
                            yolo_conf=0.6,      # YOLO 置信度閾值
                            skip_frames_for_avg=10): # **改名**: 用於初始化背景平均的幀數 (不含第一幀)
    """
    透過對幀進行平均來估計背景，並對檢測到的動態物件給予較低的權重。
    **修改**: 初始化平均過程會跳過第一幀 (索引 0)。
    """
    depth_cap = cv2.VideoCapture(depth_video_path)
    rgb_cap = cv2.VideoCapture(rgb_video_path)
    if not depth_cap.isOpened():
        print(f"錯誤：無法開啟深度影片 {depth_video_path}")
        return
    if not rgb_cap.isOpened():
        print(f"錯誤：無法開啟 RGB 影片 {rgb_video_path}")
        depth_cap.release() # 確保資源釋放
        return

    # 獲取影片基本資訊 (以 RGB 影片為準)
    width = int(rgb_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(rgb_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = rgb_cap.get(cv2.CAP_PROP_FPS)
    print(f"影片尺寸: {width}x{height}, FPS: {fps:.2f}")

    try:
      yolo_model = YOLO(yolo_model_path)
      print("YOLO 模型載入成功。")
    except Exception as e:
      print(f"錯誤：載入 YOLO 模型失敗 ({yolo_model_path}): {e}")
      depth_cap.release()
      rgb_cap.release()
      return

    # --- Initialization ---
    background_model = None
    prev_processed_depth = None
    processed_depth_buffer = deque(maxlen=frame_gap + 1)

    frame_count = 0 # 幀的索引 (從 0 開始)
    initial_avg_count = 0 # 用於計算實際平均了多少幀
    start_time = time.time()

    # 計算初始化階段需要處理的總幀數 (包含跳過的第一幀)
    total_init_frames_to_process = skip_frames_for_avg + 1

    while True:
        ret_depth, depth_frame = depth_cap.read()
        ret_rgb, rgb_frame = rgb_cap.read()

        if not ret_depth or not ret_rgb:
            print(f"影片讀取完畢或發生錯誤 (幀索引 {frame_count})。")
            break

        # 尺寸檢查與調整
        if depth_frame.shape[:2] != rgb_frame.shape[:2]:
            print(f"幀 {frame_count}: 深度圖 ({depth_frame.shape[:2]}) 與 RGB 圖 ({rgb_frame.shape[:2]}) 尺寸不符，嘗試調整深度圖。")
            depth_frame = cv2.resize(depth_frame, (rgb_frame.shape[1], rgb_frame.shape[0]), interpolation=cv2.INTER_NEAREST)
            if depth_frame.shape[:2] != rgb_frame.shape[:2]:
                 print("錯誤：調整深度圖尺寸失敗。")
                 break

        # --- 處理當前深度幀 (無論是否在初始化階段都需要) ---
        if len(depth_frame.shape) == 2:
            depth_frame_bgr = cv2.cvtColor(depth_frame, cv2.COLOR_GRAY2BGR)
        else:
            depth_frame_bgr = depth_frame
        current_processed_depth = image_process(depth_frame_bgr)
        processed_depth_buffer.append(current_processed_depth)

        # 更新 prev_processed_depth (用於下一次差分計算)
        # 這需要在每次迭代都更新，即使在初始化階段
        if len(processed_depth_buffer) > frame_gap:
             prev_processed_depth = processed_depth_buffer[0] # 最舊的幀
        elif processed_depth_buffer:
             prev_processed_depth = processed_depth_buffer[0] # 如果 buffer 不足，使用第一個


        # --- Background Model Initialization (跳過第一幀) ---
        is_initializing = (background_model is None) or (initial_avg_count < skip_frames_for_avg)

        if is_initializing:
            # 跳過第一幀 (索引 0) 的 RGB 平均
            if frame_count == 0:
                print(f"幀索引 {frame_count} (第 1 幀): 跳過 RGB 平均，僅處理深度圖。")
                # (prev_processed_depth 已在上面處理)
            # 從第二幀 (索引 1) 開始平均
            elif frame_count >= 1 and initial_avg_count < skip_frames_for_avg:
                # 如果是第一次進行平均 (即處理索引為 1 的幀)
                if background_model is None:
                    background_model = rgb_frame.astype(np.float32)
                    print(f"幀索引 {frame_count} (第 {frame_count+1} 幀): 初始化背景模型 (使用此幀)。")
                # 否則，進行加權平均
                else:
                    # initial_avg_count 從 0 開始計數，代表已平均的幀數
                    # 當 initial_avg_count = 1 時，是第 2 幀加入平均
                    beta = 1.0 / float(initial_avg_count + 1.0) # +1 因為當前幀也要計入
                    background_model = cv2.addWeighted(background_model, 1.0 - beta, rgb_frame.astype(np.float32), beta, 0)
                    print(f"幀索引 {frame_count} (第 {frame_count+1} 幀): 更新初始背景模型 (已平均 {initial_avg_count + 1} 幀)。")

                initial_avg_count += 1 # 增加實際平均幀數計數

                # 檢查初始化是否完成
                if initial_avg_count >= skip_frames_for_avg:
                    print(f"背景模型初始化完成 (使用索引 {1} 到 {frame_count} 的 {skip_frames_for_avg} 幀進行平均)。")
                    # 確保 prev_processed_depth 在初始化結束時是最新的 (雖然上面已經更新了)
                    if len(processed_depth_buffer) > frame_gap:
                       prev_processed_depth = processed_depth_buffer[0]
                    elif processed_depth_buffer:
                       prev_processed_depth = processed_depth_buffer[0]


            # 無論是否平均，都增加幀索引並繼續下一次循環 (在初始化階段不進行物件偵測)
            frame_count += 1
            continue

        # --- Initialization Complete - Start Object Detection and Weighted Update ---
        # (只有在初始化完成後才執行以下程式碼)
        detected_boxes = []
        if prev_processed_depth is not None :
             # 確保有足夠幀進行差分
             if len(processed_depth_buffer) > frame_gap:
                 prev_processed_depth_for_diff = processed_depth_buffer[0] # 用於差分的幀

                 detected_boxes, _ = detect_objects_in_frame(
                     depth_frame,
                     rgb_frame,
                     prev_processed_depth_for_diff,
                     overlap_threshold=overlap_threshold,
                     iou_threshold_for_union=iou_threshold_for_union,
                     yolo_model=yolo_model,
                     yolo_conf=yolo_conf
                 )
             else:
                  print(f"幀索引 {frame_count}: Warning - Not enough frames in buffer for differencing, skipping detection.")
        else:
             # 這不應該發生，因為 prev_processed_depth 在初始化階段就應該被設置
             print(f"幀索引 {frame_count}: Error - prev_processed_depth is None after initialization!")


        # --- Update Background Model using weighted average ---
        update_mask = np.full(rgb_frame.shape[:2], alpha_bg, dtype=np.float32)
        for (x, y, w, h) in detected_boxes:
            y1, y2 = max(0, y), min(height, y + h)
            x1, x2 = max(0, x), min(width, x + w)
            if y2 > y1 and x2 > x1:
                 update_mask[y1:y2, x1:x2] = alpha_fg

        update_mask_3channel = cv2.cvtColor(update_mask, cv2.COLOR_GRAY2BGR)
        current_frame_float = rgb_frame.astype(np.float32)
        background_model = (1.0 - update_mask_3channel) * background_model + update_mask_3channel * current_frame_float

        # --- Display ---
        display_bg = cv2.convertScaleAbs(background_model)
        cv2.imshow("Estimated Background", display_bg)

        # (可選) Debug 顯示
        display_rgb_debug = rgb_frame.copy()
        for x, y, w, h in detected_boxes:
             cv2.rectangle(display_rgb_debug, (x, y), (x + w, y + h), (255, 0, 255), 2) #Pink
        cv2.imshow("Detections on RGB", display_rgb_debug)

        # 增加幀計數
        frame_count += 1
        if frame_count % 100 == 0:
            elapsed_time = time.time() - start_time
            avg_fps = frame_count / elapsed_time if elapsed_time > 0 else 0
            print(f"已處理 {frame_count} 幀 (索引 {frame_count-1}), 平均 FPS: {avg_fps:.2f}")

        if cv2.waitKey(1) & 0xFF == ord('q'):
            print("用戶請求退出。")
            break

    # --- Cleanup ---
    end_time = time.time()
    total_time = end_time - start_time
    # 計算平均 FPS 時，排除整個初始化階段處理的幀數
    processed_frames_after_init = frame_count - total_init_frames_to_process
    avg_fps = processed_frames_after_init / total_time if total_time > 0 and processed_frames_after_init > 0 else 0
    print("-" * 30)
    print(f"處理完成。總幀數: {frame_count}")
    print(f"總耗時: {total_time:.2f} 秒")
    print(f"平均處理 FPS (排除初始化 {total_init_frames_to_process} 幀): {avg_fps:.2f}")
    print("-" * 30)

    depth_cap.release()
    rgb_cap.release()
    cv2.destroyAllWindows()

    if background_model is not None:
        final_bg_uint8 = cv2.convertScaleAbs(background_model)
        cv2.imwrite("V3_estimated_background.png", final_bg_uint8)
        print("最終背景模型已保存為 estimated_background.png")

# ---------- Main Execution Block ----------
if __name__ == "__main__":
    # ----- 請修改為您的影片和模型路徑 -----
    depth_video_path = r"E:\論文\期刊\code\final_video\aligned_tests_cropped_output_depth.mp4"
    rgb_video_path   = r"E:\論文\期刊\code\final_video\aligned_tests_cropped_output_input.mp4"
    yolo_model_path  = r"E:\論文\期刊\code\YOLO\satellite3_train.pt"
    # ----- 參數調整區域 -----
    BG_ALPHA = 0.03
    FG_ALPHA = 0.001
    FRAME_GAP = 1
    OVERLAP_THRESH = 0.3
    UNION_IOU_THRESH = 0.3
    YOLO_CONFIDENCE = 0.3
    # **注意**: 這個數字代表從第 2 幀開始，要使用多少幀來進行初始平均
    # 例如，設為 10，則會使用索引 1 到 10 (共 10 幀) 進行平均
    INITIAL_AVG_FRAMES_COUNT = 15 # 原來的 skip_frames 參數，改了名字
    # ----- 執行背景估計 -----
    background_estimation_with_object_weighting(
        depth_video_path=depth_video_path,
        rgb_video_path=rgb_video_path,
        yolo_model_path=yolo_model_path,
        alpha_bg=BG_ALPHA,
        alpha_fg=FG_ALPHA,
        frame_gap=FRAME_GAP,
        overlap_threshold=OVERLAP_THRESH,
        iou_threshold_for_union=UNION_IOU_THRESH,
        yolo_conf=YOLO_CONFIDENCE,
        skip_frames_for_avg=INITIAL_AVG_FRAMES_COUNT # 使用新參數名
    )

影片尺寸: 1024x576, FPS: 15.00
YOLO 模型載入成功。
幀索引 0 (第 1 幀): 跳過 RGB 平均，僅處理深度圖。
幀索引 1 (第 2 幀): 初始化背景模型 (使用此幀)。
幀索引 2 (第 3 幀): 更新初始背景模型 (已平均 2 幀)。
幀索引 3 (第 4 幀): 更新初始背景模型 (已平均 3 幀)。
幀索引 4 (第 5 幀): 更新初始背景模型 (已平均 4 幀)。
幀索引 5 (第 6 幀): 更新初始背景模型 (已平均 5 幀)。
背景模型初始化完成 (使用索引 1 到 5 的 5 幀進行平均)。
影片讀取完畢或發生錯誤 (幀索引 60)。
------------------------------
處理完成。總幀數: 60
總耗時: 2.27 秒
平均處理 FPS (排除初始化 6 幀): 23.77
------------------------------
最終背景模型已保存為 estimated_background.png
