This script processes images to detect birds, crop them out, and generate segmentation masks of the detected birds, all through YOLOv8

In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2 import model_zoo
import matplotlib.pyplot as plt
from ultralytics import YOLO

In [2]:
input_dir = "../Mask_RCNN/input_frames"
cropped_dir = "output_cropped"
mask_dir = "output_mask"
os.makedirs(cropped_dir, exist_ok=True)
os.makedirs(mask_dir, exist_ok=True)

# YOLO setup (using the segme model instead of det)
yolo_model = YOLO('yolov8x-seg.pt') 

print("Cropping images")
for image_name in tqdm(os.listdir(input_dir)):
    image_path = os.path.join(input_dir, image_name)
    results = yolo_model.predict(image_path, conf=0.3, verbose=False)

    img = results[0].orig_img
    for i, box in enumerate(results[0].boxes.xyxy):  # Iterate through bounding boxes
        x1, y1, x2, y2 = map(int, box)
        cropped = img[y1:y2, x1:x2]
        cropped_filename = f"{os.path.splitext(image_name)[0]}_{i}.png"
        cv2.imwrite(os.path.join(cropped_dir, cropped_filename), cropped)

print("Segmenting cropped images with YOLOv8 segmentation")
for cropped_name in tqdm(os.listdir(cropped_dir)):
    if not cropped_name.lower().endswith('.png'): 
        continue

    cropped_path = os.path.join(cropped_dir, cropped_name)
    cropped_img = cv2.imread(cropped_path)

    results = yolo_model.predict(cropped_img, conf=0.3, verbose=False)

    # Check if any masks are detected
    if results[0].masks is None:
        continue  # No masks detected

    # Access mask data correctly
    masks = results[0].masks.data  

    # Create and save the masked images
    for i, mask in enumerate(masks):
        mask = mask.cpu().numpy().astype(bool) # Ensure mask is on CPU and convert to NumPy bool array

        # Check if mask dimensions match the cropped image dimensions
        if mask.shape != cropped_img.shape[:2]:
            print(f"Resizing mask from {mask.shape} to {cropped_img.shape[:2]}")
            mask_resized = cv2.resize(mask.astype(np.uint8), 
                                      (cropped_img.shape[1], cropped_img.shape[0]), 
                                      interpolation=cv2.INTER_NEAREST)
            mask_resized = mask_resized.astype(bool)
        else:
            mask_resized = mask

        # Verify the shapes after resizing
        assert mask_resized.shape == cropped_img.shape[:2], \
            f"Mask shape {mask_resized.shape} does not match image shape {cropped_img.shape[:2]}"

        # Create a mask with the same number of channels as the image
        mask_rgb = np.zeros_like(cropped_img)  # Black background
        mask_rgb[mask_resized] = cropped_img[mask_resized] # Apply mask to the image

        mask_filename = f"{os.path.splitext(cropped_name)[0]}_mask_{i}.png"
        cv2.imwrite(os.path.join(mask_dir, mask_filename), mask_rgb)

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x-seg.pt to 'yolov8x-seg.pt'...


100%|██████████| 137M/137M [00:04<00:00, 32.0MB/s] 


Cropping images


  0%|          | 0/10 [00:00<?, ?it/s][W NNPACK.cpp:64] Could not initialize NNPACK! Reason: Unsupported hardware.
100%|██████████| 10/10 [00:45<00:00,  4.50s/it]


Segmenting cropped images with YOLOv8 segmentation


  8%|▊         | 1/12 [00:06<01:16,  6.98s/it]

Resizing mask from (512, 640) to (350, 466)


 42%|████▏     | 5/12 [00:24<00:29,  4.15s/it]

Resizing mask from (352, 640) to (341, 662)


 50%|█████     | 6/12 [00:29<00:25,  4.29s/it]

Resizing mask from (640, 352) to (407, 215)


 58%|█████▊    | 7/12 [00:37<00:28,  5.62s/it]

Resizing mask from (640, 640) to (602, 619)


 67%|██████▋   | 8/12 [00:46<00:25,  6.43s/it]

Resizing mask from (640, 640) to (408, 410)


 75%|███████▌  | 9/12 [00:52<00:19,  6.44s/it]

Resizing mask from (512, 640) to (418, 543)


 83%|████████▎ | 10/12 [01:00<00:13,  6.69s/it]

Resizing mask from (640, 576) to (413, 372)


 92%|█████████▏| 11/12 [01:05<00:06,  6.28s/it]

Resizing mask from (640, 416) to (501, 316)


100%|██████████| 12/12 [01:09<00:00,  5.75s/it]

Resizing mask from (640, 288) to (787, 335)



