This script processes images to detect birds, crop them out, and generate segmentation masks of the detected birds, all through YOLOv8

In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2 import model_zoo
import matplotlib.pyplot as plt
from ultralytics import YOLO

In [5]:
input_dir = "../Mask_RCNN/input_frames"

yolo_version = 'x' # can be 's', 'm', 'l', 'x' x is quite slower 
yolo_model = YOLO(f'yolo11{yolo_version}-seg.pt')  
cropped_dir = f"output_cropped_{yolo_version}"
mask_dir = f"output_mask_{yolo_version}"
os.makedirs(cropped_dir, exist_ok=True)
os.makedirs(mask_dir, exist_ok=True)

print("Cropping images")
for image_name in tqdm(os.listdir(input_dir)):
    image_path = os.path.join(input_dir, image_name)
    # Perform prediction on the full image
    results = yolo_model.predict(image_path, conf=0.3, verbose=False)

    # Retrieve the original image
    img = results[0].orig_img
    for i, box in enumerate(results[0].boxes.xyxy):  # Iterate through bounding boxes
        # Extract bounding box coordinates
        x1, y1, x2, y2 = map(int, box)
        cropped = img[y1:y2, x1:x2]  # Crop the region of interest
        cropped_filename = f"{os.path.splitext(image_name)[0]}_{i}.png"
        # Save the cropped image
        cv2.imwrite(os.path.join(cropped_dir, cropped_filename), cropped)

print("Segmenting cropped images with YOLOv11 segmentation")
for cropped_name in tqdm(os.listdir(cropped_dir)):
    if not cropped_name.lower().endswith('.png'): 
        continue

    cropped_path = os.path.join(cropped_dir, cropped_name)
    cropped_img = cv2.imread(cropped_path)

    # Perform prediction on the cropped image
    results = yolo_model.predict(cropped_img, conf=0.3, verbose=False)

    # Check if any masks are detected
    if results[0].masks is None:
        continue  # Skip if no masks are found

    # Access mask data
    masks = results[0].masks.data  

    # Create and save masked images
    for i, mask in enumerate(masks):
        mask = mask.cpu().numpy().astype(bool)  # Convert mask to NumPy boolean array

        # Ensure mask dimensions match the cropped image dimensions
        if mask.shape != cropped_img.shape[:2]:
            print(f"Resizing mask from {mask.shape} to {cropped_img.shape[:2]}")
            mask_resized = cv2.resize(mask.astype(np.uint8), 
                                      (cropped_img.shape[1], cropped_img.shape[0]), 
                                      interpolation=cv2.INTER_NEAREST)
            mask_resized = mask_resized.astype(bool)
        else:
            mask_resized = mask

        # Verify the resized mask dimensions
        assert mask_resized.shape == cropped_img.shape[:2], \
            f"Mask shape {mask_resized.shape} does not match image shape {cropped_img.shape[:2]}"

        # Create a mask with the same number of channels as the cropped image
        mask_rgb = np.zeros_like(cropped_img)  # Black background
        mask_rgb[mask_resized] = cropped_img[mask_resized]  # Apply the mask to the image

        # Save the masked image
        mask_filename = f"{os.path.splitext(cropped_name)[0]}_mask_{i}.png"
        cv2.imwrite(os.path.join(mask_dir, mask_filename), mask_rgb)


Cropping images


100%|██████████| 10/10 [00:40<00:00,  4.02s/it]


Segmenting cropped images with YOLOv11 segmentation


 11%|█         | 1/9 [00:05<00:44,  5.57s/it]

Resizing mask from (480, 640) to (343, 463)


 33%|███▎      | 3/9 [00:14<00:27,  4.66s/it]

Resizing mask from (256, 640) to (238, 602)


 56%|█████▌    | 5/9 [00:26<00:22,  5.62s/it]

Resizing mask from (640, 640) to (583, 603)


 67%|██████▋   | 6/9 [00:34<00:18,  6.25s/it]

Resizing mask from (640, 640) to (420, 418)


 89%|████████▉ | 8/9 [00:45<00:05,  5.90s/it]

Resizing mask from (640, 448) to (473, 320)


100%|██████████| 9/9 [00:48<00:00,  5.44s/it]

Resizing mask from (640, 288) to (776, 336)



