This script processes images to detect birds, crop them out, and generate segmentation masks of the detected birds.

* Load images from the input directory
* Use YOLO to detect objects in the images
* Crop the detected objects
* Use Mask R-CNN to generate segmentation masks for the cropped images
* Overlay the masks on the original images


In [5]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2 import model_zoo
import matplotlib.pyplot as plt
from ultralytics import YOLO

Functional: Using YOLO for OD and Mask-RCNN for mask segmentation

In [6]:
input_dir = "../Mask_RCNN/input_frames"
cropped_dir = "output_cropped"
mask_dir = "output_mask"
os.makedirs(cropped_dir, exist_ok=True)
os.makedirs(mask_dir, exist_ok=True)

# YOLO setup
yolo_model = YOLO('yolov8x.pt')  # 'yolov8x.pt' for higher accuracy, 'yolov8n.pt' for lower accuracy
print("Cropping imgs with YOLO")
for image_name in tqdm(os.listdir(input_dir)):
    image_path = os.path.join(input_dir, image_name)
    results = yolo_model.predict(image_path, conf=0.3, verbose=False)

    img = results[0].orig_img  
    for i, box in enumerate(results[0].boxes.xyxy):  # Iterate through bounding boxes
        x1, y1, x2, y2 = map(int, box)
        cropped = img[y1:y2, x1:x2]
        cropped_filename = f"{os.path.splitext(image_name)[0]}_{i}.png"
        cv2.imwrite(os.path.join(cropped_dir, cropped_filename), cropped)

# Mask RCNN setup
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3  # Confidence threshold
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.DEVICE = "cpu"  # Run on CPU
predictor = DefaultPredictor(cfg)

print("Segmentating cropped imgs with MaskRCNN")
for cropped_name in tqdm(os.listdir(cropped_dir)):
    if not cropped_name.lower().endswith(('.png')): # take only png files
        continue

    cropped_path = os.path.join(cropped_dir, cropped_name)
    cropped_img = cv2.imread(cropped_path)

    outputs = predictor(cropped_img)
    masks = outputs['instances'].pred_masks.cpu().numpy()

    # Create and save the mask-applied images
    for i, mask in enumerate(masks):
        mask_rgb = np.zeros_like(cropped_img) # black background 
        mask_rgb[mask] = cropped_img[mask] # object in RGB
        mask_filename = f"{os.path.splitext(cropped_name)[0]}_mask_{i}.png"
        cv2.imwrite(os.path.join(mask_dir, mask_filename), mask_rgb)


Cropping imgs with YOLO


100%|██████████| 10/10 [00:35<00:00,  3.58s/it]


Segmentating cropped imgs with MaskRCNN


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
100%|██████████| 11/11 [01:29<00:00,  8.14s/it]
