# Auto-Generating COCO Annotations for Instance Segmentation using FastSAM

Instance segmentation requires high-quality annotations, but manual annotation is time-consuming and expensive. This notebook automates the annotation process by leveraging FastSAM, a lightweight and efficient segmentation model, to generate masks from images.

The key steps in this pipeline include:



1.   Mask Generation with FastSAM - Detects object masks quickly.
2.   Post-processing - Reduces errors, removes false detections, and refines results.
3.   COCO JSON Conversion - Converts masks into COCO format for training deep learning models.

## Import required libraries and setup

In [None]:
# Import an image.
!curl -O https://raw.githubusercontent.com/tensorflow/models/master/official/\
projects/waste_identification_ml/pre_processing/config/sample_images/image_3.jpg

In [None]:
import os
HOME = os.getcwd()

%cd {HOME}

# Clone the FastSAM repo and install the required libraries.
!git clone https://github.com/CASIA-IVA-Lab/FastSAM.git
!pip install -r FastSAM/requirements.txt
!pip install git+https://github.com/openai/CLIP.git

%cd {HOME}/FastSAM

/content
Cloning into 'FastSAM'...
remote: Enumerating objects: 1329, done.[K
remote: Counting objects: 100% (401/401), done.[K
remote: Compressing objects: 100% (93/93), done.[K
remote: Total 1329 (delta 346), reused 308 (delta 308), pack-reused 928 (from 1)[K
Receiving objects: 100% (1329/1329), 72.56 MiB | 17.01 MiB/s, done.
Resolving deltas: 100% (542/542), done.
Collecting gradio==3.35.2 (from -r FastSAM/requirements.txt (line 15))
  Downloading gradio-3.35.2-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles (from gradio==3.35.2->-r FastSAM/requirements.txt (line 15))
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi (from gradio==3.35.2->-r FastSAM/requirements.txt (line 15))
  Downloading fastapi-0.115.8-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio==3.35.2->-r FastSAM/requirements.txt (line 15))
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client>=0.2.7 (from gradio==3.35.2->-r FastSAM/

In [None]:
import torch
import os
from fastsam import FastSAM, FastSAMPrompt
import matplotlib.pyplot as plt
import numpy as np
from typing import Union
import cv2
import pandas as pd
import skimage

DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f"DEVICE = {DEVICE}")

DEVICE = cuda:0


In [None]:
#@title Utils

_PROPERTIES = (
    "major_axis_length",
    "minor_axis_length",
)


def masks_to_bool(masks: Union[np.ndarray, torch.Tensor]) -> np.ndarray:
  """Convert masks to boolean format.

  Args:
      masks: Input masks, either as a NumPy array or a PyTorch tensor.

  Returns:
      Boolean masks where values are converted to True/False.
  """
  if type(masks) == np.ndarray:
      return masks.astype(bool)
  return masks.cpu().numpy().astype(bool)


def plot_boolean_masks(masks: np.ndarray, masks_per_row: int = 5):
    """Plots boolean masks in a grid format with a fixed number of masks per row.

    Args:
        masks: Boolean masks.
        masks_per_row: Number of masks to display per row.
    """
    num_masks = masks.shape[0]  # Total number of masks
    num_rows = (num_masks + masks_per_row - 1) // masks_per_row  # Compute required rows

    fig, axes = plt.subplots(num_rows, masks_per_row, figsize=(masks_per_row * 3, num_rows * 3))

    # Flatten axes array in case of a single row
    axes = axes.flatten()

    for i in range(len(axes)):
        if i < num_masks:
            axes[i].imshow(masks[i])  # Display mask
            axes[i].axis("off")  # Hide axis labels
            axes[i].set_title(f"Mask {i+1}")  # Set title
        else:
            axes[i].axis("off")  # Hide empty subplots

    plt.tight_layout()
    plt.show()


def extract_properties(masks: np.ndarray) -> pd.DataFrame:
    """Extracts properties of masks and computes additional ratio features.

    Args:
        masks: Boolean masks.

    Returns:
        Extracted properties.
    """
    dataframes = []

    for mask in masks:
      binary_mask = np.where(mask, 1, 0)
      df = pd.DataFrame(
        skimage.measure.regionprops_table(binary_mask, properties=_PROPERTIES)
      )
      dataframes.append(df)

    features = pd.concat(dataframes, ignore_index=True)
    features["axis_ratio"] = features["major_axis_length"] / features["minor_axis_length"]
    return features

def _is_contained(mask1: np.ndarray, mask2: np.ndarray):
  """Check if mask1 is entirely contained within mask2.

  Args:
    mask1: The first mask.
    mask2: The second mask.

  Returns:
    True if mask1 is entirely contained within mask2, False otherwise.
  """
  return np.array_equal(np.logical_and(mask1, mask2), mask1)


def _calculate_iou(mask1: np.ndarray, mask2: np.ndarray) -> float:
  """Calculate the intersection over union (IoU) between two masks.

  Args:
    mask1: The first mask.
    mask2: The second mask.

  Returns:
    The intersection over union (IoU) between the two masks.
  """
  intersection = np.logical_and(mask1, mask2).sum()
  union = np.logical_or(mask1, mask2).sum()
  return intersection / union if union != 0 else 0


def filter_masks(masks: np.ndarray, iou_threshold: float = 0.8) -> np.ndarray:
  """Filter the overlapping masks.

  Filter the masks based on the intersection over union (IoU) and keep the
  biggest masks if they are overlapping.

  Args:
    masks: The masks to filter.
    iou_threshold: The threshold for the intersection over union (IoU) between
      two masks.

  Returns:
    Unique masks.
  """
  # Calculate the area for each mask
  areas = np.array([np.sum(mask) for mask in masks])

  # Sort the masks based on area in descending order
  sorted_indices = np.argsort(areas)[::-1]
  sorted_masks = masks[sorted_indices]

  unique_masks = []

  for i, mask in enumerate(sorted_masks):
    keep = True
    for j in range(i):
      if _calculate_iou(mask, sorted_masks[j]) > iou_threshold or _is_contained(
          mask, sorted_masks[j]
      ):
        keep = False
        break
    if keep:
      unique_masks.append(mask)

  return np.array(unique_masks)