<a href="https://colab.research.google.com/drive/1rQe5kzkChFvRA-9sikOmuurIqwcH77gn" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# FiftyOne Workshop - Agriculture
# Improve Coffee Dataset quality with SAM2 in FiftyOne

## 🏆 Learning Objectives
- Understand how to apply the SAM2 segmentation model.
- Learn how to integrate SAM2 with FiftyOne.
- Visualize segmentation results using FiftyOne.
- Improve the dataset quality with Uniqueness features with FiftyOne

## Requirements
### Knowledge
- Understanding of image segmentation.
- Familiarity with deep learning-based annotation tools.
### Installation
Run the following commands to install necessary dependencies:
```bash
git clone https://github.com/facebookresearch/sam2.git && cd sam2
pip install -e .
pip install fiftyone
pip install umap-learn>=0.5
```

In [None]:
!pip install sam2
!pip install fiftyone
!pip install umap-learn


## 1. Loading the Dataset - Using HuggingFace Hub

```
import fiftyone as fo # base library and app
import fiftyone.utils.huggingface as fouh # Hugging Face integration
dataset_ = fouh.load_from_hub("pjramg/my_colombian_coffe_FO", persistent=True, overwrite=True)

# Define the new dataset name
dataset_name = "coffee_FO"

# Check if the dataset exists
if dataset_name in fo.list_datasets():
    print(f"Dataset '{dataset_name}' exists. Loading...")
    dataset = fo.load_dataset(dataset_name)
else:
    print(f"Dataset '{dataset_name}' does not exist. Creating a new one...")
    # Clone the dataset with a new name and make it persistent
    dataset = dataset_.clone(dataset_name, persistent=True)

```

In [None]:
import torch

def get_device():
    """Get the appropriate device for model inference."""
    if torch.cuda.is_available():
        return "cuda"
    elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
        return "mps"
    return "cpu"

DEVICE = get_device()

print(f"Using device: {DEVICE}")

In [None]:
import fiftyone as fo
import fiftyone.utils.huggingface as fouh
from fiftyone.utils.coco import COCODetectionDatasetImporter

import gdown

# Download the coffee dataset from Google Drive

url = "https://drive.google.com/uc?id=1TMeeIzj8EyocVyXmOgKSLYE3vTLc2gPe" # original
gdown.download(url, output="coffee_original.zip", quiet=False)

In [None]:
!unzip coffee_original.zip

In [None]:
# import fiftyone as fo # base library and app
# import fiftyone.utils.huggingface as fouh # Hugging Face integration
# dataset_ = fouh.load_from_hub("pjramg/my_colombian_coffe_FO", persistent=True, overwrite=True)

# # Define the new dataset name
dataset_name = "coffee_original"

# Check if the dataset exists
if dataset_name in fo.list_datasets():
    print(f"Dataset '{dataset_name}' exists. Loading...")
    dataset = fo.load_dataset(dataset_name)
else:
    print(f"Dataset '{dataset_name}' does not exist. Creating a new one...")
    # Clone the dataset with a new name and make it persistent
    dataset = fo.Dataset.from_dir(
                dataset_type=fo.types.COCODetectionDataset,
                dataset_dir="./colombian_coffee",
                data_path="images/default",
                labels_path="annotations/instances_default.json",
                label_types="segmentations",
                label_field="categories",
                name="coffee",
                include_id=True,
                overwrite=True
            )

In [None]:
print(dataset)

## 4. Find Uniqueness images

How to use uniqueness detection, similarity search, and embedding visualizations for agricultural AI


In [None]:
import fiftyone.brain as fob

results = fob.compute_similarity(dataset, brain_key="img_sim")
results.find_unique(100)

In [None]:
vis_results = fob.compute_visualization(dataset, brain_key="img_vis")


In [None]:
import fiftyone.brain as fob

fob.compute_uniqueness(dataset)

In [None]:
session = fo.launch_app(dataset, auto=False)
session.open_tab()

In [None]:
unique_view = dataset.select(results.unique_ids)
session.view = unique_view

print(unique_view)

export_dir = "unique_images_wo_annotations"
unique_view.export(
    export_dir=export_dir,
    dataset_type=fo.types.FiftyOneDataset,
)

In [None]:
dataset_name_unique= "unique_images_wo_annotations"
export_dir = "unique_images_wo_annotations"

# Import the dataset
dataset_unique_images_wo_annotations = fo.Dataset.from_dir(
    dataset_name= dataset_name_unique,
    dataset_dir=export_dir,
    dataset_type=fo.types.FiftyOneDataset,
    persistent=True
)

## 5. Pre-annoted with SAM2 in the 100 unique samples:

In [None]:
import fiftyone.zoo as foz
model = foz.load_zoo_model("segment-anything-2-hiera-tiny-image-torch")

# Full automatic segmentations
dataset_unique_images_wo_annotations.apply_model(model, label_field="auto")


In [None]:
session = fo.launch_app(dataset_unique_images_wo_annotations, auto=False)
#session.open_tab()

## 6. Assign labels to auto-labeling:

In [None]:
import fiftyone as fo
import numpy as np
import torch
import torchvision.transforms as transforms
from PIL import Image
from torchvision.models import resnet18
from sklearn.metrics.pairwise import cosine_similarity

# Load dataset
#dataset = fo.load_dataset("coffee_FO")

# Ensure `polished_auto` field exists
if "polished_auto" not in dataset_unique_images_wo_annotations.get_field_schema():
    dataset_unique_images_wo_annotations.add_sample_field("polished_auto", fo.EmbeddedDocumentField, embedded_doc_type=fo.Detections)

# Load a pre-trained feature extractor (ResNet18)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = resnet18(pretrained=True).eval().to(device)

# Define preprocessing for the bounding box patches
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def extract_patch(sample, bbox):
    """ Extracts an image patch corresponding to a bounding box from a sample. """
    image = Image.open(sample.filepath).convert("RGB")
    img_w, img_h = image.size

    # Convert relative bounding box to absolute
    x, y, w, h = bbox
    abs_x, abs_y, abs_w, abs_h = int(x * img_w), int(y * img_h), int(w * img_w), int(h * img_h)

    # Crop and preprocess
    patch = image.crop((abs_x, abs_y, abs_x + abs_w, abs_y + abs_h))
    return transform(patch).unsqueeze(0).to(device)  # Add batch dimension

def compute_embedding(image_patch):
    """ Computes the feature embedding of a cropped bounding box using ResNet. """
    with torch.no_grad():
        features = model(image_patch)
    return features.cpu().numpy().flatten()  # Convert to 1D vector

def compute_iou(boxA, boxB):
    """ Computes Intersection over Union (IoU) between two bounding boxes. """
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[0] + boxA[2], boxB[0] + boxB[2])
    yB = min(boxA[1] + boxA[3], boxB[1] + boxB[3])

    inter_area = max(0, xB - xA) * max(0, yB - yA)
    boxA_area = boxA[2] * boxA[3]
    boxB_area = boxB[2] * boxB[3]
    union_area = boxA_area + boxB_area - inter_area

    return inter_area / union_area if union_area > 0 else 0

# Step 1: Extract ground truth information from the whole dataset (Embeddings)
ground_truth_boxes = []
y_positions = []
gt_embeddings = []
gt_labels = []

for sample in dataset:
    if sample.categories_segmentations and sample.categories_segmentations.detections:
        for det in sample.categories_segmentations.detections:
            bbox = det.bounding_box
            ground_truth_boxes.append(bbox)
            y_positions.append(bbox[1])  # Store Y positions
            image_patch = extract_patch(sample, bbox)
            gt_embeddings.append(compute_embedding(image_patch))
            gt_labels.append(det.label)

# Convert embeddings list to NumPy array
gt_embeddings = np.array(gt_embeddings) if gt_embeddings else np.empty((0, 512))

# Compute size and Y-axis constraints
box_areas = [w * h for _, _, w, h in ground_truth_boxes]
avg_box_area = np.mean(box_areas)
std_box_area = np.std(box_areas)
lower_size_limit = max(0, avg_box_area - 1.5 * std_box_area)
upper_size_limit = avg_box_area + 1.5 * std_box_area
min_y_gt = min(y_positions) if y_positions else 0
max_y_gt = max(y_positions) if y_positions else 1

# Step 2: Filter auto-generated bounding boxes
for sample in dataset_unique_images_wo_annotations:
    if sample.auto and sample.auto.detections:
        valid_detections = []
        for detection in sample.auto.detections:
            x, y, bw, bh = detection.bounding_box
            area = bw * bh
            aspect_ratio = bw / bh if bh > 0 else 1
            is_circular = 0.25 <= aspect_ratio <= 0.8  # Keep only circular/elliptical

            if (lower_size_limit <= area <= upper_size_limit and
                min_y_gt <= y <= max_y_gt and
                is_circular):
                valid_detections.append(detection)

        # Step 3: Assign labels using embeddings
        for det in valid_detections:
            image_patch = extract_patch(sample, det.bounding_box)
            embedding = compute_embedding(image_patch)

            if len(gt_embeddings) > 0:
                similarities = cosine_similarity([embedding], gt_embeddings)[0]
                best_match_idx = np.argmax(similarities)
                best_match_label = gt_labels[best_match_idx]
            else:
                best_match_label = "unknown"  # This should not happen

            det.label = best_match_label

        # Save filtered detections in `polished_auto`
        sample["polished_auto"] = fo.Detections(detections=valid_detections)
        sample.save()

print("Filtering and label assignment completed for `polished_auto`.")


In [None]:
print(dataset)
print(dataset_unique_images_wo_annotations)

In [None]:
session = fo.launch_app(dataset_unique_images_wo_annotations, auto=False)

In [None]:
# Step 1: Duplicate `polished_auto` into `polished_auto_export`
dataset1 = dataset_unique_images_wo_annotations._dataset  # get the base dataset

if "polished_auto_export" not in dataset_unique_images_wo_annotations.get_field_schema():
    dataset1.add_sample_field("polished_auto_export", fo.EmbeddedDocumentField, embedded_doc_type=fo.Detections)

for sample in dataset1:
    if sample["polished_auto"]:
        sample["polished_auto_export"] = sample["polished_auto"].copy()  # Create a true duplicate
    else:
        sample["polished_auto_export"] = None  # Ensure field exists
    sample.save()

print("Duplicated `polished_auto` to `polished_auto_export`.")

# Step 2: Clean `polished_auto_export` to remove `score` and `confidence`
def clean_detections(sample, label_field):
    """Removes 'score' and 'confidence' fields to fix COCO export issues."""
    if sample[label_field] and sample[label_field].detections:
        for det in sample[label_field].detections:
            if hasattr(det, "attributes"):
                det.attributes.pop("score", None)  # Remove score field
                det.attributes.pop("confidence", None)  # Remove confidence field
            if hasattr(det, "score"):
                delattr(det, "score")  # Delete score if it exists
            if hasattr(det, "confidence"):
                delattr(det, "confidence")  # Delete confidence if it exists
            det["iscrowd"] = 0  # Ensure compatibility with COCO format
    return sample

# Apply cleaning function
for sample in dataset1:
    clean_detections(sample, "polished_auto_export")
    sample.save()

print("Cleaned `polished_auto_export` to remove conflicting fields.")

# Step 3: Export dataset in ...... format

In [None]:
new_dataset= dataset_unique_images_wo_annotations.clone()
print(new_dataset)

In [None]:
session = fo.launch_app(dataset_unique_images_wo_annotations, auto=False)
#session.open_tab()

In [None]:
# Step 3: Export dataset in COCO format
export_dir = "100_unique_coffee_coco"
dataset_unique_images_wo_annotations.export(
    export_dir=export_dir,
    dataset_type = fo.types.COCODetectionDataset,
    label_field="polished_auto_export",  # Use cleaned duplicate field
    include_media=True,  # Export images along with annotations
)

In [None]:
# Step 3: Export dataset in CVAT format
export_dir = "100_unique_coffee_cvat"
dataset_unique_images_wo_annotations.export(
    export_dir=export_dir,
    dataset_type=fo.types.CVATImageDataset,
    label_field="polished_auto_export",  # Use cleaned duplicate field
    include_media=True,  # Export images along with annotations
)

In [None]:
# Step 3: Export dataset in CVAT format
export_dir = "100_unique_coffee_FO"
dataset_unique_images_wo_annotations.export(
    export_dir=export_dir,
    dataset_type=fo.types.FiftyOneDataset,
    label_field="polished_auto_export",  # Use cleaned duplicate field
    include_media=True,  # Export images along with annotations
)

### Optional you can send images to CVAT for fixing annotations

In [None]:
!fiftyone plugins download \
    https://github.com/voxel51/fiftyone-plugins \
    --plugin-names @voxel51/annotation

In [None]:
!export FIFTYONE_CVAT_URL="https://www.cvat.ai/"
!export FIFTYONE_CVAT_USERNAME="your-username"
!export FIFTYONE_CVAT_PASSWORD="your-password"
!export FIFTYONE_CVAT_EMAIL="your-email"  # if applicable

In [None]:
# We need to send the 100 uniqueness samples, but in this example we
# Randomly select 5 samples to load to CVAT
unique_5_view = dataset_unique_images_wo_annotations.take(5)

# A unique identifier for this run
anno_key = "segs_run4"

# Upload the samples and launch CVAT
anno_results = unique_5_view.annotate(
    anno_key,
    label_field="auto",
    label_type="instances",
    classes=["immature", "mature", "overmature", "semimature"],
    launch_editor=True,
)

![Image](https://github.com/user-attachments/assets/498d632a-c93a-41d7-82da-a81d6c29bbdf)

## Next Steps
- Fine-tune the SAM2 model for improved segmentation.
- Integrate additional annotation tools with FiftyOne.
- Explore active learning workflows for improving dataset quality.

In [None]:
# Mount drive
from google.colab import drive
drive.mount('/content/drive')

# Save in notebook1
dataset_unique_images_wo_annotations.export(export_dir="/content/drive/MyDrive/coffee_dataset_sam2_FO", dataset_type=fo.types.FiftyOneDataset)