This notebook performs mass-segmentation on a dataset loaded from Huggingface and saves the segmented dataset to a local directory.

In [2]:
# uncomment and run this in colab to install required packages
# !pip install ultralytics --quiet
# !pip install datasets --quiet
# !pip install tqdm --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m905.3/905.3 kB[0m [31m42.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.3/179.3 kB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incomp

In [3]:
import numpy as np
from PIL import Image
from tqdm import tqdm
from datasets import load_dataset

from ultralytics import SAM
import os

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [4]:
dataset = load_dataset("e1010101/tongue-images-384")
dataset

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/639 [00:00<?, ?B/s]

train-00000-of-00003.parquet:   0%|          | 0.00/109M [00:00<?, ?B/s]

train-00001-of-00003.parquet:   0%|          | 0.00/108M [00:00<?, ?B/s]

train-00002-of-00003.parquet:   0%|          | 0.00/109M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/95.4M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/46.3M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/746 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/214 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/106 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['image', 'labels', 'pixel_values'],
        num_rows: 746
    })
    validation: Dataset({
        features: ['image', 'labels', 'pixel_values'],
        num_rows: 214
    })
    test: Dataset({
        features: ['image', 'labels', 'pixel_values'],
        num_rows: 106
    })
})

In [5]:
# Please check the documentation at https://docs.ultralytics.com/models/sam-2
# to get the latest models
model = SAM("sam2.1_l.pt")
model.info()

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/sam2.1_l.pt to 'sam2.1_l.pt'...


100%|██████████| 428M/428M [00:01<00:00, 283MB/s]


Model summary: 854 layers, 224,446,642 parameters, 224,446,642 gradients


(854, 224446642, 224446642, 0.0)

In [6]:
def segment(image):
    image_np = np.array(image)

    # Segmentation
    results_pil = model(image_np, points=[[350, 320], [0, 0]], labels=[1, 0])

    # Get the mask from the results
    mask_pil = results_pil[0].masks.data[0].cpu().numpy()

    # Masking
    binary_mask = mask_pil > 0.5
    rgb_mask = np.repeat(binary_mask[:, :, np.newaxis], 3, axis=2)
    segmented_image = image_np * rgb_mask

    return segmented_image

In [7]:
dataset['train']

Dataset({
    features: ['image', 'labels', 'pixel_values'],
    num_rows: 746
})

In [None]:
splits = ['train', 'validation', 'test']

for split in splits:
    split_dir = os.path.join("output", split)
    os.makedirs(split_dir, exist_ok=True)

    ds = dataset[split]

    for idx, item in tqdm(enumerate(ds), total=len(ds), desc=f"Processing {split}"):
        # Perform segmentation
        result = segment(item['image'])

        # Convert the numpy array to PIL Image
        segmented_image = Image.fromarray(result.astype(np.uint8))

        # Save the image
        output_path = os.path.join(split_dir, f"image_{idx}.png")
        segmented_image.save(output_path)

print("Saved all segmented images!")

It is recommended to manually browse through the output and remove any poorly-segmented images.