**Code reference:** https://www.appsloveworld.com/python/1383/how-to-extract-foreground-objects-from-coco-dataset-or-open-images-v6-dataset?expand_article=1

In [1]:
import os
import cv2 as cv
import numpy as np

In [2]:
def extract_classwise_instances(samples, output_dir, label_field, ext=".png"):
    print("Extracting object instances...")
    for sample in samples.iter_samples(progress=True):
        img = cv.imread(sample.filepath)
        img_h, img_w, c = img.shape
        for det in sample[label_field].detections:
            mask = det.mask
            [x, y, w, h] = det.bounding_box
            x = int(x * img_w)
            y = int(y * img_h)
            h, w = mask.shape
            mask_img = img[y:y+h, x:x+w, :]
            alpha = mask.astype(np.uint8)*255
            alpha = np.expand_dims(alpha, 2)
            mask_img = np.concatenate((mask_img, alpha), axis=2)

            label = det.label
            label_dir = os.path.join(output_dir, label)
            if not os.path.exists(label_dir):
                os.mkdir(label_dir)
            output_filepath = os.path.join(label_dir, det.id+ext)
            cv.imwrite(output_filepath, mask_img)

In [3]:
def save_composite(samples, output_dir, label_field, ext=".png"):
    print("Saving composite images...")
    for sample in samples.iter_samples(progress=True):
        img = cv.imread(sample.filepath)
        img_h, img_w, c = img.shape
        output_filepath = output_dir

        counter = 0
        for i, det in enumerate(sample[label_field].detections):
            if counter > 0:
              break
            label = det.label
            label_dir = os.path.join(output_dir, label)
            if not os.path.exists(label_dir):
                os.mkdir(label_dir)
            output_filepath = os.path.join(label_dir, det.id+ext)
        cv.imwrite(output_filepath, img)

In [4]:
!pip install fiftyone
!pip install fiftyone-db-ubuntu2204

Collecting fiftyone
  Downloading fiftyone-0.22.0-py3-none-any.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m60.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles (from fiftyone)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting argcomplete (from fiftyone)
  Downloading argcomplete-3.1.2-py3-none-any.whl (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.5/41.5 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting boto3 (from fiftyone)
  Downloading boto3-1.28.55-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.8/135.8 kB[0m [31m18.0 MB/s[0m eta [36m0:00:00[0m
Collecting dacite<1.8.0,>=1.6.0 (from fiftyone)
  Downloading dacite-1.7.0-py3-none-any.whl (12 kB)
Collecting Deprecated (from fiftyone)
  Downloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)
Collecting ftfy (from fiftyone)
  Downloading ftfy-6.1.1-py3-none-any.whl (53 

In [5]:
import fiftyone as fo
import fiftyone.zoo as foz
from fiftyone import ViewField as F

Migrating database to v0.22.0


INFO:fiftyone.migrations.runner:Migrating database to v0.22.0


In [6]:
dataset_name = "coco-image-example"
if dataset_name in fo.list_datasets():
    fo.delete_dataset(dataset_name)

In [7]:
label_field = "ground_truth"
classes = ["horse", "airplane"]

In [8]:
dataset = foz.load_zoo_dataset(
    "coco-2017",
    split="train",
    label_types=["segmentations"],
    classes=classes,
    max_samples=500,
    shuffle=True,
    label_field=label_field,
    dataset_name=dataset_name,
)

Downloading split 'train' to '/root/fiftyone/coco-2017/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/coco-2017/train' if necessary


Downloading annotations to '/root/fiftyone/coco-2017/tmp-download/annotations_trainval2017.zip'


INFO:fiftyone.utils.coco:Downloading annotations to '/root/fiftyone/coco-2017/tmp-download/annotations_trainval2017.zip'


 100% |██████|    1.9Gb/1.9Gb [17.2s elapsed, 0s remaining, 121.9Mb/s]      


INFO:eta.core.utils: 100% |██████|    1.9Gb/1.9Gb [17.2s elapsed, 0s remaining, 121.9Mb/s]      


Extracting annotations to '/root/fiftyone/coco-2017/raw/instances_train2017.json'


INFO:fiftyone.utils.coco:Extracting annotations to '/root/fiftyone/coco-2017/raw/instances_train2017.json'


Downloading 500 images


INFO:fiftyone.utils.coco:Downloading 500 images


 100% |██████████████████| 500/500 [54.9s elapsed, 0s remaining, 8.2 images/s]      


INFO:eta.core.utils: 100% |██████████████████| 500/500 [54.9s elapsed, 0s remaining, 8.2 images/s]      


Writing annotations for 500 downloaded samples to '/root/fiftyone/coco-2017/train/labels.json'


INFO:fiftyone.utils.coco:Writing annotations for 500 downloaded samples to '/root/fiftyone/coco-2017/train/labels.json'


Dataset info written to '/root/fiftyone/coco-2017/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/coco-2017/info.json'


Loading 'coco-2017' split 'train'


INFO:fiftyone.zoo.datasets:Loading 'coco-2017' split 'train'


 100% |█████████████████| 500/500 [5.7s elapsed, 0s remaining, 89.0 samples/s]       


INFO:eta.core.utils: 100% |█████████████████| 500/500 [5.7s elapsed, 0s remaining, 89.0 samples/s]       


Dataset 'coco-image-example' created


INFO:fiftyone.zoo.datasets:Dataset 'coco-image-example' created


In [9]:
view = dataset.filter_labels(label_field, F("label").is_in(classes))

In [10]:
foreground_output_dir = "/data/foreground_dataset"
composite_output_dir = "/data/composite_dataset"
os.makedirs(foreground_output_dir, exist_ok=True)
os.makedirs(composite_output_dir, exist_ok=True)

In [11]:
extract_classwise_instances(view, foreground_output_dir, label_field)

Extracting object instances...
 100% |█████████████████| 500/500 [8.4s elapsed, 0s remaining, 60.4 samples/s]       


INFO:eta.core.utils: 100% |█████████████████| 500/500 [8.4s elapsed, 0s remaining, 60.4 samples/s]       


In [12]:
save_composite(view, composite_output_dir, label_field)

Saving composite images...
 100% |█████████████████| 500/500 [12.7s elapsed, 0s remaining, 39.0 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 500/500 [12.7s elapsed, 0s remaining, 39.0 samples/s]      


## **Turn preprocessed images into a custom dataset**

In [13]:
import torch
from torchvision import transforms, datasets

In [14]:
data_transform = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                          std=[0.229, 0.224, 0.225])])

In [15]:
foreground_dataset = datasets.ImageFolder(root='/data/foreground_dataset',
                                          transform=data_transform)

In [18]:
fore_dataset_loader = torch.utils.data.DataLoader(foreground_dataset,
                                                  batch_size=4, shuffle=True,
                                                  num_workers=1)

In [19]:
composite_dataset = datasets.ImageFolder(root='/data/composite_dataset',
                                         transform=data_transform)

In [20]:
composite_dataset_loader = torch.utils.data.DataLoader(composite_dataset,
                                                       batch_size=4, shuffle=True,
                                                       num_workers=1)

In [21]:
print(fore_dataset_loader.dataset.classes)
print(composite_dataset_loader.dataset.classes)

['airplane', 'horse']
['airplane', 'horse']
