In [None]:
!pip install fiftyone

Collecting fiftyone
  Downloading fiftyone-1.4.1-py3-none-any.whl.metadata (23 kB)
Collecting aiofiles (from fiftyone)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting argcomplete (from fiftyone)
  Downloading argcomplete-3.6.2-py3-none-any.whl.metadata (16 kB)
Collecting boto3 (from fiftyone)
  Downloading boto3-1.37.28-py3-none-any.whl.metadata (6.7 kB)
Collecting dacite<1.8.0,>=1.6.0 (from fiftyone)
  Downloading dacite-1.7.0-py3-none-any.whl.metadata (14 kB)
Collecting ftfy (from fiftyone)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting hypercorn>=0.13.2 (from fiftyone)
  Downloading hypercorn-0.17.3-py3-none-any.whl.metadata (5.4 kB)
Collecting kaleido!=0.2.1.post1 (from fiftyone)
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl.metadata (15 kB)
Collecting mongoengine~=0.29.1 (from fiftyone)
  Downloading mongoengine-0.29.1-py3-none-any.whl.metadata (6.7 kB)
Collecting motor~=3.6.0 (from fiftyone)
  Downloading motor-3

In [None]:
import os
import shutil
import gc
import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.types as fot


classes = ["Dog", "Cat", "Deer", "Bear", "Bird", "Person", "Car", "Truck", "Airplane"]

base_export_dir = "/content/dataset"

if not os.path.exists(base_export_dir):
    os.makedirs(base_export_dir)

for cls in classes:
    print(f"Downloading Open Images subset for class '{cls}'...")

    dataset = foz.load_zoo_dataset(
        "open-images-v6",
        split="train",
        classes=[cls],
        max_samples=500,  # try with fewer samples for testing
        shuffle=True,
    )

    export_dir = os.path.join(base_export_dir, cls)

    dataset.export(
        export_dir=export_dir,
        dataset_type=fot.VOCDetectionDataset,
        label_field="detections",
        export_media="move",
    )



    # Rename folders to meet your file structure
    jpeg_images_dir = os.path.join(export_dir, "data")
    annotations_dir = os.path.join(export_dir, "labels")

    new_images_dir = os.path.join(export_dir, "images")
    if os.path.exists(jpeg_images_dir):
        if os.path.exists(new_images_dir):
            shutil.rmtree(new_images_dir)
        os.rename(jpeg_images_dir, new_images_dir)

    new_annotations_dir = os.path.join(export_dir, "annotations")
    if os.path.exists(annotations_dir):
        if os.path.exists(new_annotations_dir):
            shutil.rmtree(new_annotations_dir)
        os.rename(annotations_dir, new_annotations_dir)

    print(f"Exported and organized dataset for '{cls}' at {export_dir}")

    # Delete the dataset and free memory
    dataset.delete()
    gc.collect()

print("Dataset processing complete.")



Downloading Open Images subset for class 'Dog'...
Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


Downloading 'https://storage.googleapis.com/openimages/2018_04/train/train-images-boxable-with-rotation.csv' to '/root/fiftyone/open-images-v6/train/metadata/image_ids.csv'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/2018_04/train/train-images-boxable-with-rotation.csv' to '/root/fiftyone/open-images-v6/train/metadata/image_ids.csv'


 100% |██████|    4.8Gb/4.8Gb [15.5s elapsed, 0s remaining, 344.7Mb/s]      


INFO:eta.core.utils: 100% |██████|    4.8Gb/4.8Gb [15.5s elapsed, 0s remaining, 344.7Mb/s]      


Downloading 'https://storage.googleapis.com/openimages/v5/class-descriptions-boxable.csv' to '/root/fiftyone/open-images-v6/train/metadata/classes.csv'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/class-descriptions-boxable.csv' to '/root/fiftyone/open-images-v6/train/metadata/classes.csv'


Downloading 'https://storage.googleapis.com/openimages/v6/oidv6-attributes-description.csv' to '/root/fiftyone/open-images-v6/train/metadata/attributes.csv'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v6/oidv6-attributes-description.csv' to '/root/fiftyone/open-images-v6/train/metadata/attributes.csv'


Downloading 'https://storage.googleapis.com/openimages/v5/classes-segmentation.txt' to '/root/fiftyone/open-images-v6/train/metadata/segmentation_classes.csv'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/classes-segmentation.txt' to '/root/fiftyone/open-images-v6/train/metadata/segmentation_classes.csv'


Downloading 'https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy.json' to '/tmp/tmpcqgetmc3/metadata/hierarchy.json'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy.json' to '/tmp/tmpcqgetmc3/metadata/hierarchy.json'


Downloading 'https://storage.googleapis.com/openimages/v5/train-annotations-human-imagelabels-boxable.csv' to '/root/fiftyone/open-images-v6/train/labels/classifications.csv'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-annotations-human-imagelabels-boxable.csv' to '/root/fiftyone/open-images-v6/train/labels/classifications.csv'


Downloading 'https://storage.googleapis.com/openimages/v6/oidv6-train-annotations-bbox.csv' to '/root/fiftyone/open-images-v6/train/labels/detections.csv'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v6/oidv6-train-annotations-bbox.csv' to '/root/fiftyone/open-images-v6/train/labels/detections.csv'


Downloading 'https://storage.googleapis.com/openimages/v6/oidv6-train-annotations-vrd.csv' to '/root/fiftyone/open-images-v6/train/labels/relationships.csv'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v6/oidv6-train-annotations-vrd.csv' to '/root/fiftyone/open-images-v6/train/labels/relationships.csv'


Downloading 'https://storage.googleapis.com/openimages/v5/train-annotations-object-segmentation.csv' to '/root/fiftyone/open-images-v6/train/labels/segmentations.csv'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-annotations-object-segmentation.csv' to '/root/fiftyone/open-images-v6/train/labels/segmentations.csv'


Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-3.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/3.zip'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-3.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/3.zip'


Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-b.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/B.zip'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-b.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/B.zip'


Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-c.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/C.zip'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-c.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/C.zip'


Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-6.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/6.zip'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-6.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/6.zip'


Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-5.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/5.zip'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-5.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/5.zip'


Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-d.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/D.zip'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-d.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/D.zip'


Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-4.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/4.zip'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-4.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/4.zip'


Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-e.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/E.zip'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-e.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/E.zip'


Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-2.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/2.zip'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-2.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/2.zip'


Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-a.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/A.zip'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-a.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/A.zip'


Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-1.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/1.zip'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-1.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/1.zip'


Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-7.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/7.zip'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-7.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/7.zip'


Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-9.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/9.zip'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-9.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/9.zip'


Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-0.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/0.zip'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-0.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/0.zip'


Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-f.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/F.zip'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-f.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/F.zip'


Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-8.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/8.zip'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/train-masks/train-masks-8.zip' to '/root/fiftyone/open-images-v6/train/labels/masks/8.zip'


Downloading 500 images


INFO:fiftyone.utils.openimages:Downloading 500 images


 100% |███████████████████| 500/500 [18.4s elapsed, 0s remaining, 30.3 files/s]      


INFO:eta.core.utils: 100% |███████████████████| 500/500 [18.4s elapsed, 0s remaining, 30.3 files/s]      


Dataset info written to '/root/fiftyone/open-images-v6/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/open-images-v6/info.json'


Loading 'open-images-v6' split 'train'


INFO:fiftyone.zoo.datasets:Loading 'open-images-v6' split 'train'


 100% |█████████████████| 500/500 [29.4s elapsed, 0s remaining, 14.4 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 500/500 [29.4s elapsed, 0s remaining, 14.4 samples/s]      


Dataset 'open-images-v6-train-500' created


INFO:fiftyone.zoo.datasets:Dataset 'open-images-v6-train-500' created


 100% |█████████████████| 500/500 [3.1s elapsed, 0s remaining, 154.3 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 500/500 [3.1s elapsed, 0s remaining, 154.3 samples/s]      


Exported and organized dataset for 'Dog' at /content/dataset/Dog
Downloading Open Images subset for class 'Cat'...
Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


Downloading 500 images


INFO:fiftyone.utils.openimages:Downloading 500 images


 100% |███████████████████| 500/500 [18.2s elapsed, 0s remaining, 31.0 files/s]      


INFO:eta.core.utils: 100% |███████████████████| 500/500 [18.2s elapsed, 0s remaining, 31.0 files/s]      


Dataset info written to '/root/fiftyone/open-images-v6/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/open-images-v6/info.json'


Loading 'open-images-v6' split 'train'


INFO:fiftyone.zoo.datasets:Loading 'open-images-v6' split 'train'


 100% |█████████████████| 500/500 [20.9s elapsed, 0s remaining, 26.4 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 500/500 [20.9s elapsed, 0s remaining, 26.4 samples/s]      


Dataset 'open-images-v6-train-500' created


INFO:fiftyone.zoo.datasets:Dataset 'open-images-v6-train-500' created


 100% |█████████████████| 500/500 [2.3s elapsed, 0s remaining, 225.4 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 500/500 [2.3s elapsed, 0s remaining, 225.4 samples/s]      


Exported and organized dataset for 'Cat' at /content/dataset/Cat
Downloading Open Images subset for class 'Deer'...
Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


No segmentations exist for classes ['Deer']
You can view the available segmentation classes via `get_segmentation_classes()`


You can view the available segmentation classes via `get_segmentation_classes()`


Downloading 500 images


INFO:fiftyone.utils.openimages:Downloading 500 images


 100% |███████████████████| 500/500 [18.6s elapsed, 0s remaining, 29.1 files/s]      


INFO:eta.core.utils: 100% |███████████████████| 500/500 [18.6s elapsed, 0s remaining, 29.1 files/s]      


Dataset info written to '/root/fiftyone/open-images-v6/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/open-images-v6/info.json'


Loading 'open-images-v6' split 'train'


INFO:fiftyone.zoo.datasets:Loading 'open-images-v6' split 'train'


No segmentations exist for classes ['Deer']
You can view the available segmentation classes via `get_segmentation_classes()`


You can view the available segmentation classes via `get_segmentation_classes()`


 100% |█████████████████| 500/500 [3.7s elapsed, 0s remaining, 149.4 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 500/500 [3.7s elapsed, 0s remaining, 149.4 samples/s]      


Dataset 'open-images-v6-train-500' created


INFO:fiftyone.zoo.datasets:Dataset 'open-images-v6-train-500' created


 100% |█████████████████| 500/500 [2.0s elapsed, 0s remaining, 315.9 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 500/500 [2.0s elapsed, 0s remaining, 315.9 samples/s]      


Exported and organized dataset for 'Deer' at /content/dataset/Deer
Downloading Open Images subset for class 'Bear'...
Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


Only found 353 (<500) samples matching your requirements




Downloading 353 images


INFO:fiftyone.utils.openimages:Downloading 353 images


 100% |███████████████████| 353/353 [12.9s elapsed, 0s remaining, 30.3 files/s]      


INFO:eta.core.utils: 100% |███████████████████| 353/353 [12.9s elapsed, 0s remaining, 30.3 files/s]      


Dataset info written to '/root/fiftyone/open-images-v6/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/open-images-v6/info.json'


Loading 'open-images-v6' split 'train'


INFO:fiftyone.zoo.datasets:Loading 'open-images-v6' split 'train'


 100% |█████████████████| 353/353 [8.6s elapsed, 0s remaining, 96.2 samples/s]       


INFO:eta.core.utils: 100% |█████████████████| 353/353 [8.6s elapsed, 0s remaining, 96.2 samples/s]       


Dataset 'open-images-v6-train-500' created


INFO:fiftyone.zoo.datasets:Dataset 'open-images-v6-train-500' created


 100% |█████████████████| 353/353 [1.4s elapsed, 0s remaining, 268.2 samples/s]         


INFO:eta.core.utils: 100% |█████████████████| 353/353 [1.4s elapsed, 0s remaining, 268.2 samples/s]         


Exported and organized dataset for 'Bear' at /content/dataset/Bear
Downloading Open Images subset for class 'Bird'...
Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


Downloading 500 images


INFO:fiftyone.utils.openimages:Downloading 500 images


 100% |███████████████████| 500/500 [18.2s elapsed, 0s remaining, 29.8 files/s]      


INFO:eta.core.utils: 100% |███████████████████| 500/500 [18.2s elapsed, 0s remaining, 29.8 files/s]      


Dataset info written to '/root/fiftyone/open-images-v6/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/open-images-v6/info.json'


Loading 'open-images-v6' split 'train'


INFO:fiftyone.zoo.datasets:Loading 'open-images-v6' split 'train'


 100% |█████████████████| 500/500 [24.3s elapsed, 0s remaining, 16.1 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 500/500 [24.3s elapsed, 0s remaining, 16.1 samples/s]      


Dataset 'open-images-v6-train-500' created


INFO:fiftyone.zoo.datasets:Dataset 'open-images-v6-train-500' created


 100% |█████████████████| 500/500 [2.2s elapsed, 0s remaining, 235.1 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 500/500 [2.2s elapsed, 0s remaining, 235.1 samples/s]      


Exported and organized dataset for 'Bird' at /content/dataset/Bird
Downloading Open Images subset for class 'Person'...
Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


Downloading 500 images


INFO:fiftyone.utils.openimages:Downloading 500 images


 100% |███████████████████| 500/500 [18.4s elapsed, 0s remaining, 29.5 files/s]      


INFO:eta.core.utils: 100% |███████████████████| 500/500 [18.4s elapsed, 0s remaining, 29.5 files/s]      


Dataset info written to '/root/fiftyone/open-images-v6/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/open-images-v6/info.json'


Loading 'open-images-v6' split 'train'


INFO:fiftyone.zoo.datasets:Loading 'open-images-v6' split 'train'


 100% |█████████████████| 500/500 [43.3s elapsed, 0s remaining, 8.1 samples/s]       


INFO:eta.core.utils: 100% |█████████████████| 500/500 [43.3s elapsed, 0s remaining, 8.1 samples/s]       


Dataset 'open-images-v6-train-500' created


INFO:fiftyone.zoo.datasets:Dataset 'open-images-v6-train-500' created


 100% |█████████████████| 500/500 [4.3s elapsed, 0s remaining, 114.5 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 500/500 [4.3s elapsed, 0s remaining, 114.5 samples/s]      


Exported and organized dataset for 'Person' at /content/dataset/Person
Downloading Open Images subset for class 'Car'...
Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


Downloading 500 images


INFO:fiftyone.utils.openimages:Downloading 500 images


 100% |███████████████████| 500/500 [18.5s elapsed, 0s remaining, 31.1 files/s]      


INFO:eta.core.utils: 100% |███████████████████| 500/500 [18.5s elapsed, 0s remaining, 31.1 files/s]      


Dataset info written to '/root/fiftyone/open-images-v6/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/open-images-v6/info.json'


Loading 'open-images-v6' split 'train'


INFO:fiftyone.zoo.datasets:Loading 'open-images-v6' split 'train'


 100% |█████████████████| 500/500 [51.9s elapsed, 0s remaining, 10.2 samples/s]     


INFO:eta.core.utils: 100% |█████████████████| 500/500 [51.9s elapsed, 0s remaining, 10.2 samples/s]     


Dataset 'open-images-v6-train-500' created


INFO:fiftyone.zoo.datasets:Dataset 'open-images-v6-train-500' created


 100% |█████████████████| 500/500 [4.6s elapsed, 0s remaining, 110.3 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 500/500 [4.6s elapsed, 0s remaining, 110.3 samples/s]      


Exported and organized dataset for 'Car' at /content/dataset/Car
Downloading Open Images subset for class 'Truck'...
Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


Downloading 500 images


INFO:fiftyone.utils.openimages:Downloading 500 images


 100% |███████████████████| 500/500 [18.8s elapsed, 0s remaining, 28.5 files/s]      


INFO:eta.core.utils: 100% |███████████████████| 500/500 [18.8s elapsed, 0s remaining, 28.5 files/s]      


Dataset info written to '/root/fiftyone/open-images-v6/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/open-images-v6/info.json'


Loading 'open-images-v6' split 'train'


INFO:fiftyone.zoo.datasets:Loading 'open-images-v6' split 'train'


 100% |█████████████████| 500/500 [51.9s elapsed, 0s remaining, 9.3 samples/s]       


INFO:eta.core.utils: 100% |█████████████████| 500/500 [51.9s elapsed, 0s remaining, 9.3 samples/s]       


Dataset 'open-images-v6-train-500' created


INFO:fiftyone.zoo.datasets:Dataset 'open-images-v6-train-500' created


 100% |█████████████████| 500/500 [4.2s elapsed, 0s remaining, 120.7 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 500/500 [4.2s elapsed, 0s remaining, 120.7 samples/s]      


Exported and organized dataset for 'Truck' at /content/dataset/Truck
Downloading Open Images subset for class 'Airplane'...
Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/open-images-v6/train' if necessary


Downloading 500 images


INFO:fiftyone.utils.openimages:Downloading 500 images


 100% |███████████████████| 500/500 [18.5s elapsed, 0s remaining, 30.5 files/s]      


INFO:eta.core.utils: 100% |███████████████████| 500/500 [18.5s elapsed, 0s remaining, 30.5 files/s]      


Dataset info written to '/root/fiftyone/open-images-v6/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/open-images-v6/info.json'


Loading 'open-images-v6' split 'train'


INFO:fiftyone.zoo.datasets:Loading 'open-images-v6' split 'train'


 100% |█████████████████| 500/500 [26.3s elapsed, 0s remaining, 17.5 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 500/500 [26.3s elapsed, 0s remaining, 17.5 samples/s]      


Dataset 'open-images-v6-train-500' created


INFO:fiftyone.zoo.datasets:Dataset 'open-images-v6-train-500' created


 100% |█████████████████| 500/500 [2.4s elapsed, 0s remaining, 212.4 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 500/500 [2.4s elapsed, 0s remaining, 212.4 samples/s]      


Exported and organized dataset for 'Airplane' at /content/dataset/Airplane
Dataset processing complete.


In [None]:
# Import necessary libraries
import os
import torch
import torchvision
import torchvision.transforms as T
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import xml.etree.ElementTree as ET
import json

#########################################
# 1. Define the Classes Mapping
#########################################
# Mapping from string class names to integer labels.
# (0 is reserved for background; our classes start at 1.)
classes_map = {
    "Dog": 1,
    "Cat": 2,
    "Deer": 3,
    "Bear": 4,
    "Bird": 5,
    "Person": 6,
    "Car": 7,
    "Truck": 8,
    "Airplane": 9,
}
# Reverse mapping for inference: from integer label to string class name.
reverse_classes_map = {v: k for k, v in classes_map.items()}

#########################################
# 2. Define a Custom Dataset Class to Combine Multiple Datasets
#########################################
class CustomDataset(Dataset):
    """
    A custom dataset class for object detection that combines multiple datasets.
    Each dataset must follow this structure:

        dataset_root/
            images/       -> image files
            annotations/  -> annotation files (XML, JSON, or text)

    Annotation parsing:
      - XML: expects an <annotation> element with <object>/<name> and <bndbox>.
      - JSON: expects a dict with key "objects" holding list of objects.
      - Text: each line should be "xmin ymin xmax ymax label".

    The classes_map converts string class names into integer labels.
    Additionally, this class returns meta-information (image and annotation paths)
    to help track errors.
    """
    def __init__(self, roots, transforms=None, classes_map=None):
        # Ensure roots is a list
        if isinstance(roots, str):
            roots = [roots]
        self.samples = []  # list of tuples: (img_path, ann_path)
        self.transforms = transforms
        self.classes_map = classes_map

        # Gather image/annotation pairs from each dataset root.
        for root in roots:
            images_dir = os.path.join(root, "images")
            annotations_dir = os.path.join(root, "annotations")
            if not os.path.isdir(images_dir) or not os.path.isdir(annotations_dir):
                print(f"Warning: Missing 'images' or 'annotations' in {root}. Skipping this root.")
                continue
            imgs = sorted(os.listdir(images_dir))
            anns = sorted(os.listdir(annotations_dir))
            for img_file, ann_file in zip(imgs, anns):
                img_path = os.path.join(images_dir, img_file)
                ann_path = os.path.join(annotations_dir, ann_file)
                self.samples.append((img_path, ann_path))
        print(f"Total samples loaded: {len(self.samples)}")

    def parse_annotation(self, ann_path):
        boxes = []
        labels = []
        ext = os.path.splitext(ann_path)[1].lower()
        if ext == '.xml':
            tree = ET.parse(ann_path)
            root_elem = tree.getroot()
            for obj in root_elem.findall('object'):
                name = obj.find('name').text
                bndbox = obj.find('bndbox')
                xmin = float(bndbox.find('xmin').text)
                ymin = float(bndbox.find('ymin').text)
                xmax = float(bndbox.find('xmax').text)
                ymax = float(bndbox.find('ymax').text)
                boxes.append([xmin, ymin, xmax, ymax])
                if self.classes_map and name in self.classes_map:
                    label = self.classes_map[name]
                else:
                    try:
                        label = int(name)
                    except:
                        label = 0
                labels.append(label)
        elif ext == '.json':
            with open(ann_path, 'r') as f:
                data = json.load(f)
            for obj in data.get("objects", []):
                name = obj.get("name", "")
                bndbox = obj.get("bndbox", {})
                xmin = float(bndbox.get("xmin", 0))
                ymin = float(bndbox.get("ymin", 0))
                xmax = float(bndbox.get("xmax", 0))
                ymax = float(bndbox.get("ymax", 0))
                boxes.append([xmin, ymin, xmax, ymax])
                if self.classes_map and name in self.classes_map:
                    label = self.classes_map[name]
                else:
                    try:
                        label = int(name)
                    except:
                        label = 0
                labels.append(label)
        else:
            with open(ann_path, 'r') as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) != 5:
                        continue
                    xmin, ymin, xmax, ymax = map(float, parts[:4])
                    label = int(parts[4])
                    boxes.append([xmin, ymin, xmax, ymax])
                    labels.append(label)
        return boxes, labels

    def __getitem__(self, idx):
        img_path, ann_path = self.samples[idx]
        img = Image.open(img_path).convert("RGB")
        boxes, labels = self.parse_annotation(ann_path)

        # Convert lists to torch tensors.
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        target = {"boxes": boxes, "labels": labels}

        # Return meta info along with image and target
        meta = {"img_path": img_path, "ann_path": ann_path}

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target, meta

    def __len__(self):
        return len(self.samples)

#########################################
# 3. Define Data Transforms
#########################################
def get_transform(train):
    transforms = [T.ToTensor()]
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

#########################################
# 4. Prepare the Combined Dataset and DataLoader
#########################################
# Define dataset roots (one per class). (Assuming on Colab under /content/dataset)
dataset_roots = [
    "/content/dataset/Dog",
    "/content/dataset/Cat",
    "/content/dataset/Deer",
    "/content/dataset/Bear",
    "/content/dataset/Bird",
    "/content/dataset/Person",
    "/content/dataset/Car",
    "/content/dataset/Truck",
    "/content/dataset/Airplane",
]

# Create the combined dataset.
dataset = CustomDataset(dataset_roots, transforms=get_transform(train=True), classes_map=classes_map)

# Update the collate function to handle the extra meta info.
def collate_fn(batch):
    images, targets, metas = zip(*batch)
    return list(images), list(targets), list(metas)

data_loader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=4, collate_fn=collate_fn)

#########################################
# 5. Build the Object Detection Model
#########################################
def get_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

num_classes = 1 + len(classes_map)  # background + defined classes.
model = get_model(num_classes)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

#########################################
# 6. Define the Optimizer and Learning Rate Scheduler
#########################################
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

#########################################
# 7. Training Loop with Exception Handling
#########################################
error_log = []  # To store errors with corresponding file paths.

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    iteration = 0
    for images, targets, metas in data_loader:
        # Attempt to process the entire batch.
        try:
            images_device = [img.to(device) for img in images]
            targets_device = [{k: v.to(device) for k, v in t.items()} for t in targets]
            loss_dict = model(images_device, targets_device)
            loss = sum(loss for loss in loss_dict.values())
        except Exception as e:
            # If batch-level processing fails, try processing each sample individually.
            valid_images = []
            valid_targets = []
            for i in range(len(images)):
                try:
                    img_i = images[i].to(device)
                    target_i = {k: v.to(device) for k, v in targets[i].items()}
                    # Process single sample.
                    _ = model([img_i], [target_i])
                    valid_images.append(images[i])
                    valid_targets.append(targets[i])
                except Exception as ex:
                    error_info = {
                        "img_path": metas[i]["img_path"],
                        "ann_path": metas[i]["ann_path"],
                        "error": str(ex)
                    }
                    error_log.append(error_info)
                    print(f"Error processing sample: {error_info}")
            if len(valid_images) == 0:
                print("Skipping entire batch due to errors.")
                continue
            images_device = [img.to(device) for img in valid_images]
            targets_device = [{k: v.to(device) for k, v in t.items()} for t in valid_targets]
            loss_dict = model(images_device, targets_device)
            loss = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if iteration % 10 == 0:
            print(f"Epoch {epoch}, Iteration {iteration}, Loss: {loss.item():.4f}")
        iteration += 1

    lr_scheduler.step()
    print(f"Epoch {epoch} completed.")

print("Training complete.")
print(f"Total error samples encountered: {len(error_log)}")
if error_log:
    print("Error details:")
    for err in error_log:
        print(err)

# Save the trained model weights.
torch.save(model.state_dict(), "fasterrcnn_model.pth")

#########################################
# 8. Inference: Real-Time Object Detection Demo
#########################################
def predict_and_plot(image_path, model, device, threshold=0.5):
    model.eval()  # Set the model to evaluation mode
    img = Image.open(image_path).convert("RGB")
    transform = T.Compose([T.ToTensor()])
    img_tensor = transform(img).to(device)

    with torch.no_grad():
        prediction = model([img_tensor])

    fig, ax = plt.subplots(1, figsize=(12, 8))
    ax.imshow(img)

    # Iterate over detected bounding boxes.
    for i, box in enumerate(prediction[0]['boxes']):
        score = prediction[0]['scores'][i].item()
        if score > threshold:
            # Move the tensor to CPU and convert to numpy
            box_np = box.cpu().numpy()
            xmin, ymin, xmax, ymax = box_np
            rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                     linewidth=2, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
            label_int = prediction[0]['labels'][i].item()
            label_str = reverse_classes_map.get(label_int, "N/A")
            ax.text(xmin, ymin, f"{label_str} {score:.2f}", color='yellow', fontsize=12)
    plt.axis('off')
    plt.show()



# Example usage: Run inference on random sample images.
# Assuming your combined dataset is in the variable 'dataset' (an instance of CustomDataset)
# and that dataset.samples is a list of (img_path, ann_path) tuples.
all_img_paths = [sample[0] for sample in dataset.samples]

# Randomly select 10 images
random_img_paths = random.sample(all_img_paths, 10)

# Run inference on each selected image
for img_path in random_img_paths:
    print(f"Processing: {img_path}")
    predict_and_plot(img_path, model, device)



Total samples loaded: 4353
Epoch 0, Iteration 0, Loss: 2.5979
Epoch 0, Iteration 10, Loss: 2.3398
Epoch 0, Iteration 20, Loss: 0.2880
Epoch 0, Iteration 30, Loss: 0.5760
Epoch 0, Iteration 40, Loss: 0.5795
Epoch 0, Iteration 50, Loss: 0.3633
Epoch 0, Iteration 60, Loss: 0.5078
Epoch 0, Iteration 70, Loss: 0.9142
Epoch 0, Iteration 80, Loss: 0.8824
Epoch 0, Iteration 90, Loss: 0.3628
Epoch 0, Iteration 100, Loss: 0.2542
Epoch 0, Iteration 110, Loss: 1.1353
Epoch 0, Iteration 120, Loss: 0.2646
Epoch 0, Iteration 130, Loss: 0.3074
Epoch 0, Iteration 140, Loss: 1.2325
Epoch 0, Iteration 150, Loss: 0.4288
Epoch 0, Iteration 160, Loss: 0.2887
Epoch 0, Iteration 170, Loss: 0.8840
Epoch 0, Iteration 180, Loss: 0.6128
Epoch 0, Iteration 190, Loss: 1.1333
Epoch 0, Iteration 200, Loss: 1.3912
Epoch 0, Iteration 210, Loss: 0.5892
Epoch 0, Iteration 220, Loss: 0.2279
Epoch 0, Iteration 230, Loss: 0.2421
Epoch 0, Iteration 240, Loss: 0.5662
Epoch 0, Iteration 250, Loss: 0.8340
Epoch 0, Iteration 260

FileNotFoundError: [Errno 2] No such file or directory: '/content/dataset/Dog/images/sample.jpg'

In [None]:
import random
import matplotlib.pyplot as plt

# Function to perform inference and plot a single image.
def predict_and_plot(image_path, model, device, threshold=0.5):
    model.eval()  # Set the model to evaluation mode
    img = Image.open(image_path).convert("RGB")
    transform = T.Compose([T.ToTensor()])
    img_tensor = transform(img).to(device)

    with torch.no_grad():
        prediction = model([img_tensor])

    fig, ax = plt.subplots(1, figsize=(12, 8))
    ax.imshow(img)

    # Iterate over detected bounding boxes.
    for i, box in enumerate(prediction[0]['boxes']):
        score = prediction[0]['scores'][i].item()
        if score > threshold:
            # Move the tensor to CPU and convert to numpy
            box_np = box.cpu().numpy()
            xmin, ymin, xmax, ymax = box_np
            rect = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                     linewidth=2, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
            label_int = prediction[0]['labels'][i].item()
            label_str = reverse_classes_map.get(label_int, "N/A")
            ax.text(xmin, ymin, f"{label_str} {score:.2f}", color='yellow', fontsize=12)
    plt.axis('off')
    plt.show()

# Assuming your combined dataset is in the variable 'dataset' (an instance of CustomDataset)
# and that dataset.samples is a list of (img_path, ann_path) tuples.
all_img_paths = [sample[0] for sample in dataset.samples]

# Randomly select 10 images
random_img_paths = random.sample(all_img_paths, 10)

# Run inference on each selected image
for img_path in random_img_paths:
    print(f"Processing: {img_path}")
    predict_and_plot(img_path, model, device)


Output hidden; open in https://colab.research.google.com to view.