In [11]:
import fiftyone as fo
import fiftyone.zoo as foz

classes_to_keep = [
    "person",
    "bicycle",
    "car",
    "motorcycle",
    "airplane",
]

train_dataset_name = "coco_tra_1000"
if train_dataset_name in fo.list_datasets():
    # Delete the existing dataset
    fo.Dataset(train_dataset_name).delete()

val_dataset_name = "coco_vali_100"
if val_dataset_name in fo.list_datasets():
    # Delete the existing dataset
    fo.Dataset(val_dataset_name).delete()

train_dataset = foz.load_zoo_dataset(
    "coco-2017",
    label_types=["detections"],
    split="train",
    classes=classes_to_keep,
    max_samples=1000,
    shuffle=True,
    seed=123,
    only_matching=True,
    num_workers=10,
    dataset_name=train_dataset_name,
)

val_dataset = foz.load_zoo_dataset(
    "coco-2017",
    label_types=["detections"],
    split="validation",
    classes=classes_to_keep,
    max_samples=100,
    shuffle=True,
    seed=123,
    only_matching=True,
    num_workers=10,
    dataset_name=val_dataset_name,
)


Downloading split 'train' to '/home/yipeng/fiftyone/coco-2017/train' if necessary
Downloading annotations to '/home/yipeng/fiftyone/coco-2017/tmp-download/annotations_trainval2017.zip'
 100% |██████|    1.9Gb/1.9Gb [17.3s elapsed, 0s remaining, 198.4Mb/s]      
Extracting annotations to '/home/yipeng/fiftyone/coco-2017/raw/instances_train2017.json'
Downloading 1000 images
 100% |████████████████| 1000/1000 [1.1m elapsed, 0s remaining, 16.6 images/s]      
Writing annotations for 1000 downloaded samples to '/home/yipeng/fiftyone/coco-2017/train/labels.json'
Dataset info written to '/home/yipeng/fiftyone/coco-2017/info.json'
Loading 'coco-2017' split 'train'
 100% |███████████████| 1000/1000 [2.0s elapsed, 0s remaining, 510.8 samples/s]      
Dataset 'coco_tr_1000' created
Downloading split 'validation' to '/home/yipeng/fiftyone/coco-2017/validation' if necessary
Found annotations at '/home/yipeng/fiftyone/coco-2017/raw/instances_val2017.json'
Downloading 100 images
 100% |██████████████

In [12]:
import os
import cv2 as cv

output_dir = "resized_dataset"
os.makedirs(output_dir, exist_ok=True)

resized_image_size = (224, 224)

def resize_image_and_annotations(sample):

    # Resize image
    image_path = sample.filepath
    image = cv.imread(image_path)
    height, width = image.shape[:2]
    image = cv.resize(image, resized_image_size)

    # Update the file path
    resized_image_path = os.path.join(output_dir, os.path.basename(image_path))
    sample.filepath = resized_image_path

    # Save the resized image
    cv.imwrite(resized_image_path, image)

    # Update the bounding boxes
    scale_x = resized_image_size[0] / float(width)
    scale_y = resized_image_size[1] / float(height)

    for det in sample["ground_truth"].detections:
        x1, y1, x2, y2 = det.bounding_box
        x1 = x1 * scale_x
        x2 = x2 * scale_x
        y1 = y1 * scale_y
        y2 = y2 * scale_y
        det.bounding_box = [x1, y1, x2, y2]

    sample.save()

for sample in train_dataset:
    resize_image_and_annotations(sample)

for sample in val_dataset:
    resize_image_and_annotations(sample)



In [19]:
import tensorflow as tf
import numpy as np

def convert_to_tf_dataset(fiftyone_dataset):
    images = []
    bboxes = []
    labels = []

    for sample in fiftyone_dataset:
        image = cv.imread(sample.filepath)
        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        images.append(image)

        sample_bboxes = []
        sample_labels = []
        for det in sample["ground_truth"].detections:
            x1, y1, x2, y2 = det.bounding_box
            sample_bboxes.append([y1, x1, y2, x2])  # Convert to [ymin, xmin, ymax, xmax] format
            sample_labels.append(classes_to_keep.index(det.label) + 1)
        bboxes.append(sample_bboxes)
        labels.append(sample_labels)

    images = np.array(images)
    bboxes = tf.keras.preprocessing.sequence.pad_sequences(bboxes, padding="post", dtype='float32')
    labels = tf.keras.preprocessing.sequence.pad_sequences(labels, padding="post", dtype='float32')

    return tf.data.Dataset.from_tensor_slices((images, {"bbox_output": bboxes, "class_output": labels}))



train_tf_dataset = convert_to_tf_dataset(train_dataset)
val_tf_dataset = convert_to_tf_dataset(val_dataset)


In [23]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Input, Conv2D, Reshape
from tensorflow.keras.models import Model

def create_mobilenetv2_object_detection_model(num_classes, input_shape=(224, 224, 3)):
    backbone = MobileNetV2(weights="imagenet", include_top=False, input_shape=input_shape)

    x = backbone.output
    x = Conv2D(num_classes, (1, 1), activation="sigmoid", name="class_predictions")(x)
    bbox_output = Conv2D(4, (1, 1), name="bbox_predictions")(x)

    class_output = Reshape((-1, num_classes), name='class_output')(x)
    bbox_output = Reshape((-1, 4), name='bbox_output')(bbox_output)

    return Model(inputs=backbone.input, outputs=[class_output, bbox_output])


num_classes = len(classes_to_keep) + 1
object_detection_model = create_mobilenetv2_object_detection_model(num_classes)


In [24]:
object_detection_model.summary()


Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_5 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv1 (Conv2D)                 (None, 112, 112, 32  864         ['input_5[0][0]']                
                                )                                                                 
                                                                                                  
 bn_Conv1 (BatchNormalization)  (None, 112, 112, 32  128         ['Conv1[0][0]']                  
                                )                                                           

In [25]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy, Huber

def loss_fn(y_true, y_pred):
    bbox_true = y_true[0]
    bbox_pred = y_pred["bbox_output"]
    label_true = y_true[1]
    label_pred = y_pred["class_output"]

    bbox_loss = tf.reduce_sum(tf.abs(bbox_true - bbox_pred), axis=-1)
    label_loss = tf.keras.losses.sparse_categorical_crossentropy(label_true, label_pred, from_logits=True)

    return {"bbox_output": bbox_loss, "class_output": label_loss}




optimizer = Adam(learning_rate=1e-4)

object_detection_model.compile(optimizer=optimizer, loss=loss_fn)

train_tf_dataset = train_tf_dataset.batch(16).repeat()
val_tf_dataset = val_tf_dataset.batch(16)

history = object_detection_model.fit(train_tf_dataset, epochs=10, steps_per_epoch=len(train_dataset) // 16, validation_data=val_tf_dataset, validation_steps=len(val_dataset) // 16)



Epoch 1/10


ValueError: in user code:

    File "/home/yipeng/miniconda3/envs/tf/lib/python3.9/site-packages/keras/engine/training.py", line 1249, in train_function  *
        return step_function(self, iterator)
    File "/home/yipeng/miniconda3/envs/tf/lib/python3.9/site-packages/keras/engine/training.py", line 1233, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/yipeng/miniconda3/envs/tf/lib/python3.9/site-packages/keras/engine/training.py", line 1222, in run_step  **
        outputs = model.train_step(data)
    File "/home/yipeng/miniconda3/envs/tf/lib/python3.9/site-packages/keras/engine/training.py", line 1023, in train_step
        y_pred = self(x, training=True)
    File "/home/yipeng/miniconda3/envs/tf/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/home/yipeng/miniconda3/envs/tf/lib/python3.9/site-packages/keras/engine/input_spec.py", line 295, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "model_4" is incompatible with the layer: expected shape=(None, 224, 224, 3), found shape=(None, None, 224, 224, 3)
