In [6]:
import fiftyone as fo
import fiftyone.zoo as foz

classes_to_keep = [
    "person",
    "bicycle",
    "car",
    "motorcycle",
    "airplane",
]

train_dataset = foz.load_zoo_dataset(
    "coco-2017",
    label_types=["detections"],
    split="train",
    classes=classes_to_keep,
    max_samples=1000,
    shuffle=True,
    seed=1,
    only_matching=True,
    num_workers=10,
)

val_dataset = foz.load_zoo_dataset(
    "coco-2017",
    label_types=["detections"],
    split="validation",
    classes=classes_to_keep,
    max_samples=100,
    shuffle=True,
    seed=1,
    only_matching=True,
    num_workers=10,
)


Downloading split 'train' to '/home/yipeng/fiftyone/coco-2017/train' if necessary
Found annotations at '/home/yipeng/fiftyone/coco-2017/raw/instances_train2017.json'
Sufficient images already downloaded
Existing download of split 'train' is sufficient
Loading 'coco-2017' split 'train'
 100% |███████████████| 1000/1000 [1.9s elapsed, 0s remaining, 527.3 samples/s]      
Dataset 'coco-2017-train-1000' created
Downloading split 'validation' to '/home/yipeng/fiftyone/coco-2017/validation' if necessary
Found annotations at '/home/yipeng/fiftyone/coco-2017/raw/instances_val2017.json'
Sufficient images already downloaded
Existing download of split 'validation' is sufficient
Loading 'coco-2017' split 'validation'
 100% |█████████████████| 100/100 [202.8ms elapsed, 0s remaining, 493.2 samples/s]     
Dataset 'coco-2017-validation-100' created


In [5]:
from PIL import Image
import os
from concurrent.futures import ThreadPoolExecutor

output_dir = "resized_dataset"
os.makedirs(output_dir, exist_ok=True)

resized_image_size = (256, 256)

def resize_image_and_annotations(sample):
    # Read image using Pillow
    image_path = sample.filepath
    try:
        image = Image.open(image_path)
    except IOError:
        print(f"Warning: Unable to read image {image_path}. Skipping.")
        return

    width, height = image.size
    image = image.resize(resized_image_size)

    # Update the file path
    resized_image_path = os.path.join(output_dir, os.path.basename(image_path))
    sample.filepath = resized_image_path

    # Save the resized image
    image.save(resized_image_path)

    # Update the bounding boxes
    scale_x = resized_image_size[0] / float(width)
    scale_y = resized_image_size[1] / float(height)

    for det in sample["ground_truth"].detections:
        x1, y1, x2, y2 = det.bounding_box
        x1 = x1 * scale_x
        x2 = x2 * scale_x
        y1 = y1 * scale_y
        y2 = y2 * scale_y
        det.bounding_box = [x1, y1, x2, y2]

    sample.save()

# Number of threads to use for parallel processing
num_threads = 8

# Resize images and annotations for the train dataset
with ThreadPoolExecutor(max_workers=num_threads) as executor:
    executor.map(resize_image_and_annotations, train_dataset)

# Resize images and annotations for the validation dataset
with ThreadPoolExecutor(max_workers=num_threads) as executor:
    executor.map(resize_image_and_annotations, val_dataset)





In [None]:
import numpy as np
import cv2
import tensorflow as tf

# def dataset_generator(fiftyone_dataset):
#     for sample in fiftyone_dataset:
#         # Load image
#         image = cv2.imread(sample.filepath)
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#         image = image.astype(np.float32) / 255.0
#
#         # Get ground truth bounding boxes and labels
#         bboxes = []
#         labels = []
#         for det in sample["ground_truth"].detections:
#             x1, y1, x2, y2 = det.bounding_box
#             label = det.label
#             class_id = fiftyone_dataset.info["classes"][label]
#
#             bboxes.append([y1, x1, y2, x2])  # Format: [ymin, xmin, ymax, xmax]
#             labels.append(class_id)
#
#         # Format target dictionary
#         target = {
#             "bounding_boxes": np.array(bboxes, dtype=np.float32),
#             "class_labels": np.array(labels, dtype=np.int64),
#         }
#
#         yield image, target

def dataset_generator(fiftyone_dataset):
    for sample in fiftyone_dataset:
        # Load image
        image = cv2.imread(sample.filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = image.astype(np.float32) / 255.0

        # Get ground truth bounding boxes and labels
        bboxes = []
        labels = []
        for det in sample["ground_truth"].detections:
            x1, y1, x2, y2 = det.bounding_box
            label = det.label
            class_id = fiftyone_dataset.info["classes"][label]

            bboxes.append([y1, x1, y2, x2])  # Format: [ymin, xmin, ymax, xmax]
            labels.append(class_id)

        # Format target dictionary
        target_bbox = np.array(bboxes, dtype=np.float32)
        target_cls = np.eye(num_classes)[np.array(labels, dtype=np.int64)]

        yield image, {"bounding_boxes": target_bbox, "class_labels": target_cls}



In [None]:
output_signature = (
    tf.TensorSpec(shape=(256, 256, 3), dtype=tf.float32),
    {
        "bounding_boxes": tf.TensorSpec(shape=(None, 4), dtype=tf.float32),
        "class_labels": tf.TensorSpec(shape=(None,), dtype=tf.int64),
    },
)

train_dataset = tf.data.Dataset.from_generator(
    lambda: dataset_generator(train_dataset),
    output_signature=output_signature,
)

val_dataset = tf.data.Dataset.from_generator(
    lambda: dataset_generator(val_dataset),
    output_signature=output_signature,
)


In [None]:
BATCH_SIZE = 32
auto=tf.data.AUTOTUNE

train_dataset = train_dataset.repeat(2).cache().batch(BATCH_SIZE).prefetch(auto)
val_dataset = val_dataset.cache().batch(32).prefetch(auto)

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, Reshape, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
from tensorflow.keras import layers

num_classes = 5  # Including the background class
image_size = 256

def create_mobilenetv2_ssd_model(num_classes):
    # Load the MobileNetV2 model without the top layers
    base_model = MobileNetV2(weights="imagenet", include_top=False, input_shape=(256, 256, 3))

    # Create the SSD layers on top of the base model
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)

    # Bounding box output
    bbox_output = layers.Dense(num_classes * 4, activation="sigmoid", name="bounding_boxes")(x)
    bbox_output = layers.Reshape((-1, 4))(bbox_output)

    # Class label output
    cls_output = layers.Dense(num_classes, activation="softmax", name="class_labels")(x)

    # Create the final model
    model = Model(inputs=base_model.input, outputs=[bbox_output, cls_output])

    return model




In [None]:
model = create_mobilenetv2_ssd_model(num_classes)
model.summary()


In [None]:
import tensorflow as tf

ALPHA = 0.25  # Focal loss alpha
GAMMA = 2.0   # Focal loss gamma
LOC_WEIGHT = 1.0  # Weight for localization loss

def smooth_l1_loss(y_true, y_pred):
    abs_diff = tf.abs(y_true - y_pred)
    loss = tf.where(tf.less(abs_diff, 1.0), 0.5 * tf.square(abs_diff), abs_diff - 0.5)
    return tf.reduce_sum(loss, axis=-1)

def focal_loss(y_true, y_pred):
    pos_mask = tf.cast(tf.equal(y_true, 1), dtype=tf.float32)
    neg_mask = tf.cast(tf.less(y_true, 1), dtype=tf.float32)
    pos_loss = -ALPHA * tf.pow(1 - y_pred, GAMMA) * tf.math.log(y_pred) * pos_mask
    neg_loss = -(1 - ALPHA) * tf.pow(y_pred, GAMMA) * tf.math.log(1 - y_pred) * neg_mask
    return tf.reduce_sum(pos_loss + neg_loss, axis=-1)

def combined_loss(y_true_bbox, y_pred_bbox, y_true_cls, y_pred_cls):
    loc_loss = smooth_l1_loss(y_true_bbox, y_pred_bbox)
    cls_loss = focal_loss(y_true_cls, y_pred_cls)

    return LOC_WEIGHT * loc_loss + cls_loss

def smooth_l1_loss_wrapper(y_true, y_pred):
    def smooth_l1_loss(y_true, y_pred):
        return tf.reduce_mean(tf.keras.losses.huber(y_true, y_pred, delta=1.0))
    return smooth_l1_loss

def focal_loss_wrapper(y_true, y_pred):
    def focal_loss(y_true, y_pred):
        return tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_true, y_pred))
    return focal_loss


In [3]:
# Define a wrapper function for the combined_loss
def loss_wrapper(y_true_bbox, y_pred_bbox, y_true_cls, y_pred_cls):
    def combined_loss(y_true, y_pred):
        return combined_loss(y_true_bbox, y_pred_bbox, y_true_cls, y_pred_cls)
    return combined_loss

# Compile the model
model.compile(optimizer='adam', loss={'bounding_boxes': smooth_l1_loss_wrapper, 'class_labels': focal_loss_wrapper})

model.fit(train_dataset, epochs=100, validation_data=val_dataset)

NameError: name 'model' is not defined