# KerasCV Object Detection Training
https://keras.io/guides/keras_cv/object_detection_keras_cv/

https://www.tensorflow.org/datasets/catalog/voc

In [None]:
try:
    import keras_core as keras
except:
    !pip -q install keras_core
    import keras_core as keras

try:
    import keras_cv
except:
    !pip -q install keras_cv
    import keras_cv

import os
import resource
import tqdm

import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import optimizers

import tensorflow_datasets as tfds

from keras_cv import bounding_box
from keras_cv import visualization

import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print(tf.config.list_physical_devices('GPU'))

if len(tf.config.list_physical_devices('GPU')) > 0:
    device_name = '/GPU:0'
else:
    device_name = "/CPU:0"

# Transfer Learning

In [None]:
# Dataloaders.

def visualize_dataset(inputs, value_range, rows, cols, bounding_box_format):
    inputs = next(iter(inputs.take(1)))
    images, bounding_boxes = inputs["images"], inputs["bounding_boxes"]
    visualization.plot_bounding_box_gallery(
        images,
        value_range=value_range,
        rows=rows,
        cols=cols,
        y_true=bounding_boxes,
        scale=5,
        font_scale=0.7,
        bounding_box_format=bounding_box_format,
        class_mapping=class_mapping,
    )


def unpackage_raw_tfds_inputs(inputs, bounding_box_format):
    image = inputs["image"]
    boxes = keras_cv.bounding_box.convert_format(
        inputs["objects"]["bbox"],
        images=image,
        source="rel_yxyx",
        target=bounding_box_format,
    )
    bounding_boxes = {
        "classes": tf.cast(inputs["objects"]["label"], dtype=tf.float32),
        "boxes": tf.cast(boxes, dtype=tf.float32),
    }
    return {"images": tf.cast(image, tf.float32), "bounding_boxes": bounding_boxes}


def load_pascal_voc(split, dataset, bounding_box_format):
    # https://www.tensorflow.org/datasets/catalog/voc
    ds = tfds.load(dataset, split=split, with_info=False, shuffle_files=True)
    ds = ds.map(
        lambda x: unpackage_raw_tfds_inputs(x, bounding_box_format=bounding_box_format),
        num_parallel_calls=tf.data.AUTOTUNE,
    )
    return ds

In [None]:
# Pascal VOC classes.

class_ids = [
    "Aeroplane",
    "Bicycle",
    "Bird",
    "Boat",
    "Bottle",
    "Bus",
    "Car",
    "Cat",
    "Chair",
    "Cow",
    "Dining Table",
    "Dog",
    "Horse",
    "Motorbike",
    "Person",
    "Potted Plant",
    "Sheep",
    "Sofa",
    "Train",
    "Tvmonitor",
    "Total",
]

class_mapping = dict(zip(range(len(class_ids)), class_ids))

In [None]:
BATCH_SIZE = 8


train_ds = load_pascal_voc(
    split="train", dataset="voc/2007", bounding_box_format="xywh"
)

eval_ds = load_pascal_voc(
    split="validation", dataset="voc/2007", bounding_box_format="xywh"
)

train_ds = train_ds.shuffle(BATCH_SIZE * 4)

train_ds = train_ds.ragged_batch(BATCH_SIZE, drop_remainder=True)
eval_ds = eval_ds.ragged_batch(BATCH_SIZE, drop_remainder=True)

In [None]:
print("Train dataset length: {}.".format(len(train_ds)))
print("Evaluation dataset length: {}".format(len(eval_ds)))

In [None]:
"""
visualize_dataset(
    train_ds, bounding_box_format="xywh", value_range=(0, 255), rows=2, cols=2
)
""";

In [None]:
"""
visualize_dataset(
    eval_ds,
    bounding_box_format="xywh",
    value_range=(0, 255),
    rows=2,
    cols=2,
    # If you are not running your experiment on a local machine, you can also
    # make `visualize_dataset()` dump the plot to a file using `path`:
    # path="eval.png"
)
""";

In [None]:
# Image augmentation for training.

augmenter = keras.Sequential(
    layers=[
        keras_cv.layers.RandomFlip(mode="horizontal", bounding_box_format="xywh"),
        keras_cv.layers.JitteredResize(
            target_size=(640, 640), scale_factor=(0.75, 1.3), bounding_box_format="xywh"
        ),
    ]
)

train_ds = train_ds.map(augmenter, num_parallel_calls = tf.data.AUTOTUNE)

"""
visualize_dataset(
    train_ds, bounding_box_format="xywh", value_range=(0, 255), rows=2, cols=2
)
""";

In [None]:
inference_resizing = keras_cv.layers.Resizing(
    640, 640, bounding_box_format="xywh", pad_to_aspect_ratio=True
)

eval_ds = eval_ds.map(inference_resizing, num_parallel_calls = tf.data.AUTOTUNE)

"""
visualize_dataset(
    eval_ds, bounding_box_format="xywh", value_range=(0, 255), rows=2, cols=2
)
""";

In [None]:
# Unpackage inputs from preprocessing to feed into the model.

def dict_to_tuple(inputs):
    return inputs["images"], bounding_box.to_dense(
        inputs["bounding_boxes"], max_boxes = 32
    )

with tf.device(device_name):
    train_ds = train_ds.map(dict_to_tuple, num_parallel_calls = tf.data.AUTOTUNE)
    eval_ds = eval_ds.map(dict_to_tuple, num_parallel_calls = tf.data.AUTOTUNE)

    train_ds = train_ds.prefetch(tf.data.AUTOTUNE)
    eval_ds = eval_ds.prefetch(tf.data.AUTOTUNE)

In [None]:
# Prepare optimizer.

base_lr = 0.005

# Including a global_clipnorm is extremely important in object detection tasks
optimizer = tf.keras.optimizers.SGD(
    learning_rate = base_lr, momentum = 0.9, global_clipnorm = 10.0
)

In [None]:
# https://keras.io/api/keras_cv/models/

#model = "retinanet"
model = "yolo"

#preset = "resnet50_imagenet"
preset = "yolo_v8_xs_backbone_coco"

with tf.device(device_name):
    if model == "retinanet":
        model = keras_cv.models.RetinaNet.from_preset(
            preset,
            num_classes = len(class_mapping),
            bounding_box_format = "xywh",
        )

        classification_loss = "focal"
        box_loss = "smoothl1"

    elif model == "yolo":
        model = keras_cv.models.YOLOV8Detector.from_preset(
            preset,
            num_classes = len(class_mapping),
            bounding_box_format = "xywh",
        )

        classification_loss = 'binary_crossentropy'
        box_loss = 'ciou'

    # Compile model on device.
    model.compile(
        classification_loss = classification_loss,
        box_loss = box_loss,
        optimizer = optimizer,
        jit_compile = False,
    )

In [None]:
#print(model.backbone.summary())

In [None]:
epochs = 10

with tf.device(device_name):
    history = model.fit(
        train_ds,
        validation_data = eval_ds,
        epochs = epochs,
    )

In [None]:
plt.figure(figsize = (15, 3))
plt.subplot(1, 3, 1)
plt.plot([i + 1 for i in history.epoch], history.history["loss"], "-o")
plt.plot([i + 1 for i in history.epoch], history.history["val_loss"], "-o")
plt.grid(True)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(["Loss", "Val loss"])
plt.subplot(1, 3, 2)
plt.plot([i + 1 for i in history.epoch], history.history["box_loss"], "-o")
plt.plot([i + 1 for i in history.epoch], history.history["val_box_loss"], "-o")
plt.grid(True)
plt.xlabel("Epoch")
plt.ylabel("Box Loss")
plt.legend(["Box loss", "Val box loss"])
plt.subplot(1, 3, 3)
plt.plot([i + 1 for i in history.epoch], history.history["class_loss"], "-o")
plt.plot([i + 1 for i in history.epoch], history.history["val_class_loss"], "-o")
plt.grid(True)
plt.xlabel("Epoch")
plt.ylabel("Class Loss")
plt.legend(["Class loss", "Val class loss"])
plt.show()

In [None]:
evaluation_results = model.evaluate(eval_ds, return_dict = True)

evaluation_results

In [None]:
# Calculate metrics on the trained model using eval_ds.

coco_metrics = keras_cv.metrics.BoxCOCOMetrics(
    bounding_box_format="xywh", evaluate_freq=1
)

coco_metrics.reset_state()

for batch in tqdm.tqdm(eval_ds):
    x, y = batch

    with tf.device(device_name):
        y_pred = model.predict(x, verbose = False)

    coco_metrics.update_state(y, y_pred)

metrics_result = coco_metrics.result(force = True)

In [None]:
metrics_result

In [None]:
x, y = next(iter(eval_ds))

y_pred = model.predict(x)

visualization.plot_bounding_box_gallery(
    x,
    value_range = (0, 255),
    rows = 2,
    cols = 4,
    y_pred = y_pred,
    scale = 5,
    font_scale = 0.7,
    bounding_box_format = "xywh",
    class_mapping = class_mapping,
)