In [None]:
!pip install keras-cv keras-core

In [None]:
!pip uninstall -y keras-cv
!pip install git+https://github.com/ianstenbit/keras-cv.git@task-aligned-assignment

In [None]:
# Copyright 2022 The KerasCV Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Title: Train an Object Detection Model on Pascal VOC 2007 using KerasCV
Author: [lukewood](https://github.com/LukeWood), [tanzhenyu](https://github.com/tanzhenyu)
Date created: 2022/09/27
Last modified: 2023/03/29
Description: Use KerasCV to train a RetinaNet on Pascal VOC 2007.
"""
import resource
import sys

import tensorflow as tf
import tensorflow_datasets as tfds
import tqdm
from tensorflow import keras

import keras_cv

# Temporarily need PyCOCOCallback to verify
# a 1:1 comparison with the PyMetrics version.
from keras_cv.callbacks import PyCOCOCallback

low, high = resource.getrlimit(resource.RLIMIT_NOFILE)
resource.setrlimit(resource.RLIMIT_NOFILE, (high, high))

Using TensorFlow backend


In [None]:
from google.colab import auth

auth.authenticate_user()

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
    strategy = tf.distribute.TPUStrategy(tpu)
except ValueError:
    # MirroredStrategy is best for a single machine with one or multiple GPUs
    strategy = tf.distribute.MirroredStrategy()

BATCH_SIZE = 4
GLOBAL_BATCH_SIZE = BATCH_SIZE * strategy.num_replicas_in_sync
BASE_LR = 0.01 * GLOBAL_BATCH_SIZE / 64
print("Number of accelerators: ", strategy.num_replicas_in_sync)
print("Global Batch Size: ", GLOBAL_BATCH_SIZE)

IMG_SIZE = 640
image_size = [IMG_SIZE, IMG_SIZE, 3]

# data_dir="gs://kerascv-dataset"
train_ds = tfds.load(
    "voc/2007",
    split="train+validation",
    with_info=False,
    shuffle_files=True,  # , data_dir="gs://kerascv-dataset"
)
train_ds = train_ds.concatenate(
    tfds.load(
        "voc/2012",
        split="train+validation",
        with_info=False,
        shuffle_files=True,
        # data_dir="gs://kerascv-dataset"
    )
)
eval_ds = tfds.load(
    "voc/2007", split="test", with_info=False
)  # , data_dir="gs://kerascv-dataset")


def unpackage_tfds_inputs(inputs, bounding_box_format):
    image = inputs["image"]
    boxes = keras_cv.bounding_box.convert_format(
        inputs["objects"]["bbox"],
        images=image,
        source="rel_yxyx",
        target=bounding_box_format,
    )
    bounding_boxes = {
        "classes": tf.cast(inputs["objects"]["label"], dtype=tf.float32),
        "boxes": tf.cast(boxes, dtype=tf.float32),
    }
    return {
        "images": tf.cast(image, tf.float32),
        "bounding_boxes": bounding_boxes,
    }


train_ds = train_ds.map(
    lambda inputs: unpackage_tfds_inputs(inputs, bounding_box_format="xywh"),
    num_parallel_calls=tf.data.AUTOTUNE,
)
eval_ds = eval_ds.map(
    lambda inputs: unpackage_tfds_inputs(inputs, bounding_box_format="xywh"),
    num_parallel_calls=tf.data.AUTOTUNE,
)

augmenter = keras.Sequential(
    layers=[
        keras_cv.layers.RandomFlip(
            mode="horizontal", bounding_box_format="xywh"
        ),
        keras_cv.layers.JitteredResize(
            target_size=(640, 640),
            scale_factor=(0.8, 1.25),
            bounding_box_format="xywh",
        ),
    ]
)
train_ds = train_ds.apply(
    tf.data.experimental.dense_to_ragged_batch(BATCH_SIZE)
)
train_ds = train_ds.map(augmenter, num_parallel_calls=tf.data.AUTOTUNE)


def pad_fn(inputs):
    inputs["bounding_boxes"] = keras_cv.bounding_box.to_dense(
        inputs["bounding_boxes"], max_boxes=32
    )
    return inputs


train_ds = train_ds.shuffle(8 * strategy.num_replicas_in_sync)
train_ds = train_ds.map(pad_fn, num_parallel_calls=tf.data.AUTOTUNE)
train_ds = train_ds.prefetch(tf.data.AUTOTUNE)

eval_resizing = keras_cv.layers.Resizing(
    640, 640, pad_to_aspect_ratio=True, bounding_box_format="xywh"
)
eval_ds = eval_ds.map(
    eval_resizing,
    num_parallel_calls=tf.data.AUTOTUNE,
)
eval_ds = eval_ds.apply(tf.data.experimental.dense_to_ragged_batch(BATCH_SIZE))
eval_ds = eval_ds.map(pad_fn, num_parallel_calls=tf.data.AUTOTUNE)
eval_ds = eval_ds.prefetch(tf.data.AUTOTUNE)

Number of accelerators:  1
Global Batch Size:  4


Instructions for updating:
Use `tf.data.Dataset.ragged_batch` instead.


In [None]:
with strategy.scope():
    model = keras_cv.models.YOLOV8Detector(
        num_classes=20,
        backbone=keras_cv.models.YOLOV8Backbone.from_preset(
            "yolo_v8_m_backbone_coco"
        ),
        fpn_depth=2,
        bounding_box_format="xywh",
    )
    lr_schedule = keras.optimizers.schedules.PolynomialDecay(
        initial_learning_rate=BASE_LR,
        decay_steps=train_ds.cardinality() * 120,
    )
    optimizer = tf.keras.optimizers.SGD(
        learning_rate=lr_schedule,
        momentum=0.937,
        clipnorm=5.0,
        weight_decay=5e-4,
        use_ema=True,
        ema_momentum=0.9999,
    )

model.compile(
    optimizer=optimizer,
    box_loss="ciou",
    classification_loss="binary_crossentropy",
)
model.backbone.trainable = True

callbacks = [
    keras_cv.callbacks.PyCOCOCallback(eval_ds, bounding_box_format="xywh"),
    keras.callbacks.TensorBoard("gs://ian-kerascv/yolov8-gpu-logs-v4"),
    keras.callbacks.ModelCheckpoint(
        "./weights.h5", save_best_only=True, save_weights_only=True
    ),
]

history = model.fit(
    train_ds,
    validation_data=eval_ds,
    epochs=120,
    callbacks=callbacks,
)

Epoch 1/120
   6/4138 [..............................] - ETA: 7:32 - loss: 543.9800 - box_loss: 2.9202 - class_loss: 541.0598



creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.04s).
Accumulating evaluation results...
DONE (t=0.06s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.005
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.010
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.004
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.006
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.008
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.009
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.009
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Av

In [None]:
from keras_cv import bounding_box, visualization


def visualize_detections(model, dataset, bounding_box_format, rows, cols):
    images, y_true = next(iter(dataset.take(1)))
    y_pred = model.predict(images)
    y_pred = bounding_box.to_ragged(y_pred)
    visualization.plot_bounding_box_gallery(
        images,
        value_range=(0, 255),
        bounding_box_format=bounding_box_format,
        y_true=y_true,
        y_pred=y_pred,
        scale=4,
        rows=rows,
        cols=cols,
        show=True,
        font_scale=0.7,
        class_mapping=class_mapping,
    )

In [None]:
class_ids = [
    "Aeroplane",
    "Bicycle",
    "Bird",
    "Boat",
    "Bottle",
    "Bus",
    "Car",
    "Cat",
    "Chair",
    "Cow",
    "Dining Table",
    "Dog",
    "Horse",
    "Motorbike",
    "Person",
    "Potted Plant",
    "Sheep",
    "Sofa",
    "Train",
    "Tvmonitor",
    "Total",
]
class_mapping = dict(zip(range(len(class_ids)), class_ids))

In [None]:
model.prediction_decoder = keras_cv.layers.MultiClassNonMaxSuppression(
    bounding_box_format="xywh",
    from_logits=False,
    confidence_threshold=0.3,
    iou_threshold=0.5,
)
model.make_predict_function(force=True)
visualize_detections(model, eval_ds.shuffle(10), "xywh", rows=2, cols=2)
old_model = model

IndexError: ignored