keras-team · oarriaga · Oct 1, 2024 · Oct 1, 2024 · Oct 1, 2024 · Oct 1, 2024
diff --git a/keras_hub/api/layers/__init__.py b/keras_hub/api/layers/__init__.py
@@ -72,3 +72,6 @@
 from keras_hub.src.models.whisper.whisper_audio_converter import (
     WhisperAudioConverter,
 )
+from keras_hub.src.models.yolo_v8.yolo_v8_image_converter import (
+    YOLOV8ImageConverter,
+)
diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py
@@ -372,4 +372,11 @@
     XLMRobertaTokenizer,
 )
 from keras_hub.src.models.xlnet.xlnet_backbone import XLNetBackbone
+from keras_hub.src.models.yolo_v8.yolo_v8_backbone import YOLOV8Backbone
+from keras_hub.src.models.yolo_v8.yolo_v8_detector import (
+    YOLOV8ImageObjectDetector,
+)
+from keras_hub.src.models.yolo_v8.yolo_v8_object_detector_preprocessor import (
+    YOLOV8ImageObjectDetectorPreprocessor,
+)
 from keras_hub.src.tokenizers.tokenizer import Tokenizer
diff --git a/keras_hub/src/models/yolo_v8/__init__.py b/keras_hub/src/models/yolo_v8/__init__.py
@@ -0,0 +1,10 @@
+from keras_hub.src.models.yolo_v8.yolo_v8_backbone import YOLOV8Backbone
+from keras_hub.src.models.yolo_v8.yolo_v8_detector import (
+    YOLOV8ImageObjectDetector,
+)
+from keras_hub.src.models.yolo_v8.yolo_v8_presets import backbone_presets
+from keras_hub.src.models.yolo_v8.yolo_v8_presets import detector_presets
+from keras_hub.src.utils.preset_utils import register_presets
+
+register_presets(backbone_presets, YOLOV8Backbone)
+register_presets(detector_presets, YOLOV8ImageObjectDetector)
diff --git a/keras_hub/src/models/yolo_v8/ciou_loss.py b/keras_hub/src/models/yolo_v8/ciou_loss.py
@@ -0,0 +1,109 @@
+import keras
+from keras import ops
+from keras.utils.bounding_boxes import compute_ciou
+
+
+class CIoULoss(keras.losses.Loss):
+    """Implements the Complete IoU (CIoU) Loss
+
+    CIoU loss is an extension of GIoU loss, which further improves the IoU
+    optimization for object detection. CIoU loss not only penalizes the
+    bounding box coordinates but also considers the aspect ratio and center
+    distance of the boxes. The length of the last dimension should be 4 to
+    represent the bounding boxes.
+
+    Args:
+        bounding_box_format: a case-insensitive string (for example, "xyxy").
+            Each bounding box is defined by these 4 values. For detailed
+            information on the supported formats, see the [Keras bounding box
+            documentation](https://github.com/keras-team/keras/blob/master/
+            keras/src/layers/preprocessing/image_preprocessing/
+            bounding_boxes/formats.py).
+        epsilon: (optional) float, a small value added to avoid division by
+            zero and stabilize calculations. Defaults 1e-07.
+
+    References:
+        - [CIoU paper](https://arxiv.org/pdf/2005.03572.pdf)
+
+    Example:
+    ```python
+    y_true = np.random.uniform(
+        size=(5, 10, 4),
+        low=0,
+        high=10)
+    y_pred = np.random.uniform(
+        size=(5, 10, 4),
+        low=0,
+        high=10)
+    loss = keras_hub.src.models.yolo_v8.ciou_loss.CIoULoss("xyxy")
+    loss(y_true, y_pred).numpy()
+    ```
+
+    Usage with the `compile()` API:
+    ```python
+    model.compile(optimizer="adam", loss=CIoULoss("xyxy"))
+    model.fit(y_true, y_pred)
+    ```
+    """
+
+    def __init__(
+        self, bounding_box_format, epsilon=1e-07, image_shape=None, **kwargs
+    ):
+        super().__init__(**kwargs)
+        box_formats = [
+            "xywh",
+            "center_xywh",
+            "center_yxhw",
+            "rel_xywh",
+            "xyxy",
+            "rel_xyxy",
+            "yxyx",
+            "rel_yxyx",
+        ]
+        if bounding_box_format not in box_formats:
+            raise ValueError(
+                f"Invalid bounding box format: '{bounding_box_format}'. "
+                f"Expected one of {box_formats}. "
+                "Ensure that the string format is correctly spelled."
+            )
+        self.bounding_box_format = bounding_box_format
+        self.epsilon = epsilon
+        self.image_shape = image_shape
+
+    def call(self, y_true, y_pred):
+        y_pred = ops.convert_to_tensor(y_pred)
+        y_true = ops.cast(y_true, y_pred.dtype)
+
+        if y_pred.shape[-1] != 4:
+            raise ValueError(
+                "CIoULoss expects y_pred.shape[-1] to be 4 to represent the "
+                f"bounding boxes. Received y_pred.shape[-1]={y_pred.shape[-1]}."
+            )
+
+        if y_true.shape[-1] != 4:
+            raise ValueError(
+                "CIoULoss expects y_true.shape[-1] to be 4 to represent the "
+                f"bounding boxes. Received y_true.shape[-1]={y_true.shape[-1]}."
+            )
+
+        if y_true.shape[-2] != y_pred.shape[-2]:
+            raise ValueError(
+                "CIoULoss expects number of boxes in y_pred to be equal to the "
+                "number of boxes in y_true. Received number of boxes in "
+                f"y_true={y_true.shape[-2]} and number of boxes in "
+                f"y_pred={y_pred.shape[-2]}."
+            )
+
+        ciou = compute_ciou(
+            y_true, y_pred, self.bounding_box_format, self.image_shape
+        )
+        return 1 - ciou
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "epsilon": self.epsilon,
+            }
+        )
+        return config
diff --git a/keras_hub/src/models/yolo_v8/ciou_loss_test.py b/keras_hub/src/models/yolo_v8/ciou_loss_test.py
@@ -0,0 +1,74 @@
+import numpy as np
+from absl.testing import parameterized
+
+from keras_hub.src.models.yolo_v8.ciou_loss import CIoULoss
+from keras_hub.src.tests.test_case import TestCase
+
+
+class CIoUTest(TestCase):
+    def test_output_shape(self):
+        y_true = np.random.uniform(size=(2, 2, 4), low=0, high=10)
+        y_pred = np.random.uniform(size=(2, 2, 4), low=0, high=20)
+
+        ciou_loss = CIoULoss(bounding_box_format="xywh")
+
+        self.assertAllEqual(ciou_loss(y_true, y_pred).shape, ())
+
+    def test_output_shape_reduction_none(self):
+        y_true = np.random.uniform(size=(2, 2, 4), low=0, high=10)
+        y_pred = np.random.uniform(size=(2, 2, 4), low=0, high=20)
+
+        ciou_loss = CIoULoss(bounding_box_format="xyxy", reduction="none")
+
+        self.assertAllEqual(
+            [2, 2],
+            ciou_loss(y_true, y_pred).shape,
+        )
+
+    def test_output_shape_relative_formats(self):
+        y_true = [
+            [0.0, 0.0, 0.1, 0.1],
+            [0.0, 0.0, 0.2, 0.3],
+            [0.4, 0.5, 0.5, 0.6],
+            [0.2, 0.2, 0.3, 0.3],
+        ]
+
+        y_pred = [
+            [0.0, 0.0, 0.5, 0.6],
+            [0.0, 0.0, 0.7, 0.3],
+            [0.4, 0.5, 0.5, 0.6],
+            [0.2, 0.1, 0.3, 0.3],
+        ]
+
+        ciou_loss = CIoULoss(bounding_box_format="xyxy")
+
+        self.assertAllEqual(ciou_loss(y_true, y_pred).shape, ())
+
+    @parameterized.named_parameters(
+        ("xyxy", "xyxy"),
+        ("rel_xyxy", "rel_xyxy"),
+    )
+    def test_output_value(self, name):
+        y_true = [
+            [0, 0, 1, 1],
+            [0, 0, 2, 3],
+            [4, 5, 3, 6],
+            [2, 2, 3, 3],
+        ]
+
+        y_pred = [
+            [0, 0, 5, 6],
+            [0, 0, 7, 3],
+            [4, 5, 5, 6],
+            [2, 1, 3, 3],
+        ]
+        expected_loss = 1.03202
+        ciou_loss = CIoULoss(bounding_box_format="xyxy")
+        if name == "rel_xyxy":
+            scale_factor = 1 / 640.0
+            y_true = np.array(y_true) * scale_factor
+            y_pred = np.array(y_pred) * scale_factor
+
+        self.assertAllClose(
+            ciou_loss(y_true, y_pred), expected_loss, atol=0.005
+        )
diff --git a/keras_hub/src/models/yolo_v8/yolo_v8_backbone.py b/keras_hub/src/models/yolo_v8/yolo_v8_backbone.py
@@ -0,0 +1,136 @@
+from keras import ops
+from keras.layers import Input
+from keras.layers import MaxPooling2D
+
+from keras_hub.src.api_export import keras_hub_export
+from keras_hub.src.models.feature_pyramid_backbone import FeaturePyramidBackbone
+from keras_hub.src.models.yolo_v8.yolo_v8_layers import apply_conv_bn
+from keras_hub.src.models.yolo_v8.yolo_v8_layers import apply_CSP
+
+
+def apply_stem(x, stem_width, activation):
+    x = apply_conv_bn(x, stem_width // 2, 3, 2, activation, "stem_1")
+    x = apply_conv_bn(x, stem_width, 3, 2, activation, "stem_2")
+    return x
+
+
+def apply_fast_SPP(x, pool_size=5, activation="swish", name="spp_fast"):
+    input_channels = x.shape[-1]
+    hidden_channels = int(input_channels // 2)
+    x = apply_conv_bn(x, hidden_channels, 1, 1, activation, f"{name}_pre")
+    pool_kwargs = {"strides": 1, "padding": "same"}
+    p1 = MaxPooling2D(pool_size, **pool_kwargs, name=f"{name}_pool1")(x)
+    p2 = MaxPooling2D(pool_size, **pool_kwargs, name=f"{name}_pool2")(p1)
+    p3 = MaxPooling2D(pool_size, **pool_kwargs, name=f"{name}_pool3")(p2)
+    x = ops.concatenate([x, p1, p2, p3], axis=-1)
+    x = apply_conv_bn(x, input_channels, 1, 1, activation, f"{name}_output")
+    return x
+
+
+def apply_yolo_block(x, block_arg, channels, depth, block_depth, activation):
+    name = f"stack{block_arg + 1}"
+    if block_arg >= 1:
+        x = apply_conv_bn(x, channels, 3, 2, activation, f"{name}_downsample")
+    x = apply_CSP(x, -1, depth, True, 0.5, activation, f"{name}_c2f")
+    if block_arg == len(block_depth) - 1:
+        x = apply_fast_SPP(x, 5, activation, f"{name}_spp_fast")
+    return x
+
+
+def stackwise_yolo_blocks(x, stackwise_depth, stackwise_channels, activation):
+    pyramid_level_inputs = {"P1": get_tensor_input_name(x)}
+    iterator = enumerate(zip(stackwise_channels, stackwise_depth))
+    block_args = (stackwise_depth, activation)
+    for stack_arg, (channel, depth) in iterator:
+        x = apply_yolo_block(x, stack_arg, channel, depth, *block_args)
+        pyramid_level_inputs[f"P{stack_arg + 2}"] = get_tensor_input_name(x)
+    return x, pyramid_level_inputs
+
+
+def get_tensor_input_name(tensor):
+    return tensor._keras_history.operation.name
+
+
+def build_pyramid_outputs(model, level_to_layer_name):
+    pyramid_outputs = {}
+    for level_name, layer_name in level_to_layer_name.items():
+        pyramid_outputs[level_name] = model.get_layer(layer_name).output
+    return pyramid_outputs
+
+
+@keras_hub_export("keras_hub.models.YOLOV8Backbone")
+class YOLOV8Backbone(FeaturePyramidBackbone):
+    """Implements the YOLOV8 backbone for object detection.
+
+    This backbone is a variant of the `CSPDarkNetBackbone` architecture.
+
+    For transfer learning use cases, make sure to read the
+    [guide to transfer learning & fine-tuning](https://keras.io/guides/
+    transfer_learning/).
+
+    Args:
+        stackwise_channels: A list of int. The number of channels for each dark
+            level in the model.
+        stackwise_depth: A list of int. The depth for each dark level in the
+            model.
+        include_rescaling: bool. Rescale the inputs. If set to
+            True, inputs will be passed through a `Rescaling(1/255.0)` layer.
+        activation: str. The activation functions to use in the backbone to
+            use in the CSPDarkNet blocks. Defaults to "swish".
+        image_shape: optional shape tuple, defaults to `(None, None, 3)`.
+
+    Returns:
+        A `keras.Model` instance.
+
+    Examples:
+    ```python
+    input_data = tf.ones(shape=(8, 224, 224, 3))
+
+    # Pretrained backbone
+    model = keras_hub.models.YOLOV8Backbone.from_preset(
+        "yolo_v8_xs_backbone_coco"
+    )
+    output = model(input_data)
+
+    # Randomly initialized backbone with a custom config
+    model = keras_hub.models.YOLOV8Backbone(
+        stackwise_channels=[128, 256, 512, 1024],
+        stackwise_depth=[3, 9, 9, 3],
+    )
+    output = model(input_data)
+    ```
+    """
+
+    def __init__(
+        self,
+        stackwise_channels,
+        stackwise_depth,
+        activation="swish",
+        image_shape=(None, None, 3),
+        **kwargs,
+    ):
+        inputs = Input(shape=image_shape)
+        stem_width = stackwise_channels[0]
+        x = apply_stem(inputs, stem_width, activation)
+        x, pyramid_level_inputs = stackwise_yolo_blocks(
+            x, stackwise_depth, stackwise_channels, activation
+        )
+        super().__init__(inputs=inputs, outputs=x, **kwargs)
+        self.pyramid_level_inputs = pyramid_level_inputs
+        self.pyramid_outputs = build_pyramid_outputs(self, pyramid_level_inputs)
+        self.stackwise_channels = stackwise_channels
+        self.stackwise_depth = stackwise_depth
+        self.activation = activation
+        self.image_shape = image_shape
+
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "image_shape": self.image_shape,
+                "stackwise_channels": self.stackwise_channels,
+                "stackwise_depth": self.stackwise_depth,
+                "activation": self.activation,
+            }
+        )
+        return config
diff --git a/keras_hub/src/models/yolo_v8/yolo_v8_backbone_test.py b/keras_hub/src/models/yolo_v8/yolo_v8_backbone_test.py
@@ -0,0 +1,45 @@
+import numpy as np
+import pytest
+
+from keras_hub.src.models.yolo_v8.yolo_v8_backbone import YOLOV8Backbone
+from keras_hub.src.tests.test_case import TestCase
+
+
+class YOLOV8BackboneTest(TestCase):
+    def setUp(self):
+        self.init_kwargs = {
+            "stackwise_channels": [64, 128, 256, 512],
+            "stackwise_depth": [1, 2, 2, 1],
+            "activation": "swish",
+            "image_shape": (32, 32, 3),
+        }
+        self.input_size = 32
+        self.input_data = np.ones(
+            (2, self.input_size, self.input_size, 3), dtype="float32"
+        )
+
+    def test_backbone_basics(self):
+        self.run_vision_backbone_test(
+            cls=YOLOV8Backbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+            expected_output_shape=(2, 1, 1, 512),
+            expected_pyramid_output_keys=["P1", "P2", "P3", "P4", "P5"],
+            expected_pyramid_image_sizes=[
+                (8, 8),
+                (8, 8),
+                (4, 4),
+                (2, 2),
+                (1, 1),
+            ],
+            run_mixed_precision_check=False,
+            run_data_format_check=False,
+        )
+
+    @pytest.mark.large
+    def test_saved_model(self):
+        self.run_model_saving_test(
+            cls=YOLOV8Backbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )