In [2]:
import os

import tensorflow as tf

import numpy as np

from PIL import Image

from pathlib import Path

import keras_cv

import cv2

import tqdm

from keras_cv import models, bounding_box, visualization, losses, callbacks

from keras_cv.models import YOLOV8Detector

import tensorflow_datasets as tfds



# Define constants

IMAGE_SIZE = (640, 640)

BATCH_SIZE = 3

NUM_CLASSES = 1

BOUNDING_BOX_FORMAT = "xywh"

PAD_TO_ASPECT_RATIO = True

LEARNING_RATE = 0.001

EPOCH = 1

GLOBAL_CLIPNORM = 10.0

TRAIN_IMAGES_DIR = Path("D:/Projects/DL/MonumentDetection/DL/train/images")
TRAIN_LABELS_DIR = Path("D:/Projects/DL/MonumentDetection/DL/train/labels")

VAL_IMAGES_DIR = Path("D:/Projects/DL/MonumentDetection/DL/val/images")
VAL_LABELS_DIR = Path("D:/Projects/DL/MonumentDetection/DL/val/labels")



def load_yolo_annotations(label_path, image_size):

    annotations = []

    with open(label_path, 'r') as file:

        for line in file:

            parts = line.strip().split(" ")

            if len(parts) != 5:

                continue

            class_id = int(parts[0])

            x_center = float(parts[1])

            y_center = float(parts[2])

            width = float(parts[3])

            height = float(parts[4])

            x_min = (x_center - width / 2) * image_size[0]

            y_min = (y_center - height / 2) * image_size[1]

            x_max = (x_center + width / 2) * image_size[0]

            y_max = (y_center + height / 2) * image_size[1]

            annotations.append([x_min, y_min, x_max, y_max, class_id])

    return np.array(annotations, dtype=np.float32)



def load_sample(image_path, labels_dir):

    image_path_str = tf.keras.backend.get_value(image_path).decode("utf-8")

    image = Image.open(image_path_str).resize(IMAGE_SIZE)

    image = np.array(image) / 255.0

    image_stem = Path(image_path_str).stem

    label_path = os.path.join(labels_dir, image_stem + ".txt")

    if not os.path.isfile(label_path):

        raise FileNotFoundError(f"Label file not found: {label_path}")

    annotations = load_yolo_annotations(label_path, IMAGE_SIZE)

    return image, annotations



def filter_empty_annotations(image, annotations):

    return tf.size(annotations) > 0



def pad_annotations(image, annotations, max_annotations=5):

    num_annotations = tf.shape(annotations)[0]

    annotations = tf.reshape(annotations, [num_annotations, 5])

    padding = [[0, max_annotations - num_annotations], [0, 0]]

    annotations = tf.pad(annotations, padding, constant_values=-1)

    boxes = annotations[:, :4]

    classes = tf.expand_dims(annotations[:, 4], axis=-1)

    return image, {'boxes': boxes, 'classes': classes}



def data_loader(images_dir, labels_dir, batch_size):

    image_paths = list(Path(images_dir).rglob("*.jpg")) + list(Path(images_dir).rglob("*.png"))

    if len(image_paths) == 0:

        raise ValueError(f"No images found in {images_dir}. Check your dataset path.")

    dataset = tf.data.Dataset.from_tensor_slices([str(p) for p in image_paths])

    def load_sample_with_shape(image_path):

        image, annotations = tf.py_function(

            lambda y: load_sample(y, labels_dir),

            [image_path],

            [tf.float32, tf.float32]

        )

        image.set_shape(IMAGE_SIZE + (3,))

        annotations.set_shape([None, 5])

        return image, annotations

    dataset = dataset.map(load_sample_with_shape, num_parallel_calls=tf.data.AUTOTUNE)

    dataset = dataset.filter(lambda image, annotations: tf.py_function(

        func=filter_empty_annotations,

        inp=[image, annotations],

        Tout=tf.bool)

    )

    dataset = dataset.map(lambda image, annotations: pad_annotations(image, annotations))

    dataset = dataset.batch(batch_size, drop_remainder=False).prefetch(tf.data.AUTOTUNE)

    return dataset



# Create datasets for training and validation

train_dataset = data_loader(TRAIN_IMAGES_DIR, TRAIN_LABELS_DIR, BATCH_SIZE)

val_dataset = data_loader(VAL_IMAGES_DIR, VAL_LABELS_DIR, BATCH_SIZE // 2)



class_ids = ["Nyatapola"]

class_mapping = dict(zip(range(len(class_ids)), class_ids))



backbone = keras_cv.models.YOLOV8Backbone.from_preset(

    "yolo_v8_s_backbone_coco"

)



optimizer = tf.keras.optimizers.Adam(

    learning_rate=LEARNING_RATE,

    global_clipnorm=GLOBAL_CLIPNORM,

)



yolo = keras_cv.models.YOLOV8Detector(

    num_classes=len(class_mapping),

    bounding_box_format="xyxy",

    backbone=backbone,

    fpn_depth=1,

)



# Realistic YOLO function replacements

def decode_regression_to_boxes(box_pred):

    # Assuming box_pred is of shape [batch_size, num_boxes, 4]

    return box_pred



def dist2bbox(pred_boxes, anchor_points):

    # Assuming pred_boxes are offsets, convert them to absolute positions using anchor_points

    return pred_boxes + anchor_points



def get_anchors(image_shape):

    height, width = image_shape[:2]

    strides = [8, 16, 32]  # Strides for different layers

    anchors = []

    stride_tensors = []

    for stride in strides:

        grid_x, grid_y = tf.meshgrid(tf.range(width // stride), tf.range(height // stride))

        anchor_points = tf.stack([grid_x, grid_y], axis=-1)

        anchor_points = tf.cast(anchor_points, dtype=tf.float32)

        anchor_points = tf.reshape(anchor_points, [-1, 2]) * stride

        anchors.append(anchor_points)

        stride_tensors.append(tf.fill([tf.shape(anchor_points)[0]], stride))

        

    return tf.concat(anchors, axis=0), tf.concat(stride_tensors, axis=0)



# Debugging function addition to compute_loss method

def compute_loss(self, x, y, y_pred, sample_weight=None, **kwargs):

    box_pred, cls_pred = y_pred["boxes"], y_pred["classes"]

    pred_boxes = decode_regression_to_boxes(box_pred)

    pred_scores = cls_pred

    anchor_points, stride_tensor = get_anchors(x.shape[1:])

    stride_tensor = tf.expand_dims(stride_tensor, axis=-1)

    gt_labels = y["classes"]

    mask_gt = tf.reduce_all(y["boxes"] > -1.0, axis=-1, keepdims=True)

    gt_bboxes = bounding_box.convert_format(

        y["boxes"],

        source=self.bounding_box_format,

        target="xyxy",

        images=x,

    )

    pred_bboxes = dist2bbox(pred_boxes, anchor_points)

    print(f"shape pred_scores: {tf.shape(pred_scores)}")

    print(f"shape pred_boxes: {tf.shape(pred_boxes)}")

    print(f"shape pred_bboxes: {tf.shape(pred_bboxes)}")

    print(f"shape anchor_points: {tf.shape(anchor_points)}")

    print(f"shape stride_tensor: {tf.shape(stride_tensor)}")

    print(f"shape gt_labels: {tf.shape(gt_labels)}")

    print(f"shape gt_bboxes: {tf.shape(gt_bboxes)}")

    print(f"gt_bboxes dtype: {gt_bboxes.dtype}")

    target_bboxes, target_scores, fg_mask = self.label_encoder(

        pred_scores,

        tf.cast(pred_bboxes * stride_tensor, gt_bboxes.dtype),

        anchor_points * stride_tensor,

        gt_labels,

        gt_bboxes,

        mask_gt,

    )

    target_bboxes /= stride_tensor

    target_scores_sum = tf.maximum(tf.reduce_sum(target_scores), 1)

    box_weight = tf.expand_dims(

        tf.reduce_sum(target_scores, axis=-1) * fg_mask, axis=-1,

    )

    y_true = {

        "box": target_bboxes * fg_mask[..., None],

        "class": target_scores,

    }

    y_pred = {

        "box": pred_bboxes * fg_mask[..., None],

        "class": pred_scores,

    }

    sample_weights = {

        "box": self.box_loss_weight * box_weight / target_scores_sum,

        "class": self.classification_loss_weight / target_scores_sum,

    }

    return super(YOLOV8Detector, self).compute_loss(

        x=x, y=y_true, y_pred=y_pred, sample_weight=sample_weights, **kwargs

    )



# Bind the new compute_loss method

YOLOV8Detector.compute_loss = compute_loss



# Compile and fit the model

yolo.compile(

    optimizer=optimizer, classification_loss="binary_crossentropy", box_loss="ciou"

)



yolo.fit(

    train_dataset,

    validation_data=val_dataset,

    epochs=EPOCH

)

ValueError: Dimensions must be equal, but are 64 and 2 for '{{node add}} = AddV2[T=DT_FLOAT](yolov8_detector_1/box_1/concat/concat, concat)' with input shapes: [?,?,64], [8400,2].