<a href="https://colab.research.google.com/github/juhumkwon/DataMining/blob/main/YOLO_v5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, Model
import numpy as np

# YOLOv5 Model - Backbone, Detection Head
class YOLOv5(Model):
    def __init__(self, S=7, B=2, C=20):
        super(YOLOv5, self).__init__()
        self.S = S  # Grid size
        self.B = B  # Number of bounding boxes
        self.C = C  # Number of classes

        # Backbone: Simple Convolutional Layers (CSPDarknet is simplified)
        self.conv1 = layers.Conv2D(64, (3, 3), strides=1, padding='same', activation='relu')
        self.pool1 = layers.MaxPooling2D(pool_size=(2, 2), strides=2)
        self.conv2 = layers.Conv2D(128, (3, 3), strides=1, padding='same', activation='relu')
        self.pool2 = layers.MaxPooling2D(pool_size=(2, 2), strides=2)
        self.conv3 = layers.Conv2D(256, (3, 3), strides=1, padding='same', activation='relu')
        self.pool3 = layers.MaxPooling2D(pool_size=(2, 2), strides=2)

        # YOLO Head (Detection Layer)
        self.detection_head = layers.Conv2D(S * S * (B * 5 + C), (1, 1), activation='linear')

    def call(self, inputs):
        x = self.conv1(inputs)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.pool3(x)

        # Detection output
        x = self.detection_head(x)

        return tf.reshape(x, (-1, self.S, self.S, self.B * 5 + self.C))

# Dummy dataset
def create_dataset(num_samples=1000, S=7, B=2, C=20):
    # Force NumPy arrays to reside on CPU memory
    with tf.device('/CPU:0'):
        images = np.random.rand(num_samples, 448, 448, 3).astype(np.float32)  # Random images
        labels = np.random.rand(num_samples, S, S, 5 * B + C).astype(np.float32)  # Random labels
    dataset = tf.data.Dataset.from_tensor_slices((images, labels))
    return dataset.batch(16)

# Loss function
def yolo_loss(y_true, y_pred, S=7, B=2, C=20):
    pred_box = y_pred[..., :5 * B]
    pred_class = y_pred[..., 5 * B:]
    true_box = y_true[..., :5 * B]
    true_class = y_true[..., 5 * B:]

    # Objectness loss (confidence)
    obj_loss = tf.reduce_mean(tf.square(true_box[..., 4::5] - pred_box[..., 4::5]))

    # Coordinate loss
    coord_loss = tf.reduce_mean(tf.square(true_box[..., :4] - pred_box[..., :4]))

    # Classification loss
    class_loss = tf.reduce_mean(tf.square(true_class - pred_class))

    total_loss = obj_loss + coord_loss + class_loss
    return total_loss

# Training function
def train_yolov5():
    S, B, C = 7, 2, 20
    model = YOLOv5(S, B, C)
    dataset = create_dataset()

    # Compile the model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss=lambda y_true, y_pred: yolo_loss(y_true, y_pred, S, B, C))

    # Train the model
    print("Starting training...")
    model.fit(dataset, epochs=10)
    print("Training complete!")

# Post-processing: Non-Maximum Suppression (NMS)
def non_max_suppression(boxes, scores, iou_threshold=0.5):
    selected_indices = tf.image.non_max_suppression(boxes, scores, max_output_size=10, iou_threshold=iou_threshold)
    return tf.gather(boxes, selected_indices)

# Prediction Example
def predict_yolov5():
    S, B, C = 7, 2, 20
    model = YOLOv5(S, B, C)
    model.build(input_shape=(None, 448, 448, 3))

    # Generate a dummy image for prediction
    image = np.random.rand(1, 448, 448, 3).astype(np.float32)
    prediction = model(image)

    # Decode and apply NMS
    boxes = tf.random.uniform((10, 4))  # Random boxes (x_min, y_min, x_max, y_max)
    scores = tf.random.uniform((10,))  # Random confidence scores
    selected_boxes = non_max_suppression(boxes, scores)

    print("Selected Boxes (after NMS):", selected_boxes)

# Main Execution
if __name__ == "__main__":
    train_yolov5()
    predict_yolov5()


Starting training...
Epoch 1/10


InvalidArgumentError: Graph execution error:

Detected at node compile_loss/lambda/sub defined at (most recent call last):
<stack traces unavailable>
Incompatible shapes: [16,7,7,2] vs. [50176,7,7,2]
	 [[{{node compile_loss/lambda/sub}}]]
	tf2xla conversion failed while converting __inference_one_step_on_data_1757[]. Run with TF_DUMP_GRAPH_PREFIX=/path/to/dump/dir and --vmodule=xla_compiler=2 to obtain a dump of the compiled functions.
	 [[StatefulPartitionedCall]] [Op:__inference_one_step_on_iterator_1816]