In [None]:
from google.colab import drive
drive.mount('/content/drive')
model_save_folder = "/content/drive/MyDrive/Colab_Notebooks/saved_models"
model_name = "Global_Wheat_detection_yolov8_trial_2"

Mounted at /content/drive


# import module

In [None]:
!pip install --upgrade -q git+https://github.com/keras-team/keras-cv
!pip install kaggle --upgrade
import os

os.environ['KAGGLE_USERNAME']='datamollu'
os.environ['KAGGLE_KEY']='a6470a9f718e911c4975eaae221b47c0'

!kaggle competitions download -c global-wheat-detection
!unzip -qq "global-wheat-detection.zip"

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m950.8/950.8 kB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for keras-cv (pyproject.toml) ... [?25l[?25hdone
Downloading global-wheat-detection.zip to /content
100% 606M/607M [00:29<00:00, 19.1MB/s]
100% 607M/607M [00:29<00:00, 21.3MB/s]


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.optimizers.schedules import CosineDecay
from tensorflow.keras.callbacks import Callback
import keras_cv
from keras_cv import bounding_box
from keras_cv import visualization
from keras_cv.api_export import keras_cv_export
from keras_cv.backend import ops
from keras_cv.metrics.coco import compute_pycoco_metrics
from keras_cv.models.object_detection.__internal__ import unpack_input
from keras_cv.utils.conditional_imports import assert_pycocotools_installed
import keras_core

import os
import sys
from tqdm.auto import tqdm
import pycocotools
import gc

import numpy as np
import pandas as pd


import seaborn as sns
import cv2
import matplotlib.pyplot as plt
import glob

Using TensorFlow backend


# CONFIG

In [None]:
class CFG:
    # preprocessing
    IMAGE_SIZE = (640, 640)
    BATCH_SIZE = 8
    BOX_FORMAT = "rel_xyxy"

    # cosine decay
    DECAY_STEPS = 1000
    COSINE_INITIAL_LR = 1e-6
    WARMUP_STEPS = 800
    WARMUP_TARGET = 1e-3
    DECAY_TARGET = 1e-6
    USE_COSINE_DECAY_WITH_WARMUP = "Y"

    # validation
    SPLIT_RATIO = 0.2

    # train
    EPOCHS = 5
    NUM_CLASSES = 1
    CLASSES = ["wheat"]
    LEARNING_RATE = 2e-3
    LOSS_WEIGHTS = {"box_loss" : 7.5, "classification_loss" : 0.5}
    FINE_TUNE_EPOCHS = 1
    FINE_TUNE_LR = 0.01
    DO_FINE_TUNE = "Y"

    # augmentation
    USE_DATA_AUGMENTATION = "Y"
    JITTERING_SCALE_FACTOR = (0.85, 1.15)
    HORIZONTAL_FLIP_RATE = 0.3
    VERTICAL_FLIP_RATE = 0.3
    HEIGHT_ZOOM_BOUND = (-0.15, 0.15)
    WIDTH_ZOOM_BOUND = (-0.15, 0.15)
    COLOR_DEGENERATE_FACTOR = (0.2, 0.5)
    SATURATION_BOUND = (0.2, 0.5)
    MIXUP_ALPHA = 0.5

    # optimizer


    # metrics
    IOU_THRESHHOLDS = np.arange(0.05, 0.95, 0.05)

In [None]:
# set random seed
keras.utils.set_random_seed(220)
np.random.seed(220)

# load dataset

In [None]:
class preprocessing:
    def __init__(self):
        self.BASE_DIR = "/content"
        self.TEST_IMAGE_DIR = "/content/test"
        self.TRAIN_IMAGE_DIR = "/content/train"
        self.train_df = pd.read_csv(os.path.join(self.BASE_DIR, "train.csv"))
        self.submission = pd.read_csv(os.path.join(self.BASE_DIR, "sample_submission.csv"))

        self.train_image_paths = glob.glob(os.path.join(self.TRAIN_IMAGE_DIR, "*.jpg"))
        self.all_train_images = pd.DataFrame([fns.split('/')[-1][:-4] for fns in self.train_image_paths])
        self.all_train_images.columns=['image_id']
        # Merge all train images with the bounding boxes dataframe
        self.all_train_images = self.all_train_images.merge(self.train_df, on='image_id', how='left')

        # replace nan values with zeros
        self.all_train_images['bbox'] = self.all_train_images.bbox.fillna('[0,0,0,0]')
        bbox_items = self.all_train_images.bbox.str.split(',', expand=True)
        self.all_train_images['bbox_xmin'] = bbox_items[0].str.strip('[ ').astype(float)
        self.all_train_images['bbox_ymin'] = bbox_items[1].str.strip(' ').astype(float)
        self.all_train_images['bbox_width'] = bbox_items[2].str.strip(' ').astype(float)
        self.all_train_images['bbox_height'] = bbox_items[3].str.strip(' ]').astype(float)
        self.all_train_images = self.all_train_images[((self.all_train_images["bbox_width"] * self.all_train_images["bbox_height"]) < 17000) \
                                                     & ((self.all_train_images["bbox_width"] * self.all_train_images["bbox_height"]) > 5)] # 너무 크거나 작은 box 삭제


        # all_train_images에 xmax, xmin 콜럼 추가
        self.all_train_images["bbox_xmax"] = self.all_train_images['bbox_xmin'] + self.all_train_images['bbox_width']
        self.all_train_images["bbox_ymax"] = self.all_train_images['bbox_ymin'] + self.all_train_images['bbox_height']

        resized_h, resized_w = CFG.IMAGE_SIZE
        if CFG.BOX_FORMAT == "rel_xyxy":
          self.all_train_images['bbox_xmin'] = self.all_train_images['bbox_xmin'] / self.all_train_images['width']
          self.all_train_images['bbox_ymin'] = self.all_train_images['bbox_ymin'] / self.all_train_images['height']
          self.all_train_images['bbox_xmax'] = self.all_train_images['bbox_xmax'] / self.all_train_images['width']
          self.all_train_images['bbox_ymax'] = self.all_train_images['bbox_ymax'] / self.all_train_images['height']



    def parse_bboxes(self, image_id):
        boxes = []
        classes = []
        for i, row in self.all_train_images[self.all_train_images["image_id"] == image_id].iterrows():
            boxes.append([row.bbox_xmin, row.bbox_ymin, row.bbox_xmax, row.bbox_ymax])
            classes.append(0)
        return boxes, classes

    def get_list(self):
        image_paths = []
        bboxes = []
        classes = []

        for id in tqdm(self.all_train_images["image_id"].unique()):
            image_paths.append(os.path.join(self.TRAIN_IMAGE_DIR, id + ".jpg"))
            boxes, Classes = self.parse_bboxes(id)
            bboxes.append(boxes)
            classes.append(Classes)

        return image_paths, bboxes, classes

In [None]:
pp = preprocessing()
image_paths, bboxes, classes = pp.get_list()

  0%|          | 0/3371 [00:00<?, ?it/s]

# train

In [None]:
data = tf.data.Dataset.from_tensor_slices((tf.ragged.constant(image_paths), tf.ragged.constant(classes), tf.ragged.constant(bboxes)))

# Determine the number of validation samples
num_val = int(len(pp.all_train_images["image_id"].unique()) * CFG.SPLIT_RATIO)
print(f"validation size : {num_val}")
# Split the dataset into train and validation sets
val_data = data.take(num_val)
train_data = data.skip(num_val)

validation size : 674


In [None]:
del pp, image_paths, bboxes, classes
gc.collect()

161

In [None]:
def load_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, CFG.IMAGE_SIZE)
    return image


def load_dataset(image_path, classes, bbox):
    # Read Image
    image = load_image(image_path)
    bounding_boxes = {
        "classes": tf.cast(classes, dtype=tf.float32),
        "boxes": bbox
    }
    return {"images": tf.cast(image, tf.float32), "bounding_boxes": bounding_boxes}

In [None]:
if CFG.USE_DATA_AUGMENTATION == "Y":
    augmenter = keras.Sequential(
        layers=[
            # keras_cv.layers.RandomFlip(mode="horizontal", rate=CFG.HORIZONTAL_FLIP_RATE, bounding_box_format=CFG.BOX_FORMAT),
            # keras_cv.layers.RandomFlip(mode="vertical", rate=CFG.VERTICAL_FLIP_RATE ,bounding_box_format=CFG.BOX_FORMAT), # 공식 docs에 없는건 GPU 지원이 아직 안됨 쓰지 말기
            keras_cv.layers.RandomColorDegeneration(factor=CFG.COLOR_DEGENERATE_FACTOR),
            # keras_cv.layers.CutMix(alpha=1.0, seed=None), - segmentation은 지원되고 이건 안됨
            # keras_cv.layers.GridMask(), # 되긴 하는데 가려진 부분에 대해 바운딩박스를 없애는 처리를 안해줌
            keras_cv.layers.MixUp(alpha=CFG.MIXUP_ALPHA),
            keras_cv.layers.RandomSaturation(factor = CFG.SATURATION_BOUND),
            keras_cv.layers.JitteredResize(
                target_size=CFG.IMAGE_SIZE, scale_factor=CFG.JITTERING_SCALE_FACTOR, bounding_box_format=CFG.BOX_FORMAT
            ),
        ]
    )
else:
    augmenter = keras.Sequential(
        layers=[
            keras_cv.layers.JitteredResize(
                target_size=CFG.IMAGE_SIZE, scale_factor=CFG.JITTERING_SCALE_FACTOR, bounding_box_format=CFG.BOX_FORMAT
            )
        ]
    )

In [None]:
train_ds = train_data.map(load_dataset, num_parallel_calls=tf.data.AUTOTUNE)
train_ds = train_ds.shuffle(CFG.BATCH_SIZE * 4)
train_ds = train_ds.ragged_batch(CFG.BATCH_SIZE, drop_remainder=True)
train_ds = train_ds.map(augmenter, num_parallel_calls=tf.data.AUTOTUNE)

resizing = keras_cv.layers.JitteredResize(
    target_size=CFG.IMAGE_SIZE,
    scale_factor=CFG.JITTERING_SCALE_FACTOR,
    bounding_box_format=CFG.BOX_FORMAT,
)

val_ds = val_data.map(load_dataset, num_parallel_calls=tf.data.AUTOTUNE)
val_ds = val_ds.shuffle(CFG.BATCH_SIZE * 4)
val_ds = val_ds.ragged_batch(CFG.BATCH_SIZE, drop_remainder=True)
val_ds = val_ds.map(resizing, num_parallel_calls=tf.data.AUTOTUNE)

In [None]:
# visualize augmented bboxes and images
def visualize_dataset(inputs, value_range, rows, cols, bounding_box_format):
    inputs = next(iter(inputs.take(1)))
    images, bounding_boxes = inputs["images"], inputs["bounding_boxes"]
    visualization.plot_bounding_box_gallery(
        images,
        value_range=value_range,
        rows=rows,
        cols=cols,
        y_true=bounding_boxes,
        scale=5,
        font_scale=0.7,
        bounding_box_format=bounding_box_format,
        class_mapping={0 : "wheat"},
    )

visualize_dataset(
    train_ds, bounding_box_format="rel_xyxy", value_range=(0, 255), rows=2, cols=2
)

visualize_dataset(
    val_ds, bounding_box_format="rel_xyxy", value_range=(0, 255), rows=2, cols=2
)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
def dict_to_tuple(inputs):
    return inputs["images"], inputs["bounding_boxes"]


train_ds = train_ds.map(dict_to_tuple, num_parallel_calls=tf.data.AUTOTUNE)
train_ds = train_ds.prefetch(tf.data.AUTOTUNE)

val_ds = val_ds.map(dict_to_tuple, num_parallel_calls=tf.data.AUTOTUNE)
val_ds = val_ds.prefetch(tf.data.AUTOTUNE)

# model 정의

In [None]:
backbone = keras_cv.models.YOLOV8Backbone.from_preset(
    "yolo_v8_m_backbone_coco"  # We will use yolov8 small backbone with coco weights
)

yolo = keras_cv.models.YOLOV8Detector(
    num_classes=1,
    bounding_box_format="rel_xyxy",
    backbone=backbone,
    fpn_depth=4,
)

Downloading data from https://storage.googleapis.com/keras-cv/models/yolov8/coco/yolov8_m_backbone.h5


In [None]:
# class EvaluateCOCOMetricsCallback(keras.callbacks.Callback):
#     def __init__(self, data, save_path):
#         super().__init__()
#         self.data = data
#         self.metrics = keras_cv.metrics.BoxCOCOMetrics(
#             bounding_box_format=CFG.BOX_FORMAT,
#             evaluate_freq=1e9,
#         )

#         self.save_path = save_path
#         self.best_map = -1.0

#     def on_epoch_end(self, epoch, logs):
#         self.metrics.reset_state()
#         for batch in self.data:
#             images, y_true = batch[0], batch[1]
#             y_pred = self.model.predict(images, verbose=0)
#             self.metrics.update_state(y_true, y_pred) # 아니 업데이트까지 잘 되놓고

#         metrics = self.metrics.result(force=True) #왜 결과 나올때 에러남?
#         logs.update(metrics)

#         current_map = metrics["MaP"]
#         # if current_map > self.best_map:
#         #     self.best_map = current_map
#         #     self.model.save(self.save_path)  # Save the model when mAP improves

#         return logs

# 위에 거는 업데이트 중이라 못쓴다고함
# https://github.com/keras-team/keras-cv/issues/1994

# 요것도 사용 안됨
# coco_metrics_callback = keras_cv.callbacks.PyCOCOCallback(val_ds, bounding_box_format="rel_xyxy")
# metric 안해

# TUNING THE DETECTOR

In [None]:
class GarbageCollectorCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        gc.collect()

In [None]:
# freezing
if CFG.DO_FINE_TUNE == "Y":
  callbacks = [GarbageCollectorCallback()]


  backbone.trainable = False

  optimizer = tf.keras.optimizers.AdamW(
      learning_rate=CFG.FINE_TUNE_LR
  )

  yolo.compile(
      optimizer=optimizer,
      box_loss="ciou",
      classification_loss="binary_crossentropy",
      box_loss_weight=7.5,
      classification_loss_weight=0.5
  )

  yolo.fit(
      train_ds,
      validation_data = val_ds,
      epochs=CFG.FINE_TUNE_EPOCHS,
      callbacks=callbacks
  )
  backbone.trainable = True



In [None]:
# freeze all batchnorm layers
for layer in yolo.layers:
  if layer.name.endswith("bn"):
    print(f"layer :{layer.name} has been frozen.")
    layer.trainable = False

layer :pa_fpn_p4p5_pre_bn has been frozen.
layer :pa_fpn_p4p5_pre_0_1_bn has been frozen.
layer :pa_fpn_p4p5_pre_0_2_bn has been frozen.
layer :pa_fpn_p4p5_pre_1_1_bn has been frozen.
layer :pa_fpn_p4p5_pre_1_2_bn has been frozen.
layer :pa_fpn_p4p5_pre_2_1_bn has been frozen.
layer :pa_fpn_p4p5_pre_2_2_bn has been frozen.
layer :pa_fpn_p4p5_pre_3_1_bn has been frozen.
layer :pa_fpn_p4p5_pre_3_2_bn has been frozen.
layer :pa_fpn_p4p5_output_bn has been frozen.
layer :pa_fpn_p3p4p5_pre_bn has been frozen.
layer :pa_fpn_p3p4p5_pre_0_1_bn has been frozen.
layer :pa_fpn_p3p4p5_pre_0_2_bn has been frozen.
layer :pa_fpn_p3p4p5_pre_1_1_bn has been frozen.
layer :pa_fpn_p3p4p5_pre_1_2_bn has been frozen.
layer :pa_fpn_p3p4p5_pre_2_1_bn has been frozen.
layer :pa_fpn_p3p4p5_pre_2_2_bn has been frozen.
layer :pa_fpn_p3p4p5_pre_3_1_bn has been frozen.
layer :pa_fpn_p3p4p5_pre_3_2_bn has been frozen.
layer :pa_fpn_p3p4p5_output_bn has been frozen.
layer :pa_fpn_p3p4p5_downsample1_bn has been froze

In [None]:
if CFG.USE_COSINE_DECAY_WITH_WARMUP == "Y":
  lr = tf.keras.optimizers.schedules.CosineDecay(initial_learning_rate = CFG.COSINE_INITIAL_LR,
                                                 decay_steps = CFG.DECAY_STEPS,
                                                 alpha = CFG.DECAY_TARGET,
                                                 warmup_target = CFG.WARMUP_TARGET,
                                                 warmup_steps = CFG.WARMUP_STEPS
                                                 )
else:
  lr = CFG.LEARNING_RATE

optimizer = tf.keras.optimizers.AdamW(
    learning_rate=lr
)

In [None]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=2),
    tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(model_save_folder, model_name), save_best_only=True),
    GarbageCollectorCallback()
]

In [None]:
yolo.compile(
    optimizer=optimizer,
    box_loss="ciou",
    classification_loss="binary_crossentropy",
    box_loss_weight=7.5,
    classification_loss_weight=0.7
) # 다른 모델의 경우는 잘 모르겠는데, 모든 박스의 cross_entropy loss를 더해버리는 거 같음..

In [None]:
yolo.fit(
    train_ds,
    validation_data = val_ds,
    epochs=CFG.EPOCHS,
    callbacks=callbacks
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7e7facafad10>

# inference

In [None]:
def visualize_detections(model, dataset, bounding_box_format):
    images, y_true = next(iter(dataset.take(1)))
    y_pred = model.predict(images)
    y_pred = bounding_box.to_ragged(y_pred)
    y_pred["boxes"] = tf.clip_by_value(y_pred["boxes"], 0.0, 1.0)

    bx, conf, cl = [], [], []
    for idx, (b, c) in enumerate(zip(y_pred["boxes"], y_pred["confidence"])):
      indices = tf.image.non_max_suppression(y_pred["boxes"][idx], y_pred["confidence"][idx], 100, 0.6, 0.5)
      indices = indices.numpy()

      bx.append(list(y_pred["boxes"][idx].numpy()[indices, :]))
      conf.append(list(y_pred["confidence"][idx].numpy()[indices]))
      cl.append(list(y_pred["classes"][idx].numpy()[indices]))

    y_pred = {"boxes" : tf.ragged.constant(bx), "confidence" : tf.ragged.constant(conf), "classes" :tf.ragged.constant(cl)}
    visualization.plot_bounding_box_gallery(
        images,
        value_range=(0, 255),
        bounding_box_format=bounding_box_format,
        y_true=y_true,
        y_pred=y_pred,
        scale=4,
        rows=2,
        cols=2,
        show=True,
        font_scale=0.7,
        class_mapping={0:"wheat"},
    )


for i in range(10):
  visualize_detections(yolo, dataset=val_ds, bounding_box_format="rel_xyxy")

Output hidden; open in https://colab.research.google.com to view.