Initialize workspace - install dependencies, clear old stuff...

In [None]:
!pip install -U ipywidgets pyyaml sahi shapely tqdm ultralytics

In [None]:
!rm -rf /kaggle/working/*

Monkey-patch ultralytics weighted fitness function (prioritize mAP50). See [the original source file](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/metrics.py).

Note that subprocesses are unaffected by this patch.

In [None]:
import numpy
import ultralytics.utils.metrics


class Metric(ultralytics.utils.metrics.Metric):
    def fitness(self):
        """Model fitness as a weighted combination of metrics."""
        w = [0.0, 0.0, 0.6, 0.4]  # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
        return (numpy.array(self.mean_results()) * w).sum()


ultralytics.utils.metrics.Metric = Metric

Setup global configurations.

In [None]:
import io
import itertools
import random
import re
import shutil
from pathlib import Path
from typing import Any, Iterable, List, Tuple, TypeVar

import torch
import yaml
from sahi import AutoDetectionModel
from tqdm import tqdm
from ultralytics import YOLO
from sahi.predict import PredictionResult, get_prediction
from ultralytics.nn.tasks import DetectionModel
from ultralytics.utils.loss import v8DetectionLoss
from ultralytics.utils.tal import make_anchors
from ultralytics.models.yolo.detect import DetectionTrainer


KAGGLE_INPUT = Path("/kaggle/input")
# /kaggle/input/soict-hackathon-2024
KAGGLE_DATASET = KAGGLE_INPUT / "soict-hackathon-2024"
# /kaggle/input/sh2024-models/pytorch/map50-0.9/3
KAGGLE_MODEL = KAGGLE_INPUT / "sh2024-models" / "pytorch" / "map50-0.9" / "3"
KAGGLE_WORKSPACE = Path("/kaggle/working")
IMAGE_SIZE = 640

See available [train settings](https://docs.ultralytics.com/modes/train/#train-settings) and [augmentation arguments](https://docs.ultralytics.com/modes/train/#augmentation-settings-and-hyperparameters).

In [None]:
CLASSIFY_ARGS = {
    "epochs": 3,
    "imgsz": IMAGE_SIZE,
    "hsv_v": 0,
}
DETECT_ARGS = {
    "epochs": 50,
    "imgsz": IMAGE_SIZE,
    "hsv_v": 0,
    "degrees": 10,
    "shear": 10,
    "mosaic": 0,
}

Transform dataset format for classification. See [supported dataset formats](https://docs.ultralytics.com/datasets/classify).

We perform operations within a function in order not to pollute the global namespace.

In [None]:
def make_classification_dataset() -> Path:
    target = KAGGLE_WORKSPACE / "classification"
    shutil.rmtree(target, ignore_errors=True)

    train = target / "train"
    test = target / "test"
    val = target / "val"

    daytime_dirs = [subdir / "daytime" for subdir in (train, test, val)]
    nighttime_dirs = [subdir / "nighttime" for subdir in (train, test, val)]
    for subdir in itertools.chain(daytime_dirs, nighttime_dirs):
        subdir.mkdir(parents=True)

    ratio = (0.8, 0.1, 0.1)
    dataset = KAGGLE_DATASET / "train_20241023"

    for file in dataset.joinpath("daytime").iterdir():
        symlink = random.choices(daytime_dirs, weights=ratio, k=1)[0] / file.name
        symlink.symlink_to(file)

    for file in dataset.joinpath("nighttime").iterdir():
        symlink = random.choices(nighttime_dirs, weights=ratio, k=1)[0] / file.name
        symlink.symlink_to(file)

    def count_files(paths: List[Path]) -> Iterable[int]:
        for path in paths:
            yield len(list(path.iterdir()))

    print("Daytime dataset sizes:", ", ".join(map(str, count_files(daytime_dirs))))
    print("Nighttime dataset sizes:", ", ".join(map(str, count_files(nighttime_dirs))))

    return target

Train the classification model using shell command (after the training is completed, the subprocess frees its memory, thus avoid memory exhaustion).

In [None]:
def train_classifier() -> None:
    classifier_path = KAGGLE_MODEL / "classifier.pt"
    if classifier_path.is_file():
        !mkdir -p runs/classify/train/weights
        !cp {classifier_path} runs/classify/train/weights/best.pt
    
    else:
        model = YOLO("yolo11n-cls.pt")
        model.train(data=make_classification_dataset(), **CLASSIFY_ARGS)

train_classifier()

Transform dataset format for object detection. See [supported dataset formats](https://docs.ultralytics.com/datasets/detect/).

In [None]:
def make_object_detection_dataset(
    *,
    source: Path,
    target: Path,
) -> Path:
    shutil.rmtree(target, ignore_errors=True)

    images = target / "images"
    images_train = images / "train"
    images_val = images / "val"

    labels = target / "labels"
    labels_train = labels / "train"
    labels_val = labels / "val"

    for subdir in (images_train, images_val, labels_train, labels_val):
        subdir.mkdir(parents=True)

    for file in source.iterdir():
        match = re.search(r"^cam_(\d+)_\d{5}", file.stem)
        if int(match.group(1)) < 8:
            images = images_train
            labels = labels_train
        else:
            images = images_val
            labels = labels_val

        if file.suffix == ".jpg":
            images.joinpath(file.name).symlink_to(file)
        elif file.suffix == ".txt":
            labels.joinpath(file.name).symlink_to(file) 

    def count_files(path: Path) -> int:
        return len(list(path.iterdir()))

    for subdir in (images_train, images_val, labels_train, labels_val):
        print(f"Size of {subdir}: {count_files(subdir)}")

    data_yaml = target / "data.yaml"
    with data_yaml.open("w", encoding="utf-8") as config:
        print(f"Writing to {data_yaml}")
        config.write(
            yaml.dump(
                {
                    "path": str(target),
                    "train": str(images_train),
                    "val": str(images_val),
                    "names": {
                        0: "motorbike",
                        1: "car",
                        2: "coach",
                        3: "container",
                    },
                },
            ),
        )

    return data_yaml


daytime_yaml = make_object_detection_dataset(
    source=KAGGLE_DATASET / "train_20241023" / "daytime",
    target=KAGGLE_WORKSPACE / "daytime",
)
nighttime_yaml = make_object_detection_dataset(
    source=KAGGLE_DATASET / "train_20241023" / "nighttime",
    target=KAGGLE_WORKSPACE / "nighttime",
)

In [None]:
!cat {daytime_yaml}

In [None]:
!cat {nighttime_yaml}

Customize loss function.

In [None]:
class CustomDetectionLoss(v8DetectionLoss):
    def __call__(self, preds, batch):
        """Calculate the sum of the loss for box, cls and dfl multiplied by batch size."""
        loss = torch.zeros(3, device=self.device)  # box, cls, dfl
        feats = preds[1] if isinstance(preds, tuple) else preds
        pred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split(
            (self.reg_max * 4, self.nc), 1
        )

        pred_scores = pred_scores.permute(0, 2, 1).contiguous()
        pred_distri = pred_distri.permute(0, 2, 1).contiguous()

        dtype = pred_scores.dtype
        batch_size = pred_scores.shape[0]
        imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]  # image size (h,w)
        anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)

        # Targets
        targets = torch.cat((batch["batch_idx"].view(-1, 1), batch["cls"].view(-1, 1), batch["bboxes"]), 1)
        targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])
        gt_labels, gt_bboxes = targets.split((1, 4), 2)  # cls, xyxy
        mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0.0)

        # Pboxes
        pred_bboxes = self.bbox_decode(anchor_points, pred_distri)  # xyxy, (b, h*w, 4)
        # dfl_conf = pred_distri.view(batch_size, -1, 4, self.reg_max).detach().softmax(-1)
        # dfl_conf = (dfl_conf.amax(-1).mean(-1) + dfl_conf.amax(-1).amin(-1)) / 2

        _, target_bboxes, target_scores, fg_mask, _ = self.assigner(
            # pred_scores.detach().sigmoid() * 0.8 + dfl_conf.unsqueeze(-1) * 0.2,
            pred_scores.detach().sigmoid(),
            (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
            anchor_points * stride_tensor,
            gt_labels,
            gt_bboxes,
            mask_gt,
        )

        target_scores_sum = max(target_scores.sum(), 1)

        # Cls loss
        cls_loss = self.bce(pred_scores, target_scores.to(dtype))
        # Increase the loss for the background class (class 0)
        cls_loss[:, :, 0] *= 10
        loss[1] = cls_loss.sum() / target_scores_sum  # BCE

        # Bbox loss
        if fg_mask.sum():
            target_bboxes /= stride_tensor
            loss[0], loss[2] = self.bbox_loss(
                pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask
            )

        loss[0] *= self.hyp.box  # box gain
        loss[1] *= self.hyp.cls  # cls gain
        loss[2] *= self.hyp.dfl  # dfl gain

        return loss.sum() * batch_size, loss.detach()  # loss(box, cls, dfl)


class CustomModel(DetectionModel):
    def init_criterion(self) -> CustomDetectionLoss:
        return CustomDetectionLoss(self)


class CustomTrainer(DetectionTrainer):
    def get_model(self, cfg: Any = None, weights: Any = None, verbose: bool = True) -> CustomModel:
        model = CustomModel(cfg, nc=4, verbose=verbose)
        if weights is not None:
            model.load(weights)

        return model

Train daytime object detection model.

In [None]:
def train_daytime() -> None:
    detector_path = KAGGLE_MODEL / "daytime.pt"
    if detector_path.is_file():
        pretrained = detector_path
    else:
        pretrained = "yolo11s.pt"

    model = YOLO(pretrained)
    model.train(trainer=CustomTrainer, data=daytime_yaml, **DETECT_ARGS)

train_daytime()

Train nighttime object detection model.

In [None]:
def train_nighttime() -> None:
    detector_path = KAGGLE_MODEL / "nighttime.pt"
    if detector_path.is_file():
        pretrained = detector_path
    else:
        pretrained = "yolo11s.pt"

    model = YOLO(pretrained)
    model.train(trainer=CustomTrainer, data=nighttime_yaml, **DETECT_ARGS)

train_nighttime()

Remove downloaded models, if any.

In [None]:
!rm -f yolo11n.pt yolo11n-cls.pt yolo11s.pt

There are 2 prediction methods: with and without [SAHI sliced inference](https://docs.ultralytics.com/guides/sahi-tiled-inference).

In [None]:
classifier = YOLO("runs/classify/train/weights/best.pt", task="classify")
daytime = YOLO("runs/detect/train/weights/best.pt", task="detect")
nighttime = YOLO("runs/detect/train2/weights/best.pt", task="detect")
daytime_sahi = AutoDetectionModel.from_pretrained(
    model_type="yolov8",
    model_path="runs/detect/train/weights/best.pt",
)
nighttime_sahi = AutoDetectionModel.from_pretrained(
    model_type="yolov8",
    model_path="runs/detect/train2/weights/best.pt",
)

Classify daytime and nighttime images first.

In [None]:
daytime_images: List[Path] = []
nighttime_images: List[Path] = []
for file in tqdm(KAGGLE_DATASET.joinpath("public test").iterdir()):
    # Predict the images one by one, since we may not have enough memory to store the entire results
    result = classifier.predict(file, imgsz=IMAGE_SIZE, verbose=False)[0]
    prob = result.probs.data
    if prob[0] > prob[1]:
        daytime_images.append(file)
    else:
        nighttime_images.append(file)

print(f"Found {len(daytime_images)} daytime images and {len(nighttime_images)} nighttime images")

Predict with [SAHI sliced inference](https://docs.ultralytics.com/guides/sahi-tiled-inference).

In [None]:
def write_sahi(writer: io.TextIOWrapper, file: Path, result: PredictionResult) -> None:
    for o in result.object_prediction_list:
        bbox = o.bbox
        centerx = (bbox.minx + bbox.maxx) / (2 * result.image_width)
        centery = (bbox.miny + bbox.maxy) / (2 * result.image_height)
        width = (bbox.maxx - bbox.minx) / result.image_width
        height = (bbox.maxy - bbox.miny) / result.image_height
        writer.write(f"{file.name} {o.category.id} {centerx} {centery} {width} {height} {o.score.value}\n")


with KAGGLE_WORKSPACE.joinpath("predict-sahi.txt").open("w", encoding="utf-8") as writer:
    for file in tqdm(daytime_images):
        write_sahi(writer, file, get_prediction(str(file), daytime_sahi))

    for file in tqdm(nighttime_images):
        write_sahi(writer, file, get_prediction(str(file), nighttime_sahi))

Predict without [SAHI sliced inference](https://docs.ultralytics.com/guides/sahi-tiled-inference).

In [None]:
T = TypeVar("T")


def write(file: Path, writer: io.TextIOWrapper) -> None:
    with file.open("r") as f:
        for line in f.readlines():
            writer.write(f"{file.stem}.jpg {line}")


def batched(iterable: Iterable[T], n: int) -> Iterable[Tuple[T, ...]]:
    if n < 1:
        raise ValueError("n < 1")

    iterator = iter(iterable)
    while batch := tuple(itertools.islice(iterator, n)):
        yield batch


for files in batched(daytime_images, 10):
    for _ in daytime.predict(
        files,
        imgsz=IMAGE_SIZE,
        stream=True,
        save_conf=True,
        save_txt=True,
        verbose=False,
    ):
        pass

for files in batched(nighttime_images, 10):
    for _ in nighttime.predict(
        files,
        imgsz=IMAGE_SIZE,
        stream=True,
        save_conf=True,
        save_txt=True,
        verbose=False,
    ):
        pass

detect = KAGGLE_WORKSPACE / "runs" / "detect"
target = KAGGLE_WORKSPACE / "predict.txt"
with target.open("w", encoding="utf-8") as writer:
    for file in detect.joinpath("predict", "labels").iterdir():
        write(file, writer)

    for file in detect.joinpath("predict2", "labels").iterdir():
        write(file, writer)