In [1]:
import logging
from collections import deque
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union

import cv2
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import yaml
from ultralytics import YOLO

2024-11-11 10:44:58.354340: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-11 10:44:58.361697: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-11 10:44:58.371517: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-11 10:44:58.374403: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-11 10:44:58.381822: I tensorflow/core/platform/cpu_feature_guar

## Configurations


In [2]:
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)

### Detection


In [3]:
@dataclass
class TrainingConfig:
    epochs: int = 100
    image_size: int = 250
    batch_size: int = 16
    device: str = "0"
    patience: int = 20
    dropout: float = 0.2
    augment: bool = True
    mosaic: float = 1.0
    mixup: float = 0.3
    copy_paste: float = 0.3
    degrees: float = 45.0
    translate: float = 0.2
    scale: float = 0.5
    shear: float = 10.0
    perspective: float = 0.0005
    flipud: float = 0.5
    fliplr: float = 0.5
    hsv_h: float = 0.015
    hsv_s: float = 0.7
    hsv_v: float = 0.4

In [4]:
@dataclass
class DetectionConfig:
    conf_threshold: float = 0.30
    iou_threshold: float = 0.60
    min_area: int = 500
    max_overlap_ratio: float = 0.8
    temporal_window: int = 3
    enable_nms: bool = True
    enable_temporal: bool = True
    enable_size_filter: bool = True

In [26]:
@dataclass
class ConsistencyConfig:
    num_inference_passes: int = 3
    confidence_threshold: float = 0.3
    consistency_threshold: float = 0.7

    enable_tta: bool = True
    tta_scales: List[float] = None
    tta_flips: bool = True

    min_detection_votes: int = 2
    calibrate_confidence: bool = True
    confidence_scaling_factor: float = 1.2
    min_relative_size: float = 0.01
    max_relative_size: float = 0.8

    def __post_init__(self):
        if self.tta_scales is None:
            self.tta_scales = [0.9, 1.0, 1.1]

## Detection Tracker


In [6]:
class DetectionTracker:
    def __init__(self, window_size: int = 3):
        self.window_size = window_size
        self.detection_history = deque(maxlen=window_size)

    def update(self, detections: np.ndarray) -> np.ndarray:
        """
        Updates the detection history with the new detections and returns the
        filtered detections.

        Args:
            detections (np.ndarray): The detections to be added to the history.

        Returns:
            np.ndarray: The filtered detections.
        """
        if len(detections) == 0:
            return detections

        self.detection_history.append(detections)

        if len(self.detection_history) < 2:
            return detections

        weights = np.linspace(0.5, 1.0, len(self.detection_history))
        weights /= np.sum(weights)

        smoothed = np.zeros_like(detections)
        for i, (det, w) in enumerate(zip(self.detection_history, weights)):
            if det.shape == detections.shape:
                smoothed += w * det

        return smoothed

## YOLO


In [7]:
class YOLOBase:
    def __init__(
        self,
        model_path: Union[str, Path],
        detection_config: Optional[DetectionConfig] = None,
    ):
        self.model_path = Path(model_path)
        self.detection_config = detection_config or DetectionConfig()
        self.tracker = DetectionTracker(self.detection_config.temporal_window)
        self.model = None

    def _load_model(self, weights_path: Optional[Path] = None) -> bool:
        try:
            if weights_path is not None and weights_path.exists():
                self.model = YOLO(str(self.model_path))
                self.model.load(str(weights_path))
                logging.info(
                    f"Loaded base model from {self.model_path} with weights from {weights_path}"
                )

            else:
                self.model = YOLO(str(self.model_path))
                logging.info(f"Loaded model from {self.model_path}")

            return True

        except Exception as e:
            logging.error(f"Failed to load model: {e}")
            return False

    def _compute_intersection(self, box1: np.ndarray, box2: np.ndarray) -> float:
        """
        Computes the intersection area between two bounding boxes.

        Args:
            box1 (np.ndarray): The first bounding box.
            box2 (np.ndarray): The second bounding box.

        Returns:
            float: The intersection area.
        """
        x1 = max(box1[0], box2[0])
        y1 = max(box1[1], box2[1])
        x2 = min(box1[2], box2[2])
        y2 = min(box1[3], box2[3])

        return max(0, x2 - x1) * max(0, y2 - y1)

    def post_process_detection(
        self, boxes: np.ndarray, scores: np.ndarray, class_ids: np.ndarray
    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
        """
        Post-processes the raw detections by applying NMS and size filtering.

        Args:
            boxes (np.ndarray): The detected bounding boxes.
            scores (np.ndarray): The detection scores.
            class_ids (np.ndarray): The detected class IDs.

        Returns:
            Tuple[np.ndarray, np.ndarray, np.ndarray]: The post-processed detections.
        """

        if len(boxes) == 0:
            return boxes, scores, class_ids

        if self.detection_config.enable_nms:
            indices = cv2.dnn.NMSBoxes(
                boxes.tolist(),
                scores.tolist(),
                self.detection_config.conf_threshold,
                self.detection_config.iou_threshold,
            )
            boxes = boxes[indices.flatten()]
            scores = scores[indices.flatten()]
            class_ids = class_ids[indices.flatten()]

        if self.detection_config.enable_size_filter:
            areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
            mask = areas >= self.detection_config.min_area
            boxes = boxes[mask]
            scores = scores[mask]
            class_ids = class_ids[mask]

        final_boxes, final_scores, final_class_ids = [], [], []
        for idx in range(len(boxes)):
            overlap_too_high = False
            box_area = (boxes[idx, 2] - boxes[idx, 0]) * (boxes[idx, 3] - boxes[idx, 1])

            for j in range(len(final_boxes)):
                intersection = self._compute_intersection(boxes[idx], final_boxes[j])
                if intersection / box_area > self.detection_config.max_overlap_ratio:
                    overlap_too_high = True
                    break

            if not overlap_too_high:
                final_boxes.append(boxes[idx])
                final_scores.append(scores[idx])
                final_class_ids.append(class_ids[idx])

        return np.array(final_boxes), np.array(final_scores), np.array(final_class_ids)

## Trainer


In [None]:
class YOLOTrainer(YOLOBase):
    def __init__(
        self,
        model_path: Union[str, Path] = "yolov8n.pt",
        training_config: Optional[TrainingConfig] = None,
        detection_config: Optional[DetectionConfig] = None,
    ):
        super().__init__(model_path, detection_config)

        self.training_config = training_config or TrainingConfig()
        self._load_model()

    def train(
        self,
        data_yaml_path: Union[str, Path],
        epochs: Optional[int] = 50,
        image_size: Optional[int] = 320,
        batch_size: Optional[int] = 16,
    ):
        data_yaml_path = Path(data_yaml_path).resolve()
        data_dir = data_yaml_path.parent

        train_path = (data_dir / "train" / "images").resolve()
        val_path = (data_dir / "valid" / "images").resolve()
        self._verify_data_paths(train_path, val_path)

        data_config = self._create_data_config(train_path, val_path)
        with open(data_yaml_path, "w") as f:
            yaml.safe_dump(data_config, f, sort_keys=False)

        self.results = self.model.train(
            data=data_yaml_path,
            epochs=epochs or self.training_config.epochs,
            imgsz=image_size or self.training_config.image_size,
            batch=batch_size or self.training_config.batch_size,
            device=self.training_config.device,
            patience=self.training_config.patience,
            save=True,
            plots=True,
            augment=self.training_config.augment,
            dropout=self.training_config.dropout,
            **{
                k: v
                for k, v in vars(self.training_config).items()
                if k in ["mosaic", "mixup", "copy_paste"]
            },
        )

        return self.model, self.results

    def _create_data_config(self, train_path: Path, val_path: Path) -> dict:
        """Create YOLO training configuration"""
        config = {
            "train": str(train_path),
            "val": str(val_path),
            "nc": 1,
            "names": ["cow"],
        }

        augment_params = {
            k: v
            for k, v in vars(self.training_config).items()
            if k
            in [
                "mosaic",
                "mixup",
                "copy_paste",
                "degrees",
                "translate",
                "scale",
                "shear",
                "perspective",
                "flipud",
                "fliplr",
                "hsv_h",
                "hsv_s",
                "hsv_v",
            ]
        }
        config.update(augment_params)
        return config

    def _verify_data_paths(self, train_path: Path, val_path: Path):
        """Verify that data paths exist"""
        if not train_path.exists():
            raise FileNotFoundError(f"Training directory not found: {train_path}")

        if not val_path.exists():
            raise FileNotFoundError(f"Validation directory not found: {val_path}")

## Inference Preparation


In [119]:
class YOLOInference:
    def __init__(
        self, model_path: str = "yolov8m.pt", weights_path: Optional[str] = None
    ):
        """
        Initialize YOLO model with optional custom weights
        """
        if weights_path:
            self.model = YOLO(weights_path)

        else:
            self.model = YOLO(model_path)

    def predict_image(
        self,
        image_path: str,
        save_path: Optional[str] = None,
        conf_threshold: float = 0.3,
        iou_threshold: float = 0.7,
    ) -> Tuple[np.ndarray, Any]:
        image = cv2.imread(image_path)
        if image is None:
            raise ValueError(f"Could not read image: {image_path}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Run inference with NMS parameters
        results = self.model(
            image, verbose=False, conf=conf_threshold, iou=iou_threshold, max_det=20
        )[0]

        # Draw boxes
        annotated_image = image.copy()
        if results.boxes is not None and len(results.boxes) > 0:
            boxes = results.boxes.cpu().numpy()

            # Sort boxes by confidence
            confidences = [float(box.conf[0]) for box in boxes]
            sorted_indices = np.argsort(confidences)[::-1]

            for idx in sorted_indices:
                box = boxes[idx]
                # Get coordinates and confidence
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                confidence = float(box.conf[0])

                # Draw box
                cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)

                # Create label
                label = f"{confidence:.2f}"
                font = cv2.FONT_HERSHEY_SIMPLEX
                font_scale = 0.6
                thickness = 2

                # Get label size
                (label_width, label_height), baseline = cv2.getTextSize(
                    label, font, font_scale, thickness
                )

                # Draw label background
                label_patch = (
                    np.ones(
                        (label_height + 2 * baseline, label_width + 2 * baseline, 3),
                        dtype=np.uint8,
                    )
                    * 255
                )
                cv2.putText(
                    label_patch,
                    label,
                    (baseline, label_height),
                    font,
                    font_scale,
                    (0, 0, 0),
                    thickness,
                )

                # Calculate label position
                label_y = max(y1 - label_height - baseline, 0)
                label_x = max(x1, 0)

                # Add label to image using alpha blending
                if (
                    label_y + label_height <= image.shape[0]
                    and label_x + label_width <= image.shape[1]
                ):
                    alpha = 0.7
                    label_height, label_width = label_patch.shape[:2]
                    roi = annotated_image[
                        label_y : label_y + label_height,
                        label_x : label_x + label_width,
                    ]
                    try:
                        cv2.addWeighted(label_patch, alpha, roi, 1 - alpha, 0, roi)
                    except Exception:
                        # Fallback to simpler label if blending fails
                        cv2.putText(
                            annotated_image,
                            label,
                            (label_x, label_y + label_height),
                            font,
                            font_scale,
                            (0, 0, 0),
                            thickness,
                        )

        # Save if path provided
        if save_path:
            save_dir = Path(save_path).parent
            save_dir.mkdir(parents=True, exist_ok=True)
            cv2.imwrite(save_path, cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))

        return annotated_image, results

In [100]:
def create_inference_pipeline(
    base_model_path: str = "yolov8m.pt",
    weights_path: str = "runs/detect/train29/weights/last.pt",
    **config_kwargs,
) -> YOLOInference:
    """Creates an inference pipeline with specified weights."""
    consistency_config = ConsistencyConfig(
        **{k: v for k, v in config_kwargs.items() if hasattr(ConsistencyConfig, k)}
    )

    detection_config = DetectionConfig(
        **{k: v for k, v in config_kwargs.items() if hasattr(DetectionConfig, k)}
    )

    return YOLOInference(
        model_path=base_model_path,
        weights_path=weights_path,
        consistency_config=consistency_config,
        detection_config=detection_config,
    )

## Main Execution


In [101]:
detection_config = DetectionConfig(
    conf_threshold=0.3, iou_threshold=0.6, min_area=500, temporal_window=3
)

training_config = TrainingConfig(epochs=50, image_size=640, batch_size=16, dropout=0.2)

In [None]:
trainer = YOLOTrainer(
    model_path="yolov8x.pt",
    training_config=training_config,
    detection_config=detection_config,
)

2024-11-11 10:45:01,292 - INFO - Loaded model from yolov8m.pt


### Train Loop


In [14]:
model, results = trainer.train("../../data/yolo/data.yaml")

New https://pypi.org/project/ultralytics/8.3.29 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.28 🚀 Python-3.10.13 torch-2.4.1.post302 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 6144MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8m.pt, data=/home/carlos/developer/cow-project/data/yolo/data.yaml, epochs=50, time=None, patience=20, batch=16, imgsz=320, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=train36, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.2, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=True, agnostic_nms=False, class

[34m[1mtrain: [0mScanning /home/carlos/developer/cow-project/data/yolo/train/labels.cache... 6469 images, 1218 backgrounds, 0 corrupt: 100%|██████████| 7283/7283 [00:00<?, ?it/s]


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


  check_for_updates()
[34m[1mval: [0mScanning /home/carlos/developer/cow-project/data/yolo/valid/labels.cache... 1618 images, 144 backgrounds, 0 corrupt: 100%|██████████| 1658/1658 [00:00<?, ?it/s]


Plotting labels to runs/detect/train36/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0005), 83 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 320 train, 320 val
Using 8 dataloader workers
Logging results to [1mruns/detect/train36[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50       2.4G       1.24      1.117      1.393         47        320: 100%|██████████| 456/456 [01:20<00:00,  5.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.15it/s]


                   all       1658       6636      0.844      0.865      0.901      0.645

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/50      2.18G      1.242      1.045      1.397         29        320: 100%|██████████| 456/456 [01:13<00:00,  6.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.16it/s]

                   all       1658       6636      0.893      0.898      0.929      0.676






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/50      2.18G      1.213      1.012      1.385         28        320: 100%|██████████| 456/456 [01:14<00:00,  6.15it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:11<00:00,  4.63it/s]


                   all       1658       6636      0.868      0.898      0.894      0.634

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/50      2.19G      1.155     0.9598      1.352         25        320: 100%|██████████| 456/456 [01:13<00:00,  6.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:09<00:00,  5.70it/s]


                   all       1658       6636      0.891      0.868       0.92      0.677

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/50      2.17G      1.117       0.92      1.325         11        320: 100%|██████████| 456/456 [01:15<00:00,  6.07it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.04it/s]

                   all       1658       6636      0.909      0.925      0.944      0.726






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/50      2.19G      1.084     0.8903      1.309         20        320: 100%|██████████| 456/456 [01:15<00:00,  6.00it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:09<00:00,  5.51it/s]

                   all       1658       6636      0.917      0.911      0.943      0.733






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/50      2.18G      1.064     0.8738        1.3         16        320: 100%|██████████| 456/456 [01:18<00:00,  5.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.24it/s]

                   all       1658       6636      0.912       0.93      0.945      0.734






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/50      2.19G      1.037     0.8416      1.284         44        320: 100%|██████████| 456/456 [01:15<00:00,  6.01it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:09<00:00,  5.70it/s]

                   all       1658       6636      0.916      0.928       0.95       0.74






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/50      2.17G       1.02     0.8248      1.269         17        320: 100%|██████████| 456/456 [01:14<00:00,  6.15it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.20it/s]

                   all       1658       6636      0.917      0.925      0.948      0.744






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/50      2.18G      1.008     0.8178      1.265         24        320: 100%|██████████| 456/456 [01:16<00:00,  5.97it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.04it/s]

                   all       1658       6636      0.914      0.927      0.946      0.747






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/50      2.19G     0.9923     0.7995      1.256         27        320: 100%|██████████| 456/456 [01:13<00:00,  6.19it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.14it/s]

                   all       1658       6636      0.927      0.928      0.944      0.746






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/50      2.18G     0.9893      0.798      1.256         47        320: 100%|██████████| 456/456 [01:14<00:00,  6.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:11<00:00,  4.59it/s]

                   all       1658       6636      0.919       0.93      0.954      0.757






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/50      2.19G     0.9729     0.7832      1.245         23        320: 100%|██████████| 456/456 [01:14<00:00,  6.11it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.14it/s]

                   all       1658       6636      0.923      0.933      0.951      0.766






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/50      2.17G     0.9606     0.7685       1.24         25        320: 100%|██████████| 456/456 [01:14<00:00,  6.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.11it/s]

                   all       1658       6636       0.92      0.939      0.953      0.762






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/50      2.18G     0.9481     0.7618      1.232         32        320: 100%|██████████| 456/456 [01:14<00:00,  6.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.04it/s]

                   all       1658       6636      0.918      0.937      0.953      0.767






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/50      2.19G     0.9438     0.7544      1.227         53        320: 100%|██████████| 456/456 [01:14<00:00,  6.09it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.20it/s]

                   all       1658       6636      0.922      0.939      0.955      0.776






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/50      2.17G     0.9375     0.7481      1.221         38        320: 100%|██████████| 456/456 [01:14<00:00,  6.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:11<00:00,  4.61it/s]

                   all       1658       6636      0.917      0.943      0.955      0.774






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/50      2.19G     0.9222     0.7361      1.214         32        320: 100%|██████████| 456/456 [01:15<00:00,  6.01it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:09<00:00,  5.69it/s]

                   all       1658       6636      0.922      0.946      0.957      0.782






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/50      2.18G     0.9215     0.7369       1.21         32        320: 100%|██████████| 456/456 [01:14<00:00,  6.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:11<00:00,  4.61it/s]

                   all       1658       6636      0.916      0.945      0.954      0.783






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/50      2.19G     0.9121     0.7296      1.207         44        320: 100%|██████████| 456/456 [01:13<00:00,  6.21it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.36it/s]

                   all       1658       6636      0.922      0.942      0.955      0.776






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/50      2.19G     0.9098     0.7266      1.205         27        320: 100%|██████████| 456/456 [01:14<00:00,  6.09it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.10it/s]


                   all       1658       6636      0.919      0.951      0.957      0.782

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/50      2.18G     0.9026     0.7143        1.2         24        320: 100%|██████████| 456/456 [01:18<00:00,  5.80it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.05it/s]

                   all       1658       6636      0.913      0.946      0.951       0.78






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/50      2.17G     0.8929      0.711      1.197         11        320: 100%|██████████| 456/456 [01:15<00:00,  6.07it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.24it/s]

                   all       1658       6636      0.918      0.952      0.955       0.79






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/50      2.19G     0.8886     0.7075      1.192         11        320: 100%|██████████| 456/456 [01:15<00:00,  6.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:11<00:00,  4.46it/s]

                   all       1658       6636      0.921      0.946      0.954      0.781






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/50      2.19G     0.8902     0.7016      1.196         12        320: 100%|██████████| 456/456 [01:13<00:00,  6.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.23it/s]

                   all       1658       6636      0.922      0.951      0.958      0.792






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      26/50      2.18G     0.8781        0.7      1.188         44        320: 100%|██████████| 456/456 [01:14<00:00,  6.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:11<00:00,  4.59it/s]

                   all       1658       6636       0.92      0.949      0.956      0.788






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      27/50      2.17G     0.8669      0.686      1.178         16        320: 100%|██████████| 456/456 [01:14<00:00,  6.15it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  5.96it/s]

                   all       1658       6636      0.916      0.949      0.956      0.793






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      28/50      2.17G       0.87     0.6933      1.179         41        320: 100%|██████████| 456/456 [01:14<00:00,  6.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.08it/s]

                   all       1658       6636      0.917      0.944      0.957      0.796






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      29/50      2.18G     0.8608     0.6855      1.175         35        320: 100%|██████████| 456/456 [01:17<00:00,  5.88it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.16it/s]

                   all       1658       6636      0.916      0.951      0.953      0.793






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      30/50      2.19G     0.8514     0.6812      1.175         41        320: 100%|██████████| 456/456 [01:16<00:00,  5.97it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.20it/s]

                   all       1658       6636      0.917      0.944      0.956        0.8






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      31/50      2.19G     0.8491     0.6715       1.17         22        320: 100%|██████████| 456/456 [01:14<00:00,  6.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:11<00:00,  4.59it/s]

                   all       1658       6636      0.918      0.952      0.958      0.797






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      32/50      2.19G     0.8487     0.6679      1.172         44        320: 100%|██████████| 456/456 [01:14<00:00,  6.11it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  5.97it/s]

                   all       1658       6636      0.926      0.942      0.957      0.799






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      33/50      2.17G     0.8406     0.6683      1.165         31        320: 100%|██████████| 456/456 [01:15<00:00,  6.06it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:11<00:00,  4.52it/s]

                   all       1658       6636      0.921      0.948      0.959      0.802






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      34/50      2.18G     0.8357     0.6653      1.162         25        320: 100%|██████████| 456/456 [01:18<00:00,  5.82it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.23it/s]

                   all       1658       6636      0.921      0.948      0.955        0.8






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      35/50      2.17G     0.8236     0.6563      1.158         30        320: 100%|██████████| 456/456 [01:13<00:00,  6.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:13<00:00,  3.83it/s]

                   all       1658       6636      0.921      0.946      0.957        0.8






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      36/50      2.16G     0.8231     0.6469      1.154         30        320: 100%|██████████| 456/456 [01:13<00:00,  6.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.43it/s]

                   all       1658       6636      0.918       0.95      0.957      0.807






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      37/50      2.17G     0.8179     0.6524      1.157         47        320: 100%|██████████| 456/456 [01:14<00:00,  6.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.32it/s]

                   all       1658       6636      0.926      0.947      0.958      0.808






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      38/50      2.19G     0.8135     0.6434      1.153         27        320: 100%|██████████| 456/456 [01:13<00:00,  6.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:11<00:00,  4.43it/s]

                   all       1658       6636      0.923      0.948      0.958      0.807






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      39/50      2.19G     0.8098     0.6443      1.147         30        320: 100%|██████████| 456/456 [01:14<00:00,  6.11it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.21it/s]

                   all       1658       6636      0.919      0.952      0.957      0.806






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      40/50       2.2G     0.8071     0.6369      1.145         16        320: 100%|██████████| 456/456 [01:14<00:00,  6.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.34it/s]

                   all       1658       6636      0.919      0.949      0.958      0.808





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      41/50      2.16G     0.6687     0.5156      1.062         16        320: 100%|██████████| 456/456 [01:14<00:00,  6.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.25it/s]

                   all       1658       6636      0.919      0.945      0.956      0.801






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      42/50      2.15G     0.6576     0.5107      1.058          7        320: 100%|██████████| 456/456 [01:14<00:00,  6.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:09<00:00,  5.35it/s]

                   all       1658       6636      0.917      0.951      0.957      0.806






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      43/50      2.15G     0.6442        0.5      1.047         19        320: 100%|██████████| 456/456 [01:15<00:00,  6.01it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.29it/s]

                   all       1658       6636      0.921      0.946      0.957      0.803






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      44/50      2.15G     0.6367     0.4929      1.048         14        320: 100%|██████████| 456/456 [01:15<00:00,  6.02it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.14it/s]

                   all       1658       6636       0.92      0.946      0.958      0.808






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      45/50      2.15G     0.6336     0.4867      1.043          6        320: 100%|██████████| 456/456 [01:15<00:00,  6.01it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.23it/s]

                   all       1658       6636      0.922      0.942      0.959      0.809






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      46/50      2.16G     0.6257     0.4846      1.037         14        320: 100%|██████████| 456/456 [01:14<00:00,  6.09it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  17%|█▋        | 9/52 [00:01<00:06,  6.56it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:11<00:00,  4.38it/s]

                   all       1658       6636       0.92      0.942      0.956      0.808






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      47/50      2.15G     0.6159     0.4808      1.034         13        320: 100%|██████████| 456/456 [01:18<00:00,  5.84it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  5.78it/s]

                   all       1658       6636      0.921      0.945      0.959       0.81






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      48/50      2.16G     0.6114     0.4801      1.032         15        320: 100%|██████████| 456/456 [01:13<00:00,  6.19it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:11<00:00,  4.37it/s]

                   all       1658       6636      0.922      0.948      0.959      0.812






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      49/50      2.16G     0.6074     0.4724      1.027         13        320: 100%|██████████| 456/456 [01:14<00:00,  6.09it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  6.15it/s]

                   all       1658       6636      0.915      0.954      0.959      0.813






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      50/50      2.16G     0.6011     0.4665      1.026         12        320: 100%|██████████| 456/456 [01:15<00:00,  6.05it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:08<00:00,  5.88it/s]

                   all       1658       6636      0.925      0.943       0.96      0.814






50 epochs completed in 1.190 hours.
Optimizer stripped from runs/detect/train36/weights/last.pt, 52.0MB
Optimizer stripped from runs/detect/train36/weights/best.pt, 52.0MB

Validating runs/detect/train36/weights/best.pt...
Ultralytics 8.3.28 🚀 Python-3.10.13 torch-2.4.1.post302 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 6144MiB)
Model summary (fused): 218 layers, 25,840,339 parameters, 0 gradients, 78.7 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:17<00:00,  2.93it/s]


                   all       1658       6636      0.924      0.942       0.96      0.806
Speed: 0.0ms preprocess, 4.0ms inference, 0.0ms loss, 0.8ms postprocess per image
Results saved to [1mruns/detect/train36[0m


## Inference


In [120]:
detector = YOLOInference("yolov8x.pt", "runs/detect/train36/weights/best.pt")

In [121]:
image, results = detector.predict_image(
    "../../data/day/4/2024-05-17-11-20-03_jpg.rf.d22ac7c5629960c1b011ed3a501cf535.jpg",
    save_path="output.jpg",
)

print(f"Found {len(results.boxes) if results.boxes is not None else 0} detections")

Found 6 detections


In [104]:
import random

In [None]:
def visualize_samples_per_class(
    detector: YOLOInference,
    data_dir: str = "../../data",
    samples_per_class: int = 10,
    figsize: tuple = None,
):
    """
    Visualize detection results for samples from each class
    """
    class_dirs = sorted([d for d in Path(data_dir).glob("day/*") if d.is_dir()])
    n_classes = len(class_dirs)

    if figsize is None:
        figsize = (20, 4 * len(class_dirs))

    fig = plt.figure(figsize=figsize)
    for class_idx, class_dir in enumerate(class_dirs):
        print(f"Processing class {class_dir.name}")

        all_samples = list(class_dir.rglob("*.jpg"))
        if not all_samples:
            continue

        selected_samples = random.sample(all_samples, min(10, len(all_samples)))
        for sample_idx, sample in enumerate(selected_samples):
            try:
                image, results = detector.predict_image(str(sample))

                ax = fig.add_subplot(n_classes, 10, class_idx * 10 + sample_idx + 1)

                ax.imshow(image)
                ax.set_title(f"Class {class_dir.name}\n{len(results.boxes)} dets")
                ax.axis("off")

            except Exception as e:
                print(f"Error processing {sample}: {e}")
                continue

    plt.tight_layout()
    return fig

In [123]:
from tqdm import tqdm

In [129]:
output_dir = Path("outputs_inference")
output_dir.mkdir(exist_ok=True)

In [130]:
for class_dir in tqdm(list(Path("../../data/day").glob("*"))):
    if not class_dir.is_dir():
        continue

    class_output = output_dir / class_dir.name
    class_output.mkdir(exist_ok=True)

    for sample in class_dir.rglob("*.jpg"):
        image, results = detector.predict_image(
            str(sample), save_path=str(class_output / sample.name)
        )

100%|██████████| 13/13 [04:30<00:00, 20.82s/it]
