In [1]:
import logging
from collections import deque
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union

import cv2
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import yaml
from ultralytics import YOLO

2024-11-19 15:07:13.963137: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-19 15:07:14.134557: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732050434.196491    3996 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732050434.212581    3996 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-19 15:07:14.357835: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

## Configurations


In [2]:
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)

### Detection


In [3]:
@dataclass
class TrainingConfig:
    epochs: int = 100
    image_size: int = 250
    batch_size: int = 16
    device: str = "0"
    patience: int = 20
    dropout: float = 0.2
    augment: bool = True
    mosaic: float = 1.0
    mixup: float = 0.3
    copy_paste: float = 0.3
    degrees: float = 45.0
    translate: float = 0.2
    scale: float = 0.5
    shear: float = 10.0
    perspective: float = 0.0005
    flipud: float = 0.5
    fliplr: float = 0.5
    hsv_h: float = 0.015
    hsv_s: float = 0.7
    hsv_v: float = 0.4

In [4]:
@dataclass
class DetectionConfig:
    conf_threshold: float = 0.30
    iou_threshold: float = 0.60
    min_area: int = 500
    max_overlap_ratio: float = 0.8
    temporal_window: int = 3
    enable_nms: bool = True
    enable_temporal: bool = True
    enable_size_filter: bool = True

In [5]:
@dataclass
class ConsistencyConfig:
    num_inference_passes: int = 3
    confidence_threshold: float = 0.3
    consistency_threshold: float = 0.7

    enable_tta: bool = True
    tta_scales: List[float] = None
    tta_flips: bool = True

    min_detection_votes: int = 2
    calibrate_confidence: bool = True
    confidence_scaling_factor: float = 1.2
    min_relative_size: float = 0.01
    max_relative_size: float = 0.8

    def __post_init__(self):
        if self.tta_scales is None:
            self.tta_scales = [0.9, 1.0, 1.1]

## Detection Tracker


In [6]:
class DetectionTracker:
    def __init__(self, window_size: int = 3):
        self.window_size = window_size
        self.detection_history = deque(maxlen=window_size)

    def update(self, detections: np.ndarray) -> np.ndarray:
        """
        Updates the detection history with the new detections and returns the
        filtered detections.

        Args:
            detections (np.ndarray): The detections to be added to the history.

        Returns:
            np.ndarray: The filtered detections.
        """
        if len(detections) == 0:
            return detections

        self.detection_history.append(detections)

        if len(self.detection_history) < 2:
            return detections

        weights = np.linspace(0.5, 1.0, len(self.detection_history))
        weights /= np.sum(weights)

        smoothed = np.zeros_like(detections)
        for i, (det, w) in enumerate(zip(self.detection_history, weights)):
            if det.shape == detections.shape:
                smoothed += w * det

        return smoothed

## YOLO


In [7]:
class YOLOBase:
    def __init__(
        self,
        model_path: Union[str, Path],
        detection_config: Optional[DetectionConfig] = None,
    ):
        self.model_path = Path(model_path)
        self.detection_config = detection_config or DetectionConfig()
        self.tracker = DetectionTracker(self.detection_config.temporal_window)
        self.model = None

    def _load_model(self, weights_path: Optional[Path] = None) -> bool:
        try:
            if weights_path is not None and weights_path.exists():
                self.model = YOLO(str(self.model_path))
                self.model.load(str(weights_path))
                logging.info(
                    f"Loaded base model from {self.model_path} with weights from {weights_path}"
                )

            else:
                self.model = YOLO(str(self.model_path))
                logging.info(f"Loaded model from {self.model_path}")

            return True

        except Exception as e:
            logging.error(f"Failed to load model: {e}")
            return False

    def _compute_intersection(self, box1: np.ndarray, box2: np.ndarray) -> float:
        """
        Computes the intersection area between two bounding boxes.

        Args:
            box1 (np.ndarray): The first bounding box.
            box2 (np.ndarray): The second bounding box.

        Returns:
            float: The intersection area.
        """
        x1 = max(box1[0], box2[0])
        y1 = max(box1[1], box2[1])
        x2 = min(box1[2], box2[2])
        y2 = min(box1[3], box2[3])

        return max(0, x2 - x1) * max(0, y2 - y1)

    def post_process_detection(
        self, boxes: np.ndarray, scores: np.ndarray, class_ids: np.ndarray
    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
        """
        Post-processes the raw detections by applying NMS and size filtering.

        Args:
            boxes (np.ndarray): The detected bounding boxes.
            scores (np.ndarray): The detection scores.
            class_ids (np.ndarray): The detected class IDs.

        Returns:
            Tuple[np.ndarray, np.ndarray, np.ndarray]: The post-processed detections.
        """

        if len(boxes) == 0:
            return boxes, scores, class_ids

        if self.detection_config.enable_nms:
            indices = cv2.dnn.NMSBoxes(
                boxes.tolist(),
                scores.tolist(),
                self.detection_config.conf_threshold,
                self.detection_config.iou_threshold,
            )
            boxes = boxes[indices.flatten()]
            scores = scores[indices.flatten()]
            class_ids = class_ids[indices.flatten()]

        if self.detection_config.enable_size_filter:
            areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
            mask = areas >= self.detection_config.min_area
            boxes = boxes[mask]
            scores = scores[mask]
            class_ids = class_ids[mask]

        final_boxes, final_scores, final_class_ids = [], [], []
        for idx in range(len(boxes)):
            overlap_too_high = False
            box_area = (boxes[idx, 2] - boxes[idx, 0]) * (boxes[idx, 3] - boxes[idx, 1])

            for j in range(len(final_boxes)):
                intersection = self._compute_intersection(boxes[idx], final_boxes[j])
                if intersection / box_area > self.detection_config.max_overlap_ratio:
                    overlap_too_high = True
                    break

            if not overlap_too_high:
                final_boxes.append(boxes[idx])
                final_scores.append(scores[idx])
                final_class_ids.append(class_ids[idx])

        return np.array(final_boxes), np.array(final_scores), np.array(final_class_ids)

## Trainer


In [8]:
class YOLOTrainer(YOLOBase):
    def __init__(
        self,
        model_path: Union[str, Path] = "yolov8n.pt",
        training_config: Optional[TrainingConfig] = None,
        detection_config: Optional[DetectionConfig] = None,
    ):
        super().__init__(model_path, detection_config)

        self.training_config = training_config or TrainingConfig()
        self._load_model()

    def train(
        self,
        data_yaml_path: Union[str, Path],
        epochs: Optional[int] = 50,
        image_size: Optional[int] = 320,
        batch_size: Optional[int] = 16,
    ):
        data_yaml_path = Path(data_yaml_path).resolve()
        data_dir = data_yaml_path.parent

        train_path = (data_dir / "train" / "images").resolve()
        val_path = (data_dir / "valid" / "images").resolve()
        self._verify_data_paths(train_path, val_path)

        data_config = self._create_data_config(train_path, val_path)
        with open(data_yaml_path, "w") as f:
            yaml.safe_dump(data_config, f, sort_keys=False)

        self.results = self.model.train(
            data=data_yaml_path,
            epochs=epochs or self.training_config.epochs,
            imgsz=image_size or self.training_config.image_size,
            batch=batch_size or self.training_config.batch_size,
            device=self.training_config.device,
            patience=self.training_config.patience,
            save=True,
            plots=True,
            augment=self.training_config.augment,
            dropout=self.training_config.dropout,
            **{
                k: v
                for k, v in vars(self.training_config).items()
                if k in ["mosaic", "mixup", "copy_paste"]
            },
        )

        return self.model, self.results

    def _create_data_config(self, train_path: Path, val_path: Path) -> dict:
        """Create YOLO training configuration"""
        config = {
            "train": str(train_path),
            "val": str(val_path),
            "nc": 1,
            "names": ["cow"],
        }

        augment_params = {
            k: v
            for k, v in vars(self.training_config).items()
            if k
            in [
                "mosaic",
                "mixup",
                "copy_paste",
                "degrees",
                "translate",
                "scale",
                "shear",
                "perspective",
                "flipud",
                "fliplr",
                "hsv_h",
                "hsv_s",
                "hsv_v",
            ]
        }
        config.update(augment_params)
        return config

    def _verify_data_paths(self, train_path: Path, val_path: Path):
        """Verify that data paths exist"""
        if not train_path.exists():
            raise FileNotFoundError(f"Training directory not found: {train_path}")

        if not val_path.exists():
            raise FileNotFoundError(f"Validation directory not found: {val_path}")

## Inference Preparation


In [9]:
class YOLOInference:
    def __init__(
        self, model_path: str = "yolov8m.pt", weights_path: Optional[str] = None
    ):
        """
        Initialize YOLO model with optional custom weights
        """
        if weights_path:
            self.model = YOLO(weights_path)

        else:
            self.model = YOLO(model_path)

    def predict_image(
        self,
        image_path: str,
        save_path: Optional[str] = None,
        conf_threshold: float = 0.3,
        iou_threshold: float = 0.7,
    ) -> Tuple[np.ndarray, Any]:
        image = cv2.imread(image_path)
        if image is None:
            raise ValueError(f"Could not read image: {image_path}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Run inference with NMS parameters
        results = self.model(
            image, verbose=False, conf=conf_threshold, iou=iou_threshold, max_det=20
        )[0]

        # Draw boxes
        annotated_image = image.copy()
        if results.boxes is not None and len(results.boxes) > 0:
            boxes = results.boxes.cpu().numpy()

            # Sort boxes by confidence
            confidences = [float(box.conf[0]) for box in boxes]
            sorted_indices = np.argsort(confidences)[::-1]

            for idx in sorted_indices:
                box = boxes[idx]
                # Get coordinates and confidence
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                confidence = float(box.conf[0])

                # Draw box
                cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 2)

                # Create label
                label = f"{confidence:.2f}"
                font = cv2.FONT_HERSHEY_SIMPLEX
                font_scale = 0.6
                thickness = 2

                # Get label size
                (label_width, label_height), baseline = cv2.getTextSize(
                    label, font, font_scale, thickness
                )

                # Draw label background
                label_patch = (
                    np.ones(
                        (label_height + 2 * baseline, label_width + 2 * baseline, 3),
                        dtype=np.uint8,
                    )
                    * 255
                )
                cv2.putText(
                    label_patch,
                    label,
                    (baseline, label_height),
                    font,
                    font_scale,
                    (0, 0, 0),
                    thickness,
                )

                # Calculate label position
                label_y = max(y1 - label_height - baseline, 0)
                label_x = max(x1, 0)

                # Add label to image using alpha blending
                if (
                    label_y + label_height <= image.shape[0]
                    and label_x + label_width <= image.shape[1]
                ):
                    alpha = 0.7
                    label_height, label_width = label_patch.shape[:2]
                    roi = annotated_image[
                        label_y : label_y + label_height,
                        label_x : label_x + label_width,
                    ]
                    try:
                        cv2.addWeighted(label_patch, alpha, roi, 1 - alpha, 0, roi)
                    except Exception:
                        # Fallback to simpler label if blending fails
                        cv2.putText(
                            annotated_image,
                            label,
                            (label_x, label_y + label_height),
                            font,
                            font_scale,
                            (0, 0, 0),
                            thickness,
                        )

        # Save if path provided
        if save_path:
            save_dir = Path(save_path).parent
            save_dir.mkdir(parents=True, exist_ok=True)
            cv2.imwrite(save_path, cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))

        return annotated_image, results

In [10]:
def create_inference_pipeline(
    base_model_path: str = "yolov8m.pt",
    weights_path: str = "runs/detect/train29/weights/last.pt",
    **config_kwargs,
) -> YOLOInference:
    """Creates an inference pipeline with specified weights."""
    consistency_config = ConsistencyConfig(
        **{k: v for k, v in config_kwargs.items() if hasattr(ConsistencyConfig, k)}
    )

    detection_config = DetectionConfig(
        **{k: v for k, v in config_kwargs.items() if hasattr(DetectionConfig, k)}
    )

    return YOLOInference(
        model_path=base_model_path,
        weights_path=weights_path,
        consistency_config=consistency_config,
        detection_config=detection_config,
    )

In [11]:
class YOLOEvaluator:
    def __init__(self, detector: YOLOInference):
        self.detector = detector

    def calculate_metrics(
        self,
        ground_truth_path: str,
        iou_threshold: float = 0.5,
        conf_threshold: float = 0.3,
    ) -> Dict[str, float]:
        """
        Calculate precision, recall, and F1 score for the model.

        Args:
            ground_truth_path (str): The path to the ground truth directory.
            iou_threshold (float): The IoU threshold for matching detections.
            conf_threshold (float): The confidence threshold for filtering detections.

        Returns:
            Dict[str, float]: The calculated metrics.
        """

        true_positives, false_positives, false_negatives = 0, 0, 0
        gt_dir = Path(ground_truth_path)
        image_files = list(gt_dir.glob("**/*.jpg"))

        for image_file in image_files:
            label_file = image_file.with_suffix(".txt")
            if not label_file.exists():
                continue

            gt_boxes = self._read_yolo_labels(label_file)
            _, results = self.detector.predict_image(
                str(image_file),
                conf_threshold=conf_threshold,
                iou_threshold=iou_threshold,
            )

            if results.boxes is None:
                false_negatives += len(gt_boxes)
                continue

            pred_boxes = results.boxes.cpu().numpy()
            matched_gt = set()

            for pred in pred_boxes:
                pred_box = pred.xyxy[0]
                best_iou = 0
                best_gt_idx = -1

                for gt_idx, gt_box in enumerate(gt_boxes):
                    if gt_idx in matched_gt:
                        continue

                    iou = self._calculate_iou(pred_box, gt_box)
                    if iou > best_iou:
                        best_iou = iou
                        best_gt_idx = gt_idx

                if best_iou > iou_threshold:
                    true_positives += 1
                    matched_gt.add(best_gt_idx)

                else:
                    false_positives += 1

            false_negatives += len(gt_boxes) - len(matched_gt)

        precision = (
            true_positives / (true_positives + false_positives)
            if (true_positives + false_positives) > 0
            else 0
        )
        recall = (
            true_positives / (true_positives + false_negatives)
            if (true_positives + false_negatives) > 0
            else 0
        )
        f1_score = (
            2 * (precision * recall) / (precision + recall)
            if (precision + recall) > 0
            else 0
        )

        return {
            "precision": precision,
            "recall": recall,
            "f1_score": f1_score,
            "true_positives": true_positives,
            "false_positives": false_positives,
            "false_negatives": false_negatives,
        }

    def _read_yolo_labels(self, label_file: Path) -> List[np.ndarray]:
        boxes = []

        if label_file.exists():
            with open(label_file) as f:
                for line in f:
                    class_id, x_center, y_center, width, height = map(
                        float, line.strip().split()
                    )

                    x1 = x_center - width / 2
                    y1 = y_center - height / 2

                    x2 = x_center + width / 2
                    y2 = y_center + height / 2

                    boxes.append(np.array([x1, y1, x2, y2]))

        return boxes

    def _calculate_iou(self, box1: np.ndarray, box2: np.ndarray) -> float:
        x1 = max(box1[0], box2[0])
        y1 = max(box1[1], box2[1])
        x2 = min(box1[2], box2[2])
        y2 = min(box1[3], box2[3])

        intersection = max(0, x2 - x1) * max(0, y2 - y1)
        area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
        area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])

        union = area1 + area2 - intersection
        return intersection / union if union > 0 else 0

## Main Execution


In [12]:
detection_config = DetectionConfig(
    conf_threshold=0.3, iou_threshold=0.6, min_area=500, temporal_window=3
)

training_config = TrainingConfig(epochs=50, image_size=640, batch_size=16, dropout=0.2)

In [13]:
trainer = YOLOTrainer(
    model_path="yolov8x.pt",
    training_config=training_config,
    detection_config=detection_config,
)

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x.pt to 'yolov8x.pt'...


100%|██████████| 131M/131M [00:14<00:00, 9.56MB/s] 
2024-11-19 15:07:37,432 - INFO - Loaded model from yolov8x.pt


### Train Loop


In [14]:
model, results = trainer.train("../../data/yolo/data.yaml")

Ultralytics 8.3.34 🚀 Python-3.10.13 torch-2.4.1.post302 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 6144MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8x.pt, data=/home/carlos/developer/cow-project/data/yolo/data.yaml, epochs=50, time=None, patience=20, batch=16, imgsz=320, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.2, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=True, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False

100%|██████████| 5.35M/5.35M [00:00<00:00, 8.98MB/s]


[34m[1mAMP: [0mchecks passed ✅


[34m[1mtrain: [0mScanning /home/carlos/developer/cow-project/data/yolo/train/labels... 6469 images, 1218 backgrounds, 0 corrupt: 100%|██████████| 7283/7283 [00:07<00:00, 921.64it/s]


[34m[1mtrain: [0mNew cache created: /home/carlos/developer/cow-project/data/yolo/train/labels.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


  check_for_updates()
[34m[1mval: [0mScanning /home/carlos/developer/cow-project/data/yolo/valid/labels... 1618 images, 144 backgrounds, 0 corrupt: 100%|██████████| 1658/1658 [00:02<00:00, 749.74it/s]

[34m[1mval: [0mNew cache created: /home/carlos/developer/cow-project/data/yolo/valid/labels.cache





Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 97 weight(decay=0.0), 104 weight(decay=0.0005), 103 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 320 train, 320 val
Using 8 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50      4.95G      1.238      1.124      1.467         47        320: 100%|██████████| 456/456 [03:19<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:17<00:00,  3.04it/s]

                   all       1658       6636      0.842       0.83      0.885      0.609






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/50      4.57G      1.254      1.073      1.488         29        320: 100%|██████████| 456/456 [02:46<00:00,  2.74it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.03it/s]

                   all       1658       6636      0.877      0.862      0.893       0.64






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/50      4.55G      1.204      1.011      1.458         28        320: 100%|██████████| 456/456 [02:45<00:00,  2.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:13<00:00,  3.98it/s]


                   all       1658       6636      0.897      0.898      0.925      0.677

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/50      4.51G      1.139     0.9545      1.421         25        320: 100%|██████████| 456/456 [02:45<00:00,  2.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:13<00:00,  3.99it/s]

                   all       1658       6636      0.897      0.885      0.928      0.686






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/50      4.55G        1.1     0.9071      1.395         11        320: 100%|██████████| 456/456 [02:46<00:00,  2.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.01it/s]

                   all       1658       6636      0.905       0.91      0.941      0.728






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/50      4.55G      1.053     0.8669      1.365         20        320: 100%|██████████| 456/456 [02:47<00:00,  2.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:15<00:00,  3.29it/s]


                   all       1658       6636      0.911      0.925      0.946      0.735

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/50      4.56G       1.04     0.8577      1.359         16        320: 100%|██████████| 456/456 [02:44<00:00,  2.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:15<00:00,  3.28it/s]

                   all       1658       6636      0.904      0.933      0.949      0.736






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/50      4.54G      1.012     0.8228      1.339         44        320: 100%|██████████| 456/456 [02:47<00:00,  2.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.08it/s]

                   all       1658       6636      0.902      0.934      0.944      0.736






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/50      4.54G     0.9952     0.8047       1.33         17        320: 100%|██████████| 456/456 [02:48<00:00,  2.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.06it/s]

                   all       1658       6636      0.917      0.929      0.952      0.758






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/50      4.54G     0.9832      0.794      1.321         24        320: 100%|██████████| 456/456 [02:47<00:00,  2.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.10it/s]

                   all       1658       6636      0.923      0.934      0.953      0.761






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/50      4.56G     0.9671      0.778       1.31         27        320: 100%|██████████| 456/456 [02:47<00:00,  2.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:15<00:00,  3.32it/s]

                   all       1658       6636      0.917      0.929      0.943      0.749






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/50       4.5G     0.9628     0.7719      1.311         47        320: 100%|██████████| 456/456 [02:44<00:00,  2.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:15<00:00,  3.30it/s]


                   all       1658       6636      0.917      0.938      0.954      0.764

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/50      4.56G     0.9488     0.7593        1.3         23        320: 100%|██████████| 456/456 [02:44<00:00,  2.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.08it/s]


                   all       1658       6636      0.918      0.937      0.952      0.764

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/50      4.51G     0.9348     0.7482      1.292         25        320: 100%|██████████| 456/456 [02:47<00:00,  2.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.18it/s]

                   all       1658       6636       0.91      0.947      0.951      0.765






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/50      4.52G     0.9195     0.7387      1.281         32        320: 100%|██████████| 456/456 [02:48<00:00,  2.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.14it/s]

                   all       1658       6636      0.916      0.941      0.953      0.777






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/50      4.52G      0.916     0.7284      1.276         53        320: 100%|██████████| 456/456 [02:48<00:00,  2.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.19it/s]


                   all       1658       6636      0.916      0.938      0.955       0.78

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/50      4.54G     0.9044     0.7198       1.27         38        320: 100%|██████████| 456/456 [02:46<00:00,  2.74it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:15<00:00,  3.33it/s]

                   all       1658       6636       0.92       0.94      0.951      0.777






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/50       4.5G     0.8935     0.7105      1.262         32        320: 100%|██████████| 456/456 [02:47<00:00,  2.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.04it/s]


                   all       1658       6636      0.924      0.937      0.955      0.782

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/50      4.55G     0.8922     0.7069       1.26         32        320: 100%|██████████| 456/456 [02:48<00:00,  2.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:13<00:00,  3.85it/s]

                   all       1658       6636      0.917      0.946      0.952      0.783






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/50       4.5G      0.882     0.6983      1.253         44        320: 100%|██████████| 456/456 [02:47<00:00,  2.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.06it/s]


                   all       1658       6636      0.918      0.947      0.955      0.788

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/50       4.5G     0.8829     0.6966      1.253         27        320: 100%|██████████| 456/456 [02:47<00:00,  2.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:15<00:00,  3.29it/s]

                   all       1658       6636      0.928      0.946      0.958      0.787






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/50       4.5G     0.8684     0.6855      1.246         24        320: 100%|██████████| 456/456 [02:44<00:00,  2.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:15<00:00,  3.32it/s]


                   all       1658       6636      0.921      0.947      0.956      0.791

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/50      4.55G      0.861     0.6826      1.241         11        320: 100%|██████████| 456/456 [02:48<00:00,  2.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.07it/s]

                   all       1658       6636      0.918      0.951      0.954      0.788






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/50      4.57G     0.8599     0.6799      1.241         11        320: 100%|██████████| 456/456 [02:48<00:00,  2.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.07it/s]

                   all       1658       6636      0.922       0.94      0.955      0.789






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/50      4.55G     0.8589     0.6712       1.24         12        320: 100%|██████████| 456/456 [02:48<00:00,  2.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:14<00:00,  3.70it/s]

                   all       1658       6636       0.92      0.946      0.957      0.792






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      26/50      4.56G     0.8477      0.673      1.234         44        320: 100%|██████████| 456/456 [02:48<00:00,  2.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:15<00:00,  3.29it/s]

                   all       1658       6636      0.914      0.949      0.956      0.795






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      27/50      4.55G     0.8363      0.657      1.222         16        320: 100%|██████████| 456/456 [02:46<00:00,  2.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:15<00:00,  3.28it/s]

                   all       1658       6636       0.92      0.946      0.957        0.8






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      28/50      4.57G     0.8404     0.6611      1.225         41        320: 100%|██████████| 456/456 [02:49<00:00,  2.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.15it/s]


                   all       1658       6636      0.928      0.937      0.959        0.8

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      29/50      4.55G     0.8279     0.6531      1.221         35        320: 100%|██████████| 456/456 [02:50<00:00,  2.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.08it/s]

                   all       1658       6636      0.917      0.952      0.955      0.798






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      30/50      4.55G     0.8222     0.6515       1.22         41        320: 100%|██████████| 456/456 [02:50<00:00,  2.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:13<00:00,  3.86it/s]

                   all       1658       6636      0.919       0.94      0.953      0.797






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      31/50      4.49G      0.815     0.6446      1.211         22        320: 100%|██████████| 456/456 [02:50<00:00,  2.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:16<00:00,  3.24it/s]


                   all       1658       6636      0.922      0.943      0.958      0.805

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      32/50      4.56G     0.8151      0.637      1.212         44        320: 100%|██████████| 456/456 [02:45<00:00,  2.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:17<00:00,  3.05it/s]

                   all       1658       6636      0.919      0.948      0.957      0.802






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      33/50      4.53G     0.8099      0.635      1.203         31        320: 100%|██████████| 456/456 [02:47<00:00,  2.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.15it/s]

                   all       1658       6636      0.921      0.951      0.956      0.805






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      34/50      4.55G     0.8012     0.6337      1.198         25        320: 100%|██████████| 456/456 [02:51<00:00,  2.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.15it/s]


                   all       1658       6636      0.927      0.945      0.957      0.805

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      35/50      4.53G     0.7905     0.6254      1.195         30        320: 100%|██████████| 456/456 [02:46<00:00,  2.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  63%|██████▎   | 33/52 [00:08<00:04,  4.23it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:16<00:00,  3.14it/s]


                   all       1658       6636      0.917      0.948      0.956      0.801

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      36/50      4.55G     0.7888     0.6162       1.19         30        320: 100%|██████████| 456/456 [02:46<00:00,  2.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):  23%|██▎       | 12/52 [00:02<00:09,  4.10it/s]



                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:16<00:00,  3.20it/s]

                   all       1658       6636      0.918      0.949      0.957      0.808






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      37/50      4.51G     0.7845     0.6188       1.19         47        320: 100%|██████████| 456/456 [02:51<00:00,  2.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.13it/s]

                   all       1658       6636      0.919      0.951      0.956      0.808






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      38/50      4.51G     0.7754     0.6114      1.183         27        320: 100%|██████████| 456/456 [02:51<00:00,  2.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:13<00:00,  3.88it/s]


                   all       1658       6636      0.919      0.951      0.959       0.81

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      39/50      4.54G     0.7708     0.6096       1.18         30        320: 100%|██████████| 456/456 [02:51<00:00,  2.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:13<00:00,  3.80it/s]

                   all       1658       6636      0.919      0.954      0.956      0.809






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      40/50      4.52G     0.7703     0.6039      1.181         16        320: 100%|██████████| 456/456 [02:51<00:00,  2.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:17<00:00,  3.02it/s]

                   all       1658       6636      0.924      0.948      0.958      0.812





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      41/50      4.55G     0.6385     0.4959      1.099         16        320: 100%|██████████| 456/456 [02:45<00:00,  2.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:16<00:00,  3.19it/s]


                   all       1658       6636      0.921      0.947      0.956      0.811

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      42/50      4.55G     0.6289     0.4903       1.09          7        320: 100%|██████████| 456/456 [02:49<00:00,  2.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.18it/s]


                   all       1658       6636       0.92      0.953      0.957       0.81

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      43/50       4.5G     0.6188     0.4807      1.083         19        320: 100%|██████████| 456/456 [02:49<00:00,  2.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.18it/s]

                   all       1658       6636      0.921      0.953      0.956      0.803






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      44/50      4.55G     0.6085     0.4735      1.078         14        320: 100%|██████████| 456/456 [02:48<00:00,  2.70it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.17it/s]


                   all       1658       6636      0.922      0.948      0.955      0.807

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      45/50      4.55G     0.5985     0.4689      1.073          6        320: 100%|██████████| 456/456 [02:48<00:00,  2.70it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.17it/s]

                   all       1658       6636      0.921      0.948      0.959      0.812






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      46/50      4.51G     0.5943     0.4658      1.066         14        320: 100%|██████████| 456/456 [02:48<00:00,  2.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.13it/s]

                   all       1658       6636      0.919       0.95      0.957      0.812






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      47/50      4.55G     0.5874     0.4584      1.061         13        320: 100%|██████████| 456/456 [02:49<00:00,  2.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:12<00:00,  4.11it/s]


                   all       1658       6636      0.923      0.949      0.957      0.811

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      48/50      4.55G     0.5811     0.4581      1.062         15        320: 100%|██████████| 456/456 [02:44<00:00,  2.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:17<00:00,  3.01it/s]

                   all       1658       6636      0.922      0.949      0.957      0.813






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      49/50      4.55G     0.5735     0.4493      1.058         13        320: 100%|██████████| 456/456 [02:44<00:00,  2.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:16<00:00,  3.23it/s]

                   all       1658       6636       0.92      0.951      0.956      0.811






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      50/50      4.55G     0.5694     0.4451      1.055         12        320: 100%|██████████| 456/456 [02:44<00:00,  2.78it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:16<00:00,  3.24it/s]


                   all       1658       6636      0.918      0.951      0.957      0.814

50 epochs completed in 2.556 hours.
Optimizer stripped from runs/detect/train/weights/last.pt, 136.7MB
Optimizer stripped from runs/detect/train/weights/best.pt, 136.7MB

Validating runs/detect/train/weights/best.pt...
Ultralytics 8.3.34 🚀 Python-3.10.13 torch-2.4.1.post302 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 6144MiB)
Model summary (fused): 268 layers, 68,124,531 parameters, 0 gradients, 257.4 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:26<00:00,  1.96it/s]


                   all       1658       6636      0.919      0.945      0.958       0.81
Speed: 0.0ms preprocess, 9.8ms inference, 0.0ms loss, 0.7ms postprocess per image
Results saved to [1mruns/detect/train[0m


## Inference


In [16]:
detector = YOLOInference("yolov8x.pt", "runs/detect/train/weights/best.pt")

In [17]:
image, results = detector.predict_image(
    "../../data/extended/day/5/2024-05-13-18-20-03_jpg.rf.c5fbf2f48caf77a7b952411adc0cdf39.jpg",
    save_path="output.jpg",
)

print(f"Found {len(results.boxes) if results.boxes is not None else 0} detections")

Found 5 detections


In [18]:
import random

In [19]:
def visualize_samples_per_class(
    detector: YOLOInference,
    data_dir: str = "../../data",
    samples_per_class: int = 10,
    figsize: tuple = None,
):
    """
    Visualize detection results for samples from each class
    """
    class_dirs = sorted([d for d in Path(data_dir).glob("day/*") if d.is_dir()])
    n_classes = len(class_dirs)

    if figsize is None:
        figsize = (20, 4 * len(class_dirs))

    fig = plt.figure(figsize=figsize)
    for class_idx, class_dir in enumerate(class_dirs):
        print(f"Processing class {class_dir.name}")

        all_samples = list(class_dir.rglob("*.jpg"))
        if not all_samples:
            continue

        selected_samples = random.sample(all_samples, min(10, len(all_samples)))
        for sample_idx, sample in enumerate(selected_samples):
            try:
                image, results = detector.predict_image(str(sample))

                ax = fig.add_subplot(n_classes, 10, class_idx * 10 + sample_idx + 1)

                ax.imshow(image)
                ax.set_title(f"Class {class_dir.name}\n{len(results.boxes)} dets")
                ax.axis("off")

            except Exception as e:
                print(f"Error processing {sample}: {e}")
                continue

    plt.tight_layout()
    return fig

In [20]:
from tqdm import tqdm

In [21]:
output_dir = Path("outputs_inference")
output_dir.mkdir(exist_ok=True)

In [22]:
for class_dir in tqdm(list(Path("../../data/extended/day/").glob("*"))):
    if not class_dir.is_dir():
        continue

    class_output = output_dir / class_dir.name
    class_output.mkdir(exist_ok=True)

    for sample in class_dir.rglob("*.jpg"):
        image, results = detector.predict_image(
            str(sample), save_path=str(class_output / sample.name)
        )

100%|██████████| 13/13 [02:15<00:00, 10.45s/it]


In [23]:
for class_dir in tqdm(list(Path("../../data/extended/night/").glob("*"))):
    if not class_dir.is_dir():
        continue

    class_output = output_dir / class_dir.name
    class_output.mkdir(exist_ok=True)

    for sample in class_dir.rglob("*.jpg"):
        image, results = detector.predict_image(
            str(sample), save_path=str(class_output / sample.name)
        )

 91%|█████████ | 10/11 [01:35<00:09,  9.69s/it]



100%|██████████| 11/11 [01:43<00:00,  9.40s/it]
