In [1]:
!pip install ultralytics torch torchvision torchaudio opencv-python pyyaml numpy



In [2]:
# train_yolov8s_weighted.py
"""
Train YOLOv8s with:
 - imgsz = 768
 - epochs = 100
 - class balancing via a weighted dataloader (oversamples images with minority classes)
Works locally and on Lightning AI.
"""

import os
import numpy as np
import yaml
from ultralytics import YOLO
import ultralytics.data.build as build  # we'll monkey-patch build.YOLODataset
import random
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


# ----------------------
# Helper: robust extraction of class ids from a label object
# ----------------------
def extract_classes_from_label(lbl):
    """
    lbl may be:
      - a dict-like with key 'cls' (older/newer Ultralytics variants),
      - or a numpy array with class index in the first column (common).
    This helper returns a 1D numpy array of ints (possibly length 0).
    """
    try:
        # if dict-like and holds 'cls'
        if isinstance(lbl, dict) and 'cls' in lbl:
            cls = np.array(lbl['cls']).reshape(-1).astype(int)
            return cls
    except Exception:
        pass

    # fallback: try treat as numpy array
    try:
        arr = np.array(lbl)
        if arr.size == 0:
            return np.array([], dtype=int)
        # assume first column is class index
        cls = arr[:, 0].astype(int)
        return cls
    except Exception:
        # last-resort: empty
        return np.array([], dtype=int)


# ----------------------
# Weighted dataset that samples images according to per-image probabilities
# (based on inverted class frequencies)
# ----------------------
def create_weighted_dataset_class():
    """Return a YOLOWeightedDataset class that subclasses the installed YOLODataset."""
    BaseYOLODataset = build.YOLODataset  # original

    class YOLOWeightedDataset(BaseYOLODataset):
        def __init__(self, *args, **kwargs):
            # Keep same signature as original
            super().__init__(*args, **kwargs)

            # Determine if training mode: older implementations expose self.prefix
            prefix = getattr(self, "prefix", "") or ""
            self.train_mode = "train" in prefix or "train" in kwargs.get("mode", "")

            # Compute class instance counts across the dataset
            self._count_instances()

            # Class weights: inverse of frequency (simple)
            # Avoid division by zero (replace zeros by 1)
            counts = np.array(self.counts, dtype=float)
            counts[counts == 0] = 1.0
            class_weights = counts.sum() / counts  # more weight for rarer classes
            self.class_weights = class_weights

            # Aggregation function to combine multiple labels in an image (mean by default)
            self.agg_func = np.mean

            # Per-image weights & probabilities
            self.weights = self._calculate_weights()
            self.probabilities = self._calculate_probabilities()

            logger.info(f"Detected class counts: {self.counts.tolist()}")
            logger.info(f"Computed class_weights (rarer -> larger): {self.class_weights.tolist()}")

        def _count_instances(self):
            # self.labels is provided by the base YOLODataset (list of label arrays/dicts)
            nclasses = len(self.data.get("names", []))
            counts = np.zeros(nclasses, dtype=int)
            for lbl in self.labels:
                cls_ids = extract_classes_from_label(lbl)
                for c in cls_ids:
                    if 0 <= int(c) < nclasses:
                        counts[int(c)] += 1
            # avoid zero counts
            counts = np.where(counts == 0, 0, counts)
            self.counts = counts

        def _calculate_weights(self):
            weights = []
            for lbl in self.labels:
                cls_ids = extract_classes_from_label(lbl)
                if cls_ids.size == 0:
                    weights.append(1.0)
                    continue
                # aggregate per-class weights for this image
                w = float(self.agg_func(self.class_weights[cls_ids]))
                weights.append(max(w, 1e-6))
            return weights

        def _calculate_probabilities(self):
            total = float(sum(self.weights))
            if total <= 0:
                # fallback to uniform
                return [1.0 / len(self.weights)] * len(self.weights)
            return [w / total for w in self.weights]

        def __getitem__(self, index):
            # During training: sample an index according to probabilities
            if self.train_mode:
                idx = np.random.choice(len(self.labels), p=self.probabilities)
                return self.transforms(self.get_image_and_label(idx))
            else:
                # validation/test: behave normally
                return self.transforms(self.get_image_and_label(index))

    return YOLOWeightedDataset


# ----------------------
# Main training routine
# ----------------------
def main():
    # 1. Monkey-patch YOLODataset with weighted version BEFORE calling model.train()
    YOLOWeightedDataset = create_weighted_dataset_class()
    build.YOLODataset = YOLOWeightedDataset
    logger.info("Patched ultralytics.data.build.YOLODataset -> YOLOWeightedDataset (weighted sampling enabled)")

    # 2. Load dataset path from data.yaml to log / sanity-check (optional)
    data_yaml = "data.yaml"
    if not os.path.exists(data_yaml):
        raise FileNotFoundError(f"{data_yaml} not found. Place it next to this script.")
    with open(data_yaml, "r") as f:
        data_cfg = yaml.safe_load(f)

    logger.info(f"Using dataset: {data_cfg}")

    # 3. Build model (YOLOv8s pretrained)
    #    Use .pt pretrained weights (good transfer) - change to 'yolov8s.yaml' to train from scratch
    model = YOLO("yolov8s.pt")

    # 4. Train
    model.train(
        data=data_yaml,
        epochs=100,
        imgsz=768,
        batch=-1,     # auto batch size (let ultralytics pick based on GPU)
        project="ppe_runs",
        name="yolov8s_weighted_768_ep100",
        device=0,     # use GPU (Lightning will provide one)
        seed=42,
        # optionally keep default augmentations; remove if you don't want them:
        # mosaic=1.0, mixup=0.2, copy_paste=0.2,
    )

    # 5. Final evaluation on test split
    logger.info("Evaluating on test split...")
    results = model.val(split="test")
    logger.info(f"Validation summary: {results}")

    # done
    logger.info("Training finished. Results are in the ppe_runs folder.")


if __name__ == "__main__":
    # make training deterministic-ish
    random.seed(42)
    np.random.seed(42)
    main()


INFO:__main__:Patched ultralytics.data.build.YOLODataset -> YOLOWeightedDataset (weighted sampling enabled)
INFO:__main__:Using dataset: {'path': '.', 'train': 'train/images', 'val': 'valid/images', 'test': 'test/images', 'nc': 5, 'names': ['boots', 'gloves', 'helmet', 'human', 'vest'], 'roboflow': {'workspace': 'hx-hezqh', 'project': 'ppe-detection-yfmym', 'version': 1, 'license': 'CC BY 4.0', 'url': 'https://universe.roboflow.com/hx-hezqh/ppe-detection-yfmym/dataset/1'}}


[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt to 'yolov8s.pt': 100% ━━━━━━━━━━━━ 21.5/21.5MB 273.0MB/s 0.1s
Ultralytics 8.3.189 🚀 Python-3.10.10 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15102MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=-1, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=768, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8s.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=yolov8s_we

INFO:__main__:Detected class counts: [2369, 76, 1329, 1375, 1823]
INFO:__main__:Computed class_weights (rarer -> larger): [2.94301392992824, 91.73684210526316, 5.246049661399549, 5.070545454545455, 3.8244651673066374]


[34m[1mAutoBatch: [0mComputing optimal batch size for imgsz=768 at 60.0% CUDA memory utilization.
[34m[1mAutoBatch: [0mCUDA:0 (Tesla T4) 14.75G total, 0.13G reserved, 0.12G allocated, 14.49G free
      Params      GFLOPs  GPU_mem (GB)  forward (ms) backward (ms)                   input                  output
    11137535       41.26         0.891         44.71         318.3        (1, 3, 768, 768)                    list
    11137535       82.53         1.325         23.74         108.3        (2, 3, 768, 768)                    list
    11137535       165.1         2.070         26.75         84.01        (4, 3, 768, 768)                    list
    11137535       330.1         3.429         51.57         124.7        (8, 3, 768, 768)                    list
    11137535       660.2         6.229         104.9         186.9       (16, 3, 768, 768)                    list
[34m[1mAutoBatch: [0mUsing batch-size 23 for CUDA:0 8.96G/14.75G (61%) ✅
[34m[1mtrain: [0mFast image a

INFO:__main__:Detected class counts: [2369, 76, 1329, 1375, 1823]
INFO:__main__:Computed class_weights (rarer -> larger): [2.94301392992824, 91.73684210526316, 5.246049661399549, 5.070545454545455, 3.8244651673066374]


[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1436.1±592.2 MB/s, size: 73.7 KB)
[K[34m[1mval: [0mScanning /teamspace/studios/this_studio/valid/labels... 420 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 420/420 1678.3it/s 0.3s
[34m[1mval: [0mNew cache created: /teamspace/studios/this_studio/valid/labels.cache


INFO:__main__:Detected class counts: [593, 41, 320, 376, 467]
INFO:__main__:Computed class_weights (rarer -> larger): [3.03035413153457, 43.829268292682926, 5.615625, 4.779255319148936, 3.84796573875803]


Plotting labels to ppe_runs/yolov8s_weighted_768_ep100/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001111, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005390625), 63 bias(decay=0.0)
Image sizes 768 train, 768 val
Using 4 dataloader workers
Logging results to [1mppe_runs/yolov8s_weighted_768_ep100[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
[K      1/100      7.03G      1.137      1.972      1.401         11        768: 100% ━━━━━━━━━━━━ 69/69 2.0it/s 34.8s
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 10/10 2.2it/s 4.6s
                   all        420       1797      0.742      0.659      0.715      0.473

      Epoch    GPU_mem   box_loss   cls_l

INFO:__main__:Evaluating on test split...


Ultralytics 8.3.189 🚀 Python-3.10.10 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15102MiB)
Model summary (fused): 72 layers, 11,127,519 parameters, 0 gradients, 28.4 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1272.6±416.2 MB/s, size: 61.2 KB)
[K[34m[1mval: [0mScanning /teamspace/studios/this_studio/test/labels.cache... 211 images, 0 backgrounds, 0 corrupt: 100% ━━━━━━━━━━━━ 211/211 455246.0it/s 0.0s0s


INFO:__main__:Detected class counts: [286, 16, 165, 178, 232]
INFO:__main__:Computed class_weights (rarer -> larger): [3.0664335664335662, 54.8125, 5.315151515151515, 4.926966292134831, 3.7801724137931036]


[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ━━━━━━━━━━━━ 10/10 1.9it/s 5.2s
                   all        211        877      0.956      0.963      0.977      0.816
                 boots        146        286       0.98      0.997      0.995      0.794
                gloves          9         16      0.934      0.882      0.936      0.677
                helmet        146        165      0.946      0.988      0.975      0.779
                 human        158        178      0.956       0.97      0.988      0.931
                  vest        182        232      0.966      0.978      0.991      0.897
Speed: 2.6ms preprocess, 14.2ms inference, 0.0ms loss, 1.2ms postprocess per image
Results saved to [1mppe_runs/yolov8s_weighted_768_ep1002[0m


INFO:__main__:Validation summary: ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2, 3, 4])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7f0d78ccd3f0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045, 