In [None]:
#!/usr/bin/env python
"""
Fine-tune YOLOv8 on custom crowd detection dataset.

Author: –ù–∏–∫–∏—Ç–∞
Date: 2025-11-06
"""

import os
from pathlib import Path
from typing import Optional
from ultralytics import YOLO
import torch


def train_model(
    data_yaml: str = "dataset/data.yaml",
    model_name: str = "yolov8s.pt",
    epochs: int = 100,
    imgsz: int = 640,
    batch: int = 16,
    name: str = "crowd_finetune",
    use_cpu: bool = False,
    project: str = "runs/detect",
    exist_ok: bool = True
) -> str:
    """
    Fine-tune YOLOv8 model on custom dataset.

    Args:
        data_yaml (str): Path to dataset configuration (data.yaml).
        model_name (str): Pretrained YOLOv8 model (e.g., 'yolov8s.pt').
        epochs (int): Number of training epochs.
        imgsz (int): Image size for training.
        batch (int): Batch size.
        name (str): Name of the training run.
        use_cpu (bool): Force training on CPU.
        project (str): Directory to save training results.
        exist_ok (bool): Allow overwriting existing run.

    Returns:
        str: Path to the best model weights (best.pt).

    Raises:
        FileNotFoundError: If data.yaml or model weights are missing.
    """
    data_path = Path(data_yaml)
    if not data_path.exists():
        raise FileNotFoundError(f"Dataset config not found: {data_path}")

    model_path = Path(model_name)
    if not model_path.exists() and not model_name.endswith(('.pt', '.yaml')):
        print(f"Pretrained weights {model_name} not found locally. "
              f"Will download from Ultralytics hub...")

    # Auto device selection
    if use_cpu:
        device = 'cpu'
        print("CPU forced via use_cpu=True")
    else:
        device = 0 if torch.cuda.is_available() else 'cpu'
        print(f"Using device: {'CUDA' if device == 0 else 'CPU'}")

    print(f"Loading model: {model_name}")
    model = YOLO(model_name)

    print(f"Starting training: {name}")
    print(f"  Dataset: {data_yaml}")
    print(f"  Epochs: {epochs}, Batch: {batch}, ImgSz: {imgsz}")

    results = model.train(
        data=str(data_path),
        epochs=epochs,
        imgsz=imgsz,
        batch=batch,
        name=name,
        project=project,
        exist_ok=exist_ok,
        patience=20,
        device=device,
        optimizer='AdamW',
        lr0=0.001,
        cos_lr=True,
        augment=True,
        cache='disk',  # Avoid OOM on large datasets
        plots=True,
        save=True,
        verbose=True
    )

    # Construct paths to best and last weights
    run_dir = Path(project) / name
    best_pt = run_dir / "weights" / "best.pt"
    last_pt = run_dir / "weights" / "last.pt"

    if not best_pt.exists():
        raise FileNotFoundError(f"Best model not saved: {best_pt}")

    print("\n" + "="*50)
    print("TRAINING COMPLETED SUCCESSFULLY!")
    print(f"Run: {run_dir}")
    print(f"Best model: {best_pt}")
    print(f"Last model: {last_pt}")
    print("="*50)

    return str(best_pt)


def main() -> None:
    """Entry point for training script."""
    best_model_path = train_model(
        data_yaml="dataset/data.yaml",
        model_name="yolov8s.pt",
        epochs=100,
        imgsz=640,
        batch=16,
        name="crowd_finetune_v1",
        use_cpu=False  # Set to True for CPU-only systems
    )
    print(f"\n–ì–æ—Ç–æ–≤–æ! –õ—É—á—à–∞—è –º–æ–¥–µ–ª—å —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: {best_model_path}")


if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm


Using device: cpu
Training: yolov8s.pt, epochs=100, batch=16, imgsz=640
New https://pypi.org/project/ultralytics/8.3.225 available üòÉ Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.0.226 üöÄ Python-3.10.7 torch-1.12.1+cu116 CPU (Intel Core(TM) i3-7320 4.10GHz)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=dataset/data.yaml, epochs=100, patience=20, batch=16, imgsz=640, save=True, save_period=-1, cache=disk, device=cpu, workers=8, project=None, name=crowd_finetune, exist_ok=True, pretrained=True, optimizer=AdamW, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=True, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=True, agnostic_nms=Fals

[34m[1mtrain: [0mScanning D:\DataScience\test_for_new_work\dataset\labels\train.cache... 567 images, 0 backgrounds, 0 corrupt: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 567/567 [00:00<?, ?it/s][0m
[34m[1mtrain: [0mCaching images (3.3GB disk): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 567/567 [00:44<00:00, 12.65it/s][0m


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning D:\DataScience\test_for_new_work\dataset\labels\val... 141 images, 0 backgrounds, 0 corrupt: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 141/141 [00:02<00:00, 58.00it/s][0m


[34m[1mval: [0mNew cache created: D:\DataScience\test_for_new_work\dataset\labels\val.cache


[34m[1mval: [0mCaching images (0.8GB disk): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 141/141 [00:07<00:00, 18.55it/s][0m
