## Builting Functions

### Cuda Check

In [1]:
import torch

# Print the CUDA version that PyTorch was built with
print("Built CUDA Version:", torch.version.cuda)

# Check if CUDA is available on the current system
if torch.cuda.is_available():
    # Print the CUDA runtime version (compiled version)
    print("CUDA Runtime Version:", torch._C._cuda_getCompiledVersion())
    
    # Print the name of the first available GPU
    print("GPU Name:", torch.cuda.get_device_name(0))
else:
    # Inform the user if CUDA is not available
    print("CUDA is not available.")


Built CUDA Version: 12.4
CUDA Runtime Version: 12040
GPU Name: NVIDIA GeForce RTX 3050 OEM


### Mapping Classes and their colors with class id 

In [2]:
# Mapping of class IDs to class names and their corresponding RGB color codes
class_id_to_name = {
    0:  ('unlabeled', [28, 42, 168]),         # Background or unclassified area
    1:  ('pool', [0, 50, 89]),                # Swimming pool
    2:  ('vegetation', [107, 142, 35]),       # Trees, grass, or bushes
    3:  ('roof', [70, 70, 70]),               # Building rooftops
    4:  ('wall', [102, 102, 156]),            # Building walls
    5:  ('window', [254, 228, 12]),           # Windows
    6:  ('person', [255, 22, 96]),            # People
    7:  ('dog', [102, 51, 0]),                # Dogs
    8:  ('car', [9, 143, 150]),               # Cars
    9:  ('bicycle', [119, 11, 32]),           # Bicycles
    10: ('tree', [51, 51, 0]),                # Trees
    11: ('truck', [160, 160, 60]),            # Trucks (added)
    12: ('bus', [200, 80, 80]),               # Buses (added)
    13: ('vehicle', [20, 80, 80]),            # General vehicle category (added)
}


### Install and Import packages

In [3]:
# Install NumPy - fundamental package for numerical computations
# !pip install numpy

# Install OpenCV - library for computer vision tasks
# !pip install opencv-python

# Install Pillow - image processing library
# !pip install pillow

# Install Matplotlib - plotting and visualization library
# !pip install matplotlib

# Install tqdm - progress bar utility
# !pip install tqdm

# Install scikit-learn - machine learning tools
# !pip install scikit-learn

# Install PyTorch and TorchVision - deep learning framework and its vision tools
# !pip install torch torchvision

# Install Ultralytics - YOLO model implementation and training tools
# !pip install ultralytics


In [4]:
# Standard library imports
import os                     # Operating system interfaces
import gc                     # Garbage collection interface
import json                   # Working with JSON data
import shutil                 # File operations like copy, move, etc.
import zipfile                # Extracting zip archives
import random                 # Random number generation
from glob import glob         # Pattern matching for file paths
from pathlib import Path      # Object-oriented file path handling
from collections import defaultdict  # Dictionary with default value support
import xml.etree.ElementTree as ET  # Parsing XML files

# Scientific computing and data manipulation
import numpy as np            # Numerical operations
import pandas as pd           # Data analysis and manipulation
from sklearn.model_selection import train_test_split  # Train-test split

# Image processing and visualization
import cv2                    # OpenCV for computer vision
from PIL import Image, ImageDraw, ImageFont  # PIL for image handling
import matplotlib.pyplot as plt              # Plotting library
import matplotlib.patches as mpatches        # Drawing patches on plots

# Progress bar utility
from tqdm.auto import tqdm    # Progress bars for loops

# PyTorch and related imports
import torch
import torch.nn as nn         # Neural network modules
from torch.utils.data import DataLoader       # Efficient data loading
import torchvision.models as models           # Pretrained models
import torchvision.transforms as transforms   # Image transformations
import torchvision.models.segmentation as segmentation  # Segmentation models

# YOLO from Ultralytics
from ultralytics import YOLO  # YOLO object detection models

# Pandas library
import pandas as pd

# Google Drive downloader
import gdown                  # Downloading files from Google Drive

# Environment configuration
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"   # Avoids OpenMP duplicate library error

# Set device for computation (GPU if available, else CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


  from .autonotebook import tqdm as notebook_tqdm


### Dataset Download Functions

These functions allow you to download and extract datasets from Google Drive shared URLs. They handle the download and extraction of ZIP files for the **Semantic Drone Dataset** and the **UAVDT Dataset**.

#### `semantic_drone_dataset_download`

This function downloads and extracts the **Semantic Drone Dataset** from a Google Drive URL.

#### `uavdt_dataset_download`
This function downloads and extracts the UAVDT Dataset from a Google Drive URL.


In [5]:
def semantic_drone_dataset_download(gdrive_url, extract_to="extracted"):
    """
    Downloads and extracts the Semantic Drone Dataset from a Google Drive URL.
    
    Parameters:
        gdrive_url (str): The shared Google Drive link to the ZIP file.
        extract_to (str): Directory to extract contents into. Default is 'extracted'.
    """
    # Extract the file ID from the Google Drive shareable URL
    file_id = gdrive_url.split("/d/")[1].split("/")[0]
    download_url = f"https://drive.google.com/uc?id={file_id}"

    # Create the output folder if it doesn't exist
    os.makedirs(extract_to, exist_ok=True)

    zip_path = os.path.join(extract_to, "downloaded.zip")

    print("[INFO] Downloading ZIP from Google Drive...")
    gdown.download(download_url, zip_path, quiet=False)

    print("[INFO] Extracting ZIP...")
    # Extract contents of the ZIP file
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

    # Delete the ZIP file after extraction to save space
    os.remove(zip_path)

    print(f"[DONE] Extracted files to: {extract_to}")


def uavdt_dataset_download(gdrive_url, extract_to="extracted"):
    """
    Downloads and extracts the UAVDT Dataset from a Google Drive URL.
    
    Parameters:
        gdrive_url (str): The shared Google Drive link to the ZIP file.
        extract_to (str): Directory to extract contents into. Default is 'extracted'.
    """
    # Extract the file ID from the Google Drive shareable URL
    file_id = gdrive_url.split("/d/")[1].split("/")[0]
    download_url = f"https://drive.google.com/uc?id={file_id}"

    # Create the output folder if it doesn't exist
    os.makedirs(extract_to, exist_ok=True)

    zip_path = os.path.join(extract_to, "downloaded.zip")

    print("[+] Downloading ZIP from Google Drive...")
    gdown.download(download_url, zip_path, quiet=False)

    print("[+] Extracting ZIP...")
    # Extract contents of the ZIP file
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

    # Delete the ZIP file after extraction to save space
    os.remove(zip_path)

    print(f"[+] Extracted files to: {extract_to}")


### YOLO Dataset Conversion

#### Semantic Dorne Datasets

1. **`parse_yolo_style_bbox_from_xml`**: Parses XML annotations and converts polygon objects to YOLO-style bounding boxes.
2. **`save_yolo_format`**: Saves bounding boxes in YOLO format (normalized coordinates: `<class_id> <x_center> <y_center> <width> <height>`).
3. **`convert_fulldataset_yolo_only`**: Converts a full dataset of images and XML annotations to YOLO format and saves them to the specified output directory.


In [6]:
# ----------------------------
# Parse polygon and convert to YOLO bbox
# ----------------------------

# Parses XML annotation and converts polygon objects to YOLO-style bounding boxes
def parse_yolo_style_bbox_from_xml(xml_path, class_id_to_name):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    bboxes = []
    for obj in root.findall('object'):
        class_name = obj.find('name').text
        if class_name in [value[0] for value in class_id_to_name.values()]:
            polygon = obj.find('polygon')
            if polygon is not None:
                # Extract points from polygon
                points = polygon.findall('pt')
                coords = [(float(pt.find('x').text), float(pt.find('y').text)) for pt in points]
                # Convert polygon to bounding box
                x_min = min(coord[0] for coord in coords)
                y_min = min(coord[1] for coord in coords)
                x_max = max(coord[0] for coord in coords)
                y_max = max(coord[1] for coord in coords)
                bboxes.append(((x_min, y_min), (x_max, y_max), class_name))
    return bboxes


# ----------------------------
# Save YOLO-format txt
# ----------------------------

# Saves the bounding boxes in YOLO format: <class_id> <x_center> <y_center> <width> <height>
def save_yolo_format(image_id, bboxes, image_width, image_height, output_path, class_id_to_name):
    with open(output_path, 'w') as f:
        for (x_min, y_min), (x_max, y_max), class_name in bboxes:
            class_id = next(cid for cid, (name, _) in class_id_to_name.items() if name == class_name)
            x_center = (x_min + x_max) / 2 / image_width
            y_center = (y_min + y_max) / 2 / image_height
            width = (x_max - x_min) / image_width
            height = (y_max - y_min) / image_height
            # Write to file with six decimal precision
            f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")


# ----------------------------
# Convert dataset (YOLO only)
# ----------------------------

# Converts the full dataset by extracting YOLO-style annotations and saving them
def convert_fulldataset_yolo_only(dataset_path, output_dir, class_id_to_name):
    # Get list of image IDs (without extension)
    image_ids = [img.split('.')[0] for img in os.listdir(f"{dataset_path}/images") if img.endswith(".jpg")]

    # Create output folders
    os.makedirs(f"{output_dir}/images", exist_ok=True)
    os.makedirs(f"{output_dir}/labels", exist_ok=True)

    for image_id in tqdm(image_ids, desc="Converting to YOLO"):
        img_path = f"{dataset_path}/images/{image_id}.jpg"
        bbox_xml_path = f"{dataset_path}/gt/bounding_box/label_me_xml/{image_id}.xml"
        semantic_xml_path = f"{dataset_path}/gt/semantic/label_me_xml/{image_id}.xml"

        if not os.path.exists(img_path):
            print(f"[WARNING] Image not found: {img_path}, skipping...")
            continue

        try:
            # Parse bounding box and semantic annotations
            bboxes1 = parse_yolo_style_bbox_from_xml(bbox_xml_path, class_id_to_name)
            bboxes2 = parse_yolo_style_bbox_from_xml(semantic_xml_path, class_id_to_name)
            all_bboxes = bboxes1 + bboxes2
        except Exception as e:
            print(f"[WARNING] Skipping image {image_id} due to parse error: {e}")
            continue

        try:
            # Load image
            image = Image.open(img_path)
            image_np = np.array(image)
        except Exception as e:
            print(f"[WARNING] Could not load image {image_id}: {e}")
            continue

        # Save image to output directory
        image.save(f"{output_dir}/images/{image_id}.jpg")

        # Save YOLO-format labels to output directory
        yolo_annotation_path = f"{output_dir}/labels/{image_id}.txt"
        save_yolo_format(image_id, all_bboxes, image_np.shape[1], image_np.shape[0], yolo_annotation_path, class_id_to_name)

    print("[+] YOLO-format annotation conversion complete!")


#### UAVDT Datasets

1. **`convert_dataset`**: Converts UAVDT annotation files to YOLO format, mapping original class IDs to extended IDs and saving them with normalized bounding box coordinates.
2. **`copy_split_sequences`**: Splits the dataset into training and validation sets, copying the corresponding images and YOLO-format label files to separate directories.


In [7]:
# 🧠 Mapping UAVDT class IDs to extended class IDs used in the combined dataset
uavdt_to_extended = {
    0: 8,   # car
    1: 11,  # truck
    2: 12,  # bus
    3: 13   # other vehicle
}

# === Function to convert a single annotation file to YOLO format ===
def convert_annotation(anno_path, label_path, image_path, stats):
    if not os.path.exists(image_path):
        stats["missing_image"] += 1
        return

    try:
        img = cv2.imread(image_path)
        height, width = img.shape[:2]
    except:
        stats["missing_image"] += 1
        return

    with open(anno_path, 'r') as fin, open(label_path, 'w') as fout:
        for line in fin:
            parts = line.strip().split(',')
            if len(parts) < 8:
                stats["malformed"] += 1
                continue

            try:
                # Parse bounding box and class info
                x, y, w, h = map(float, parts[0:4])
                original_cls = int(parts[5])

                # Skip classes not in our mapping
                if original_cls not in uavdt_to_extended:
                    stats["skipped"][original_cls] += 1
                    continue

                # Convert to new class ID
                cls = uavdt_to_extended[original_cls]

                # Convert to YOLO format (normalized center_x, center_y, width, height)
                x_center = (x + w / 2) / width
                y_center = (y + h / 2) / height
                w /= width
                h /= height

                # Validate normalized coordinates
                if not (0 <= x_center <= 1 and 0 <= y_center <= 1 and w > 0 and h > 0):
                    stats["skipped"][cls] += 1
                    continue

                # Write label line
                fout.write(f"{cls} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")
                stats["converted"] += 1
            except Exception:
                stats["malformed"] += 1
                continue

            stats["total"] += 1

# === Step 1: Convert all UAVDT annotations to YOLO format ===
def convert_dataset(root_dir):
    # Find all annotation files inside any Mxxxx/annotations/ directory
    annotation_paths = glob(os.path.join(root_dir, "M*/annotations/*.txt"))
    total_files = len(annotation_paths)

    # Stats for tracking issues and progress
    stats = {
        "total": 0,
        "converted": 0,
        "malformed": 0,
        "missing_image": 0,
        "skipped": defaultdict(int)
    }

    print(f"🔄 Converting {total_files} annotation files to YOLO format...")

    for anno_path in tqdm(annotation_paths, desc="Converting", unit="file"):
        # Get sequence directory (e.g., M0101)
        sequence_dir = os.path.dirname(os.path.dirname(anno_path))
        file_name = os.path.basename(anno_path)

        # Output label directory
        label_dir = os.path.join(sequence_dir, "labels")
        os.makedirs(label_dir, exist_ok=True)

        # Output label file path
        label_path = os.path.join(label_dir, file_name)

        # Corresponding image path
        image_name = file_name.replace(".txt", ".jpg")
        image_path = os.path.join(sequence_dir, "images", image_name)

        # Perform the actual conversion
        convert_annotation(anno_path, label_path, image_path, stats)

    # Print summary of the conversion process
    print("\nConversion complete.")
    print(f"Total boxes:     {stats['total']}")
    print(f"Converted boxes: {stats['converted']}")
    print(f"Skipped boxes:   {sum(stats['skipped'].values())}")
    for cls, count in sorted(stats["skipped"].items()):
        print(f"   - Skipped class {cls}: {count}")
    print(f"Malformed lines: {stats['malformed']}")
    print(f"Missing images:  {stats['missing_image']}")

# === Step 2: Split dataset into train/val and copy files ===
def copy_split_sequences(src_root, dst_root, train_ratio=0.8):
    # Find all sequences (Mxxxx folders)
    all_sequences = sorted(glob(os.path.join(src_root, "M*")))

    # Split into training and validation sequences
    train_seqs, val_seqs = train_test_split(all_sequences, train_size=train_ratio, random_state=42)

    # Copy files into respective folders
    for split_name, split_list in zip(['train', 'val'], [train_seqs, val_seqs]):
        for seq_path in tqdm(split_list, desc=f"Copying {split_name}"):
            images_src = os.path.join(seq_path, "images")
            labels_src = os.path.join(seq_path, "labels")

            images_dst = os.path.join(dst_root, split_name, "images")
            labels_dst = os.path.join(dst_root, split_name, "labels")

            os.makedirs(images_dst, exist_ok=True)
            os.makedirs(labels_dst, exist_ok=True)

            # Copy image files
            for img_file in glob(os.path.join(images_src, "*.jpg")):
                shutil.copy(img_file, os.path.join(images_dst, os.path.basename(img_file)))

            # Copy label files
            for label_file in glob(os.path.join(labels_src, "*.txt")):
                shutil.copy(label_file, os.path.join(labels_dst, os.path.basename(label_file)))

    print("\n[+] Dataset split into 'train/' and 'val/' folders with images and YOLO-format labels.")


#### Convert into train and Val Sets

This script converts UAVDT annotations to YOLO format, maps class IDs, normalizes bounding boxes, and splits the dataset into training and validation sets, organizing images and label files into respective directories.


In [8]:
import os
import shutil
import random
from tqdm import tqdm

# Function to move files from source directories to target directories
def move_files(file_list, 
               source_image_dir, 
               source_annotation_dir,
               target_image_dir, 
               target_annotation_dir):
    
    # Create target directories if they don't exist
    os.makedirs(target_image_dir, exist_ok=True)
    os.makedirs(target_annotation_dir, exist_ok=True)

    # Loop through each file in the provided list and move the corresponding image and annotation
    for image_id in tqdm(file_list, desc=f"Moving to {os.path.basename(os.path.dirname(target_image_dir))}"):
        # Construct paths for the image and annotation
        image_path = os.path.join(source_image_dir, f"{image_id}.jpg")
        annotation_path = os.path.join(source_annotation_dir, f"{image_id}.txt")

        # Construct target paths for the image and annotation
        target_image_path = os.path.join(target_image_dir, f"{image_id}.jpg")
        target_annotation_path = os.path.join(target_annotation_dir, f"{image_id}.txt")

        # Check if both the image and annotation files exist, then copy them to target directories
        if os.path.exists(image_path) and os.path.exists(annotation_path):
            shutil.copy(image_path, target_image_path)
            shutil.copy(annotation_path, target_annotation_path)

# Function to split the dataset into training and validation sets, and move the files
def split_and_move_dataset(source_base_dir="./datasets/semantic_yolo",
                           target_base_dir="./datasets/new_dataset_yolo_split",
                           split_ratio=0.8,
                           seed=42):
    
    # Set the random seed for reproducibility
    random.seed(seed)

    # Define paths for the image and label directories
    image_dir = os.path.join(source_base_dir, "images")
    label_dir = os.path.join(source_base_dir, "labels")

    # Get all image IDs (file names without extensions) from the image directory
    image_ids = [os.path.splitext(f)[0] for f in os.listdir(image_dir) if f.endswith(".jpg")]
    
    # Shuffle the image IDs to randomize the split
    random.shuffle(image_ids)

    # Determine the split index based on the split ratio
    split_idx = int(len(image_ids) * split_ratio)

    # Split the image IDs into training and validation sets
    train_ids = image_ids[:split_idx]
    val_ids = image_ids[split_idx:]

    # Move the training images and annotations to the target directories
    move_files(train_ids,
               source_image_dir=image_dir,
               source_annotation_dir=label_dir,
               target_image_dir=os.path.join(target_base_dir, "train/images"),
               target_annotation_dir=os.path.join(target_base_dir, "train/labels"))

    # Move the validation images and annotations to the target directories
    move_files(val_ids,
               source_image_dir=image_dir,
               source_annotation_dir=label_dir,
               target_image_dir=os.path.join(target_base_dir, "val/images"),
               target_annotation_dir=os.path.join(target_base_dir, "val/labels"))

    # Print the summary of the dataset split
    print(f"\n[✓] Dataset split completed: {len(train_ids)} train / {len(val_ids)} val samples")


#### Normalize Labels

This script normalizes bounding box coordinates in YOLO label files to the [0, 1] range based on the corresponding image dimensions.


In [9]:
from PIL import Image
import os
from tqdm import tqdm

def normalize_label_file(label_file, img_width, img_height):
    """
    Normalize the label coordinates in a label file to ensure they are within [0, 1] range.
    The label file is updated with the normalized values.
    """
    with open(label_file, 'r') as f:
        lines = f.readlines()
    
    with open(label_file, 'w') as f:
        for line in lines:
            # Split the line by spaces to get the class and coordinates
            parts = line.strip().split()
            class_id = int(parts[0])
            x_center, y_center, width, height = map(float, parts[1:])
            
            # Normalize the coordinates to be within the range [0, 1]
            x_center = min(1.0, max(0.0, x_center))
            y_center = min(1.0, max(0.0, y_center))
            width = min(1.0, max(0.0, width))
            height = min(1.0, max(0.0, height))

            # Write the normalized values back to the file
            f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")


def get_image_size(img_path):
    """
    Get the width and height of the image to normalize the coordinates properly.
    This function uses PIL (Pillow) to open the image and return its dimensions.
    """
    with Image.open(img_path) as img:
        return img.size  # returns (width, height)


def normalize_all_labels(labels_dir, img_dir):
    """
    Normalize all label files in the specified directory.
    It reads each label file, gets the corresponding image size, and normalizes the label coordinates.
    """
    for label_file in tqdm(os.listdir(labels_dir)):  # Iterate over all files in the labels directory
       
        if label_file.endswith('.txt'):  # Process only label files
            label_path = os.path.join(labels_dir, label_file)
            img_path = os.path.join(img_dir, label_file.replace('.txt', '.jpg'))  # Assuming JPG images
            
            if os.path.exists(img_path):
                # Get the image dimensions to normalize the labels
                img_width, img_height = get_image_size(img_path)
                normalize_label_file(label_path, img_width, img_height)
            else:
                # Warning if the corresponding image is missing
                print(f"Warning: Image for label {label_file} not found!")
    
    print("Normalization Complete")


### Training v8 model functions

This function trains a YOLO model from scratch using a specified base model variant and dataset configuration, with various data augmentation techniques and hyperparameters for effective training.


In [10]:
from ultralytics import YOLO

def train_yolo_from_scratch(data_yaml, epochs, imgsz, batch, name, model_variant="yolov8n.pt"):
    """
    Train YOLO model from scratch using a specified base model variant.
    
    Parameters:
    - data_yaml (str): Path to the YAML file containing dataset configuration.
    - epochs (int): Number of training epochs.
    - imgsz (int): Image size for training.
    - batch (int): Batch size.
    - name (str): Name for the training session, which will be used for saving results.
    - model_variant (str): Base model to start from (default is 'yolov8n.pt').
    
    The function initializes a YOLO model from the specified variant, 
    then trains it on the given dataset configuration with data augmentation 
    and hyperparameters tailored for the task.
    """
    print(f"[+] Training from scratch using base model: {model_variant}")
    
    # Initialize the model with the specified base model variant
    model = YOLO(model_variant)

    # Clear memory
    gc.collect()
    torch.cuda.empty_cache()

    # Start the training process with the provided parameters
    model.train(
        data=data_yaml,            # Path to dataset YAML configuration
        epochs=epochs,             # Number of epochs for training
        imgsz=imgsz,               # Image size to resize input images during training
        batch=batch,               # Batch size
        name=name,                 # Name for the experiment (saved in 'runs/train')
        project="runs/train",      # Directory where training results will be saved
        augment=True,              # Whether to apply data augmentation
        degrees=10,                # Rotation degrees for augmentation
        scale=0.5,                 # Scale factor for augmentation
        flipud=0.2,                # Probability of flipping the image upside down
        fliplr=0.5,                # Probability of flipping the image left-right
        hsv_h=0.015,               # Hue shift for HSV augmentation
        hsv_s=0.7,                 # Saturation shift for HSV augmentation
        hsv_v=0.4,                 # Value shift for HSV augmentation
        mosaic=1.0,                # Mosaic augmentation probability
        mixup=0.2,                 # Mixup augmentation probability
        lr0=0.01,                  # Initial learning rate
        lrf=0.01,                  # Final learning rate (multiplied by lr0)
        verbose=True,              # Whether to print training logs
        patience=15                # Number of epochs with no improvement before stopping
    )


### Fine-Tuning YOLO Model

This function fine-tunes a pre-trained YOLO model on a new dataset, applying data augmentation and adjusting hyperparameters like learning rate and weight decay to optimize performance for the new task.


In [11]:
from ultralytics import YOLO
import torch
import gc

def fine_tune_yolo(data_yaml, epochs, imgsz, batch, name, base_model_path):
    """
    Fine-tune a pre-trained YOLO model on a new dataset.

    Parameters:
    - data_yaml (str): Path to the dataset YAML file containing the configuration.
    - epochs (int): Number of epochs for fine-tuning.
    - imgsz (int): Image size for training.
    - batch (int): Batch size for training.
    - name (str): The name of the fine-tuning experiment, which will be used for saving.
    - base_model_path (str): Path to the pre-trained YOLO model that will be fine-tuned.
    
    The function loads the pre-trained YOLO model from the specified path, performs garbage 
    collection and memory clearing to avoid CUDA memory issues, and then starts the fine-tuning process 
    on the new dataset with specific settings, such as a lower learning rate and data augmentations.
    """
    print(f"[+] Fine-tuning model from: {base_model_path}")
    
    # Load the pre-trained model from the specified base model path
    model = YOLO(base_model_path)

    # Perform garbage collection and clear CUDA memory to avoid out-of-memory errors
    gc.collect()
    torch.cuda.empty_cache()

    # Fine-tune the model with the specified parameters
    model.train(
        data=data_yaml,           # Path to dataset YAML configuration
        epochs=epochs,            # Number of epochs for fine-tuning
        imgsz=imgsz,              # Image size for resizing input images
        batch=batch,              # Batch size for training
        name=name,                # Name for the fine-tuning experiment (saved in 'runs/train')
        project="runs/train",     # Directory where the results of the fine-tuning will be stored
        weight_decay=0.0005,      # Weight decay parameter to prevent overfitting
        augment=True,             # Enable data augmentation
        degrees=10,               # Rotate images by up to 10 degrees
        scale=0.5,                # Scale images by 50% during augmentation
        flipud=0.2,               # 20% chance of flipping images vertically
        fliplr=0.5,               # 50% chance of flipping images horizontally
        hsv_h=0.015,              # Adjust hue by +/- 1.5% during augmentation
        hsv_s=0.7,                # Adjust saturation by +/- 70% during augmentation
        hsv_v=0.4,                # Adjust brightness by +/- 40% during augmentation
        mosaic=1.0,               # Apply mosaic augmentation with 100% probability
        mixup=0.2,                # Mixup augmentation with 20% probability
        patience=20,              # Early stopping patience (wait 10 epochs without improvement)
        verbose=True,             # Display detailed training logs
    )


### YOLO Model Evaluation and Metrics Extraction

This script provides functions for evaluating a YOLO model on a dataset, extracting per-class mAP@0.5:0.95 metrics, and saving them to a JSON file. Below are the key functions:

- **load_yolo_model**: Loads a pre-trained YOLO model from the specified path.
- **run_model_validation**: Runs the validation for the loaded YOLO model and returns the results.
- **extract_per_class_metrics**: Extracts the mAP@0.5:0.95 metrics for each class from the model validation results.
- **save_metrics_to_json**: Saves the extracted metrics in a JSON file for easy access and further analysis.
- **evaluate_and_save_metrics**: A high-level function that loads the model, validates it, extracts metrics, and saves them to a JSON file.


In [12]:
import json
from ultralytics import YOLO

def load_yolo_model(model_path):
    """
    Loads a pre-trained YOLO model from the specified path.

    Parameters:
    - model_path (str): Path to the pre-trained YOLO model file.

    Returns:
    - YOLO model object.
    """
    return YOLO(model_path)

def run_model_validation(model):
    """
    Runs the model validation and returns the results.

    Parameters:
    - model (YOLO): The YOLO model to be validated.

    Returns:
    - results (Result object): Validation results from the model.
    """
    return model.val()

def extract_per_class_metrics(results):
    """
    Extracts mAP@0.5:0.95 for each class from the validation results.

    Parameters:
    - results (Result object): Validation results from the YOLO model.

    Returns:
    - per_class_metrics (dict): Dictionary containing per-class mAP@0.5:0.95 values.
    """
    per_class_metrics = {}
    if hasattr(results.box, 'maps') and results.box.maps is not None:
        maps = results.box.maps  # NumPy array of mAP@0.5:0.95 for each class
        for i, name in results.names.items():
            per_class_metrics[name] = {
                "class_id": i,
                "mAP@0.5:0.95": round(float(maps[i]), 4)  # Round to 4 decimal places
            }
    else:
        print("[-] No per-class mAP@0.5:0.95 data found.")
    return per_class_metrics

def save_metrics_to_json(metrics, output_path):
    """
    Saves the per-class metrics to a JSON file.

    Parameters:
    - metrics (dict): The metrics to be saved, typically containing per-class mAP values.
    - output_path (str): Path to the output JSON file where the metrics will be saved.
    """
    with open(output_path, "w") as f:
        json.dump(metrics, f, indent=4)  # Save as JSON with indents for readability
    print(f"[+] Saved per-class metrics to {output_path}")

def evaluate_and_save_metrics(model_path, output_json_path="per_class_metrics.json"):
    """
    Evaluates the YOLO model and saves the per-class mAP metrics to a JSON file.

    Parameters:
    - model_path (str): Path to the pre-trained YOLO model.
    - output_json_path (str): Path to save the output JSON file with per-class mAP values.
    """
    model = load_yolo_model(model_path)  # Load the YOLO model
    results = run_model_validation(model)  # Run validation
    metrics = extract_per_class_metrics(results)  # Extract per-class metrics
    save_metrics_to_json(metrics, output_json_path)  # Save metrics to JSON


### Print Per-Class mAP Metrics

This function loads the per-class mAP@0.5:0.95 metrics from a JSON file and prints them in a tabular format.

- **print_per_class_metrics**: 
    - **Parameters**: 
        - `json_path` (str): The path to the JSON file containing the per-class metrics (default is `"per_class_metrics.json"`).
    - **Functionality**:
        - Loads the metrics from the provided JSON file.
        - Prints a header and iterates through each class to display the class name, class ID, and corresponding mAP value.


In [13]:
def print_per_class_metrics(json_path="per_class_metrics.json"):
    """
    Prints the per-class mAP@0.5:0.95 metrics from a JSON file.

    Parameters:
    - json_path (str): Path to the JSON file containing the per-class metrics.
    """
    # Load the metrics from the specified JSON file
    with open(json_path, "r") as f:
        metrics = json.load(f)
    
    # Print the header for the table
    print("Per-Class mAP@0.5:0.95 Metrics:\n")
    print(f"{'Class Name':<15} {'Class ID':<10} {'mAP@0.5:0.95':<15}")
    print("-" * 40)
    
    # Loop through each class in the metrics and print the results
    for name, data in metrics.items():
        print(f"{name:<15} {data['class_id']:<10} {data['mAP@0.5:0.95']:<15}")


### Find Best YOLO Model

- **find_best_model(base_dir='runs_yolo/')**: 
    - Searches for the most recently modified `best.pt` file in the given directory and subdirectories.
    - Returns the path to the latest `best.pt` file.
    - Raises `FileNotFoundError` if no `best.pt` file is found.


In [14]:
from pathlib import Path

def find_best_model(base_dir='runs_yolo/'):
    """
    Searches for the 'best.pt' model file in the given directory and its subdirectories.

    Parameters:
    - base_dir (str): The directory where the search will start. Defaults to 'runs_yolo/'.

    Returns:
    - str: Path to the 'best.pt' model file.
    
    Raises:
    - FileNotFoundError: If no 'best.pt' file is found in the directory.
    """
    # Use Path.rglob to recursively search for all 'best.pt' files in the directory
    best_paths = list(Path(base_dir).rglob('best.pt'))
    
    # Check if any 'best.pt' file was found
    if not best_paths:
        raise FileNotFoundError("No 'best.pt' file found in the 'runs/' directory.")
    
    # Optionally, sort the found files by their last modified time (descending)
    best_paths.sort(key=lambda p: p.stat().st_mtime, reverse=True)
    
    # Print the path of the most recently modified 'best.pt' file
    print(f"[+] Found best.pt at: {best_paths[0]}")
    
    # Return the path to the 'best.pt' file
    return str(best_paths[0])


### Prediction on Videos

1. **Frame Processing**  

2. **Video Loop & Saving**  


In [15]:
# ========== FRAME PROCESSING ==========

import cv2

def process_frame_1(frame, yolo_model, w, h, class_id_to_name, conf_threshold=0.5):
    """
    Process a single frame from a video, run YOLO model inference, and annotate the frame.

    Parameters:
    - frame: The video frame to process.
    - yolo_model: The YOLO model used for object detection.
    - w, h: The width and height of the frame (image size).
    - class_id_to_name: A dictionary mapping class IDs to class names and colors.
    - conf_threshold: The confidence threshold for filtering detections.

    Returns:
    - annotated: The annotated frame with bounding boxes and labels.
    - boxes: The bounding box coordinates for each detected object.
    - class_ids: The class IDs for each detected object.
    """
    annotated = frame.copy()  # Make a copy of the original frame for annotation
    results = yolo_model(annotated, verbose=False)[0]  # Run YOLO inference on the frame
    boxes = results.boxes.xyxy.cpu().numpy()  # Get bounding box coordinates (x1, y1, x2, y2)
    class_ids = results.boxes.cls.cpu().numpy()  # Get class IDs for detected objects
    confidences = results.boxes.conf.cpu().numpy()  # Confidence scores for each box

    for box, cls_id, confidence in zip(boxes, class_ids, confidences):
        if confidence > conf_threshold:  # Filter detections based on confidence
            x1, y1, x2, y2 = map(int, box)  # Convert box coordinates to integers
            class_name, color = class_id_to_name[int(cls_id)]  # Get class name and color based on class ID
            cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 2)  # Draw bounding box
            cv2.putText(annotated, f"{class_name} {confidence:.2f}", (x1, max(y1 - 10, 10)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 0), 2)  # Add label and confidence text

    return annotated, boxes, class_ids


# ========== VIDEO CAPTURE ==========

def setup_video_capture_1(video_path):
    """
    Set up video capture for a given video file and return video properties.

    Parameters:
    - video_path: Path to the video file.

    Returns:
    - cap: OpenCV video capture object.
    - total_frames: Total number of frames in the video.
    - fps: Frames per second of the video.
    - w, h: Width and height of the video frames.
    """
    cap = cv2.VideoCapture(video_path)  # Open the video file
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # Get total number of frames
    fps = cap.get(cv2.CAP_PROP_FPS)  # Get frames per second
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))  # Get frame width
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))  # Get frame height
    return cap, total_frames, fps, w, h


# ========== MAIN FUNCTION ==========

def videos_predictions(yolo_weights_path, class_id_to_name, video_dir='videos', output_base='./datatsets/opt', max_frames=None):
    """
    Process multiple video files, run YOLO inference, and save the results.

    Parameters:
    - yolo_weights_path: Path to the YOLO weights file.
    - class_id_to_name: A dictionary mapping class IDs to class names and colors.
    - video_dir: Directory containing the input video files.
    - output_base: Base directory where output images, labels, and videos will be saved.
    - max_frames: The maximum number of frames to process per video. If None, all frames will be processed.
    """
    yolo_model = YOLO(yolo_weights_path)  # Load the YOLO model

    # Set up output directories for images, labels, and output video
    image_out_dir = os.path.join(output_base, 'images')
    label_out_dir = os.path.join(output_base, 'labels')
    output_video_dir = os.path.join(output_base, 'output')

    # Create output directories if they don't exist
    os.makedirs(image_out_dir, exist_ok=True)
    os.makedirs(label_out_dir, exist_ok=True)
    os.makedirs(output_video_dir, exist_ok=True)

    # Iterate over video files in the video directory
    for video_file in tqdm(sorted(os.listdir(video_dir))):
        if not video_file.lower().endswith(".mp4"):
            continue  # Skip non-video files

        video_id = os.path.splitext(video_file)[0]  # Extract video ID (filename without extension)
        video_path = os.path.join(video_dir, video_file)  # Full path to the video file
        output_video_path = os.path.join(output_video_dir, f"{video_id}.mp4")  # Path for output video

        print(f"========== STARTED: {video_id} ==========")
        cap, total_frames, fps, w, h = setup_video_capture_1(video_path)  # Set up video capture
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Video writer codec
        writer = cv2.VideoWriter(output_video_path, fourcc, fps, (w, h))  # Video writer setup

        frame_count = 0
        pbar = tqdm(total=max_frames if max_frames else total_frames, desc=video_id)  # Progress bar

        while True:
            ret, frame = cap.read()
            if not ret or (max_frames and frame_count >= max_frames):
                break  # Exit if no frame is read or max_frames is reached

            # Process the current frame
            annotated_bgr, boxes, class_ids = process_frame_1(frame, yolo_model, w, h, class_id_to_name)

            # Save original frame as image
            img_filename = f'{video_id}_{frame_count:04d}.jpg'
            img_path = os.path.join(image_out_dir, img_filename)
            cv2.imwrite(img_path, frame)

            # Save YOLO-format label file
            label_filename = f'{video_id}_{frame_count:04d}.txt'
            label_path = os.path.join(label_out_dir, label_filename)
            with open(label_path, 'w') as f:
                for box, cls_id in zip(boxes, class_ids):
                    x1, y1, x2, y2 = box
                    w_box = x2 - x1
                    h_box = y2 - y1
                    cx = x1 + w_box / 2
                    cy = y1 + h_box / 2
                    f.write(f"{int(cls_id)} {cx/w:.6f} {cy/h:.6f} {w_box/w:.6f} {h_box/h:.6f}\n")

            writer.write(annotated_bgr)  # Write the annotated frame to the output video
            frame_count += 1
            pbar.update(1)  # Update progress bar

        cap.release()  # Release the video capture object
        writer.release()  # Release the video writer object
        pbar.close()  # Close the progress bar
        print(f"DONE: {video_id} — Processed {frame_count} frames")  # Print processing summary


### YOLO Video Inference 

1. **Frame Processing with Filtering**  
   `process_frame()` runs YOLO inference on each frame, filters by confidence and specific class IDs (`underrepresented_class_ids`), and draws bounding boxes with labels.

2. **Batch Video Processing & Saving**  
   `process_all_videos()` loops through videos, processes each frame, and saves:
   - Original frame (`.jpg`)
   - YOLO label (`.txt`)
   - Annotated output video (`.mp4`)  
   It also tracks total frames, bounding boxes, and labels.


In [16]:
import cv2
import os
import numpy as np
from ultralytics import YOLO
from tqdm import tqdm
import gc
import torch

# ========== FRAME PROCESSING ==========
def process_frame(frame, yolo_model, w, h, class_id_to_name, valid_class_ids, conf_threshold=0.5):
    """
    Process each frame, run inference, and annotate with bounding boxes and class labels.
    
    Parameters:
    - frame: Input video frame
    - yolo_model: YOLO model for inference
    - w, h: Width and height of the frame
    - class_id_to_name: Mapping of class IDs to class names and colors
    - valid_class_ids: List of valid class IDs to filter
    - conf_threshold: Confidence threshold for valid detections
    
    Returns:
    - annotated: Annotated frame with bounding boxes and labels
    - filtered_boxes: List of bounding boxes for valid detections
    - filtered_ids: List of class IDs for valid detections
    """
    annotated = frame.copy()
    results = yolo_model(annotated, verbose=False)[0]

    # Filter detections based on confidence
    mask = results.boxes.conf > conf_threshold
    boxes = results.boxes.xyxy[mask].cpu().numpy()
    class_ids = results.boxes.cls[mask].cpu().numpy()
    confs = results.boxes.conf[mask].cpu().numpy()

    filtered_boxes, filtered_ids = [], []

    # Process each detection
    for box, cls_id, conf in zip(boxes, class_ids, confs):
        if int(cls_id) in valid_class_ids:
            filtered_boxes.append(box)
            filtered_ids.append(cls_id)

            x1, y1, x2, y2 = map(int, box)
            class_name, color = class_id_to_name[int(cls_id)]
            label = f"{class_name} {conf:.2f}"
            cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 2)
            cv2.putText(annotated, label, (x1, max(y1 - 10, 10)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 0), 2)

    return annotated, filtered_boxes, filtered_ids

# ========== VIDEO CAPTURE ==========
def setup_video_capture(video_path):
    """
    Setup video capture and retrieve video metadata.
    
    Parameters:
    - video_path: Path to the video file
    
    Returns:
    - cap: VideoCapture object
    - total_frames: Total number of frames in the video
    - fps: Frames per second
    - w, h: Width and height of the video frames
    """
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    return cap, total_frames, fps, w, h

# ========== MAIN FUNCTION ==========
def process_all_videos(yolo_weights_path, class_id_to_name, underrepresented_class_ids,
                       video_dir='videos', output_base='./datasets/opt', conf_threshold=0.5):
    """
    Process all videos in the specified directory, running YOLO model inference and saving
    frames, bounding boxes, and labels.
    
    Parameters:
    - yolo_weights_path: Path to the YOLO weights file
    - class_id_to_name: Mapping of class IDs to class names and colors
    - underrepresented_class_ids: List of class IDs that need special attention
    - video_dir: Directory containing the video files
    - output_base: Base directory for saving outputs
    - conf_threshold: Confidence threshold for valid detections
    
    Returns:
    None
    """
    print("[+] Using Model", yolo_weights_path)
    yolo_model = YOLO(yolo_weights_path)

    # Output directories for images, labels, and video
    image_out_dir = os.path.join(output_base, 'images')
    label_out_dir = os.path.join(output_base, 'labels')
    output_video_dir = os.path.join(output_base, 'output')

    os.makedirs(image_out_dir, exist_ok=True)
    os.makedirs(label_out_dir, exist_ok=True)
    os.makedirs(output_video_dir, exist_ok=True)

    total_frame_count = 0
    total_bounding_boxes = 0
    total_label_files = 0

    # Process each video
    for video_file in tqdm(sorted(os.listdir(video_dir))):
        if not video_file.lower().endswith(".mp4"):
            continue

        video_id = os.path.splitext(video_file)[0]
        video_path = os.path.join(video_dir, video_file)
        output_video_path = os.path.join(output_video_dir, f"{video_id}.mp4")

        print(f"\n========== STARTED: {video_id} ==========")
        cap, total_frames, fps, w, h = setup_video_capture(video_path)
        writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))

        frame_count = 0
        current_index = 0

        pbar = tqdm(total=total_frames, desc=f"{video_id} (all frames)")

        while True:
            ret, frame = cap.read()
            if not ret:
                break

            # Annotate the frame
            annotated_bgr, boxes, class_ids = process_frame(
                frame, yolo_model, w, h, class_id_to_name, underrepresented_class_ids, conf_threshold=conf_threshold)

            if boxes:  # Only save if there are valid detections
                img_filename = f'{video_id}_{frame_count:04d}.jpg'
                img_path = os.path.join(image_out_dir, img_filename)
                cv2.imwrite(img_path, frame)

                # Save YOLO-format label
                label_filename = f'{video_id}_{frame_count:04d}.txt'
                label_path = os.path.join(label_out_dir, label_filename)
                with open(label_path, 'w') as f:
                    for box, cls_id in zip(boxes, class_ids):
                        x1, y1, x2, y2 = box
                        w_box = x2 - x1
                        h_box = y2 - y1
                        cx = x1 + w_box / 2
                        cy = y1 + h_box / 2
                        f.write(f"{int(cls_id)} {cx/w:.6f} {cy/h:.6f} {w_box/w:.6f} {h_box/h:.6f}\n")

                total_label_files += 1
                total_bounding_boxes += len(boxes)

                writer.write(annotated_bgr)
                frame_count += 1

            current_index += 1
            pbar.update(1)

        cap.release()
        writer.release()
        pbar.close()
        print(f"DONE: {video_id} — Processed {frame_count} meaningful frames")
        total_frame_count += frame_count

        gc.collect()
        torch.cuda.empty_cache()

    print(f"\n[+] DONE: Total Processed {total_frame_count} meaningful frames")
    print(f"[+] Total Bounding Boxes Detected: {total_bounding_boxes}")
    print(f"[+] Total Label Files Created: {total_label_files}")


### Identify Rare Classes from YOLO Labels

1. **Class Frequency Counting**  
   The function scans all `.txt` label files in `label_dir` to count the occurrences of each class ID.

2. **Rare Class Detection**  
   Class IDs with instance counts below `rare_threshold` are identified as rare and returned.

In [17]:
import os
from collections import defaultdict

def get_rare_class_ids(label_dir, class_id_to_name, rare_threshold=1000):
    """
    Identify rare class IDs based on the number of instances in the label files.
    
    Parameters:
    - label_dir: Directory containing the label files
    - class_id_to_name: Mapping of class IDs to class names
    - rare_threshold: Threshold for class instances to be considered rare
    
    Returns:
    - rare_class_ids: Set of class IDs that have fewer instances than the threshold
    """
    # Initialize a dictionary to count occurrences of each class
    class_counts = defaultdict(int)

    # Iterate through all label files in the specified directory
    for label_file in os.listdir(label_dir):
        if not label_file.endswith('.txt'):  # Only consider .txt label files
            continue
        with open(os.path.join(label_dir, label_file), 'r') as f:
            # Count occurrences of each class in the label file
            for line in f:
                parts = line.strip().split()  # Split each line by spaces
                if len(parts) >= 1:  # Ensure there's at least one part (class ID)
                    cls_id = int(parts[0])  # Extract class ID
                    class_counts[cls_id] += 1  # Increment the count for this class

    # Print the count for each class
    print("Class-wise instance counts:")
    total_count = 0  # Variable to track the total count of instances

    # Iterate over the class IDs and print the count for each class
    for cls_id in sorted(class_counts.keys()):
        name = class_id_to_name.get(cls_id, ("Unknown", []))[0]  # Get the class name
        count = class_counts[cls_id]
        print(f"Class {cls_id:2d} ({name:10s}): {count} instances")
        total_count += count  # Update the total count of instances

    print("Total Count:", total_count)

    # Identify classes with fewer instances than the threshold
    rare_class_ids = {cls_id for cls_id, count in class_counts.items() if count < rare_threshold}
    print(f"\nRare class IDs (threshold < {rare_threshold}): {rare_class_ids}")

    return rare_class_ids


### Merge Two YOLO Datasets

1. **Dataset Copy with Prefix Renaming**  
   Images and labels from two YOLO datasets (`source1`, `source2`) are copied to a `destination` directory, each renamed with a unique prefix (`orig_`, `pred_`) to avoid filename clashes.

2. **Folder Structure Preserved**  
   Ensures YOLO folder structure (`images/`, `labels/`) is maintained in the destination and handles missing label files with warnings.

In [18]:
import os
import shutil

def merge_yolo_datasets(source1, source2, destination):
    """
    Merges two YOLO datasets by copying the images and label files from two sources
    into a destination directory, with appropriate renaming and handling of duplicate labels.
    
    Parameters:
    - source1: Path to the first source dataset
    - source2: Path to the second source dataset
    - destination: Path to the destination directory where merged dataset will be saved
    """
    # Define subfolders for images and labels in both source datasets
    img1_dir = os.path.join(source1, 'images')
    lbl1_dir = os.path.join(source1, 'labels')
    img2_dir = os.path.join(source2, 'images')
    lbl2_dir = os.path.join(source2, 'labels')
    dst_img_dir = os.path.join(destination, 'images')
    dst_lbl_dir = os.path.join(destination, 'labels')

    # Create destination folders for images and labels if they don't exist
    os.makedirs(dst_img_dir, exist_ok=True)
    os.makedirs(dst_lbl_dir, exist_ok=True)

    def copy_files(src_img_dir, src_lbl_dir, prefix):
        """
        Copies image and label files from source directories to destination,
        renaming them with a given prefix to distinguish the datasets.
        
        Parameters:
        - src_img_dir: Source directory containing image files
        - src_lbl_dir: Source directory containing label files
        - prefix: Prefix to append to filenames to distinguish the source dataset
        """
        # Iterate through the image files in the source directory
        for filename in sorted(os.listdir(src_img_dir)):
            if not filename.lower().endswith('.jpg'):
                continue
            base = os.path.splitext(filename)[0]  # Get base filename without extension

            # Copy image file with the prefix
            new_img_name = f"{prefix}_{base}.jpg"
            shutil.copy(os.path.join(src_img_dir, filename),
                        os.path.join(dst_img_dir, new_img_name))

            # Copy corresponding label file, if it exists
            label_file = base + ".txt"
            if os.path.exists(os.path.join(src_lbl_dir, label_file)):
                new_lbl_name = f"{prefix}_{base}.txt"
                shutil.copy(os.path.join(src_lbl_dir, label_file),
                            os.path.join(dst_lbl_dir, new_lbl_name))
            else:
                print(f"Warning: No label for {filename}")

    # Merge the first (original) dataset
    print("[+] Merging original dataset...")
    copy_files(img1_dir, lbl1_dir, prefix="orig")

    # Merge the second (predicted) dataset
    print("[+] Merging predicted video dataset...")
    copy_files(img2_dir, lbl2_dir, prefix="pred")

    # Print completion message
    print(f"\n[+] Merge complete! Merged dataset saved at: {destination}")


### Print YOLOv8 Training Metrics from `results.csv`

1. **Automatic CSV Discovery & Analysis**  
   Recursively searches for `results.csv` in the given directory and loads it into a DataFrame to analyze training progress and outcomes.

2. **Final Epoch Summary**  
   Calculates total training and validation loss and prints key metrics like Precision, Recall, and mAP from the last epoch.


In [19]:
import os
import pandas as pd

def find_results_csv(directory):
    """Find the results.csv file in the specified directory."""
    # Traverse the directory to find 'results.csv'
    for root, dirs, files in os.walk(directory):
        if 'results.csv' in files:
            return os.path.join(root, 'results.csv')  # Return the full path if found
    return None  # Return None if 'results.csv' is not found

def load_results_csv(results_csv_path):
    """Load the results CSV into a pandas DataFrame."""
    # Load the CSV file into a DataFrame and return it
    return pd.read_csv(results_csv_path)

def calculate_total_epochs(df):
    """Calculate the total number of epochs from the DataFrame."""
    # Return the maximum epoch value from the DataFrame
    return df['epoch'].max()

def calculate_training_loss(epoch_data):
    """Calculate the total training loss from the given epoch data."""
    # Extract training loss components from the epoch data
    train_box_loss = epoch_data['train/box_loss']
    train_cls_loss = epoch_data['train/cls_loss']
    train_dfl_loss = epoch_data['train/dfl_loss']
    # Return the sum of the training losses
    return train_box_loss + train_cls_loss + train_dfl_loss

def calculate_validation_loss(epoch_data):
    """Calculate the total validation loss from the given epoch data."""
    # Extract validation loss components from the epoch data
    val_box_loss = epoch_data['val/box_loss']
    val_cls_loss = epoch_data['val/cls_loss']
    val_dfl_loss = epoch_data['val/dfl_loss']
    # Return the sum of the validation losses
    return val_box_loss + val_cls_loss + val_dfl_loss

def print_final_metrics(df):
    """Print the final metrics for the last epoch."""
    # Extract the data for the last epoch
    final_epoch_data = df.iloc[-1]

    # Calculate total training and validation loss
    train_loss = calculate_training_loss(final_epoch_data)
    val_loss = calculate_validation_loss(final_epoch_data)

    # Print the training metrics for the last epoch
    print("\n========== Final Training Metrics ==========")
    print(f"Training Loss: {train_loss:.6f}")
    print(f"Precision: {final_epoch_data['metrics/precision(B)']:.6f}")
    print(f"Recall: {final_epoch_data['metrics/recall(B)']:.6f}")
    print(f"mAP@0.5: {final_epoch_data['metrics/mAP50(B)']:.6f}")
    print(f"mAP@0.5:0.95: {final_epoch_data['metrics/mAP50-95(B)']:.6f}")

    # Print the validation metrics for the last epoch
    print("\n========== Final Validation Metrics ==========")
    print(f"Validation Loss: {val_loss:.6f}")

def print_csv_metrics(directory):
    """Main function to process and print final metrics."""
    # Find the 'results.csv' file in the given directory
    results_csv_path = find_results_csv(directory)
    
    if not results_csv_path:
        # If no 'results.csv' is found, print an error and return
        print("Error: 'results.csv' file not found in the specified directory.")
        return

    # Print the path to the found 'results.csv' file
    print(f"Found results.csv at: {results_csv_path}")

    # Load the results from the CSV file into a DataFrame
    df = load_results_csv(results_csv_path)

    # Get the total number of epochs from the DataFrame
    total_epochs = calculate_total_epochs(df)
    print(f"Total number of epochs: {total_epochs}")

    # Print the final training and validation metrics for the last epoch
    print_final_metrics(df)


### Compare Class-wise mAP@0.5:0.95 Before vs After Retraining

1. **Side-by-Side Metric Comparison**  
   Reads two JSON files containing class-wise mAP@0.5:0.95 values (before & after retraining) and compares them.

2. **Visual Trend Highlighting**  
   Displays changes in mAP with colored indicators:
   - 🟢 **Increase** for performance gain
   - 🔴 **Decrease** for performance drop


In [20]:
import json

def compare_maps(json_path1, json_path2):
    with open(json_path1, 'r') as f1, open(json_path2, 'r') as f2:
        metrics1 = json.load(f1)
        metrics2 = json.load(f2)

    print("\nmAP@0.5:0.95 Differences Before and After Retraning:\n")
    print(f"{'Class':<15} {'Before':<10} {'After':<10} {'Diff':<10} {'Trend'}")
    print("-" * 60)

    for class_name in metrics1:
        map1 = metrics1[class_name].get("mAP@0.5:0.95", 0)
        map2 = metrics2.get(class_name, {}).get("mAP@0.5:0.95", 0)

        diff = map2 - map1
        if abs(diff) > 1e-6:
            if diff > 0:
                trend = f"\033[92m Increase\033[0m"  # Green for increase
            else:
                trend = f"\033[91m Decrease\033[0m"  # Red for decrease

            # Printing with colors
            print(f"{class_name:<15} {map1:<10.4f} {map2:<10.4f} {diff:<10.4f} {trend}")


In [21]:
def compare_final_metrics(csv1_path, csv2_path):
    df1 = pd.read_csv(csv1_path)
    df2 = pd.read_csv(csv2_path)

    last1 = df1.iloc[-1]
    last2 = df2.iloc[-1]

    metrics_to_compare = {
        "train/box_loss": "Box Loss (Train)",
        "train/cls_loss": "Cls Loss (Train)",
        "train/dfl_loss": "DFL Loss (Train)",
        "metrics/precision(B)": "Precision",
        "metrics/recall(B)": "Recall",
        "metrics/mAP50(B)": "mAP@0.5",
        "metrics/mAP50-95(B)": "mAP@0.5:0.95",
        "val/box_loss": "Box Loss (Val)",
        "val/cls_loss": "Cls Loss (Val)",
        "val/dfl_loss": "DFL Loss (Val)"
    }

    print("Changes in Metrics Before and After Retraning:\n")
    print(f"{'Metric':<25} {'Before':<10} {'After':<10} {'Diff':<10} {'Trend'}")
    print("-" * 65)

    for key, label in metrics_to_compare.items():
        val1 = last1[key]
        val2 = last2[key]
        diff = val2 - val1
        if abs(diff) > 1e-6:
            # If increase, color green; if decrease, color red
            if diff > 0:
                trend = f"\033[92m Increase\033[0m"  # Green
            else:
                trend = f"\033[91m Decrease\033[0m"  # Red

            # Printing with colors
            print(f"{label:<25} {val1:<10.5f} {val2:<10.5f} {diff:<10.5f} {trend}")


## Calling Functions

### Download and Extract the Semantic Drone Dataset and UAVDT Dataset

In [22]:
# Google Drive URL for the Semantic Drone Dataset
gdrive_url = "https://drive.google.com/file/d/1UppumYqYOi-kto6BWPfFxwJK2Eph46oY/view?usp=sharing"
# Call the function to download and extract the Semantic Drone Dataset
semantic_drone_dataset_download(gdrive_url, extract_to="datasets")

# Google Drive URL for the UAVDT Dataset
gdrive_url = "https://drive.google.com/file/d/12cbrTaBAMIsuU-mwAA7IgDk9wSLC9cC-/view?usp=sharing"
# Call the function to download and extract the UAVDT Dataset
uavdt_dataset_download(gdrive_url, extract_to="datasets")


[INFO] Downloading ZIP from Google Drive...


Downloading...
From (original): https://drive.google.com/uc?id=1UppumYqYOi-kto6BWPfFxwJK2Eph46oY
From (redirected): https://drive.google.com/uc?id=1UppumYqYOi-kto6BWPfFxwJK2Eph46oY&confirm=t&uuid=12f3c5a5-b005-41d1-9be7-e42ba5e0e6fd
To: /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/downloaded.zip
100%|██████████| 4.14G/4.14G [01:25<00:00, 48.3MB/s]


[INFO] Extracting ZIP...
[DONE] Extracted files to: datasets
[+] Downloading ZIP from Google Drive...


Downloading...
From (original): https://drive.google.com/uc?id=12cbrTaBAMIsuU-mwAA7IgDk9wSLC9cC-
From (redirected): https://drive.google.com/uc?id=12cbrTaBAMIsuU-mwAA7IgDk9wSLC9cC-&confirm=t&uuid=676b43ef-96c6-4948-95a4-a3c0ea0f68fe
To: /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/downloaded.zip
100%|██████████| 5.21G/5.21G [00:57<00:00, 91.4MB/s]


[+] Extracting ZIP...
[+] Extracted files to: datasets


### Semantic Drone Dataset to YOLO Format Conversion

In [23]:
# Path to the Semantic Drone Dataset training set
dataset_path = "./datasets/semantic_drone_dataset/training_set" 

# Output directory where the YOLO formatted dataset will be saved
output_dir = "./datasets/semantic_yolo"

# Call the function to convert the full dataset into YOLO format
# The function converts annotations and images from the Semantic Drone Dataset into YOLO format
convert_fulldataset_yolo_only(dataset_path, output_dir, class_id_to_name)


Converting to YOLO:  75%|███████▌  | 301/400 [01:03<00:20,  4.84it/s]



Converting to YOLO: 100%|██████████| 400/400 [01:25<00:00,  4.70it/s]

[+] YOLO-format annotation conversion complete!





### UAVDT-2024 and Semnatic Dataset Processing

In [24]:
# UAVDT-2024 Dataset Processing

# Path to the source UAVDT-2024 dataset
source_root = "./datasets/UAVDT-2024"

# Output directory where the new YOLO format dataset will be saved
output_root = "./datasets/new_dataset_yolo_split"

# Convert the UAVDT-2024 dataset into YOLO format
# The function processes the dataset and converts annotations and images into YOLO format
convert_dataset(source_root)

# Split the dataset into training and validation sets with a ratio of 80:20
# This function copies the relevant sequences into the respective directories for training and validation
copy_split_sequences(source_root, output_root, train_ratio=0.8)


# Semantic Drone Datasets Processing

# Split and move the Semantic Drone dataset into training and validation sets
# The function handles the splitting of the dataset and moves the images and annotations into separate directories
split_and_move_dataset()


🔄 Converting 30946 annotation files to YOLO format...


Converting: 100%|██████████| 30946/30946 [01:02<00:00, 494.03file/s]



Conversion complete.
Total boxes:     868139
Converted boxes: 868139
Skipped boxes:   0
Malformed lines: 0
Missing images:  0


Copying train: 100%|██████████| 37/37 [00:03<00:00, 10.54it/s]
Copying val: 100%|██████████| 10/10 [00:00<00:00, 10.64it/s]



[+] Dataset split into 'train/' and 'val/' folders with images and YOLO-format labels.


Moving to train: 100%|██████████| 319/319 [00:00<00:00, 639.11it/s]
Moving to val: 100%|██████████| 80/80 [00:00<00:00, 625.41it/s]


[✓] Dataset split completed: 319 train / 80 val samples





### Dataset Label Normalization for Training and Validation

In [25]:
# Set your paths for the training dataset
dataset_path = "./datasets/new_dataset_yolo_split/train"

# Directory where the images are stored in the training dataset
image_dir = os.path.join(dataset_path, "images")

# Directory where the label files are stored in the training dataset
annotations_dir = os.path.join(dataset_path, "labels")

# Normalize all label files in the training dataset by adjusting coordinates
# This function ensures that the labels follow the expected YOLO format (normalized coordinates)
normalize_all_labels(annotations_dir, image_dir)

# Set your paths for the validation dataset
dataset_path = "./datasets/new_dataset_yolo_split/val"

# Directory where the images are stored in the validation dataset
image_dir = os.path.join(dataset_path, "images")

# Directory where the label files are stored in the validation dataset
annotations_dir = os.path.join(dataset_path, "labels")

# Normalize all label files in the validation dataset
normalize_all_labels(annotations_dir, image_dir)


100%|██████████| 2354/2354 [00:00<00:00, 9593.15it/s]


Normalization Complete


100%|██████████| 1181/1181 [00:00<00:00, 5482.03it/s]

Normalization Complete





### Identifying Rare Classes in the Training Dataset

In [26]:
# Set the path to the training labels directory
labels_dir = './datasets/new_dataset_yolo_split/train/labels'

# Get the list of rare class IDs by analyzing the label files in the specified directory
# The function `get_rare_class_ids` will count the number of occurrences of each class
# and return those with occurrences below the specified threshold (in this case, 3000)
rare_class_ids = get_rare_class_ids(label_dir=labels_dir, class_id_to_name=class_id_to_name, rare_threshold=3000)


Class-wise instance counts:
Class  1 (pool      ): 30 instances
Class  2 (vegetation): 5528 instances
Class  3 (roof      ): 280 instances
Class  4 (wall      ): 954 instances
Class  5 (window    ): 376 instances
Class  6 (person    ): 2475 instances
Class  7 (dog       ): 25 instances
Class  8 (car       ): 35045 instances
Class  9 (bicycle   ): 222 instances
Class 10 (tree      ): 417 instances
Class 11 (truck     ): 129 instances
Class 12 (bus       ): 86 instances
Class 13 (vehicle   ): 577 instances
Total Count: 46144

Rare class IDs (threshold < 3000): {1, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13}


### Clean Up Directories and Train YOLOv8 from Scratch

In [27]:
import shutil
import os

# List of folders to delete
folders_to_delete = ['./datasets/semantic_yolo', './datasets/new_dataset_yolo', './datasets/uavdt-processed', './runs', "./metrics"]

# Iterate over each folder path in the list
for folder_path in folders_to_delete:
    # Check if the folder exists
    if os.path.exists(folder_path):
        # Delete the folder and its contents
        shutil.rmtree(folder_path)
        print(f"✅ Deleted folder: {folder_path}")
    else:
        # If the folder doesn't exist, print a warning message
        print(f"⚠️ Folder does not exist: {folder_path}")


✅ Deleted folder: ./datasets/semantic_yolo
⚠️ Folder does not exist: ./datasets/new_dataset_yolo
⚠️ Folder does not exist: ./datasets/uavdt-processed
⚠️ Folder does not exist: ./runs
⚠️ Folder does not exist: ./metrics


In [28]:
# Train YOLOv8 from scratch with the specified parameters
train_yolo_from_scratch(
    data_yaml="yolov8.yaml",  # Path to the YAML file that contains dataset and class configuration
    epochs=50,               # Number of epochs to train the model
    imgsz=720,                # Image size (height and width) for training
    batch=8,                  # Batch size for training
    name="yolov8",            # Name of the training run (used for saving checkpoints, logs, etc.)
    model_variant="yolov8n.pt"  # The base YOLOv8 model variant to start training (options: yolov8n.pt, yolov8s.pt, yolov8m.pt, etc.)
)

[+] Training from scratch using base model: yolov8n.pt
Ultralytics 8.3.109 🚀 Python-3.10.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3050 OEM, 7957MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=yolov8.yaml, epochs=50, time=None, patience=15, batch=8, imgsz=720, save=True, save_period=-1, cache=False, device=None, workers=8, project=runs/train, name=yolov8, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=True, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save

[34m[1mtrain: [0mScanning /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/new_dataset_yolo_split/train/labels.cache... 2354 images, 3 backgrounds, 0 corrupt: 100%|██████████| 2354/2354 [00:00<?, ?it/s]


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/new_dataset_yolo_split/val/labels.cache... 1181 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1181/1181 [00:00<?, ?it/s]


Plotting labels to runs/train/yolov8/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000556, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 736 train, 736 val
Using 8 dataloader workers
Logging results to [1mruns/train/yolov8[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50       2.6G      1.721       2.26      1.118        123        736: 100%|██████████| 295/295 [00:34<00:00,  8.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00, 10.51it/s]


                   all       1181      70393      0.601      0.107     0.0824     0.0402

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/50      3.04G      1.451      1.167      1.021         73        736: 100%|██████████| 295/295 [00:33<00:00,  8.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00,  9.65it/s]


                   all       1181      70393      0.472      0.124      0.104     0.0521

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/50      3.07G      1.355      1.016      1.003         30        736: 100%|██████████| 295/295 [00:33<00:00,  8.70it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  8.61it/s]


                   all       1181      70393      0.344      0.179      0.122      0.068

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/50      3.07G      1.299      0.924     0.9825         64        736: 100%|██████████| 295/295 [00:34<00:00,  8.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:09<00:00,  8.18it/s]


                   all       1181      70393      0.388      0.154      0.156     0.0822

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/50      3.07G       1.26     0.8831     0.9766         46        736: 100%|██████████| 295/295 [00:33<00:00,  8.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  8.50it/s]


                   all       1181      70393      0.224      0.245      0.162       0.09

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/50      3.07G      1.222     0.8343     0.9641         62        736: 100%|██████████| 295/295 [00:33<00:00,  8.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:09<00:00,  7.99it/s]


                   all       1181      70393      0.277      0.208      0.175     0.0937

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/50      3.57G      1.193     0.8095     0.9591         73        736: 100%|██████████| 295/295 [00:33<00:00,  8.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  8.50it/s]


                   all       1181      70393       0.22      0.254      0.193      0.113

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/50      3.59G      1.166      0.778     0.9512         55        736: 100%|██████████| 295/295 [00:33<00:00,  8.78it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  9.03it/s]


                   all       1181      70393      0.545        0.2      0.222      0.127

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/50      3.61G      1.159     0.7717     0.9529         65        736: 100%|██████████| 295/295 [00:33<00:00,  8.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  8.45it/s]


                   all       1181      70393      0.517      0.218      0.217      0.125

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/50      3.62G       1.13     0.7457     0.9455         37        736: 100%|██████████| 295/295 [00:33<00:00,  8.82it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00,  9.41it/s]


                   all       1181      70393      0.544      0.225      0.225      0.136

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/50      3.62G      1.124     0.7397     0.9443        123        736: 100%|██████████| 295/295 [00:33<00:00,  8.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00,  9.98it/s]


                   all       1181      70393      0.483       0.22      0.225      0.136

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/50      3.62G      1.106     0.7303     0.9424         33        736: 100%|██████████| 295/295 [00:33<00:00,  8.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00,  9.51it/s]


                   all       1181      70393      0.492      0.232      0.218      0.123

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/50      3.62G      1.106      0.719     0.9422         73        736: 100%|██████████| 295/295 [00:33<00:00,  8.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  8.67it/s]


                   all       1181      70393      0.553      0.236      0.232      0.137

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/50      3.62G      1.086     0.6931      0.934        131        736: 100%|██████████| 295/295 [00:33<00:00,  8.80it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  8.48it/s]


                   all       1181      70393       0.49      0.238      0.232      0.136

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/50      4.13G      1.064     0.6791     0.9273         36        736: 100%|██████████| 295/295 [00:33<00:00,  8.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  8.26it/s]


                   all       1181      70393      0.465      0.225      0.202      0.121

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/50      2.68G      1.076     0.6885     0.9301        151        736: 100%|██████████| 295/295 [00:33<00:00,  8.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00, 10.11it/s]

                   all       1181      70393      0.493      0.241      0.244      0.145






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/50      3.74G       1.05     0.6702     0.9242         69        736: 100%|██████████| 295/295 [00:33<00:00,  8.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00, 10.12it/s]

                   all       1181      70393      0.501      0.221      0.223      0.127






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/50      3.74G      1.038     0.6617     0.9256         90        736: 100%|██████████| 295/295 [00:33<00:00,  8.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  9.17it/s]

                   all       1181      70393      0.518      0.218      0.222      0.127






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/50      3.75G      1.028      0.656     0.9229         52        736: 100%|██████████| 295/295 [00:33<00:00,  8.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00, 10.04it/s]

                   all       1181      70393      0.588      0.244      0.225      0.128






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/50      3.77G      1.023     0.6449     0.9211         86        736: 100%|██████████| 295/295 [00:33<00:00,  8.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  8.76it/s]


                   all       1181      70393       0.56      0.239      0.213       0.13

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/50      3.78G      1.001     0.6296     0.9153         75        736: 100%|██████████| 295/295 [00:33<00:00,  8.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00, 10.47it/s]


                   all       1181      70393      0.545      0.213      0.215      0.126

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/50      3.78G      1.019     0.6443     0.9216         49        736: 100%|██████████| 295/295 [00:33<00:00,  8.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00,  9.64it/s]

                   all       1181      70393      0.491      0.247      0.246      0.147






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/50      3.78G      1.005     0.6333     0.9192         56        736: 100%|██████████| 295/295 [00:33<00:00,  8.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  8.39it/s]

                   all       1181      70393      0.512      0.225      0.233      0.132






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/50      3.78G     0.9939     0.6287     0.9161         37        736: 100%|██████████| 295/295 [00:33<00:00,  8.74it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00,  9.46it/s]


                   all       1181      70393      0.555      0.237      0.256      0.157

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/50      3.78G     0.9776     0.6099     0.9089         41        736: 100%|██████████| 295/295 [00:33<00:00,  8.78it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00,  9.73it/s]

                   all       1181      70393      0.603       0.23       0.24      0.145






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      26/50      3.79G     0.9793     0.6187     0.9115         64        736: 100%|██████████| 295/295 [00:33<00:00,  8.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  9.02it/s]

                   all       1181      70393      0.572      0.273      0.236      0.141






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      27/50      3.79G     0.9739     0.6119     0.9107         52        736: 100%|██████████| 295/295 [00:33<00:00,  8.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00,  9.39it/s]

                   all       1181      70393       0.64       0.23      0.243      0.149






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      28/50      3.79G      0.971     0.6028     0.9074         61        736: 100%|██████████| 295/295 [00:33<00:00,  8.78it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  8.98it/s]

                   all       1181      70393      0.561      0.228      0.251      0.159






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      29/50      3.79G     0.9591     0.5911     0.9036        133        736: 100%|██████████| 295/295 [00:33<00:00,  8.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  9.08it/s]

                   all       1181      70393       0.63      0.235      0.244      0.152






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      30/50      3.79G     0.9424     0.5816     0.9009         80        736: 100%|██████████| 295/295 [00:33<00:00,  8.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  8.65it/s]


                   all       1181      70393       0.55      0.247      0.272      0.172

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      31/50      3.79G     0.9371     0.5884      0.903         83        736: 100%|██████████| 295/295 [00:33<00:00,  8.78it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  9.10it/s]


                   all       1181      70393      0.577       0.24      0.271      0.166

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      32/50      3.79G     0.9425     0.5833     0.9025        155        736: 100%|██████████| 295/295 [00:33<00:00,  8.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00, 10.05it/s]

                   all       1181      70393      0.539       0.28      0.259      0.161






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      33/50      3.79G     0.9437     0.5874     0.9012        159        736: 100%|██████████| 295/295 [00:33<00:00,  8.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00,  9.27it/s]

                   all       1181      70393       0.42      0.284       0.25      0.147






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      34/50      3.79G     0.9416     0.5903      0.906        145        736: 100%|██████████| 295/295 [00:33<00:00,  8.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  9.24it/s]

                   all       1181      70393      0.438      0.276       0.26       0.16






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      35/50      3.79G     0.9281     0.5769     0.8972         41        736: 100%|██████████| 295/295 [00:33<00:00,  8.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00,  9.88it/s]

                   all       1181      70393      0.617       0.26      0.262      0.159






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      36/50      3.79G      0.928     0.5808     0.9019         77        736: 100%|██████████| 295/295 [00:33<00:00,  8.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00, 10.23it/s]


                   all       1181      70393      0.546      0.282      0.266      0.164

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      37/50      3.79G     0.9039      0.561      0.895         50        736: 100%|██████████| 295/295 [00:33<00:00,  8.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00,  9.87it/s]

                   all       1181      70393      0.575      0.267      0.263      0.165






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      38/50      3.79G     0.9134     0.5673     0.8985         72        736: 100%|██████████| 295/295 [00:33<00:00,  8.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:08<00:00,  8.89it/s]

                   all       1181      70393       0.46      0.288      0.252      0.155






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      39/50      3.79G     0.9068     0.5612     0.8952        147        736: 100%|██████████| 295/295 [00:33<00:00,  8.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00, 10.10it/s]

                   all       1181      70393      0.546      0.281      0.292      0.182






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      40/50      3.79G     0.9094     0.5605     0.8969         55        736: 100%|██████████| 295/295 [00:33<00:00,  8.83it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00,  9.81it/s]

                   all       1181      70393      0.577       0.27       0.29      0.185





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      41/50      3.79G     0.8151     0.5055     0.8761         21        736: 100%|██████████| 295/295 [00:31<00:00,  9.38it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:06<00:00, 11.58it/s]


                   all       1181      70393       0.64      0.257       0.28       0.18

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      42/50      3.79G     0.7934     0.4914     0.8714         22        736: 100%|██████████| 295/295 [00:31<00:00,  9.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:07<00:00, 10.31it/s]

                   all       1181      70393      0.557      0.265      0.252      0.161






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      43/50      3.79G     0.7768     0.4774     0.8689         33        736: 100%|██████████| 295/295 [00:31<00:00,  9.43it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:06<00:00, 11.01it/s]


                   all       1181      70393       0.62      0.276      0.271      0.169

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      44/50      3.79G     0.7696     0.4743     0.8662         26        736: 100%|██████████| 295/295 [00:31<00:00,  9.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:06<00:00, 11.23it/s]


                   all       1181      70393      0.614      0.278      0.263      0.163

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      45/50      3.79G     0.7696     0.4765     0.8678         10        736: 100%|██████████| 295/295 [00:31<00:00,  9.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:06<00:00, 11.63it/s]


                   all       1181      70393      0.597      0.277      0.295      0.181

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      46/50      3.79G     0.7524     0.4666     0.8634         67        736: 100%|██████████| 295/295 [00:31<00:00,  9.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:06<00:00, 11.35it/s]


                   all       1181      70393      0.566      0.284      0.289      0.179

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      47/50      3.79G     0.7466     0.4633     0.8593         69        736: 100%|██████████| 295/295 [00:31<00:00,  9.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:06<00:00, 11.19it/s]


                   all       1181      70393      0.565      0.288       0.29      0.182

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      48/50      3.79G     0.7477     0.4634     0.8614         23        736: 100%|██████████| 295/295 [00:31<00:00,  9.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:06<00:00, 11.06it/s]

                   all       1181      70393      0.628      0.285      0.285      0.182






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      49/50       3.8G     0.7353     0.4538     0.8608         28        736: 100%|██████████| 295/295 [00:31<00:00,  9.43it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:06<00:00, 10.59it/s]


                   all       1181      70393      0.635      0.276       0.28      0.175

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      50/50       3.8G     0.7333     0.4544     0.8601         14        736: 100%|██████████| 295/295 [00:31<00:00,  9.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:06<00:00, 11.01it/s]


                   all       1181      70393      0.644      0.268      0.282      0.177

50 epochs completed in 0.572 hours.
Optimizer stripped from runs/train/yolov8/weights/last.pt, 6.2MB
Optimizer stripped from runs/train/yolov8/weights/best.pt, 6.2MB

Validating runs/train/yolov8/weights/best.pt...
Ultralytics 8.3.109 🚀 Python-3.10.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3050 OEM, 7957MiB)
Model summary (fused): 72 layers, 3,008,378 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:18<00:00,  4.07it/s]


                   all       1181      70393      0.529      0.263        0.3      0.194
                  pool          9          9      0.776      0.889      0.892      0.704
            vegetation         75       1780      0.307      0.222      0.163     0.0853
                  roof         42         79     0.0781      0.684      0.554      0.463
                  wall         62        255      0.123        0.2      0.124     0.0817
                window         38        139      0.711      0.245      0.281      0.172
                person         75        637      0.799      0.276      0.411      0.204
                   dog          6         12          1          0      0.016    0.00695
                   car       1115      53669      0.751      0.259      0.434      0.213
               bicycle         32         53      0.492      0.264       0.34      0.112
                  tree         38        108      0.865       0.37      0.521       0.37
                 truc

### Evaluate YOLOv8 Model and Print Metrics

In [29]:
# Define the path to the YOLOv8 training run directory
yolov8 = './runs/train/yolov8'

# Find the best model based on the training run (usually the best model is the one with the lowest validation loss)
best_pt_path = find_best_model(yolov8)

# Evaluate the best model and save the performance metrics
evaluate_and_save_metrics(best_pt_path)


[+] Found best.pt at: runs/train/yolov8/weights/best.pt
Ultralytics 8.3.109 🚀 Python-3.10.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3050 OEM, 7957MiB)


Model summary (fused): 72 layers, 3,008,378 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/new_dataset_yolo_split/val/labels.cache... 1181 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1181/1181 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:10<00:00,  7.09it/s]


                   all       1181      70393      0.577       0.27       0.29      0.186
                  pool          9          9      0.757      0.889      0.889      0.731
            vegetation         75       1780      0.155      0.254      0.149     0.0798
                  roof         42         79     0.0743      0.696      0.527      0.377
                  wall         62        255      0.067      0.208     0.0843     0.0513
                window         38        139      0.527      0.201      0.243      0.152
                person         75        637       0.82      0.262      0.369      0.188
                   dog          6         12          1          0    0.00567    0.00295
                   car       1115      53669      0.796      0.289      0.491      0.246
               bicycle         32         53      0.477       0.31      0.326      0.103
                  tree         38        108      0.869      0.389      0.512      0.356
                 truc

In [30]:
# Print per-class metrics from the specified JSON file
print_per_class_metrics("per_class_metrics.json")


Per-Class mAP@0.5:0.95 Metrics:

Class Name      Class ID   mAP@0.5:0.95   
----------------------------------------
unlabeled       0          0.1855         
pool            1          0.7305         
vegetation      2          0.0798         
roof            3          0.3768         
wall            4          0.0513         
window          5          0.1524         
person          6          0.1884         
dog             7          0.0029         
car             8          0.2459         
bicycle         9          0.1027         
tree            10         0.3563         
truck           11         0.0291         
bus             12         0.0            
vehicle         13         0.0955         


In [31]:
# Print the metrics from the CSV file found in the specified directory
print_csv_metrics(yolov8)


Found results.csv at: ./runs/train/yolov8/results.csv
Total number of epochs: 50

Training Loss: 2.047820
Precision: 0.643800
Recall: 0.268230
mAP@0.5: 0.282050
mAP@0.5:0.95: 0.177310

Validation Loss: 5.496680


### Clean Up Folders and Process Videos for YOLO Predictions

In [32]:
import shutil
import os

# List of folders to delete
folders_to_delete = ['./datasets/new-videos-predicted-yolo', "./datasets/merged_yolo_dataset", "./datasets/split_videos_dataset"]

# Loop through each folder path in the list
for folder_path in folders_to_delete:
    # Check if the folder exists
    if os.path.exists(folder_path):
        # If it exists, delete the folder and its contents
        shutil.rmtree(folder_path)
        print(f"✅ Deleted folder: {folder_path}")
    else:
        # If the folder does not exist, print a warning message
        print(f"⚠️ Folder does not exist: {folder_path}")


✅ Deleted folder: ./datasets/new-videos-predicted-yolo
✅ Deleted folder: ./datasets/merged_yolo_dataset
✅ Deleted folder: ./datasets/split_videos_dataset


In [33]:
# Process all videos and generate predictions
process_all_videos(best_pt_path, class_id_to_name, list(rare_class_ids),
                   video_dir='videos', output_base='./datasets/new-videos-predicted-yolo')

[+] Using Model runs/train/yolov8/weights/best.pt


  0%|          | 0/8 [00:00<?, ?it/s]




v1 (all frames): 100%|██████████| 642/642 [00:13<00:00, 49.29it/s]
 12%|█▎        | 1/8 [00:13<01:33, 13.36s/it]

DONE: v1 — Processed 106 meaningful frames



v12 (all frames): 100%|██████████| 897/897 [00:54<00:00, 16.35it/s]
 25%|██▌       | 2/8 [01:08<03:47, 37.87s/it]

DONE: v12 — Processed 863 meaningful frames



v2 (all frames): 100%|██████████| 175/175 [00:03<00:00, 48.80it/s]
 38%|███▊      | 3/8 [01:12<01:51, 22.28s/it]

DONE: v2 — Processed 31 meaningful frames



v3 (all frames): 100%|██████████| 176/176 [00:01<00:00, 147.60it/s]
 50%|█████     | 4/8 [01:13<00:55, 13.99s/it]

DONE: v3 — Processed 76 meaningful frames



v4 (all frames): 100%|██████████| 253/253 [00:03<00:00, 79.38it/s]
 62%|██████▎   | 5/8 [01:16<00:30, 10.13s/it]

DONE: v4 — Processed 114 meaningful frames



v5 (all frames): 100%|██████████| 1114/1114 [00:14<00:00, 76.76it/s]
 75%|███████▌  | 6/8 [01:31<00:23, 11.68s/it]

DONE: v5 — Processed 34 meaningful frames



v6 (all frames): 100%|██████████| 483/483 [00:11<00:00, 43.41it/s]
 88%|████████▊ | 7/8 [01:42<00:11, 11.55s/it]

DONE: v6 — Processed 118 meaningful frames



v8 (all frames): 100%|██████████| 949/949 [00:14<00:00, 67.12it/s]
100%|██████████| 8/8 [01:56<00:00, 14.61s/it]

DONE: v8 — Processed 320 meaningful frames

[+] DONE: Total Processed 1662 meaningful frames
[+] Total Bounding Boxes Detected: 3212
[+] Total Label Files Created: 1662





In [34]:
# Folder with YOLO label files
label_dir = './datasets/new-videos-predicted-yolo/labels'

# Get the rare class IDs from the label files based on the threshold
get_rare_class_ids(label_dir=label_dir, class_id_to_name=class_id_to_name ,rare_threshold=0)


Class-wise instance counts:
Class  3 (roof      ): 553 instances
Class  4 (wall      ): 38 instances
Class  5 (window    ): 6 instances
Class  6 (person    ): 421 instances
Class  9 (bicycle   ): 116 instances
Class 10 (tree      ): 567 instances
Class 13 (vehicle   ): 1511 instances
Total Count: 3212

Rare class IDs (threshold < 0): set()


set()

### Split and Merge new Datasets

In [35]:
split_and_move_dataset(source_base_dir="./datasets/new-videos-predicted-yolo",
                           target_base_dir="./datasets/split_videos_dataset",
                           split_ratio=0.5,
                           seed=42)


Moving to train: 100%|██████████| 831/831 [00:00<00:00, 871.88it/s]
Moving to val: 100%|██████████| 831/831 [00:00<00:00, 872.83it/s]


[✓] Dataset split completed: 831 train / 831 val samples





In [36]:
merge_yolo_datasets(
    source1='./datasets/new_dataset_yolo_split/train',  # Path to the first source dataset (YOLO format)
    source2='./datasets/split_videos_dataset/train',    # Path to the second source dataset (YOLO format)
    destination='./datasets/merged_yolo_dataset'        # Path where the merged dataset will be saved
)


[+] Merging original dataset...
[+] Merging predicted video dataset...

[+] Merge complete! Merged dataset saved at: ./datasets/merged_yolo_dataset


### Identify Rare Classes in the Merged YOLO Dataset

In [37]:
# Folder with YOLO label files
label_dir = './datasets/merged_yolo_dataset/labels'  # Path to the directory containing YOLO label files

# Get rare class IDs in the dataset based on the given threshold
rare_class_ids = get_rare_class_ids(
    label_dir=label_dir,                # Path to the label directory
    class_id_to_name=class_id_to_name,  # Mapping of class IDs to class names
    rare_threshold=0                    # Set the threshold for class frequency; here, we are considering all classes
)


Class-wise instance counts:
Class  1 (pool      ): 30 instances
Class  2 (vegetation): 5528 instances
Class  3 (roof      ): 558 instances
Class  4 (wall      ): 971 instances
Class  5 (window    ): 378 instances
Class  6 (person    ): 2708 instances
Class  7 (dog       ): 25 instances
Class  8 (car       ): 35045 instances
Class  9 (bicycle   ): 269 instances
Class 10 (tree      ): 718 instances
Class 11 (truck     ): 129 instances
Class 12 (bus       ): 86 instances
Class 13 (vehicle   ): 1327 instances
Total Count: 47772

Rare class IDs (threshold < 0): set()


### Clean Up Fine-Tune YOLOv8 Folders and Fine-Tune the Model

In [38]:
import shutil
import os
import glob

# Match all folders starting with 'fine-tune-yolov8' inside './runs/train/'
folders_to_delete = glob.glob('./runs/train/fine-tune-yolov8*')  # Find all folders starting with 'fine-tune-yolov8'

# Iterate through the matched folders
for folder_path in folders_to_delete:
    if os.path.isdir(folder_path):  # Check if the path is a valid directory
        shutil.rmtree(folder_path)  # Delete the folder and its contents
        print(f"✅ Deleted folder: {folder_path}")  # Print a success message
    else:
        print(f"⚠️ Not a directory or doesn't exist: {folder_path}")  # Print a warning if the folder doesn't exist or isn't a directory


In [39]:
fine_tune_yolo(
    data_yaml="yolo_retrain.yaml",        # Path to the updated dataset YAML file containing information like class names, train/val paths, etc.
    epochs=100,                             # Number of epochs for fine-tuning
    imgsz=720,                             # Image size to be used for training (720x720 pixels in this case)
    batch=16,                              # Batch size used during training (16 images per batch)
    name="fine-tune-yolov8",               # Name for this fine-tuning experiment
    base_model_path=best_pt_path          # Path to the pre-trained YOLO model (the best model from previous training)
)


[+] Fine-tuning model from: runs/train/yolov8/weights/best.pt
Ultralytics 8.3.109 🚀 Python-3.10.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3050 OEM, 7957MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=runs/train/yolov8/weights/best.pt, data=yolo_retrain.yaml, epochs=100, time=None, patience=20, batch=16, imgsz=720, save=True, save_period=-1, cache=False, device=None, workers=8, project=runs/train, name=fine-tune-yolov8, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=True, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_fra

[34m[1mtrain: [0mScanning /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/merged_yolo_dataset/labels... 3185 images, 3 backgrounds, 0 corrupt: 100%|██████████| 3185/3185 [00:00<00:00, 4315.52it/s]

[34m[1mtrain: [0mNew cache created: /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/merged_yolo_dataset/labels.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/split_videos_dataset/val/labels... 831 images, 0 backgrounds, 0 corrupt: 100%|██████████| 831/831 [00:00<00:00, 2897.66it/s]


[34m[1mval: [0mNew cache created: /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/split_videos_dataset/val/labels.cache
Plotting labels to runs/train/fine-tune-yolov8/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000556, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 736 train, 736 val
Using 8 dataloader workers
Logging results to [1mruns/train/fine-tune-yolov8[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/100      4.73G     0.9302     0.6727     0.9148          2        736: 100%|██████████| 200/200 [00:43<00:00,  4.58it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  7.96it/s]

                   all        831       1584      0.623      0.524       0.52      0.352






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      2/100      4.74G     0.9542     0.6458     0.9131         31        736: 100%|██████████| 200/200 [00:43<00:00,  4.63it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.87it/s]

                   all        831       1584       0.53      0.573      0.516      0.365






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      3/100      5.39G     0.9488     0.6346     0.9083         36        736: 100%|██████████| 200/200 [00:42<00:00,  4.70it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  7.76it/s]

                   all        831       1584      0.626      0.546      0.554      0.388






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      4/100      5.52G     0.9766     0.6496     0.9173         59        736: 100%|██████████| 200/200 [00:43<00:00,  4.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.43it/s]

                   all        831       1584      0.457      0.484      0.537      0.372






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      5/100      5.23G     0.9852     0.6613     0.9194          8        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  7.89it/s]

                   all        831       1584      0.687      0.483      0.569      0.397






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      6/100       4.9G     0.9806     0.6485     0.9189         24        736: 100%|██████████| 200/200 [00:42<00:00,  4.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.24it/s]

                   all        831       1584      0.685      0.512      0.586      0.402






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      7/100      6.05G     0.9828     0.6508     0.9205          8        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.48it/s]

                   all        831       1584      0.665      0.551      0.559      0.405






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      8/100      5.32G     0.9773     0.6335     0.9123         26        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.86it/s]

                   all        831       1584      0.702      0.546      0.566      0.401






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      9/100      5.22G     0.9615     0.6236      0.914         40        736: 100%|██████████| 200/200 [00:42<00:00,  4.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.32it/s]

                   all        831       1584      0.419      0.672      0.568      0.405






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     10/100      5.35G     0.9676     0.6361     0.9146        125        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.29it/s]

                   all        831       1584      0.637      0.577      0.602      0.414






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     11/100      4.31G     0.9537     0.6254     0.9111         28        736: 100%|██████████| 200/200 [00:43<00:00,  4.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.20it/s]

                   all        831       1584      0.671      0.551      0.579       0.41






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     12/100      5.71G      0.953     0.6194     0.9108         13        736: 100%|██████████| 200/200 [00:42<00:00,  4.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.29it/s]

                   all        831       1584      0.667      0.541      0.577       0.41






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     13/100      6.08G     0.9549     0.6183     0.9113         22        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.72it/s]

                   all        831       1584      0.574      0.613      0.537      0.381






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     14/100      5.59G      0.945     0.6062     0.9049         27        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.34it/s]

                   all        831       1584      0.708      0.527      0.567      0.397






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     15/100      4.69G     0.9441     0.6198     0.9098         56        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.33it/s]

                   all        831       1584       0.72      0.569      0.592      0.407






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     16/100     0.398G     0.9349     0.5997      0.904         25        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.39it/s]

                   all        831       1584      0.657      0.596      0.569      0.424






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     17/100      5.72G     0.9323     0.6079      0.909         53        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.38it/s]

                   all        831       1584      0.637      0.602      0.593       0.41






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     18/100      5.29G     0.9442      0.615     0.9088         10        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.25it/s]

                   all        831       1584      0.516      0.696      0.625      0.426






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     19/100      5.75G     0.9187     0.5924     0.9017         32        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.53it/s]

                   all        831       1584      0.691      0.568      0.606      0.434






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     20/100      5.79G     0.9258     0.6016      0.909         68        736: 100%|██████████| 200/200 [00:42<00:00,  4.70it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.20it/s]

                   all        831       1584      0.509      0.579      0.593      0.438






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     21/100      4.21G     0.9278      0.596     0.9054         36        736: 100%|██████████| 200/200 [00:42<00:00,  4.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.66it/s]

                   all        831       1584      0.675      0.567      0.604       0.43






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     22/100       4.6G      0.907     0.5896     0.9017          2        736: 100%|██████████| 200/200 [00:42<00:00,  4.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.89it/s]

                   all        831       1584      0.651      0.588      0.587      0.433






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     23/100      5.21G      0.911     0.5823     0.9023          8        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.43it/s]

                   all        831       1584      0.669      0.618      0.587      0.426






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     24/100      6.18G     0.9114     0.5904     0.9037         20        736: 100%|██████████| 200/200 [00:42<00:00,  4.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.71it/s]

                   all        831       1584      0.742      0.536      0.601      0.448






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     25/100      6.14G      0.916      0.584     0.9012         16        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.68it/s]

                   all        831       1584      0.648      0.612      0.616      0.454






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     26/100      5.91G     0.9132      0.589     0.9021        291        736: 100%|██████████| 200/200 [00:42<00:00,  4.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.47it/s]

                   all        831       1584      0.684      0.581      0.595      0.438






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     27/100      6.01G     0.8995       0.58     0.8996         11        736: 100%|██████████| 200/200 [00:42<00:00,  4.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.44it/s]

                   all        831       1584       0.66      0.592      0.586      0.424






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     28/100       6.2G     0.8986     0.5764     0.9002          4        736: 100%|██████████| 200/200 [00:42<00:00,  4.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.15it/s]

                   all        831       1584      0.702      0.566      0.608      0.432






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     29/100      5.04G      0.903     0.5784     0.9016         21        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.61it/s]

                   all        831       1584      0.624      0.617      0.605      0.444






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     30/100      5.35G     0.8928     0.5694     0.8978         32        736: 100%|██████████| 200/200 [00:42<00:00,  4.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.31it/s]

                   all        831       1584      0.497      0.645      0.609      0.435






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     31/100      5.09G     0.8901     0.5672     0.8978         18        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.68it/s]

                   all        831       1584       0.73      0.578      0.603      0.431






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     32/100      5.38G     0.8833     0.5594     0.8957         25        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.34it/s]

                   all        831       1584      0.726      0.589      0.629      0.459






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     33/100      5.66G     0.8842     0.5635     0.8992          6        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.31it/s]


                   all        831       1584      0.702      0.583      0.649      0.457

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     34/100      3.93G     0.8924     0.5737     0.9001         27        736: 100%|██████████| 200/200 [00:42<00:00,  4.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.68it/s]

                   all        831       1584      0.489       0.58      0.601      0.452






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     35/100      4.18G      0.879     0.5599     0.8949         39        736: 100%|██████████| 200/200 [00:42<00:00,  4.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.57it/s]

                   all        831       1584       0.75      0.547      0.627      0.456






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     36/100      4.57G      0.884     0.5627     0.8966         59        736: 100%|██████████| 200/200 [00:42<00:00,  4.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.60it/s]

                   all        831       1584      0.502      0.612      0.606      0.447






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     37/100      5.04G     0.8772       0.56     0.8934         28        736: 100%|██████████| 200/200 [00:42<00:00,  4.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  9.11it/s]

                   all        831       1584      0.565      0.588      0.644      0.483






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     38/100      4.72G     0.8669     0.5533     0.8925         36        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.67it/s]

                   all        831       1584       0.51      0.615      0.614      0.452






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     39/100      5.27G     0.8641     0.5516     0.8914         67        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.61it/s]

                   all        831       1584      0.644      0.594      0.609      0.435






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     40/100      5.63G     0.8639      0.549      0.892         11        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.89it/s]

                   all        831       1584      0.706      0.609      0.638      0.465






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     41/100      5.22G     0.8693     0.5556     0.8937         16        736: 100%|██████████| 200/200 [00:43<00:00,  4.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.42it/s]

                   all        831       1584       0.72      0.583      0.643      0.469






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     42/100      5.73G     0.8628     0.5441     0.8896          8        736: 100%|██████████| 200/200 [00:42<00:00,  4.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.56it/s]

                   all        831       1584      0.537      0.629      0.638       0.48






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     43/100      4.56G     0.8525     0.5376     0.8877         29        736: 100%|██████████| 200/200 [00:42<00:00,  4.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.55it/s]

                   all        831       1584      0.674      0.599      0.645      0.478






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     44/100      6.34G     0.8413     0.5346     0.8863         23        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.43it/s]

                   all        831       1584      0.685      0.597      0.644      0.476






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     45/100      5.57G     0.8437     0.5367     0.8888          9        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.64it/s]

                   all        831       1584      0.485      0.754      0.659      0.489






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     46/100      5.37G     0.8489     0.5371     0.8882          5        736: 100%|██████████| 200/200 [00:42<00:00,  4.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.36it/s]

                   all        831       1584      0.545      0.747       0.67       0.51






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     47/100      6.27G     0.8462     0.5375     0.8875         68        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.69it/s]

                   all        831       1584      0.576      0.584      0.649      0.465






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     48/100      4.25G     0.8401     0.5322     0.8858         15        736: 100%|██████████| 200/200 [00:42<00:00,  4.70it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.18it/s]

                   all        831       1584      0.452      0.641      0.617       0.46






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     49/100      4.31G     0.8392     0.5969     0.8875          1        736: 100%|██████████| 200/200 [00:42<00:00,  4.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.23it/s]

                   all        831       1584      0.729      0.572      0.653      0.483






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     50/100       5.5G     0.8351     0.5305     0.8876         12        736: 100%|██████████| 200/200 [00:42<00:00,  4.70it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.40it/s]

                   all        831       1584      0.531       0.61      0.665      0.493






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     51/100      5.34G     0.8492     0.5363     0.8861         19        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.60it/s]

                   all        831       1584      0.742      0.571      0.682      0.502






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     52/100       4.1G     0.8341     0.5293     0.8864         43        736: 100%|██████████| 200/200 [00:42<00:00,  4.70it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.87it/s]

                   all        831       1584      0.717       0.59      0.682      0.514






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     53/100      5.91G     0.8317     0.5242      0.883         46        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.90it/s]

                   all        831       1584      0.685      0.601      0.663      0.495






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     54/100      5.94G     0.8316     0.5265     0.8861         16        736: 100%|██████████| 200/200 [00:43<00:00,  4.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.26it/s]

                   all        831       1584      0.717      0.594      0.639      0.475






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     55/100         5G     0.8231     0.5184     0.8832         52        736: 100%|██████████| 200/200 [00:42<00:00,  4.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.62it/s]

                   all        831       1584      0.537      0.724      0.687      0.478






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     56/100      5.91G     0.8383     0.5319     0.8878         51        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.58it/s]

                   all        831       1584      0.696      0.633      0.666      0.502






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     57/100      4.15G     0.8422     0.5354     0.8897         64        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.62it/s]

                   all        831       1584      0.724       0.59      0.653      0.493






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     58/100      5.35G     0.8297     0.5286     0.8861         55        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.44it/s]

                   all        831       1584      0.768      0.574      0.663       0.51






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     59/100      5.35G     0.8122     0.5126     0.8797         64        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.45it/s]

                   all        831       1584      0.727       0.56      0.621      0.469






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     60/100      5.15G     0.8432     0.5457      0.896         10        736: 100%|██████████| 200/200 [00:43<00:00,  4.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.30it/s]

                   all        831       1584      0.738      0.584      0.643      0.478






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     61/100      3.92G     0.8145      0.513     0.8795         47        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.18it/s]

                   all        831       1584      0.575      0.611      0.663      0.504






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     62/100      5.29G     0.8282     0.5237     0.8846         15        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.33it/s]

                   all        831       1584      0.714      0.612      0.655      0.496






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     63/100      6.15G     0.8131     0.5084     0.8788         13        736: 100%|██████████| 200/200 [00:42<00:00,  4.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.37it/s]

                   all        831       1584      0.715      0.628       0.66      0.501






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     64/100      6.06G     0.8007     0.5026     0.8781         70        736: 100%|██████████| 200/200 [00:42<00:00,  4.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.23it/s]

                   all        831       1584      0.541      0.744      0.703      0.514






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     65/100      5.98G     0.8142     0.5175     0.8816         12        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  9.07it/s]

                   all        831       1584      0.585      0.617      0.653      0.492






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     66/100      5.77G      0.799     0.4981     0.8751         32        736: 100%|██████████| 200/200 [00:42<00:00,  4.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.45it/s]

                   all        831       1584      0.724      0.591      0.666      0.509






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     67/100      4.59G     0.8089     0.5111       0.88         12        736: 100%|██████████| 200/200 [00:43<00:00,  4.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.70it/s]

                   all        831       1584      0.604      0.719      0.703      0.529






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     68/100      4.48G      0.809     0.5105     0.8808         11        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.09it/s]

                   all        831       1584      0.583      0.601      0.682      0.511






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     69/100      5.37G     0.8249     0.5266      0.884          1        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.30it/s]

                   all        831       1584       0.51      0.717      0.685      0.513






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     70/100      4.13G     0.7942     0.5008     0.8779         25        736: 100%|██████████| 200/200 [00:42<00:00,  4.71it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.45it/s]

                   all        831       1584      0.767      0.582      0.689      0.523






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     71/100      5.76G     0.8054     0.5081     0.8803         20        736: 100%|██████████| 200/200 [00:42<00:00,  4.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.69it/s]

                   all        831       1584      0.467      0.732      0.675      0.514






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     72/100      5.05G     0.8023     0.5065     0.8782         11        736: 100%|██████████| 200/200 [00:43<00:00,  4.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.43it/s]

                   all        831       1584      0.614      0.716      0.713      0.543






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     73/100      5.76G     0.7983     0.5042     0.8793         13        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.91it/s]

                   all        831       1584      0.761      0.609      0.678       0.52






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     74/100      5.93G     0.7932     0.4962     0.8771         10        736: 100%|██████████| 200/200 [00:42<00:00,  4.70it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.16it/s]

                   all        831       1584      0.547      0.762      0.732      0.549






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     75/100      4.81G     0.7899     0.4959     0.8765         84        736: 100%|██████████| 200/200 [00:42<00:00,  4.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  9.13it/s]

                   all        831       1584      0.597      0.686       0.69      0.535






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     76/100       4.5G     0.8024     0.5042     0.8784         60        736: 100%|██████████| 200/200 [00:42<00:00,  4.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.37it/s]

                   all        831       1584       0.55      0.798      0.748      0.577






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     77/100       4.6G     0.7931     0.5028     0.8812         24        736: 100%|██████████| 200/200 [00:42<00:00,  4.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.55it/s]

                   all        831       1584      0.638      0.708      0.749      0.576






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     78/100      5.08G     0.7921     0.4967     0.8787         22        736: 100%|██████████| 200/200 [00:42<00:00,  4.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.87it/s]

                   all        831       1584      0.599      0.685      0.733      0.563






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     79/100      5.99G     0.7943     0.4966     0.8747          2        736: 100%|██████████| 200/200 [00:43<00:00,  4.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.76it/s]

                   all        831       1584      0.678      0.731      0.752      0.562






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     80/100      5.13G     0.7789     0.4876     0.8718         17        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.33it/s]

                   all        831       1584      0.678      0.652      0.708      0.532






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     81/100      6.21G      0.783     0.4971     0.8767         72        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.28it/s]

                   all        831       1584      0.627      0.666      0.687      0.515






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     82/100       4.4G     0.7776     0.4984      0.873          1        736: 100%|██████████| 200/200 [00:42<00:00,  4.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.96it/s]

                   all        831       1584      0.639      0.748      0.702      0.549






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     83/100      5.69G     0.7729     0.4859     0.8708         84        736: 100%|██████████| 200/200 [00:42<00:00,  4.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.20it/s]

                   all        831       1584       0.78      0.585      0.689      0.534






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     84/100      4.45G     0.7821     0.4879     0.8731         67        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.87it/s]

                   all        831       1584      0.574      0.686      0.671      0.531






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     85/100      5.91G     0.7757     0.4879     0.8724         45        736: 100%|██████████| 200/200 [00:42<00:00,  4.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.56it/s]

                   all        831       1584      0.633      0.729      0.734      0.573






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     86/100      5.81G     0.7736     0.4882     0.8742         29        736: 100%|██████████| 200/200 [00:42<00:00,  4.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.49it/s]

                   all        831       1584      0.618      0.664      0.713      0.556






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     87/100      5.44G     0.7721     0.4822     0.8707         49        736: 100%|██████████| 200/200 [00:42<00:00,  4.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.66it/s]

                   all        831       1584      0.576       0.71      0.711      0.546






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     88/100      5.58G     0.7685     0.4875     0.8725         21        736: 100%|██████████| 200/200 [00:42<00:00,  4.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.34it/s]

                   all        831       1584      0.712      0.635      0.708      0.533






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     89/100      5.64G     0.7558     0.4763     0.8679          2        736: 100%|██████████| 200/200 [00:42<00:00,  4.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.48it/s]

                   all        831       1584      0.591      0.686      0.699      0.536






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     90/100      5.32G     0.7564     0.4751     0.8691         15        736: 100%|██████████| 200/200 [00:42<00:00,  4.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.27it/s]

                   all        831       1584      0.549       0.69      0.707      0.543





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     91/100      4.88G     0.6725     0.4167     0.8492          8        736: 100%|██████████| 200/200 [00:40<00:00,  4.95it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:03<00:00,  8.35it/s]

                   all        831       1584      0.584      0.716      0.691      0.541






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     92/100      4.68G     0.6603     0.4088     0.8452          9        736: 100%|██████████| 200/200 [00:39<00:00,  5.00it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  9.11it/s]

                   all        831       1584      0.668      0.704      0.717      0.551






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     93/100      3.93G     0.6385     0.4009     0.8434          1        736: 100%|██████████| 200/200 [00:39<00:00,  5.03it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.80it/s]

                   all        831       1584      0.519      0.726      0.673      0.515






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     94/100      5.08G      0.638     0.3942      0.845          7        736: 100%|██████████| 200/200 [00:39<00:00,  5.02it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.93it/s]

                   all        831       1584       0.78      0.581      0.693      0.531






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     95/100      5.26G     0.6386     0.3938      0.842         11        736: 100%|██████████| 200/200 [00:39<00:00,  5.03it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  8.75it/s]

                   all        831       1584      0.558      0.765      0.717      0.556






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     96/100      4.89G     0.6292     0.3891     0.8436         15        736: 100%|██████████| 200/200 [00:39<00:00,  5.02it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:02<00:00,  9.25it/s]

                   all        831       1584      0.768      0.591       0.71      0.548
[34m[1mEarlyStopping: [0mTraining stopped early as no improvement observed in last 20 epochs. Best results observed at epoch 76, best model saved as best.pt.
To update EarlyStopping(patience=20) pass a new patience value, i.e. `patience=300` or use `patience=0` to disable EarlyStopping.






96 epochs completed in 1.228 hours.
Optimizer stripped from runs/train/fine-tune-yolov8/weights/last.pt, 6.2MB
Optimizer stripped from runs/train/fine-tune-yolov8/weights/best.pt, 6.2MB

Validating runs/train/fine-tune-yolov8/weights/best.pt...
Ultralytics 8.3.109 🚀 Python-3.10.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3050 OEM, 7957MiB)
Model summary (fused): 72 layers, 3,008,378 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 26/26 [00:05<00:00,  4.47it/s]


                   all        831       1584      0.714      0.686       0.76      0.566
                  roof        197        275      0.668      0.705       0.72      0.535
                  wall         21         21      0.538      0.857      0.805      0.635
                window          4          4          1      0.369      0.683      0.422
                person        159        188       0.64      0.755      0.711      0.515
               bicycle         65         69      0.675      0.754      0.798      0.489
                  tree        255        266      0.719      0.759      0.832      0.738
               vehicle        390        761      0.758      0.604      0.773      0.631
Speed: 0.1ms preprocess, 5.6ms inference, 0.0ms loss, 0.3ms postprocess per image
Results saved to [1mruns/train/fine-tune-yolov8[0m


### Process and Compare Metrics for Fine-Tuned YOLOv8 Model

In [40]:
new_path = './runs/train/fine-tune-yolov8'  # Path to the folder where the fine-tuned YOLO model's training results are stored
print_csv_metrics(new_path)  # This will process and print the final metrics from the 'results.csv' file in the specified folder


Found results.csv at: ./runs/train/fine-tune-yolov8/results.csv
Total number of epochs: 96

Training Loss: 1.861850
Precision: 0.767980
Recall: 0.590860
mAP@0.5: 0.710060
mAP@0.5:0.95: 0.547570

Validation Loss: 2.526340


In [41]:
# Find the best model (the one with the best performance) from the fine-tuned YOLOv8 training results
best_pt_path = find_best_model(new_path)

# Evaluate the best model and save the performance metrics in a JSON file
evaluate_and_save_metrics(best_pt_path, output_json_path="per_class_metrics_retrain.json")


[+] Found best.pt at: runs/train/fine-tune-yolov8/weights/best.pt
Ultralytics 8.3.109 🚀 Python-3.10.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3050 OEM, 7957MiB)


Model summary (fused): 72 layers, 3,008,378 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/split_videos_dataset/val/labels.cache... 831 images, 0 backgrounds, 0 corrupt: 100%|██████████| 831/831 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 52/52 [00:04<00:00, 11.97it/s]


                   all        831       1584      0.554        0.8      0.751      0.584
                  roof        197        275      0.482       0.72      0.678      0.534
                  wall         21         21      0.428      0.891      0.864      0.722
                window          4          4      0.697       0.75      0.621      0.422
                person        159        188      0.567      0.819      0.718      0.538
               bicycle         65         69      0.514      0.841      0.756      0.448
                  tree        255        266      0.558      0.805      0.831      0.744
               vehicle        390        761      0.634      0.774      0.792       0.68
Speed: 0.3ms preprocess, 3.0ms inference, 0.0ms loss, 0.3ms postprocess per image
Results saved to [1mruns/detect/val2[0m
[+] Saved per-class metrics to per_class_metrics_retrain.json


In [42]:
# Print the per-class metrics from the saved JSON file
print_per_class_metrics("per_class_metrics_retrain.json")


Per-Class mAP@0.5:0.95 Metrics:

Class Name      Class ID   mAP@0.5:0.95   
----------------------------------------
unlabeled       0          0.5839         
pool            1          0.5839         
vegetation      2          0.5839         
roof            3          0.534          
wall            4          0.7216         
window          5          0.4224         
person          6          0.5378         
dog             7          0.5839         
car             8          0.5839         
bicycle         9          0.4481         
tree            10         0.7437         
truck           11         0.5839         
bus             12         0.5839         
vehicle         13         0.6795         


In [43]:
# Define paths for the fine-tuned and original YOLOv8 model results
new_path = './runs/train/fine-tune-yolov8'
old_path = './runs/train/yolov8'

# Find the results.csv files for both models
results_csv_path = find_results_csv(new_path)  # Fine-tuned YOLOv8
results_csv_path_1 = find_results_csv(old_path)  # Original YOLOv8

# Compare the final epoch metrics between the two models
compare_final_metrics(results_csv_path_1, results_csv_path)

Changes in Metrics Before and After Retraning:

Metric                    Before     After      Diff       Trend
-----------------------------------------------------------------
Box Loss (Train)          0.73327    0.62922    -0.10405   [91m Decrease[0m
Cls Loss (Train)          0.45441    0.38908    -0.06533   [91m Decrease[0m
DFL Loss (Train)          0.86014    0.84355    -0.01659   [91m Decrease[0m
Precision                 0.64380    0.76798    0.12418    [92m Increase[0m
Recall                    0.26823    0.59086    0.32263    [92m Increase[0m
mAP@0.5                   0.28205    0.71006    0.42801    [92m Increase[0m
mAP@0.5:0.95              0.17731    0.54757    0.37026    [92m Increase[0m
Box Loss (Val)            2.02042    0.74277    -1.27765   [91m Decrease[0m
Cls Loss (Val)            2.42410    0.88854    -1.53556   [91m Decrease[0m
DFL Loss (Val)            1.05216    0.89503    -0.15713   [91m Decrease[0m


In [44]:
compare_maps("per_class_metrics.json", "per_class_metrics_retrain.json")


mAP@0.5:0.95 Differences Before and After Retraning:

Class           Before     After      Diff       Trend
------------------------------------------------------------
unlabeled       0.1855     0.5839     0.3984     [92m Increase[0m
pool            0.7305     0.5839     -0.1466    [91m Decrease[0m
vegetation      0.0798     0.5839     0.5041     [92m Increase[0m
roof            0.3768     0.5340     0.1572     [92m Increase[0m
wall            0.0513     0.7216     0.6703     [92m Increase[0m
window          0.1524     0.4224     0.2700     [92m Increase[0m
person          0.1884     0.5378     0.3494     [92m Increase[0m
dog             0.0029     0.5839     0.5810     [92m Increase[0m
car             0.2459     0.5839     0.3380     [92m Increase[0m
bicycle         0.1027     0.4481     0.3454     [92m Increase[0m
tree            0.3563     0.7437     0.3874     [92m Increase[0m
truck           0.0291     0.5839     0.5548     [92m Increase[0m
bus          

### Generate Predictions for Fine-Tuned and Original YOLOv8 Models

In [45]:
# Set paths for the fine-tuned and original YOLOv8 models
new_path = './runs/train/fine-tune-yolov8'
old_path = './runs/train/yolov8'

# Find the best model (checkpoint) from the fine-tuned YOLOv8 run
best_pt_path_retrain = find_best_model(new_path)

# Find the best model (checkpoint) from the original YOLOv8 run
best_pt_path = find_best_model(old_path)

[+] Found best.pt at: runs/train/fine-tune-yolov8/weights/best.pt
[+] Found best.pt at: runs/train/yolov8/weights/best.pt


In [46]:
videos_predictions(best_pt_path, class_id_to_name, video_dir='videos', output_base='./datasets/final_output', max_frames=None)

  0%|          | 0/8 [00:00<?, ?it/s]



v1: 100%|██████████| 642/642 [00:36<00:00, 17.62it/s]
 12%|█▎        | 1/8 [00:36<04:15, 36.51s/it]

DONE: v1 — Processed 642 frames


v12: 100%|██████████| 897/897 [00:59<00:00, 14.97it/s]
 25%|██▌       | 2/8 [01:36<05:01, 50.32s/it]

DONE: v12 — Processed 897 frames


v2: 100%|██████████| 175/175 [00:10<00:00, 16.82it/s]
 38%|███▊      | 3/8 [01:46<02:40, 32.11s/it]

DONE: v2 — Processed 175 frames


v3: 100%|██████████| 176/176 [00:01<00:00, 112.37it/s]
 50%|█████     | 4/8 [01:48<01:20, 20.06s/it]

DONE: v3 — Processed 176 frames


v4: 100%|██████████| 253/253 [00:04<00:00, 51.11it/s]
 62%|██████▎   | 5/8 [01:53<00:43, 14.62s/it]

DONE: v4 — Processed 253 frames


v5: 100%|██████████| 1114/1114 [01:09<00:00, 16.02it/s]
 75%|███████▌  | 6/8 [03:03<01:06, 33.31s/it]

DONE: v5 — Processed 1114 frames


v6: 100%|██████████| 483/483 [00:27<00:00, 17.83it/s]
 88%|████████▊ | 7/8 [03:30<00:31, 31.31s/it]

DONE: v6 — Processed 483 frames


v8: 100%|██████████| 949/949 [00:28<00:00, 33.50it/s]
100%|██████████| 8/8 [03:58<00:00, 29.83s/it]

DONE: v8 — Processed 949 frames





In [47]:
videos_predictions(best_pt_path_retrain, class_id_to_name, video_dir='videos', output_base='./datasets/final_output_retrain', max_frames=None)

  0%|          | 0/8 [00:00<?, ?it/s]



v1: 100%|██████████| 642/642 [00:35<00:00, 18.04it/s]
 12%|█▎        | 1/8 [00:35<04:09, 35.63s/it]

DONE: v1 — Processed 642 frames


v12: 100%|██████████| 897/897 [00:57<00:00, 15.72it/s]
 25%|██▌       | 2/8 [01:32<04:49, 48.27s/it]

DONE: v12 — Processed 897 frames


v2: 100%|██████████| 175/175 [00:10<00:00, 16.85it/s]
 38%|███▊      | 3/8 [01:43<02:34, 30.99s/it]

DONE: v2 — Processed 175 frames


v3: 100%|██████████| 176/176 [00:01<00:00, 121.79it/s]
 50%|█████     | 4/8 [01:44<01:17, 19.33s/it]

DONE: v3 — Processed 176 frames


v4: 100%|██████████| 253/253 [00:04<00:00, 51.01it/s]
 62%|██████▎   | 5/8 [01:49<00:42, 14.15s/it]

DONE: v4 — Processed 253 frames


v5: 100%|██████████| 1114/1114 [01:09<00:00, 16.13it/s]
 75%|███████▌  | 6/8 [02:58<01:05, 32.85s/it]

DONE: v5 — Processed 1114 frames


v6: 100%|██████████| 483/483 [00:27<00:00, 17.86it/s]
 88%|████████▊ | 7/8 [03:25<00:30, 30.98s/it]

DONE: v6 — Processed 483 frames


v8: 100%|██████████| 949/949 [00:27<00:00, 34.54it/s]
100%|██████████| 8/8 [03:53<00:00, 29.17s/it]

DONE: v8 — Processed 949 frames



