## Builting Functions

### Cuda Check

In [1]:
import torch

# Print the CUDA version that PyTorch was built with
print("Built CUDA Version:", torch.version.cuda)

# Check if CUDA is available on the current system
if torch.cuda.is_available():
    # Print the CUDA runtime version (compiled version)
    print("CUDA Runtime Version:", torch._C._cuda_getCompiledVersion())
    
    # Print the name of the first available GPU
    print("GPU Name:", torch.cuda.get_device_name(0))
else:
    # Inform the user if CUDA is not available
    print("CUDA is not available.")


Built CUDA Version: None
CUDA is not available.


### Mapping Classes and their colors with class id 

In [2]:
# Mapping of class IDs to class names and their corresponding RGB color codes
class_id_to_name = {
    0:  ('unlabeled', [28, 42, 168]),         # Background or unclassified area
    1:  ('pool', [0, 50, 89]),                # Swimming pool
    2:  ('vegetation', [107, 142, 35]),       # Trees, grass, or bushes
    3:  ('roof', [70, 70, 70]),               # Building rooftops
    4:  ('wall', [102, 102, 156]),            # Building walls
    5:  ('window', [254, 228, 12]),           # Windows
    6:  ('person', [255, 22, 96]),            # People
    7:  ('dog', [102, 51, 0]),                # Dogs
    8:  ('car', [9, 143, 150]),               # Cars
    9:  ('bicycle', [119, 11, 32]),           # Bicycles
    10: ('tree', [51, 51, 0]),                # Trees
    11: ('truck', [160, 160, 60]),            # Trucks (added)
    12: ('bus', [200, 80, 80]),               # Buses (added)
    13: ('vehicle', [20, 80, 80]),            # General vehicle category (added)
}


### Install and Import packages

In [3]:
# Install NumPy - fundamental package for numerical computations
# !pip install numpy

# Install OpenCV - library for computer vision tasks
# !pip install opencv-python

# Install Pillow - image processing library
# !pip install pillow

# Install Matplotlib - plotting and visualization library
# !pip install matplotlib

# Install tqdm - progress bar utility
# !pip install tqdm

# Install scikit-learn - machine learning tools
# !pip install scikit-learn

# Install PyTorch and TorchVision - deep learning framework and its vision tools
# !pip install torch torchvision

# Install Ultralytics - YOLO model implementation and training tools
# !pip install ultralytics


In [4]:
# Standard library imports
import os                     # Operating system interfaces
import gc                     # Garbage collection interface
import json                   # Working with JSON data
import shutil                 # File operations like copy, move, etc.
import zipfile                # Extracting zip archives
import random                 # Random number generation
from glob import glob         # Pattern matching for file paths
from pathlib import Path      # Object-oriented file path handling
from collections import defaultdict  # Dictionary with default value support
import xml.etree.ElementTree as ET  # Parsing XML files

# Scientific computing and data manipulation
import numpy as np            # Numerical operations
import pandas as pd           # Data analysis and manipulation
from sklearn.model_selection import train_test_split  # Train-test split

# Image processing and visualization
import cv2                    # OpenCV for computer vision
from PIL import Image, ImageDraw, ImageFont  # PIL for image handling
import matplotlib.pyplot as plt              # Plotting library
import matplotlib.patches as mpatches        # Drawing patches on plots

# Progress bar utility
from tqdm.auto import tqdm    # Progress bars for loops

# PyTorch and related imports
import torch
import torch.nn as nn         # Neural network modules
from torch.utils.data import DataLoader       # Efficient data loading
import torchvision.models as models           # Pretrained models
import torchvision.transforms as transforms   # Image transformations
import torchvision.models.segmentation as segmentation  # Segmentation models

# YOLO from Ultralytics
from ultralytics import YOLO  # YOLO object detection models

# Pandas library
import pandas as pd

# Google Drive downloader
import gdown                  # Downloading files from Google Drive

# Environment configuration
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"   # Avoids OpenMP duplicate library error

# Set device for computation (GPU if available, else CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


### Dataset Download Functions

These functions allow you to download and extract datasets from Google Drive shared URLs. They handle the download and extraction of ZIP files for the **Semantic Drone Dataset** and the **UAVDT Dataset**.

#### `semantic_drone_dataset_download`

This function downloads and extracts the **Semantic Drone Dataset** from a Google Drive URL.

#### `uavdt_dataset_download`
This function downloads and extracts the UAVDT Dataset from a Google Drive URL.


In [5]:
def semantic_drone_dataset_download(gdrive_url, extract_to="extracted"):
    """
    Downloads and extracts the Semantic Drone Dataset from a Google Drive URL.
    
    Parameters:
        gdrive_url (str): The shared Google Drive link to the ZIP file.
        extract_to (str): Directory to extract contents into. Default is 'extracted'.
    """
    # Extract the file ID from the Google Drive shareable URL
    file_id = gdrive_url.split("/d/")[1].split("/")[0]
    download_url = f"https://drive.google.com/uc?id={file_id}"

    # Create the output folder if it doesn't exist
    os.makedirs(extract_to, exist_ok=True)

    zip_path = os.path.join(extract_to, "downloaded.zip")

    print("[INFO] Downloading ZIP from Google Drive...")
    gdown.download(download_url, zip_path, quiet=False)

    print("[INFO] Extracting ZIP...")
    # Extract contents of the ZIP file
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

    # Delete the ZIP file after extraction to save space
    os.remove(zip_path)

    print(f"[DONE] Extracted files to: {extract_to}")


def uavdt_dataset_download(gdrive_url, extract_to="extracted"):
    """
    Downloads and extracts the UAVDT Dataset from a Google Drive URL.
    
    Parameters:
        gdrive_url (str): The shared Google Drive link to the ZIP file.
        extract_to (str): Directory to extract contents into. Default is 'extracted'.
    """
    # Extract the file ID from the Google Drive shareable URL
    file_id = gdrive_url.split("/d/")[1].split("/")[0]
    download_url = f"https://drive.google.com/uc?id={file_id}"

    # Create the output folder if it doesn't exist
    os.makedirs(extract_to, exist_ok=True)

    zip_path = os.path.join(extract_to, "downloaded.zip")

    print("[+] Downloading ZIP from Google Drive...")
    gdown.download(download_url, zip_path, quiet=False)

    print("[+] Extracting ZIP...")
    # Extract contents of the ZIP file
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

    # Delete the ZIP file after extraction to save space
    os.remove(zip_path)

    print(f"[+] Extracted files to: {extract_to}")


### YOLO Dataset Conversion

#### Semantic Dorne Datasets

1. **`parse_yolo_style_bbox_from_xml`**: Parses XML annotations and converts polygon objects to YOLO-style bounding boxes.
2. **`save_yolo_format`**: Saves bounding boxes in YOLO format (normalized coordinates: `<class_id> <x_center> <y_center> <width> <height>`).
3. **`convert_fulldataset_yolo_only`**: Converts a full dataset of images and XML annotations to YOLO format and saves them to the specified output directory.


In [6]:
# ----------------------------
# Parse polygon and convert to YOLO bbox
# ----------------------------

# Parses XML annotation and converts polygon objects to YOLO-style bounding boxes
def parse_yolo_style_bbox_from_xml(xml_path, class_id_to_name):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    bboxes = []
    for obj in root.findall('object'):
        class_name = obj.find('name').text
        if class_name in [value[0] for value in class_id_to_name.values()]:
            polygon = obj.find('polygon')
            if polygon is not None:
                # Extract points from polygon
                points = polygon.findall('pt')
                coords = [(float(pt.find('x').text), float(pt.find('y').text)) for pt in points]
                # Convert polygon to bounding box
                x_min = min(coord[0] for coord in coords)
                y_min = min(coord[1] for coord in coords)
                x_max = max(coord[0] for coord in coords)
                y_max = max(coord[1] for coord in coords)
                bboxes.append(((x_min, y_min), (x_max, y_max), class_name))
    return bboxes


# ----------------------------
# Save YOLO-format txt
# ----------------------------

# Saves the bounding boxes in YOLO format: <class_id> <x_center> <y_center> <width> <height>
def save_yolo_format(image_id, bboxes, image_width, image_height, output_path, class_id_to_name):
    with open(output_path, 'w') as f:
        for (x_min, y_min), (x_max, y_max), class_name in bboxes:
            class_id = next(cid for cid, (name, _) in class_id_to_name.items() if name == class_name)
            x_center = (x_min + x_max) / 2 / image_width
            y_center = (y_min + y_max) / 2 / image_height
            width = (x_max - x_min) / image_width
            height = (y_max - y_min) / image_height
            # Write to file with six decimal precision
            f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")


# ----------------------------
# Convert dataset (YOLO only)
# ----------------------------

# Converts the full dataset by extracting YOLO-style annotations and saving them
def convert_fulldataset_yolo_only(dataset_path, output_dir, class_id_to_name):
    # Get list of image IDs (without extension)
    image_ids = [img.split('.')[0] for img in os.listdir(f"{dataset_path}/images") if img.endswith(".jpg")]

    # Create output folders
    os.makedirs(f"{output_dir}/images", exist_ok=True)
    os.makedirs(f"{output_dir}/labels", exist_ok=True)

    for image_id in tqdm(image_ids, desc="Converting to YOLO"):
        img_path = f"{dataset_path}/images/{image_id}.jpg"
        bbox_xml_path = f"{dataset_path}/gt/bounding_box/label_me_xml/{image_id}.xml"
        semantic_xml_path = f"{dataset_path}/gt/semantic/label_me_xml/{image_id}.xml"

        if not os.path.exists(img_path):
            print(f"[WARNING] Image not found: {img_path}, skipping...")
            continue

        try:
            # Parse bounding box and semantic annotations
            bboxes1 = parse_yolo_style_bbox_from_xml(bbox_xml_path, class_id_to_name)
            bboxes2 = parse_yolo_style_bbox_from_xml(semantic_xml_path, class_id_to_name)
            all_bboxes = bboxes1 + bboxes2
        except Exception as e:
            print(f"[WARNING] Skipping image {image_id} due to parse error: {e}")
            continue

        try:
            # Load image
            image = Image.open(img_path)
            image_np = np.array(image)
        except Exception as e:
            print(f"[WARNING] Could not load image {image_id}: {e}")
            continue

        # Save image to output directory
        image.save(f"{output_dir}/images/{image_id}.jpg")

        # Save YOLO-format labels to output directory
        yolo_annotation_path = f"{output_dir}/labels/{image_id}.txt"
        save_yolo_format(image_id, all_bboxes, image_np.shape[1], image_np.shape[0], yolo_annotation_path, class_id_to_name)

    print("[+] YOLO-format annotation conversion complete!")


#### UAVDT Datasets

1. **`convert_dataset`**: Converts UAVDT annotation files to YOLO format, mapping original class IDs to extended IDs and saving them with normalized bounding box coordinates.
2. **`copy_split_sequences`**: Splits the dataset into training and validation sets, copying the corresponding images and YOLO-format label files to separate directories.


In [7]:
# 🧠 Mapping UAVDT class IDs to extended class IDs used in the combined dataset
uavdt_to_extended = {
    0: 8,   # car
    1: 11,  # truck
    2: 12,  # bus
    3: 13   # other vehicle
}

# === Function to convert a single annotation file to YOLO format ===
def convert_annotation(anno_path, label_path, image_path, stats):
    if not os.path.exists(image_path):
        stats["missing_image"] += 1
        return

    try:
        img = cv2.imread(image_path)
        height, width = img.shape[:2]
    except:
        stats["missing_image"] += 1
        return

    with open(anno_path, 'r') as fin, open(label_path, 'w') as fout:
        for line in fin:
            parts = line.strip().split(',')
            if len(parts) < 8:
                stats["malformed"] += 1
                continue

            try:
                # Parse bounding box and class info
                x, y, w, h = map(float, parts[0:4])
                original_cls = int(parts[5])

                # Skip classes not in our mapping
                if original_cls not in uavdt_to_extended:
                    stats["skipped"][original_cls] += 1
                    continue

                # Convert to new class ID
                cls = uavdt_to_extended[original_cls]

                # Convert to YOLO format (normalized center_x, center_y, width, height)
                x_center = (x + w / 2) / width
                y_center = (y + h / 2) / height
                w /= width
                h /= height

                # Validate normalized coordinates
                if not (0 <= x_center <= 1 and 0 <= y_center <= 1 and w > 0 and h > 0):
                    stats["skipped"][cls] += 1
                    continue

                # Write label line
                fout.write(f"{cls} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")
                stats["converted"] += 1
            except Exception:
                stats["malformed"] += 1
                continue

            stats["total"] += 1

# === Step 1: Convert all UAVDT annotations to YOLO format ===
def convert_dataset(root_dir):
    # Find all annotation files inside any Mxxxx/annotations/ directory
    annotation_paths = glob(os.path.join(root_dir, "M*/annotations/*.txt"))
    total_files = len(annotation_paths)

    # Stats for tracking issues and progress
    stats = {
        "total": 0,
        "converted": 0,
        "malformed": 0,
        "missing_image": 0,
        "skipped": defaultdict(int)
    }

    print(f"🔄 Converting {total_files} annotation files to YOLO format...")

    for anno_path in tqdm(annotation_paths, desc="Converting", unit="file"):
        # Get sequence directory (e.g., M0101)
        sequence_dir = os.path.dirname(os.path.dirname(anno_path))
        file_name = os.path.basename(anno_path)

        # Output label directory
        label_dir = os.path.join(sequence_dir, "labels")
        os.makedirs(label_dir, exist_ok=True)

        # Output label file path
        label_path = os.path.join(label_dir, file_name)

        # Corresponding image path
        image_name = file_name.replace(".txt", ".jpg")
        image_path = os.path.join(sequence_dir, "images", image_name)

        # Perform the actual conversion
        convert_annotation(anno_path, label_path, image_path, stats)

    # Print summary of the conversion process
    print("\nConversion complete.")
    print(f"Total boxes:     {stats['total']}")
    print(f"Converted boxes: {stats['converted']}")
    print(f"Skipped boxes:   {sum(stats['skipped'].values())}")
    for cls, count in sorted(stats["skipped"].items()):
        print(f"   - Skipped class {cls}: {count}")
    print(f"Malformed lines: {stats['malformed']}")
    print(f"Missing images:  {stats['missing_image']}")

# === Step 2: Split dataset into train/val and copy files ===
def copy_split_sequences(src_root, dst_root, train_ratio=0.8):
    # Find all sequences (Mxxxx folders)
    all_sequences = sorted(glob(os.path.join(src_root, "M*")))

    # Split into training and validation sequences
    train_seqs, val_seqs = train_test_split(all_sequences, train_size=train_ratio, random_state=42)

    # Copy files into respective folders
    for split_name, split_list in zip(['train', 'val'], [train_seqs, val_seqs]):
        for seq_path in tqdm(split_list, desc=f"Copying {split_name}"):
            images_src = os.path.join(seq_path, "images")
            labels_src = os.path.join(seq_path, "labels")

            images_dst = os.path.join(dst_root, split_name, "images")
            labels_dst = os.path.join(dst_root, split_name, "labels")

            os.makedirs(images_dst, exist_ok=True)
            os.makedirs(labels_dst, exist_ok=True)

            # Copy image files
            for img_file in glob(os.path.join(images_src, "*.jpg")):
                shutil.copy(img_file, os.path.join(images_dst, os.path.basename(img_file)))

            # Copy label files
            for label_file in glob(os.path.join(labels_src, "*.txt")):
                shutil.copy(label_file, os.path.join(labels_dst, os.path.basename(label_file)))

    print("\n[+] Dataset split into 'train/' and 'val/' folders with images and YOLO-format labels.")


#### Convert into train and Val Sets

This script converts UAVDT annotations to YOLO format, maps class IDs, normalizes bounding boxes, and splits the dataset into training and validation sets, organizing images and label files into respective directories.


In [8]:
import os
import shutil
import random
from tqdm import tqdm

# Function to move files from source directories to target directories
def move_files(file_list, 
               source_image_dir, 
               source_annotation_dir,
               target_image_dir, 
               target_annotation_dir):
    
    # Create target directories if they don't exist
    os.makedirs(target_image_dir, exist_ok=True)
    os.makedirs(target_annotation_dir, exist_ok=True)

    # Loop through each file in the provided list and move the corresponding image and annotation
    for image_id in tqdm(file_list, desc=f"Moving to {os.path.basename(os.path.dirname(target_image_dir))}"):
        # Construct paths for the image and annotation
        image_path = os.path.join(source_image_dir, f"{image_id}.jpg")
        annotation_path = os.path.join(source_annotation_dir, f"{image_id}.txt")

        # Construct target paths for the image and annotation
        target_image_path = os.path.join(target_image_dir, f"{image_id}.jpg")
        target_annotation_path = os.path.join(target_annotation_dir, f"{image_id}.txt")

        # Check if both the image and annotation files exist, then copy them to target directories
        if os.path.exists(image_path) and os.path.exists(annotation_path):
            shutil.copy(image_path, target_image_path)
            shutil.copy(annotation_path, target_annotation_path)

# Function to split the dataset into training and validation sets, and move the files
def split_and_move_dataset(source_base_dir="./datasets/semantic_yolo",
                           target_base_dir="./datasets/new_dataset_yolo_split",
                           split_ratio=0.8,
                           seed=42):
    
    # Set the random seed for reproducibility
    random.seed(seed)

    # Define paths for the image and label directories
    image_dir = os.path.join(source_base_dir, "images")
    label_dir = os.path.join(source_base_dir, "labels")

    # Get all image IDs (file names without extensions) from the image directory
    image_ids = [os.path.splitext(f)[0] for f in os.listdir(image_dir) if f.endswith(".jpg")]
    
    # Shuffle the image IDs to randomize the split
    random.shuffle(image_ids)

    # Determine the split index based on the split ratio
    split_idx = int(len(image_ids) * split_ratio)

    # Split the image IDs into training and validation sets
    train_ids = image_ids[:split_idx]
    val_ids = image_ids[split_idx:]

    # Move the training images and annotations to the target directories
    move_files(train_ids,
               source_image_dir=image_dir,
               source_annotation_dir=label_dir,
               target_image_dir=os.path.join(target_base_dir, "train/images"),
               target_annotation_dir=os.path.join(target_base_dir, "train/labels"))

    # Move the validation images and annotations to the target directories
    move_files(val_ids,
               source_image_dir=image_dir,
               source_annotation_dir=label_dir,
               target_image_dir=os.path.join(target_base_dir, "val/images"),
               target_annotation_dir=os.path.join(target_base_dir, "val/labels"))

    # Print the summary of the dataset split
    print(f"\n[✓] Dataset split completed: {len(train_ids)} train / {len(val_ids)} val samples")


#### Normalize Labels

This script normalizes bounding box coordinates in YOLO label files to the [0, 1] range based on the corresponding image dimensions.


In [9]:
from PIL import Image
import os
from tqdm import tqdm

def normalize_label_file(label_file, img_width, img_height):
    """
    Normalize the label coordinates in a label file to ensure they are within [0, 1] range.
    The label file is updated with the normalized values.
    """
    with open(label_file, 'r') as f:
        lines = f.readlines()
    
    with open(label_file, 'w') as f:
        for line in lines:
            # Split the line by spaces to get the class and coordinates
            parts = line.strip().split()
            class_id = int(parts[0])
            x_center, y_center, width, height = map(float, parts[1:])
            
            # Normalize the coordinates to be within the range [0, 1]
            x_center = min(1.0, max(0.0, x_center))
            y_center = min(1.0, max(0.0, y_center))
            width = min(1.0, max(0.0, width))
            height = min(1.0, max(0.0, height))

            # Write the normalized values back to the file
            f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")


def get_image_size(img_path):
    """
    Get the width and height of the image to normalize the coordinates properly.
    This function uses PIL (Pillow) to open the image and return its dimensions.
    """
    with Image.open(img_path) as img:
        return img.size  # returns (width, height)


def normalize_all_labels(labels_dir, img_dir):
    """
    Normalize all label files in the specified directory.
    It reads each label file, gets the corresponding image size, and normalizes the label coordinates.
    """
    for label_file in tqdm(os.listdir(labels_dir)):  # Iterate over all files in the labels directory
       
        if label_file.endswith('.txt'):  # Process only label files
            label_path = os.path.join(labels_dir, label_file)
            img_path = os.path.join(img_dir, label_file.replace('.txt', '.jpg'))  # Assuming JPG images
            
            if os.path.exists(img_path):
                # Get the image dimensions to normalize the labels
                img_width, img_height = get_image_size(img_path)
                normalize_label_file(label_path, img_width, img_height)
            else:
                # Warning if the corresponding image is missing
                print(f"Warning: Image for label {label_file} not found!")
    
    print("Normalization Complete")


### Training v8 model functions

This function trains a YOLO model from scratch using a specified base model variant and dataset configuration, with various data augmentation techniques and hyperparameters for effective training.


In [10]:
from ultralytics import YOLO

def train_yolo_from_scratch(data_yaml, epochs, imgsz, batch, name, model_variant="yolov8n.pt"):
    """
    Train YOLO model from scratch using a specified base model variant.
    
    Parameters:
    - data_yaml (str): Path to the YAML file containing dataset configuration.
    - epochs (int): Number of training epochs.
    - imgsz (int): Image size for training.
    - batch (int): Batch size.
    - name (str): Name for the training session, which will be used for saving results.
    - model_variant (str): Base model to start from (default is 'yolov8n.pt').
    
    The function initializes a YOLO model from the specified variant, 
    then trains it on the given dataset configuration with data augmentation 
    and hyperparameters tailored for the task.
    """
    print(f"[+] Training from scratch using base model: {model_variant}")
    
    # Initialize the model with the specified base model variant
    model = YOLO(model_variant)

    # Clear memory
    gc.collect()
    torch.cuda.empty_cache()

    # Start the training process with the provided parameters
    model.train(
        data=data_yaml,            # Path to dataset YAML configuration
        epochs=epochs,             # Number of epochs for training
        imgsz=imgsz,               # Image size to resize input images during training
        batch=batch,               # Batch size
        name=name,                 # Name for the experiment (saved in 'runs/train')
        project="runs/train",      # Directory where training results will be saved
        augment=True,              # Whether to apply data augmentation
        degrees=10,                # Rotation degrees for augmentation
        scale=0.5,                 # Scale factor for augmentation
        flipud=0.2,                # Probability of flipping the image upside down
        fliplr=0.5,                # Probability of flipping the image left-right
        hsv_h=0.015,               # Hue shift for HSV augmentation
        hsv_s=0.7,                 # Saturation shift for HSV augmentation
        hsv_v=0.4,                 # Value shift for HSV augmentation
        mosaic=1.0,                # Mosaic augmentation probability
        mixup=0.2,                 # Mixup augmentation probability
        lr0=0.01,                  # Initial learning rate
        lrf=0.01,                  # Final learning rate (multiplied by lr0)
        verbose=True,              # Whether to print training logs
        patience=15                # Number of epochs with no improvement before stopping
    )


### Fine-Tuning YOLO Model

This function fine-tunes a pre-trained YOLO model on a new dataset, applying data augmentation and adjusting hyperparameters like learning rate and weight decay to optimize performance for the new task.


In [11]:
from ultralytics import YOLO
import torch
import gc

def fine_tune_yolo(data_yaml, epochs, imgsz, batch, name, base_model_path):
    """
    Fine-tune a pre-trained YOLO model on a new dataset.

    Parameters:
    - data_yaml (str): Path to the dataset YAML file containing the configuration.
    - epochs (int): Number of epochs for fine-tuning.
    - imgsz (int): Image size for training.
    - batch (int): Batch size for training.
    - name (str): The name of the fine-tuning experiment, which will be used for saving.
    - base_model_path (str): Path to the pre-trained YOLO model that will be fine-tuned.
    
    The function loads the pre-trained YOLO model from the specified path, performs garbage 
    collection and memory clearing to avoid CUDA memory issues, and then starts the fine-tuning process 
    on the new dataset with specific settings, such as a lower learning rate and data augmentations.
    """
    print(f"[+] Fine-tuning model from: {base_model_path}")
    
    # Load the pre-trained model from the specified base model path
    model = YOLO(base_model_path)

    # Perform garbage collection and clear CUDA memory to avoid out-of-memory errors
    gc.collect()
    torch.cuda.empty_cache()

    # Fine-tune the model with the specified parameters
    model.train(
        data=data_yaml,           # Path to dataset YAML configuration
        epochs=epochs,            # Number of epochs for fine-tuning
        imgsz=imgsz,              # Image size for resizing input images
        batch=batch,              # Batch size for training
        name=name,                # Name for the fine-tuning experiment (saved in 'runs/train')
        project="runs/train",     # Directory where the results of the fine-tuning will be stored
        weight_decay=0.0005,      # Weight decay parameter to prevent overfitting
        augment=True,             # Enable data augmentation
        degrees=10,               # Rotate images by up to 10 degrees
        scale=0.5,                # Scale images by 50% during augmentation
        flipud=0.2,               # 20% chance of flipping images vertically
        fliplr=0.5,               # 50% chance of flipping images horizontally
        hsv_h=0.015,              # Adjust hue by +/- 1.5% during augmentation
        hsv_s=0.7,                # Adjust saturation by +/- 70% during augmentation
        hsv_v=0.4,                # Adjust brightness by +/- 40% during augmentation
        mosaic=1.0,               # Apply mosaic augmentation with 100% probability
        mixup=0.2,                # Mixup augmentation with 20% probability
        patience=10,              # Early stopping patience (wait 10 epochs without improvement)
        verbose=True,             # Display detailed training logs
    )


### YOLO Model Evaluation and Metrics Extraction

This script provides functions for evaluating a YOLO model on a dataset, extracting per-class mAP@0.5:0.95 metrics, and saving them to a JSON file. Below are the key functions:

- **load_yolo_model**: Loads a pre-trained YOLO model from the specified path.
- **run_model_validation**: Runs the validation for the loaded YOLO model and returns the results.
- **extract_per_class_metrics**: Extracts the mAP@0.5:0.95 metrics for each class from the model validation results.
- **save_metrics_to_json**: Saves the extracted metrics in a JSON file for easy access and further analysis.
- **evaluate_and_save_metrics**: A high-level function that loads the model, validates it, extracts metrics, and saves them to a JSON file.


In [12]:
import json
from ultralytics import YOLO

def load_yolo_model(model_path):
    """
    Loads a pre-trained YOLO model from the specified path.

    Parameters:
    - model_path (str): Path to the pre-trained YOLO model file.

    Returns:
    - YOLO model object.
    """
    return YOLO(model_path)

def run_model_validation(model):
    """
    Runs the model validation and returns the results.

    Parameters:
    - model (YOLO): The YOLO model to be validated.

    Returns:
    - results (Result object): Validation results from the model.
    """
    return model.val()

def extract_per_class_metrics(results):
    """
    Extracts mAP@0.5:0.95 for each class from the validation results.

    Parameters:
    - results (Result object): Validation results from the YOLO model.

    Returns:
    - per_class_metrics (dict): Dictionary containing per-class mAP@0.5:0.95 values.
    """
    per_class_metrics = {}
    if hasattr(results.box, 'maps') and results.box.maps is not None:
        maps = results.box.maps  # NumPy array of mAP@0.5:0.95 for each class
        for i, name in results.names.items():
            per_class_metrics[name] = {
                "class_id": i,
                "mAP@0.5:0.95": round(float(maps[i]), 4)  # Round to 4 decimal places
            }
    else:
        print("[-] No per-class mAP@0.5:0.95 data found.")
    return per_class_metrics

def save_metrics_to_json(metrics, output_path):
    """
    Saves the per-class metrics to a JSON file.

    Parameters:
    - metrics (dict): The metrics to be saved, typically containing per-class mAP values.
    - output_path (str): Path to the output JSON file where the metrics will be saved.
    """
    with open(output_path, "w") as f:
        json.dump(metrics, f, indent=4)  # Save as JSON with indents for readability
    print(f"[+] Saved per-class metrics to {output_path}")

def evaluate_and_save_metrics(model_path, output_json_path="per_class_metrics.json"):
    """
    Evaluates the YOLO model and saves the per-class mAP metrics to a JSON file.

    Parameters:
    - model_path (str): Path to the pre-trained YOLO model.
    - output_json_path (str): Path to save the output JSON file with per-class mAP values.
    """
    model = load_yolo_model(model_path)  # Load the YOLO model
    results = run_model_validation(model)  # Run validation
    metrics = extract_per_class_metrics(results)  # Extract per-class metrics
    save_metrics_to_json(metrics, output_json_path)  # Save metrics to JSON


### Print Per-Class mAP Metrics

This function loads the per-class mAP@0.5:0.95 metrics from a JSON file and prints them in a tabular format.

- **print_per_class_metrics**: 
    - **Parameters**: 
        - `json_path` (str): The path to the JSON file containing the per-class metrics (default is `"per_class_metrics.json"`).
    - **Functionality**:
        - Loads the metrics from the provided JSON file.
        - Prints a header and iterates through each class to display the class name, class ID, and corresponding mAP value.


In [13]:
def print_per_class_metrics(json_path="per_class_metrics.json"):
    """
    Prints the per-class mAP@0.5:0.95 metrics from a JSON file.

    Parameters:
    - json_path (str): Path to the JSON file containing the per-class metrics.
    """
    # Load the metrics from the specified JSON file
    with open(json_path, "r") as f:
        metrics = json.load(f)
    
    # Print the header for the table
    print("Per-Class mAP@0.5:0.95 Metrics:\n")
    print(f"{'Class Name':<15} {'Class ID':<10} {'mAP@0.5:0.95':<15}")
    print("-" * 40)
    
    # Loop through each class in the metrics and print the results
    for name, data in metrics.items():
        print(f"{name:<15} {data['class_id']:<10} {data['mAP@0.5:0.95']:<15}")


### Find Best YOLO Model

- **find_best_model(base_dir='runs_yolo/')**: 
    - Searches for the most recently modified `best.pt` file in the given directory and subdirectories.
    - Returns the path to the latest `best.pt` file.
    - Raises `FileNotFoundError` if no `best.pt` file is found.


In [14]:
from pathlib import Path

def find_best_model(base_dir='runs_yolo/'):
    """
    Searches for the 'best.pt' model file in the given directory and its subdirectories.

    Parameters:
    - base_dir (str): The directory where the search will start. Defaults to 'runs_yolo/'.

    Returns:
    - str: Path to the 'best.pt' model file.
    
    Raises:
    - FileNotFoundError: If no 'best.pt' file is found in the directory.
    """
    # Use Path.rglob to recursively search for all 'best.pt' files in the directory
    best_paths = list(Path(base_dir).rglob('best.pt'))
    
    # Check if any 'best.pt' file was found
    if not best_paths:
        raise FileNotFoundError("No 'best.pt' file found in the 'runs/' directory.")
    
    # Optionally, sort the found files by their last modified time (descending)
    best_paths.sort(key=lambda p: p.stat().st_mtime, reverse=True)
    
    # Print the path of the most recently modified 'best.pt' file
    print(f"[+] Found best.pt at: {best_paths[0]}")
    
    # Return the path to the 'best.pt' file
    return str(best_paths[0])


### Prediction on Videos

1. **Frame Processing**  

2. **Video Loop & Saving**  


In [15]:
# ========== FRAME PROCESSING ==========

import cv2

def process_frame_1(frame, yolo_model, w, h, class_id_to_name, conf_threshold=0.8):
    """
    Process a single frame from a video, run YOLO model inference, and annotate the frame.

    Parameters:
    - frame: The video frame to process.
    - yolo_model: The YOLO model used for object detection.
    - w, h: The width and height of the frame (image size).
    - class_id_to_name: A dictionary mapping class IDs to class names and colors.
    - conf_threshold: The confidence threshold for filtering detections.

    Returns:
    - annotated: The annotated frame with bounding boxes and labels.
    - boxes: The bounding box coordinates for each detected object.
    - class_ids: The class IDs for each detected object.
    """
    annotated = frame.copy()  # Make a copy of the original frame for annotation
    results = yolo_model(annotated, verbose=False)[0]  # Run YOLO inference on the frame
    boxes = results.boxes.xyxy.cpu().numpy()  # Get bounding box coordinates (x1, y1, x2, y2)
    class_ids = results.boxes.cls.cpu().numpy()  # Get class IDs for detected objects
    confidences = results.boxes.conf.cpu().numpy()  # Confidence scores for each box

    for box, cls_id, confidence in zip(boxes, class_ids, confidences):
        if confidence > conf_threshold:  # Filter detections based on confidence
            x1, y1, x2, y2 = map(int, box)  # Convert box coordinates to integers
            class_name, color = class_id_to_name[int(cls_id)]  # Get class name and color based on class ID
            cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 2)  # Draw bounding box
            cv2.putText(annotated, f"{class_name} {confidence:.2f}", (x1, max(y1 - 10, 10)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 0), 2)  # Add label and confidence text

    return annotated, boxes, class_ids


# ========== VIDEO CAPTURE ==========

def setup_video_capture_1(video_path):
    """
    Set up video capture for a given video file and return video properties.

    Parameters:
    - video_path: Path to the video file.

    Returns:
    - cap: OpenCV video capture object.
    - total_frames: Total number of frames in the video.
    - fps: Frames per second of the video.
    - w, h: Width and height of the video frames.
    """
    cap = cv2.VideoCapture(video_path)  # Open the video file
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # Get total number of frames
    fps = cap.get(cv2.CAP_PROP_FPS)  # Get frames per second
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))  # Get frame width
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))  # Get frame height
    return cap, total_frames, fps, w, h


# ========== MAIN FUNCTION ==========

def videos_predictions(yolo_weights_path, class_id_to_name, video_dir='videos', output_base='./datatsets/opt', max_frames=None):
    """
    Process multiple video files, run YOLO inference, and save the results.

    Parameters:
    - yolo_weights_path: Path to the YOLO weights file.
    - class_id_to_name: A dictionary mapping class IDs to class names and colors.
    - video_dir: Directory containing the input video files.
    - output_base: Base directory where output images, labels, and videos will be saved.
    - max_frames: The maximum number of frames to process per video. If None, all frames will be processed.
    """
    yolo_model = YOLO(yolo_weights_path)  # Load the YOLO model

    # Set up output directories for images, labels, and output video
    image_out_dir = os.path.join(output_base, 'images')
    label_out_dir = os.path.join(output_base, 'labels')
    output_video_dir = os.path.join(output_base, 'output')

    # Create output directories if they don't exist
    os.makedirs(image_out_dir, exist_ok=True)
    os.makedirs(label_out_dir, exist_ok=True)
    os.makedirs(output_video_dir, exist_ok=True)

    # Iterate over video files in the video directory
    for video_file in tqdm(sorted(os.listdir(video_dir))):
        if not video_file.lower().endswith(".mp4"):
            continue  # Skip non-video files

        video_id = os.path.splitext(video_file)[0]  # Extract video ID (filename without extension)
        video_path = os.path.join(video_dir, video_file)  # Full path to the video file
        output_video_path = os.path.join(output_video_dir, f"{video_id}.mp4")  # Path for output video

        print(f"========== STARTED: {video_id} ==========")
        cap, total_frames, fps, w, h = setup_video_capture_1(video_path)  # Set up video capture
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Video writer codec
        writer = cv2.VideoWriter(output_video_path, fourcc, fps, (w, h))  # Video writer setup

        frame_count = 0
        pbar = tqdm(total=max_frames if max_frames else total_frames, desc=video_id)  # Progress bar

        while True:
            ret, frame = cap.read()
            if not ret or (max_frames and frame_count >= max_frames):
                break  # Exit if no frame is read or max_frames is reached

            # Process the current frame
            annotated_bgr, boxes, class_ids = process_frame_1(frame, yolo_model, w, h, class_id_to_name)

            # Save original frame as image
            img_filename = f'{video_id}_{frame_count:04d}.jpg'
            img_path = os.path.join(image_out_dir, img_filename)
            cv2.imwrite(img_path, frame)

            # Save YOLO-format label file
            label_filename = f'{video_id}_{frame_count:04d}.txt'
            label_path = os.path.join(label_out_dir, label_filename)
            with open(label_path, 'w') as f:
                for box, cls_id in zip(boxes, class_ids):
                    x1, y1, x2, y2 = box
                    w_box = x2 - x1
                    h_box = y2 - y1
                    cx = x1 + w_box / 2
                    cy = y1 + h_box / 2
                    f.write(f"{int(cls_id)} {cx/w:.6f} {cy/h:.6f} {w_box/w:.6f} {h_box/h:.6f}\n")

            writer.write(annotated_bgr)  # Write the annotated frame to the output video
            frame_count += 1
            pbar.update(1)  # Update progress bar

        cap.release()  # Release the video capture object
        writer.release()  # Release the video writer object
        pbar.close()  # Close the progress bar
        print(f"DONE: {video_id} — Processed {frame_count} frames")  # Print processing summary


### YOLO Video Inference 

1. **Frame Processing with Filtering**  
   `process_frame()` runs YOLO inference on each frame, filters by confidence and specific class IDs (`underrepresented_class_ids`), and draws bounding boxes with labels.

2. **Batch Video Processing & Saving**  
   `process_all_videos()` loops through videos, processes each frame, and saves:
   - Original frame (`.jpg`)
   - YOLO label (`.txt`)
   - Annotated output video (`.mp4`)  
   It also tracks total frames, bounding boxes, and labels.


In [16]:
# from ultralytics import YOLO
# import cv2
# import os
# import random

# def process_frame(frame, yolo_model, w, h, class_id_to_name,
#                   rare_class_ids=None, conf_threshold=0.8, common_keep_ratio=0.3):
#     """
#     Run YOLO inference on a single frame and annotate results with prioritization of rare classes.

#     Returns:
#         annotated_frame, list_of_boxes, list_of_class_ids
#     """
#     results = yolo_model.predict(frame, verbose=False)[0]
#     boxes = []
#     class_ids = []

#     for box in results.boxes:
#         cls_id = int(box.cls[0])
#         conf = float(box.conf[0])

#         if conf < conf_threshold:
#             continue

#         if rare_class_ids is not None:
#             if cls_id in rare_class_ids:
#                 keep = True
#             else:
#                 keep = random.random() < common_keep_ratio
#             if not keep:
#                 continue

#         x1, y1, x2, y2 = map(int, box.xyxy[0])
#         boxes.append((x1, y1, x2, y2))
#         class_ids.append(cls_id)

#         label, color = class_id_to_name.get(cls_id, (f"class_{cls_id}", (255, 255, 255)))
#         cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
#         cv2.putText(frame, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

#     return frame, boxes, class_ids


# def videos_predictions_rare(yolo_weights_path, class_id_to_name, video_dir='videos', output_base='./datasets/opt',
#                             rare_class_ids=None, max_frames=None, conf_threshold=0.8, common_keep_ratio=0.3):
#     """
#     Run YOLO predictions on videos, prioritize rare classes, and save results using a flat filename structure.
#     """
#     yolo_model = YOLO(yolo_weights_path)
#     video_files = [f for f in os.listdir(video_dir) if f.endswith('.mp4')]

#     image_dir = os.path.join(output_base, 'images')
#     label_dir = os.path.join(output_base, 'labels')
#     output_video_dir = os.path.join(output_base, 'output')

#     os.makedirs(image_dir, exist_ok=True)
#     os.makedirs(label_dir, exist_ok=True)
#     os.makedirs(output_video_dir, exist_ok=True)

#     for video_file in video_files:
#         video_id = os.path.splitext(video_file)[0]
#         print(f"Processing video: {video_id}")
#         output_video_path = os.path.join(output_video_dir, f"{video_id}.mp4")

#         cap = cv2.VideoCapture(os.path.join(video_dir, video_file))
#         w, h = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
#         fps = cap.get(cv2.CAP_PROP_FPS)
#         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
#         out = cv2.VideoWriter(output_video_path, fourcc, fps, (w, h))

#         frame_count = 0
#         while cap.isOpened():
#             ret, frame = cap.read()
#             if not ret or (max_frames and frame_count >= max_frames):
#                 break

#             annotated, boxes, class_ids = process_frame(
#                 frame, yolo_model, w, h, class_id_to_name,
#                 rare_class_ids=rare_class_ids,
#                 conf_threshold=conf_threshold,
#                 common_keep_ratio=common_keep_ratio
#             )

#             frame_name = f"{video_id}_{frame_count:05d}"
#             frame_path = os.path.join(image_dir, f"{frame_name}.jpg")
#             label_path = os.path.join(label_dir, f"{frame_name}.txt")

#             cv2.imwrite(frame_path, annotated)
#             out.write(annotated)

#             with open(label_path, "w") as f:
#                 for box, cls_id in zip(boxes, class_ids):
#                     x1, y1, x2, y2 = box
#                     x_center = ((x1 + x2) / 2) / w
#                     y_center = ((y1 + y2) / 2) / h
#                     width = (x2 - x1) / w
#                     height = (y2 - y1) / h
#                     f.write(f"{cls_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

#             frame_count += 1

#         cap.release()
#         out.release()
#         print(f"Finished: {video_id}, Total Frames: {frame_count}")


In [17]:
from ultralytics import YOLO
import cv2
import os
import random
import numpy as np


def frame_difference_significant(frame1, frame2, threshold=30):
    """
    Returns True if the pixel-wise difference between frames is significant.
    """
    if frame1 is None or frame2 is None:
        return True
    diff = cv2.absdiff(frame1, frame2)
    diff_gray = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
    score = np.mean(diff_gray)
    return score > threshold


def process_frame(frame, yolo_model, w, h, class_id_to_name,
                  rare_class_ids=None, conf_threshold=0.8, common_keep_ratio=0.3):
    results = yolo_model.predict(frame, verbose=False)[0]
    boxes, class_ids = [], []

    for box in results.boxes:
        cls_id = int(box.cls[0])
        conf = float(box.conf[0])
        if conf < conf_threshold:
            continue

        if rare_class_ids is not None:
            if cls_id not in rare_class_ids and random.random() > common_keep_ratio:
                continue

        x1, y1, x2, y2 = map(int, box.xyxy[0])
        boxes.append((x1, y1, x2, y2))
        class_ids.append(cls_id)

        label, color = class_id_to_name.get(cls_id, (f"class_{cls_id}", (255, 255, 255)))
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    return frame, boxes, class_ids


def videos_predictions_pixelwise(
    yolo_weights_path,
    class_id_to_name,
    video_dir='videos',
    output_base='./datasets/opt',
    rare_class_ids=None,
    max_frames=None,
    conf_threshold=0.8,
    common_keep_ratio=0.3,
    pixel_diff_threshold=30
):
    yolo_model = YOLO(yolo_weights_path)
    video_files = [f for f in os.listdir(video_dir) if f.endswith('.mp4')]

    image_dir = os.path.join(output_base, 'images')
    label_dir = os.path.join(output_base, 'labels')
    output_video_dir = os.path.join(output_base, 'output')

    os.makedirs(image_dir, exist_ok=True)
    os.makedirs(label_dir, exist_ok=True)
    os.makedirs(output_video_dir, exist_ok=True)

    for video_file in video_files:
        video_id = os.path.splitext(video_file)[0]
        print(f"Processing video: {video_id}")
        output_video_path = os.path.join(output_video_dir, f"{video_id}.mp4")

        cap = cv2.VideoCapture(os.path.join(video_dir, video_file))
        w, h = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = cap.get(cv2.CAP_PROP_FPS)
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_video_path, fourcc, fps, (w, h))

        last_saved_frame = None
        frame_count = 0
        saved_count = 0

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret or (max_frames and saved_count >= max_frames):
                break

            if frame_difference_significant(last_saved_frame, frame, threshold=pixel_diff_threshold):
                annotated, boxes, class_ids = process_frame(
                    frame, yolo_model, w, h, class_id_to_name,
                    rare_class_ids=rare_class_ids,
                    conf_threshold=conf_threshold,
                    common_keep_ratio=common_keep_ratio
                )

                if boxes:
                    frame_name = f"{video_id}_{saved_count:05d}"
                    frame_path = os.path.join(image_dir, f"{frame_name}.jpg")
                    label_path = os.path.join(label_dir, f"{frame_name}.txt")

                    cv2.imwrite(frame_path, annotated)
                    out.write(annotated)

                    with open(label_path, "w") as f:
                        for box, cls_id in zip(boxes, class_ids):
                            x1, y1, x2, y2 = box
                            x_center = ((x1 + x2) / 2) / w
                            y_center = ((y1 + y2) / 2) / h
                            width = (x2 - x1) / w
                            height = (y2 - y1) / h
                            f.write(f"{cls_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

                    last_saved_frame = frame.copy()
                    saved_count += 1

            frame_count += 1

        cap.release()
        out.release()
        print(f"Finished: {video_id}, Total Saved Frames: {saved_count}")


### Identify Rare Classes from YOLO Labels

1. **Class Frequency Counting**  
   The function scans all `.txt` label files in `label_dir` to count the occurrences of each class ID.

2. **Rare Class Detection**  
   Class IDs with instance counts below `rare_threshold` are identified as rare and returned.

In [18]:
import os
from collections import defaultdict

def get_rare_class_ids(label_dir, class_id_to_name, rare_threshold=1000):
    """
    Identify rare class IDs based on the number of instances in the label files.
    
    Parameters:
    - label_dir: Directory containing the label files
    - class_id_to_name: Mapping of class IDs to class names
    - rare_threshold: Threshold for class instances to be considered rare
    
    Returns:
    - rare_class_ids: Set of class IDs that have fewer instances than the threshold
    """
    # Initialize a dictionary to count occurrences of each class
    class_counts = defaultdict(int)

    # Iterate through all label files in the specified directory
    for label_file in os.listdir(label_dir):
        if not label_file.endswith('.txt'):  # Only consider .txt label files
            continue
        with open(os.path.join(label_dir, label_file), 'r') as f:
            # Count occurrences of each class in the label file
            for line in f:
                parts = line.strip().split()  # Split each line by spaces
                if len(parts) >= 1:  # Ensure there's at least one part (class ID)
                    cls_id = int(parts[0])  # Extract class ID
                    class_counts[cls_id] += 1  # Increment the count for this class

    # Print the count for each class
    print("Class-wise instance counts:")
    total_count = 0  # Variable to track the total count of instances

    # Iterate over the class IDs and print the count for each class
    for cls_id in sorted(class_counts.keys()):
        name = class_id_to_name.get(cls_id, ("Unknown", []))[0]  # Get the class name
        count = class_counts[cls_id]
        print(f"Class {cls_id:2d} ({name:10s}): {count} instances")
        total_count += count  # Update the total count of instances

    print("Total Count:", total_count)

    # Identify classes with fewer instances than the threshold
    rare_class_ids = {cls_id for cls_id, count in class_counts.items() if count < rare_threshold}
    print(f"\nRare class IDs (threshold < {rare_threshold}): {rare_class_ids}")

    return rare_class_ids


### Merge Two YOLO Datasets

1. **Dataset Copy with Prefix Renaming**  
   Images and labels from two YOLO datasets (`source1`, `source2`) are copied to a `destination` directory, each renamed with a unique prefix (`orig_`, `pred_`) to avoid filename clashes.

2. **Folder Structure Preserved**  
   Ensures YOLO folder structure (`images/`, `labels/`) is maintained in the destination and handles missing label files with warnings.

In [19]:
import os
import shutil

def merge_yolo_datasets(source1, source2, destination):
    """
    Merges two YOLO datasets by copying the images and label files from two sources
    into a destination directory, with appropriate renaming and handling of duplicate labels.
    
    Parameters:
    - source1: Path to the first source dataset
    - source2: Path to the second source dataset
    - destination: Path to the destination directory where merged dataset will be saved
    """
    # Define subfolders for images and labels in both source datasets
    img1_dir = os.path.join(source1, 'images')
    lbl1_dir = os.path.join(source1, 'labels')
    img2_dir = os.path.join(source2, 'images')
    lbl2_dir = os.path.join(source2, 'labels')
    dst_img_dir = os.path.join(destination, 'images')
    dst_lbl_dir = os.path.join(destination, 'labels')

    # Create destination folders for images and labels if they don't exist
    os.makedirs(dst_img_dir, exist_ok=True)
    os.makedirs(dst_lbl_dir, exist_ok=True)

    def copy_files(src_img_dir, src_lbl_dir, prefix):
        """
        Copies image and label files from source directories to destination,
        renaming them with a given prefix to distinguish the datasets.
        
        Parameters:
        - src_img_dir: Source directory containing image files
        - src_lbl_dir: Source directory containing label files
        - prefix: Prefix to append to filenames to distinguish the source dataset
        """
        # Iterate through the image files in the source directory
        for filename in sorted(os.listdir(src_img_dir)):
            if not filename.lower().endswith('.jpg'):
                continue
            base = os.path.splitext(filename)[0]  # Get base filename without extension

            # Copy image file with the prefix
            new_img_name = f"{prefix}_{base}.jpg"
            shutil.copy(os.path.join(src_img_dir, filename),
                        os.path.join(dst_img_dir, new_img_name))

            # Copy corresponding label file, if it exists
            label_file = base + ".txt"
            if os.path.exists(os.path.join(src_lbl_dir, label_file)):
                new_lbl_name = f"{prefix}_{base}.txt"
                shutil.copy(os.path.join(src_lbl_dir, label_file),
                            os.path.join(dst_lbl_dir, new_lbl_name))
            else:
                print(f"Warning: No label for {filename}")

    # Merge the first (original) dataset
    print("[+] Merging original dataset...")
    copy_files(img1_dir, lbl1_dir, prefix="orig")

    # Merge the second (predicted) dataset
    print("[+] Merging predicted video dataset...")
    copy_files(img2_dir, lbl2_dir, prefix="pred")

    # Print completion message
    print(f"\n[+] Merge complete! Merged dataset saved at: {destination}")


### Print YOLOv8 Training Metrics from `results.csv`

1. **Automatic CSV Discovery & Analysis**  
   Recursively searches for `results.csv` in the given directory and loads it into a DataFrame to analyze training progress and outcomes.

2. **Final Epoch Summary**  
   Calculates total training and validation loss and prints key metrics like Precision, Recall, and mAP from the last epoch.


In [20]:
import os
import pandas as pd

def find_results_csv(directory):
    """Find the results.csv file in the specified directory."""
    # Traverse the directory to find 'results.csv'
    for root, dirs, files in os.walk(directory):
        if 'results.csv' in files:
            return os.path.join(root, 'results.csv')  # Return the full path if found
    return None  # Return None if 'results.csv' is not found

def load_results_csv(results_csv_path):
    """Load the results CSV into a pandas DataFrame."""
    # Load the CSV file into a DataFrame and return it
    return pd.read_csv(results_csv_path)

def calculate_total_epochs(df):
    """Calculate the total number of epochs from the DataFrame."""
    # Return the maximum epoch value from the DataFrame
    return df['epoch'].max()

def calculate_training_loss(epoch_data):
    """Calculate the total training loss from the given epoch data."""
    # Extract training loss components from the epoch data
    train_box_loss = epoch_data['train/box_loss']
    train_cls_loss = epoch_data['train/cls_loss']
    train_dfl_loss = epoch_data['train/dfl_loss']
    # Return the sum of the training losses
    return train_box_loss + train_cls_loss + train_dfl_loss

def calculate_validation_loss(epoch_data):
    """Calculate the total validation loss from the given epoch data."""
    # Extract validation loss components from the epoch data
    val_box_loss = epoch_data['val/box_loss']
    val_cls_loss = epoch_data['val/cls_loss']
    val_dfl_loss = epoch_data['val/dfl_loss']
    # Return the sum of the validation losses
    return val_box_loss + val_cls_loss + val_dfl_loss

def print_final_metrics(df):
    """Print the final metrics for the last epoch."""
    # Extract the data for the last epoch
    final_epoch_data = df.iloc[-1]

    # Calculate total training and validation loss
    train_loss = calculate_training_loss(final_epoch_data)
    val_loss = calculate_validation_loss(final_epoch_data)

    # Print the training metrics for the last epoch
    print("\n========== Final Training Metrics ==========")
    print(f"Training Loss: {train_loss:.6f}")
    print(f"Precision: {final_epoch_data['metrics/precision(B)']:.6f}")
    print(f"Recall: {final_epoch_data['metrics/recall(B)']:.6f}")
    print(f"mAP@0.5: {final_epoch_data['metrics/mAP50(B)']:.6f}")
    print(f"mAP@0.5:0.95: {final_epoch_data['metrics/mAP50-95(B)']:.6f}")

    # Print the validation metrics for the last epoch
    print("\n========== Final Validation Metrics ==========")
    print(f"Validation Loss: {val_loss:.6f}")

def print_csv_metrics(directory):
    """Main function to process and print final metrics."""
    # Find the 'results.csv' file in the given directory
    results_csv_path = find_results_csv(directory)
    
    if not results_csv_path:
        # If no 'results.csv' is found, print an error and return
        print("Error: 'results.csv' file not found in the specified directory.")
        return

    # Print the path to the found 'results.csv' file
    print(f"Found results.csv at: {results_csv_path}")

    # Load the results from the CSV file into a DataFrame
    df = load_results_csv(results_csv_path)

    # Get the total number of epochs from the DataFrame
    total_epochs = calculate_total_epochs(df)
    print(f"Total number of epochs: {total_epochs}")

    # Print the final training and validation metrics for the last epoch
    print_final_metrics(df)


### Compare Class-wise mAP@0.5:0.95 Before vs After Retraining

1. **Side-by-Side Metric Comparison**  
   Reads two JSON files containing class-wise mAP@0.5:0.95 values (before & after retraining) and compares them.

2. **Visual Trend Highlighting**  
   Displays changes in mAP with colored indicators:
   - 🟢 **Increase** for performance gain
   - 🔴 **Decrease** for performance drop


In [21]:
import json

def compare_maps(json_path1, json_path2):
    with open(json_path1, 'r') as f1, open(json_path2, 'r') as f2:
        metrics1 = json.load(f1)
        metrics2 = json.load(f2)

    print("\nmAP@0.5:0.95 Differences Before and After Retraning:\n")
    print(f"{'Class':<15} {'Before':<10} {'After':<10} {'Diff':<10} {'Trend'}")
    print("-" * 60)

    for class_name in metrics1:
        map1 = metrics1[class_name].get("mAP@0.5:0.95", 0)
        map2 = metrics2.get(class_name, {}).get("mAP@0.5:0.95", 0)

        diff = map2 - map1
        if abs(diff) > 1e-6:
            if diff > 0:
                trend = f"\033[92m Increase\033[0m"  # Green for increase
            else:
                trend = f"\033[91m Decrease\033[0m"  # Red for decrease

            # Printing with colors
            print(f"{class_name:<15} {map1:<10.4f} {map2:<10.4f} {diff:<10.4f} {trend}")


In [22]:
def compare_final_metrics(csv1_path, csv2_path):
    df1 = pd.read_csv(csv1_path)
    df2 = pd.read_csv(csv2_path)

    last1 = df1.iloc[-1]
    last2 = df2.iloc[-1]

    metrics_to_compare = {
        "train/box_loss": "Box Loss (Train)",
        "train/cls_loss": "Cls Loss (Train)",
        "train/dfl_loss": "DFL Loss (Train)",
        "metrics/precision(B)": "Precision",
        "metrics/recall(B)": "Recall",
        "metrics/mAP50(B)": "mAP@0.5",
        "metrics/mAP50-95(B)": "mAP@0.5:0.95",
        "val/box_loss": "Box Loss (Val)",
        "val/cls_loss": "Cls Loss (Val)",
        "val/dfl_loss": "DFL Loss (Val)"
    }

    print("Changes in Metrics Before and After Retraning:\n")
    print(f"{'Metric':<25} {'Before':<10} {'After':<10} {'Diff':<10} {'Trend'}")
    print("-" * 65)

    for key, label in metrics_to_compare.items():
        val1 = last1[key]
        val2 = last2[key]
        diff = val2 - val1
        if abs(diff) > 1e-6:
            # If increase, color green; if decrease, color red
            if diff > 0:
                trend = f"\033[92m Increase\033[0m"  # Green
            else:
                trend = f"\033[91m Decrease\033[0m"  # Red

            # Printing with colors
            print(f"{label:<25} {val1:<10.5f} {val2:<10.5f} {diff:<10.5f} {trend}")


## Calling Functions

### Download and Extract the Semantic Drone Dataset and UAVDT Dataset

In [23]:
# Google Drive URL for the Semantic Drone Dataset
gdrive_url = "https://drive.google.com/file/d/1UppumYqYOi-kto6BWPfFxwJK2Eph46oY/view?usp=sharing"
# Call the function to download and extract the Semantic Drone Dataset
semantic_drone_dataset_download(gdrive_url, extract_to="datasets")

# Google Drive URL for the UAVDT Dataset
gdrive_url = "https://drive.google.com/file/d/12cbrTaBAMIsuU-mwAA7IgDk9wSLC9cC-/view?usp=sharing"
# Call the function to download and extract the UAVDT Dataset
uavdt_dataset_download(gdrive_url, extract_to="datasets")


[INFO] Downloading ZIP from Google Drive...


Downloading...
From (original): https://drive.google.com/uc?id=1UppumYqYOi-kto6BWPfFxwJK2Eph46oY
From (redirected): https://drive.google.com/uc?id=1UppumYqYOi-kto6BWPfFxwJK2Eph46oY&confirm=t&uuid=844e59b2-09f2-47f5-86a0-9aca1313fca8
To: c:\Users\sande\OneDrive\Desktop\IITTP\2_Asgn\Sem2\2_dl\SemProject\Automated_SegmentAndYolo\datasets\downloaded.zip
100%|██████████| 4.14G/4.14G [02:39<00:00, 25.9MB/s]


[INFO] Extracting ZIP...
[DONE] Extracted files to: datasets
[+] Downloading ZIP from Google Drive...


Downloading...
From (original): https://drive.google.com/uc?id=12cbrTaBAMIsuU-mwAA7IgDk9wSLC9cC-
From (redirected): https://drive.google.com/uc?id=12cbrTaBAMIsuU-mwAA7IgDk9wSLC9cC-&confirm=t&uuid=d85acaa0-e2d9-4591-b4b3-05bb0c52a921
To: c:\Users\sande\OneDrive\Desktop\IITTP\2_Asgn\Sem2\2_dl\SemProject\Automated_SegmentAndYolo\datasets\downloaded.zip
100%|██████████| 5.21G/5.21G [03:24<00:00, 25.5MB/s]


[+] Extracting ZIP...
[+] Extracted files to: datasets


### Semantic Drone Dataset to YOLO Format Conversion

In [24]:
# Path to the Semantic Drone Dataset training set
dataset_path = "./datasets/semantic_drone_dataset/training_set" 

# Output directory where the YOLO formatted dataset will be saved
output_dir = "./datasets/semantic_yolo"

# Call the function to convert the full dataset into YOLO format
# The function converts annotations and images from the Semantic Drone Dataset into YOLO format
convert_fulldataset_yolo_only(dataset_path, output_dir, class_id_to_name)


Converting to YOLO:  65%|██████▌   | 261/400 [03:24<01:49,  1.27it/s]



Converting to YOLO: 100%|██████████| 400/400 [05:27<00:00,  1.22it/s]

[+] YOLO-format annotation conversion complete!





### UAVDT-2024 and Semnatic Dataset Processing

In [25]:
# UAVDT-2024 Dataset Processing

# Path to the source UAVDT-2024 dataset
source_root = "./datasets/UAVDT-2024"

# Output directory where the new YOLO format dataset will be saved
output_root = "./datasets/new_dataset_yolo_split"

# Convert the UAVDT-2024 dataset into YOLO format
# The function processes the dataset and converts annotations and images into YOLO format
convert_dataset(source_root)

# Split the dataset into training and validation sets with a ratio of 80:20
# This function copies the relevant sequences into the respective directories for training and validation
copy_split_sequences(source_root, output_root, train_ratio=0.8)


# Semantic Drone Datasets Processing

# Split and move the Semantic Drone dataset into training and validation sets
# The function handles the splitting of the dataset and moves the images and annotations into separate directories
split_and_move_dataset()


🔄 Converting 30946 annotation files to YOLO format...


Converting: 100%|██████████| 30946/30946 [03:06<00:00, 166.06file/s]



Conversion complete.
Total boxes:     868139
Converted boxes: 868139
Skipped boxes:   0
Malformed lines: 0
Missing images:  0


Copying train: 100%|██████████| 37/37 [01:03<00:00,  1.72s/it]
Copying val: 100%|██████████| 10/10 [00:18<00:00,  1.90s/it]



[+] Dataset split into 'train/' and 'val/' folders with images and YOLO-format labels.


Moving to train: 100%|██████████| 319/319 [00:03<00:00, 104.06it/s]
Moving to val: 100%|██████████| 80/80 [00:00<00:00, 122.35it/s]


[✓] Dataset split completed: 319 train / 80 val samples





### Dataset Label Normalization for Training and Validation

In [26]:
# Set your paths for the training dataset
dataset_path = "./datasets/new_dataset_yolo_split/train"

# Directory where the images are stored in the training dataset
image_dir = os.path.join(dataset_path, "images")

# Directory where the label files are stored in the training dataset
annotations_dir = os.path.join(dataset_path, "labels")

# Normalize all label files in the training dataset by adjusting coordinates
# This function ensures that the labels follow the expected YOLO format (normalized coordinates)
normalize_all_labels(annotations_dir, image_dir)

# Set your paths for the validation dataset
dataset_path = "./datasets/new_dataset_yolo_split/val"

# Directory where the images are stored in the validation dataset
image_dir = os.path.join(dataset_path, "images")

# Directory where the label files are stored in the validation dataset
annotations_dir = os.path.join(dataset_path, "labels")

# Normalize all label files in the validation dataset
normalize_all_labels(annotations_dir, image_dir)


100%|██████████| 2354/2354 [00:04<00:00, 522.92it/s]


Normalization Complete


100%|██████████| 1181/1181 [00:02<00:00, 444.89it/s]

Normalization Complete





### Identifying Rare Classes in the Training Dataset

In [27]:
# Set the path to the training labels directory
labels_dir = './datasets/new_dataset_yolo_split/train/labels'

# Get the list of rare class IDs by analyzing the label files in the specified directory
# The function `get_rare_class_ids` will count the number of occurrences of each class
# and return those with occurrences below the specified threshold (in this case, 3000)
rare_class_ids = get_rare_class_ids(label_dir=labels_dir, class_id_to_name=class_id_to_name, rare_threshold=3000)


Class-wise instance counts:
Class  1 (pool      ): 30 instances
Class  2 (vegetation): 6178 instances
Class  3 (roof      ): 300 instances
Class  4 (wall      ): 989 instances
Class  5 (window    ): 448 instances
Class  6 (person    ): 2589 instances
Class  7 (dog       ): 23 instances
Class  8 (car       ): 35054 instances
Class  9 (bicycle   ): 205 instances
Class 10 (tree      ): 464 instances
Class 11 (truck     ): 129 instances
Class 12 (bus       ): 86 instances
Class 13 (vehicle   ): 577 instances
Total Count: 47072

Rare class IDs (threshold < 3000): {1, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13}


### Clean Up Directories and Train YOLOv8 from Scratch

In [None]:
import shutil
import os

# List of folders to delete
folders_to_delete = ['./datasets/semantic_yolo', './datasets/new_dataset_yolo', './datasets/uavdt-processed', './runs', "./metrics"]

# Iterate over each folder path in the list
for folder_path in folders_to_delete:
    # Check if the folder exists
    if os.path.exists(folder_path):
        # Delete the folder and its contents
        shutil.rmtree(folder_path)
        print(f"✅ Deleted folder: {folder_path}")
    else:
        # If the folder doesn't exist, print a warning message
        print(f"⚠️ Folder does not exist: {folder_path}")


In [None]:
# Train YOLOv8 from scratch with the specified parameters
train_yolo_from_scratch(
    data_yaml="yolov8.yaml",  # Path to the YAML file that contains dataset and class configuration
    epochs=100,               # Number of epochs to train the model
    imgsz=720,                # Image size (height and width) for training
    batch=8,                  # Batch size for training
    name="yolov8",            # Name of the training run (used for saving checkpoints, logs, etc.)
    model_variant="yolov8n.pt"  # The base YOLOv8 model variant to start training (options: yolov8n.pt, yolov8s.pt, yolov8m.pt, etc.)
)

### Evaluate YOLOv8 Model and Print Metrics

In [None]:
# Define the path to the YOLOv8 training run directory
yolov8 = './runs/train/yolov8'

# Find the best model based on the training run (usually the best model is the one with the lowest validation loss)
best_pt_path = find_best_model(yolov8)

# Evaluate the best model and save the performance metrics
evaluate_and_save_metrics(best_pt_path)


[+] Found best.pt at: runs/train/yolov8/weights/best.pt
Ultralytics 8.3.109 🚀 Python-3.10.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3050 OEM, 7957MiB)


Model summary (fused): 72 layers, 3,008,378 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/new_dataset_yolo_split/val/labels.cache... 1181 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1181/1181 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:10<00:00,  6.85it/s]


                   all       1181      70393      0.485      0.304       0.33      0.207
                  pool          9          9      0.793      0.778      0.864      0.779
            vegetation         75       1780       0.18      0.324      0.233      0.127
                  roof         42         79     0.0648      0.709      0.507      0.368
                  wall         62        255     0.0882      0.196      0.125     0.0696
                window         38        139      0.701      0.223      0.331      0.187
                person         75        637      0.778      0.295      0.368      0.193
                   dog          6         12      0.777      0.167       0.25     0.0788
                   car       1115      53669       0.75      0.342      0.497      0.235
               bicycle         32         53      0.356      0.377      0.334      0.128
                  tree         38        108      0.796      0.519      0.577      0.394
                 truc

In [None]:
# Print per-class metrics from the specified JSON file
print_per_class_metrics("per_class_metrics.json")


Per-Class mAP@0.5:0.95 Metrics:

Class Name      Class ID   mAP@0.5:0.95   
----------------------------------------
unlabeled       0          0.2072         
pool            1          0.7787         
vegetation      2          0.1271         
roof            3          0.3684         
wall            4          0.0696         
window          5          0.1872         
person          6          0.1933         
dog             7          0.0788         
car             8          0.2354         
bicycle         9          0.1279         
tree            10         0.3937         
truck           11         0.0407         
bus             12         0.0            
vehicle         13         0.093          


In [None]:
# Print the metrics from the CSV file found in the specified directory
print_csv_metrics(yolov8)


Found results.csv at: ./runs/train/yolov8/results.csv
Total number of epochs: 96

Training Loss: 2.205500
Precision: 0.553850
Recall: 0.292360
mAP@0.5: 0.324740
mAP@0.5:0.95: 0.201400

Validation Loss: 5.510860


### Clean Up Folders and Process Videos for YOLO Predictions

In [None]:
import shutil
import os

# List of folders to delete
folders_to_delete = ['./datasets/new-videos-predicted-yolo', "./datasets/merged_yolo_dataset", "./datasets/split_videos_dataset"]

# Loop through each folder path in the list
for folder_path in folders_to_delete:
    # Check if the folder exists
    if os.path.exists(folder_path):
        # If it exists, delete the folder and its contents
        shutil.rmtree(folder_path)
        print(f"✅ Deleted folder: {folder_path}")
    else:
        # If the folder does not exist, print a warning message
        print(f"⚠️ Folder does not exist: {folder_path}")


✅ Deleted folder: ./datasets/new-videos-predicted-yolo
✅ Deleted folder: ./datasets/merged_yolo_dataset
✅ Deleted folder: ./datasets/split_videos_dataset


In [None]:
# # # Process all videos and generate predictions
# process_all_videos(best_pt_path, class_id_to_name, list(rare_class_ids),
#                    video_dir='videos', output_base='./datasets/new-videos-predicted-yolo')

# videos_predictions(best_pt_path, class_id_to_name, video_dir='videos', output_base='./datasets/new-videos-predicted-yolo', max_frames=None)

# videos_predictions_rare(
#     yolo_weights_path=best_pt_path,
#     class_id_to_name=class_id_to_name,
#     video_dir='videos',
#     output_base='./datasets/new-videos-predicted-yolo',
#     rare_class_ids=list(rare_class_ids),
#     max_frames=None,
#     conf_threshold=0.5,
#     common_keep_ratio=0.3  # Keep only 30% of common detections
# )

videos_predictions_pixelwise(
    yolo_weights_path=best_pt_path,
    class_id_to_name=class_id_to_name,
    video_dir="videos",
    output_base="./datasets/new-videos-predicted-yolo",
    rare_class_ids=list(rare_class_ids),
    max_frames=None,
    conf_threshold=0.5,
    common_keep_ratio=0.3,
    pixel_diff_threshold=15
)

v2:  84%|████████▍ | 147/175 [04:46<00:54,  1.95s/it]


Processing video: v2
Finished: v2, Total Saved Frames: 16
Processing video: v12
Finished: v12, Total Saved Frames: 235
Processing video: v3
Finished: v3, Total Saved Frames: 11
Processing video: v1
Finished: v1, Total Saved Frames: 1
Processing video: v4
Finished: v4, Total Saved Frames: 13
Processing video: v5
Finished: v5, Total Saved Frames: 98
Processing video: v6
Finished: v6, Total Saved Frames: 60
Processing video: v8
Finished: v8, Total Saved Frames: 108


In [None]:
# Folder with YOLO label files
label_dir = './datasets/new-videos-predicted-yolo/labels'

# Get the rare class IDs from the label files based on the threshold
get_rare_class_ids(label_dir=label_dir, class_id_to_name=class_id_to_name ,rare_threshold=0)


Class-wise instance counts:
Class  2 (vegetation): 285 instances
Class  3 (roof      ): 46 instances
Class  4 (wall      ): 4 instances
Class  5 (window    ): 2 instances
Class  6 (person    ): 72 instances
Class  8 (car       ): 1452 instances
Class  9 (bicycle   ): 3 instances
Class 10 (tree      ): 223 instances
Class 11 (truck     ): 2 instances
Class 13 (vehicle   ): 89 instances
Total Count: 2178

Rare class IDs (threshold < 0): set()


set()

### Split and Merge new Datasets

In [None]:
split_and_move_dataset(source_base_dir="./datasets/new-videos-predicted-yolo",
                           target_base_dir="./datasets/split_videos_dataset",
                           split_ratio=0.8,
                           seed=42)


Moving to train: 100%|██████████| 433/433 [00:00<00:00, 1550.30it/s]
Moving to val: 100%|██████████| 109/109 [00:00<00:00, 1543.06it/s]


[✓] Dataset split completed: 433 train / 109 val samples





In [None]:
merge_yolo_datasets(
    source1='./datasets/new_dataset_yolo_split/train',  # Path to the first source dataset (YOLO format)
    source2='./datasets/split_videos_dataset/train',    # Path to the second source dataset (YOLO format)
    destination='./datasets/merged_yolo_dataset'        # Path where the merged dataset will be saved
)


[+] Merging original dataset...
[+] Merging predicted video dataset...

[+] Merge complete! Merged dataset saved at: ./datasets/merged_yolo_dataset


### Identify Rare Classes in the Merged YOLO Dataset

In [None]:
# Folder with YOLO label files
label_dir = './datasets/merged_yolo_dataset/labels'  # Path to the directory containing YOLO label files

# Get rare class IDs in the dataset based on the given threshold
rare_class_ids = get_rare_class_ids(
    label_dir=label_dir,                # Path to the label directory
    class_id_to_name=class_id_to_name,  # Mapping of class IDs to class names
    rare_threshold=0                    # Set the threshold for class frequency; here, we are considering all classes
)


Class-wise instance counts:
Class  1 (pool      ): 30 instances
Class  2 (vegetation): 5758 instances
Class  3 (roof      ): 313 instances
Class  4 (wall      ): 958 instances
Class  5 (window    ): 378 instances
Class  6 (person    ): 2532 instances
Class  7 (dog       ): 25 instances
Class  8 (car       ): 36216 instances
Class  9 (bicycle   ): 225 instances
Class 10 (tree      ): 607 instances
Class 11 (truck     ): 131 instances
Class 12 (bus       ): 86 instances
Class 13 (vehicle   ): 642 instances
Total Count: 47901

Rare class IDs (threshold < 0): set()


### Clean Up Fine-Tune YOLOv8 Folders and Fine-Tune the Model

In [None]:
import shutil
import os
import glob

# Match all folders starting with 'fine-tune-yolov8' inside './runs/train/'
folders_to_delete = glob.glob('./runs/train/fine-tune-yolov8*')  # Find all folders starting with 'fine-tune-yolov8'

# Iterate through the matched folders
for folder_path in folders_to_delete:
    if os.path.isdir(folder_path):  # Check if the path is a valid directory
        shutil.rmtree(folder_path)  # Delete the folder and its contents
        print(f"✅ Deleted folder: {folder_path}")  # Print a success message
    else:
        print(f"⚠️ Not a directory or doesn't exist: {folder_path}")  # Print a warning if the folder doesn't exist or isn't a directory


✅ Deleted folder: ./runs/train/fine-tune-yolov8


In [None]:
fine_tune_yolo(
    data_yaml="yolo_retrain.yaml",        # Path to the updated dataset YAML file containing information like class names, train/val paths, etc.
    epochs=11,                             # Number of epochs for fine-tuning
    imgsz=720,                             # Image size to be used for training (720x720 pixels in this case)
    batch=8,                              # Batch size used during training (16 images per batch)
    name="fine-tune-yolov8",               # Name for this fine-tuning experiment
    base_model_path=best_pt_path          # Path to the pre-trained YOLO model (the best model from previous training)
)


[+] Fine-tuning model from: runs/train/yolov8/weights/best.pt
Ultralytics 8.3.109 🚀 Python-3.10.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3050 OEM, 7957MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=runs/train/yolov8/weights/best.pt, data=yolo_retrain.yaml, epochs=11, time=None, patience=10, batch=8, imgsz=720, save=True, save_period=-1, cache=False, device=None, workers=8, project=runs/train, name=fine-tune-yolov8, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=True, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frame

[34m[1mtrain: [0mScanning /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/merged_yolo_dataset/labels... 2787 images, 3 backgrounds, 0 corrupt: 100%|██████████| 2787/2787 [00:00<00:00, 6015.79it/s]

[34m[1mtrain: [0mNew cache created: /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/merged_yolo_dataset/labels.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))



[34m[1mval: [0mScanning /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/split_videos_dataset/val/labels... 109 images, 0 backgrounds, 0 corrupt: 100%|██████████| 109/109 [00:00<00:00, 4273.34it/s]

[34m[1mval: [0mNew cache created: /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/split_videos_dataset/val/labels.cache





Plotting labels to runs/train/fine-tune-yolov8/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000556, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 736 train, 736 val
Using 8 dataloader workers
Logging results to [1mruns/train/fine-tune-yolov8[0m
Starting training for 11 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/11      4.52G     0.8471     0.5782     0.8889         44        736: 100%|██████████| 349/349 [00:41<00:00,  8.38it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:00<00:00, 16.52it/s]

                   all        109        421      0.281      0.407      0.253      0.188





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/11      3.65G     0.7591     0.5012     0.8665         20        736: 100%|██████████| 349/349 [00:38<00:00,  9.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:00<00:00, 16.86it/s]


                   all        109        421      0.377      0.509      0.422      0.327

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/11      3.67G     0.7646     0.4868      0.868         68        736: 100%|██████████| 349/349 [00:37<00:00,  9.19it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:00<00:00, 16.38it/s]

                   all        109        421      0.439      0.576      0.483      0.348






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/11      3.69G     0.7515     0.4709     0.8642         42        736: 100%|██████████| 349/349 [00:37<00:00,  9.19it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:00<00:00, 16.11it/s]

                   all        109        421      0.473      0.638      0.564       0.44






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/11       3.7G     0.7305     0.4578     0.8595         60        736: 100%|██████████| 349/349 [00:37<00:00,  9.19it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:00<00:00, 16.89it/s]

                   all        109        421      0.602      0.601      0.595      0.458






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/11      3.72G     0.7291     0.4526     0.8617         20        736: 100%|██████████| 349/349 [00:37<00:00,  9.19it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:00<00:00, 16.84it/s]

                   all        109        421      0.596      0.623      0.614       0.47






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/11      3.74G     0.7139      0.443     0.8581         53        736: 100%|██████████| 349/349 [00:37<00:00,  9.19it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:00<00:00, 17.08it/s]

                   all        109        421      0.541      0.718      0.624      0.493






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/11      3.75G     0.7047     0.4377     0.8553         30        736: 100%|██████████| 349/349 [00:37<00:00,  9.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:00<00:00, 17.06it/s]

                   all        109        421      0.628      0.729       0.69      0.549






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/11      3.76G     0.6984     0.4319     0.8553         57        736: 100%|██████████| 349/349 [00:37<00:00,  9.19it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:00<00:00, 17.07it/s]

                   all        109        421      0.659      0.702      0.704      0.567






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/11      3.79G     0.6806     0.4219     0.8496         63        736: 100%|██████████| 349/349 [00:37<00:00,  9.19it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:00<00:00, 16.21it/s]

                   all        109        421      0.643      0.679      0.726      0.594






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/11       3.8G     0.6657     0.4137     0.8509         48        736: 100%|██████████| 349/349 [00:37<00:00,  9.19it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:00<00:00, 16.98it/s]

                   all        109        421      0.638      0.703      0.734      0.613






11 epochs completed in 0.119 hours.
Optimizer stripped from runs/train/fine-tune-yolov8/weights/last.pt, 6.3MB
Optimizer stripped from runs/train/fine-tune-yolov8/weights/best.pt, 6.3MB

Validating runs/train/fine-tune-yolov8/weights/best.pt...
Ultralytics 8.3.109 🚀 Python-3.10.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3050 OEM, 7957MiB)
Model summary (fused): 72 layers, 3,008,378 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:01<00:00,  6.87it/s]


                   all        109        421      0.724      0.667      0.747      0.608
            vegetation         44         55      0.844      0.687      0.783      0.643
                  roof         12         13      0.472      0.483       0.47      0.354
                person         15         15      0.873        0.6       0.76      0.597
                   car         74        281      0.929      0.925      0.963       0.78
                  tree         24         33      0.391      0.848      0.763       0.67
               vehicle         16         24      0.837      0.458      0.741      0.601
Speed: 0.3ms preprocess, 6.6ms inference, 0.0ms loss, 0.3ms postprocess per image
Results saved to [1mruns/train/fine-tune-yolov8[0m


### Process and Compare Metrics for Fine-Tuned YOLOv8 Model

In [None]:
new_path = './runs/train/fine-tune-yolov8'  # Path to the folder where the fine-tuned YOLO model's training results are stored
print_csv_metrics(new_path)  # This will process and print the final metrics from the 'results.csv' file in the specified folder


Found results.csv at: ./runs/train/fine-tune-yolov8/results.csv
Total number of epochs: 11

Training Loss: 1.930220
Precision: 0.638380
Recall: 0.703310
mAP@0.5: 0.733940
mAP@0.5:0.95: 0.613070

Validation Loss: 2.457630


In [None]:
# Find the best model (the one with the best performance) from the fine-tuned YOLOv8 training results
best_pt_path = find_best_model(new_path)

# Evaluate the best model and save the performance metrics in a JSON file
evaluate_and_save_metrics(best_pt_path, output_json_path="per_class_metrics_retrain.json")


[+] Found best.pt at: runs/train/fine-tune-yolov8/weights/best.pt
Ultralytics 8.3.109 🚀 Python-3.10.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3050 OEM, 7957MiB)


Model summary (fused): 72 layers, 3,008,378 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/split_videos_dataset/val/labels.cache... 109 images, 0 backgrounds, 0 corrupt: 100%|██████████| 109/109 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:01<00:00,  5.88it/s]


                   all        109        421      0.639      0.705      0.734      0.614
            vegetation         44         55      0.656        0.8       0.77      0.652
                  roof         12         13      0.472      0.538      0.476      0.394
                person         15         15      0.744      0.582      0.711      0.546
                   car         74        281      0.925      0.918      0.969      0.801
                  tree         24         33      0.283      0.848      0.737      0.653
               vehicle         16         24      0.753      0.542       0.74      0.639
Speed: 0.8ms preprocess, 3.3ms inference, 0.0ms loss, 0.6ms postprocess per image
Results saved to [1mruns/detect/val4[0m
[+] Saved per-class metrics to per_class_metrics_retrain.json


In [None]:
# Print the per-class metrics from the saved JSON file
print_per_class_metrics("per_class_metrics_retrain.json")


Per-Class mAP@0.5:0.95 Metrics:

Class Name      Class ID   mAP@0.5:0.95   
----------------------------------------
unlabeled       0          0.7056         
pool            1          0.7056         
vegetation      2          0.7456         
roof            3          0.6348         
wall            4          0.7056         
window          5          0.7056         
person          6          0.8149         
dog             7          0.7056         
car             8          0.8543         
bicycle         9          0.7056         
tree            10         0.4793         
truck           11         0.7056         
bus             12         0.7056         
vehicle         13         0.7046         


In [None]:
# Define paths for the fine-tuned and original YOLOv8 model results
new_path = './runs/train/fine-tune-yolov8'
old_path = './runs/train/yolov8'

# Find the results.csv files for both models
results_csv_path = find_results_csv(new_path)  # Fine-tuned YOLOv8
results_csv_path_1 = find_results_csv(old_path)  # Original YOLOv8

# Compare the final epoch metrics between the two models
compare_final_metrics(results_csv_path_1, results_csv_path)

Changes in Metrics Before and After Retraning:

Metric                    Before     After      Diff       Trend
-----------------------------------------------------------------
Box Loss (Train)          0.73327    0.98916    0.25589    [92m Increase[0m
Cls Loss (Train)          0.45441    0.70747    0.25306    [92m Increase[0m
DFL Loss (Train)          0.86014    0.94087    0.08073    [92m Increase[0m
Precision                 0.64380    0.62733    -0.01647   [91m Decrease[0m
Recall                    0.26823    0.53795    0.26972    [92m Increase[0m
mAP@0.5                   0.28205    0.55362    0.27157    [92m Increase[0m
mAP@0.5:0.95              0.17731    0.39938    0.22207    [92m Increase[0m
Box Loss (Val)            2.02042    0.99684    -1.02358   [91m Decrease[0m
Cls Loss (Val)            2.42410    0.97288    -1.45122   [91m Decrease[0m
DFL Loss (Val)            1.05216    1.01282    -0.03934   [91m Decrease[0m


In [None]:
compare_maps("per_class_metrics.json", "per_class_metrics_retrain.json")


mAP@0.5:0.95 Differences Before and After Retraning:

Class           Before     After      Diff       Trend
------------------------------------------------------------
unlabeled       0.1855     0.0715     -0.1140    [91m Decrease[0m
pool            0.7305     0.0715     -0.6590    [91m Decrease[0m
vegetation      0.0798     0.0208     -0.0590    [91m Decrease[0m
roof            0.3768     0.0081     -0.3687    [91m Decrease[0m
wall            0.0513     0.0715     0.0202     [92m Increase[0m
window          0.1524     0.0715     -0.0809    [91m Decrease[0m
person          0.1884     0.0166     -0.1718    [91m Decrease[0m
dog             0.0029     0.0715     0.0686     [92m Increase[0m
car             0.2459     0.1107     -0.1352    [91m Decrease[0m
bicycle         0.1027     0.0715     -0.0312    [91m Decrease[0m
tree            0.3563     0.0636     -0.2927    [91m Decrease[0m
truck           0.0291     0.0715     0.0424     [92m Increase[0m
bus          

### Generate Predictions for Fine-Tuned and Original YOLOv8 Models

In [None]:
# Set paths for the fine-tuned and original YOLOv8 models
new_path = './runs/train/fine-tune-yolov8'
old_path = './runs/train/yolov8'

# Find the best model (checkpoint) from the fine-tuned YOLOv8 run
best_pt_path_retrain = find_best_model(new_path)

# Find the best model (checkpoint) from the original YOLOv8 run
best_pt_path = find_best_model(old_path)

[+] Found best.pt at: runs/train/fine-tune-yolov8/weights/best.pt
[+] Found best.pt at: runs/train/yolov8/weights/best.pt


In [None]:
videos_predictions(best_pt_path, class_id_to_name, video_dir='videos', output_base='./datasets/final_output', max_frames=None)

  0%|          | 0/8 [00:00<?, ?it/s]







[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A

DONE: v1 — Processed 642 frames






[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A

DONE: v12 — Processed 897 frames






[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A

DONE: v2 — Processed 175 frames






[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



v3: 100%|██████████| 176/176 [00:01<00:00, 115.44it/s]
 50%|█████     | 4/8 [01:41<01:15, 18.83s/it]

DONE: v3 — Processed 176 frames






[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



v4: 100%|██████████| 253/253 [00:05<00:00, 49.96it/s]
 62%|██████▎   | 5/8 [01:46<00:41, 13.87s/it]

DONE: v4 — Processed 253 frames






[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A[A[A[A



[A

In [None]:
videos_predictions(best_pt_path_retrain, class_id_to_name, video_dir='videos', output_base='./datasets/final_output_retrain', max_frames=None)

  0%|          | 0/8 [00:00<?, ?it/s]



v1: 100%|██████████| 642/642 [00:36<00:00, 17.73it/s]
 12%|█▎        | 1/8 [00:36<04:13, 36.27s/it]

DONE: v1 — Processed 642 frames


v12: 100%|██████████| 897/897 [00:58<00:00, 15.32it/s]
 25%|██▌       | 2/8 [01:34<04:56, 49.43s/it]

DONE: v12 — Processed 897 frames


v2: 100%|██████████| 175/175 [00:10<00:00, 16.61it/s]
 38%|███▊      | 3/8 [01:45<02:38, 31.69s/it]

DONE: v2 — Processed 175 frames


v3: 100%|██████████| 176/176 [00:01<00:00, 124.04it/s]
 50%|█████     | 4/8 [01:46<01:18, 19.74s/it]

DONE: v3 — Processed 176 frames


v4: 100%|██████████| 253/253 [00:04<00:00, 50.65it/s]
 62%|██████▎   | 5/8 [01:51<00:43, 14.43s/it]

DONE: v4 — Processed 253 frames


v5: 100%|██████████| 1114/1114 [01:09<00:00, 15.92it/s]
 75%|███████▌  | 6/8 [03:02<01:06, 33.36s/it]

DONE: v5 — Processed 1114 frames


v6: 100%|██████████| 483/483 [00:27<00:00, 17.36it/s]
 88%|████████▊ | 7/8 [03:29<00:31, 31.58s/it]

DONE: v6 — Processed 483 frames


v8: 100%|██████████| 949/949 [00:27<00:00, 34.24it/s]
100%|██████████| 8/8 [03:57<00:00, 29.71s/it]

DONE: v8 — Processed 949 frames



