## Builting Functions

### Initial Setup

In [1]:
import torch

# Print the CUDA version PyTorch is built with
print("Built CUDA Version:", torch.version.cuda)

# Print the CUDA version runtime (if CUDA is available)
if torch.cuda.is_available():
    print("CUDA Runtime Version:", torch._C._cuda_getCompiledVersion())
    print("GPU Name:", torch.cuda.get_device_name(0))
else:
    print("CUDA is not available.")


Built CUDA Version: 12.4
CUDA Runtime Version: 12040
GPU Name: NVIDIA GeForce RTX 3050 OEM


In [None]:
class_id_to_name = {
    0:  ('road', [28, 42, 168]),
    1:  ('pool', [0, 50, 89]),
    2:  ('vegetation', [107, 142, 35]),
    3:  ('roof', [70, 70, 70]),
    4:  ('wall', [102, 102, 156]),
    5:  ('window', [254, 228, 12]),
    6:  ('person', [255, 22, 96]),
    7:  ('dog', [102, 51, 0]),
    8:  ('car', [9, 143, 150]),
    9:  ('bicycle', [119, 11, 32]),
    10: ('tree', [51, 51, 0]),
    11: ('truck', [160, 160, 60]),   # added truck
    12: ('bus', [200, 80, 80]),      # added bus
    13: ('vehicle', [20, 80, 80]),      # added bus
}

### Install packages

In [None]:
# !pip install numpy
# !pip install opencv-python
# !pip install pillow
# !pip install matplotlib
# !pip install tqdm
# !pip install scikit-learn
# !pip install torch torchvision
# !pip install ultralytics
# !pip install opencv-python

In [4]:
# !pip uninstall torch torchvision torchaudio
# !pip cache purge  # clean out pip's install cache
# !pip install torch torchvision torchaudio --force-reinstall

In [5]:
# Core packages
import os
import shutil
import json
import zipfile
import xml.etree.ElementTree as ET
from pathlib import Path

# Math and array handling
import numpy as np
from sklearn.model_selection import train_test_split

# Image and visualization
import cv2
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

# Progress bar
from tqdm.auto import tqdm

# Deep Learning Frameworks
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.models.segmentation as segmentation
import torchvision.transforms as transforms

# Object Detection and Segmentation
from ultralytics import YOLO

from torch.utils.data import DataLoader
import gc

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

# Automatically use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

%matplotlib inline

import random
import gdown

from collections import defaultdict
from glob import glob

from PIL import Image

# save this as split_uavdt_train_val.py

import os
import shutil
from glob import glob
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm

import os
from PIL import Image

import os
import random
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import cv2

import os
import random
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt
from ultralytics import YOLO

import os
import pandas as pd
# from .autonotebook import tqdm as notebook_tqdm

import os
from collections import defaultdict

  from .autonotebook import tqdm as notebook_tqdm


### Download Datsets 

In [6]:
def semantic_drone_dataset_download(gdrive_url, extract_to="extracted"):
    # Convert shared drive URL to direct download URL
    file_id = gdrive_url.split("/d/")[1].split("/")[0]
    download_url = f"https://drive.google.com/uc?id={file_id}"

    # Create output folder
    os.makedirs(extract_to, exist_ok=True)

    zip_path = os.path.join(extract_to, "downloaded.zip")

    print("[INFO] Downloading ZIP from Google Drive...")
    gdown.download(download_url, zip_path, quiet=False)

    print("[INFO] Extracting ZIP...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

    # Optionally, remove the ZIP file after extraction
    os.remove(zip_path)

    print(f"[DONE] Extracted files to: {extract_to}")

def uavdt_dataset_download(gdrive_url, extract_to="extracted"):
    # Convert shared drive URL to direct download URL
    file_id = gdrive_url.split("/d/")[1].split("/")[0]
    download_url = f"https://drive.google.com/uc?id={file_id}"

    # Create output folder
    os.makedirs(extract_to, exist_ok=True)

    zip_path = os.path.join(extract_to, "downloaded.zip")

    print("[INFO] Downloading ZIP from Google Drive...")
    gdown.download(download_url, zip_path, quiet=False)

    print("[INFO] Extracting ZIP...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

    # Optionally, remove the ZIP file after extraction
    os.remove(zip_path)

    print(f"[DONE] Extracted files to: {extract_to}")



### Convert Two Datsets into yolo format

In [7]:
# ----------------------------
# Parse polygon and convert to YOLO bbox
# ----------------------------
# Semantic drone datasets 
def parse_yolo_style_bbox_from_xml(xml_path, class_id_to_name):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    bboxes = []
    for obj in root.findall('object'):
        class_name = obj.find('name').text
        if class_name in [value[0] for value in class_id_to_name.values()]:
            polygon = obj.find('polygon')
            if polygon is not None:
                points = polygon.findall('pt')
                coords = [(float(pt.find('x').text), float(pt.find('y').text)) for pt in points]
                x_min = min(coord[0] for coord in coords)
                y_min = min(coord[1] for coord in coords)
                x_max = max(coord[0] for coord in coords)
                y_max = max(coord[1] for coord in coords)
                bboxes.append(((x_min, y_min), (x_max, y_max), class_name))
    return bboxes


# ----------------------------
# Save YOLO-format txt
# ----------------------------
def save_yolo_format(image_id, bboxes, image_width, image_height, output_path, class_id_to_name):
    with open(output_path, 'w') as f:
        for (x_min, y_min), (x_max, y_max), class_name in bboxes:
            class_id = next(cid for cid, (name, _) in class_id_to_name.items() if name == class_name)
            x_center = (x_min + x_max) / 2 / image_width
            y_center = (y_min + y_max) / 2 / image_height
            width = (x_max - x_min) / image_width
            height = (y_max - y_min) / image_height
            f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")


# ----------------------------
# Convert dataset (YOLO only)
# ----------------------------
def convert_fulldataset_yolo_only(dataset_path, output_dir, class_id_to_name):
    image_ids = [img.split('.')[0] for img in os.listdir(f"{dataset_path}/images") if img.endswith(".jpg")]

    os.makedirs(f"{output_dir}/images", exist_ok=True)
    os.makedirs(f"{output_dir}/labels", exist_ok=True)

    for image_id in tqdm(image_ids, desc="Converting to YOLO"):
        img_path = f"{dataset_path}/images/{image_id}.jpg"
        bbox_xml_path = f"{dataset_path}/gt/bounding_box/label_me_xml/{image_id}.xml"
        semantic_xml_path = f"{dataset_path}/gt/semantic/label_me_xml/{image_id}.xml"

        if not os.path.exists(img_path):
            print(f"[WARNING] Image not found: {img_path}, skipping...")
            continue

        try:
            bboxes1 = parse_yolo_style_bbox_from_xml(bbox_xml_path, class_id_to_name)
            bboxes2 = parse_yolo_style_bbox_from_xml(semantic_xml_path, class_id_to_name)
            all_bboxes = bboxes1 + bboxes2
        except Exception as e:
            print(f"[WARNING] Skipping image {image_id} due to parse error: {e}")
            continue

        try:
            image = Image.open(img_path)
            image_np = np.array(image)
        except Exception as e:
            print(f"[WARNING] Could not load image {image_id}: {e}")
            continue

        # Save image
        image.save(f"{output_dir}/images/{image_id}.jpg")

        # Save YOLO labels
        yolo_annotation_path = f"{output_dir}/labels/{image_id}.txt"
        save_yolo_format(image_id, all_bboxes, image_np.shape[1], image_np.shape[0], yolo_annotation_path, class_id_to_name)

    print("✅ YOLO-format annotation conversion complete!")

In [8]:
import os
import shutil
import cv2
from glob import glob
from collections import defaultdict
from sklearn.model_selection import train_test_split

# 🧠 Map UAVDT class to extended class_id_to_name
uavdt_to_extended = {
    0: 8,   # car
    1: 11,  # truck
    2: 12,  # bus
    3: 13
}

# === Function to Convert Single Annotation to YOLO Format ===
def convert_annotation(anno_path, label_path, image_path, stats):
    if not os.path.exists(image_path):
        stats["missing_image"] += 1
        return

    try:
        img = cv2.imread(image_path)
        height, width = img.shape[:2]
    except:
        stats["missing_image"] += 1
        return

    with open(anno_path, 'r') as fin, open(label_path, 'w') as fout:
        for line in fin:
            parts = line.strip().split(',')
            if len(parts) < 8:
                stats["malformed"] += 1
                continue

            try:
                x, y, w, h = map(float, parts[0:4])
                original_cls = int(parts[5])

                # 🔁 Convert original class to extended class
                if original_cls not in uavdt_to_extended:
                    stats["skipped"][original_cls] += 1
                    continue

                cls = uavdt_to_extended[original_cls]

                x_center = (x + w / 2) / width
                y_center = (y + h / 2) / height
                w /= width
                h /= height

                if not (0 <= x_center <= 1 and 0 <= y_center <= 1 and w > 0 and h > 0):
                    stats["skipped"][cls] += 1
                    continue

                fout.write(f"{cls} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")
                stats["converted"] += 1
            except Exception:
                stats["malformed"] += 1
                continue

            stats["total"] += 1

# === Step 1: Convert UAVDT annotations to YOLO format ===
def convert_dataset(root_dir):
    annotation_paths = glob(os.path.join(root_dir, "M*/annotations/*.txt"))
    total_files = len(annotation_paths)

    stats = {
        "total": 0,
        "converted": 0,
        "malformed": 0,
        "missing_image": 0,
        "skipped": defaultdict(int)
    }

    print(f"🔄 Converting {total_files} annotation files to YOLO format...")

    for anno_path in tqdm(annotation_paths, desc="Converting", unit="file"):
        sequence_dir = os.path.dirname(os.path.dirname(anno_path))  # Mxxxx
        file_name = os.path.basename(anno_path)

        label_dir = os.path.join(sequence_dir, "labels")
        os.makedirs(label_dir, exist_ok=True)

        label_path = os.path.join(label_dir, file_name)

        # Construct image path
        image_name = file_name.replace(".txt", ".jpg")
        image_path = os.path.join(sequence_dir, "images", image_name)

        convert_annotation(anno_path, label_path, image_path, stats)

    print("\nConversion complete.")
    print(f"Total boxes:     {stats['total']}")
    print(f"Converted boxes: {stats['converted']}")
    print(f"Skipped boxes:   {sum(stats['skipped'].values())}")
    for cls, count in sorted(stats["skipped"].items()):
        print(f"   - Skipped class {cls}: {count}")
    print(f"Malformed lines: {stats['malformed']}")
    print(f"Missing images: {stats['missing_image']}")

# === Step 2: Copy to train/val structure ===
def copy_split_sequences(src_root, dst_root, train_ratio=0.8):
    all_sequences = sorted(glob(os.path.join(src_root, "M*")))
    train_seqs, val_seqs = train_test_split(all_sequences, train_size=train_ratio, random_state=42)

    for split_name, split_list in zip(['train', 'val'], [train_seqs, val_seqs]):
        for seq_path in tqdm(split_list, desc=f"Copying {split_name}"):
            images_src = os.path.join(seq_path, "images")
            labels_src = os.path.join(seq_path, "labels")

            images_dst = os.path.join(dst_root, split_name, "images")
            labels_dst = os.path.join(dst_root, split_name, "labels")

            os.makedirs(images_dst, exist_ok=True)
            os.makedirs(labels_dst, exist_ok=True)

            for img_file in glob(os.path.join(images_src, "*.jpg")):
                shutil.copy(img_file, os.path.join(images_dst, os.path.basename(img_file)))

            for label_file in glob(os.path.join(labels_src, "*.txt")):
                shutil.copy(label_file, os.path.join(labels_dst, os.path.basename(label_file)))

    print("\nDataset split into 'train/' and 'val/' with images and YOLO labels.")


### Convert into train and Val sets

In [9]:
# Semantic Drone Datasets
def move_files(file_list, 
               source_image_dir, 
               source_annotation_dir,
               target_image_dir, 
               target_annotation_dir):
    
    os.makedirs(target_image_dir, exist_ok=True)
    os.makedirs(target_annotation_dir, exist_ok=True)

    for image_id in tqdm(file_list, desc=f"Moving to {os.path.basename(os.path.dirname(target_image_dir))}"):
        image_path = os.path.join(source_image_dir, f"{image_id}.jpg")
        annotation_path = os.path.join(source_annotation_dir, f"{image_id}.txt")

        target_image_path = os.path.join(target_image_dir, f"{image_id}.jpg")
        target_annotation_path = os.path.join(target_annotation_dir, f"{image_id}.txt")

        if os.path.exists(image_path):
            shutil.copy(image_path, target_image_path)
        else:
            print(f"[Warning] Missing image: {image_path}")

        if os.path.exists(annotation_path):
            shutil.copy(annotation_path, target_annotation_path)
        else:
            print(f"[Warning] Missing annotation: {annotation_path}")

def split_and_move_dataset(source_base_dir="./datasets/semantic_yolo",
                           target_base_dir="./datasets/new_dataset_yolo_split",
                           split_ratio=0.8,
                           seed=42):
    random.seed(seed)

    image_dir = os.path.join(source_base_dir, "images")
    label_dir = os.path.join(source_base_dir, "labels")

    image_ids = [os.path.splitext(f)[0] for f in os.listdir(image_dir) if f.endswith(".jpg")]
    random.shuffle(image_ids)

    split_idx = int(len(image_ids) * split_ratio)
    train_ids = image_ids[:split_idx]
    val_ids = image_ids[split_idx:]

    # Train
    move_files(train_ids,
               source_image_dir=image_dir,
               source_annotation_dir=label_dir,
               target_image_dir=os.path.join(target_base_dir, "train/images"),
               target_annotation_dir=os.path.join(target_base_dir, "train/labels"))

    # Val
    move_files(val_ids,
               source_image_dir=image_dir,
               source_annotation_dir=label_dir,
               target_image_dir=os.path.join(target_base_dir, "val/images"),
               target_annotation_dir=os.path.join(target_base_dir, "val/labels"))

    print(f"\n[✓] Dataset split completed: {len(train_ids)} train / {len(val_ids)} val samples")


### Normalize Labels

In [10]:
def normalize_label_file(label_file, img_width, img_height):
    """
    Normalize the label coordinates in a label file to ensure they are within [0, 1] range.
    """
    with open(label_file, 'r') as f:
        lines = f.readlines()
    
    with open(label_file, 'w') as f:
        for line in lines:
            parts = line.strip().split()
            class_id = int(parts[0])
            x_center, y_center, width, height = map(float, parts[1:])
            
            # Normalize coordinates to ensure they are within the range [0, 1]
            x_center = min(1.0, max(0.0, x_center))
            y_center = min(1.0, max(0.0, y_center))
            width = min(1.0, max(0.0, width))
            height = min(1.0, max(0.0, height))

            # Write normalized values back to file
            f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")


def get_image_size(img_path):
    """
    Get the width and height of the image to normalize the coordinates properly.
    """
    with Image.open(img_path) as img:
        return img.size  # returns (width, height)


def normalize_all_labels(labels_dir, img_dir):
    """
    Normalize all label files in the specified directory.
    """
    for label_file in tqdm(os.listdir(labels_dir)):
       
        if label_file.endswith('.txt'):  # Process only label files
            label_path = os.path.join(labels_dir, label_file)
            img_path = os.path.join(img_dir, label_file.replace('.txt', '.jpg'))  # Assuming JPG images
            if os.path.exists(img_path):
                # Get image dimensions to normalize the labels
                img_width, img_height = get_image_size(img_path)
                # print(f"Normalizing {label_file}...")
                normalize_label_file(label_path, img_width, img_height)
            else:
                print(f"Warning: Image for label {label_file} not found!")
    print("Normalize Complete")



### Training v8 model functions

In [11]:
def train_yolo(data_yaml, epochs, imgsz, batch, name, model, dir):
    if(model):
        model = YOLO(dir)
    else:
        model = YOLO("yolov8m.pt")

    model.train(
        data=data_yaml,
        epochs=epochs,
        imgsz=imgsz,
        batch=batch,
        name=name,
        project="runs/train",
        patience=30,  # Early stopping
        augment=True,  # Apply augmentations
        degrees=10,  # Image rotation
        scale=0.5,  # Scale range
        flipud=0.2,  # Vertical flip
        fliplr=0.5,  # Horizontal flip
        hsv_h=0.015,  # Hue augmentation
        hsv_s=0.7,  # Saturation augmentation
        hsv_v=0.4,  # Value augmentation
        mosaic=1.0,  # Mosaic augmentation
        mixup=0.2,  # Mixup augmentation
        lr0=0.01,  # Initial learning rate (you can tune this)
        lrf=0.01,  # Learning rate final factor (for cosine annealing)
        verbose=True  # Print progress
    )


### Print val metrics

In [12]:
import json
from ultralytics import YOLO

def load_yolo_model(model_path):
    return YOLO(model_path)

def run_model_validation(model):
    return model.val()

def extract_per_class_metrics(results):
    """
    Extracts mAP@0.5:0.95 per class from results.
    NOTE: Only mAP@0.5:0.95 is available via `results.box.maps`
    """
    per_class_metrics = {}
    if hasattr(results.box, 'maps') and results.box.maps is not None:
        maps = results.box.maps  # This is a NumPy array [num_classes]
        for i, name in results.names.items():
            per_class_metrics[name] = {
                "class_id": i,
                "mAP@0.5:0.95": round(float(maps[i]), 4)
            }
    else:
        print("⚠️ No per-class mAP@0.5:0.95 data found.")
    return per_class_metrics

def save_metrics_to_json(metrics, output_path):
    with open(output_path, "w") as f:
        json.dump(metrics, f, indent=4)
    print(f"✅ Saved per-class metrics to {output_path}")

def evaluate_and_save_metrics(model_path, output_json_path="per_class_metrics.json"):
    model = load_yolo_model(model_path)
    results = run_model_validation(model)
    metrics = extract_per_class_metrics(results)
    save_metrics_to_json(metrics, output_json_path)



In [13]:
import json

def print_per_class_metrics(json_path="per_class_metrics.json"):
    with open(json_path, "r") as f:
        metrics = json.load(f)
    
    print("📊 Per-Class mAP@0.5:0.95 Metrics:\n")
    print(f"{'Class Name':<15} {'Class ID':<10} {'mAP@0.5:0.95':<15}")
    print("-" * 40)
    
    for name, data in metrics.items():
        print(f"{name:<15} {data['class_id']:<10} {data['mAP@0.5:0.95']:<15}")


### Find best model path after training 

In [14]:
def find_best_model(base_dir='runs_yolo/'):
    best_paths = list(Path(base_dir).rglob('best.pt'))
    if not best_paths:
        raise FileNotFoundError("No 'best.pt' file found in the 'runs/' directory.")
    
    # Optionally, sort by latest modified time
    best_paths.sort(key=lambda p: p.stat().st_mtime, reverse=True)
    
    print(f"✅ Found best.pt at: {best_paths[0]}")
    return str(best_paths[0])


### Prediction on vdieos

In [15]:

# ========== FRAME PROCESSING ==========
def process_frame_1(frame, yolo_model, w, h, class_id_to_name):
    annotated = frame.copy()
    results = yolo_model(annotated, verbose=False)[0]
    boxes = results.boxes.xyxy.cpu().numpy()
    class_ids = results.boxes.cls.cpu().numpy()

    for box, cls_id in zip(boxes, class_ids):
        x1, y1, x2, y2 = map(int, box)
        class_name, color = class_id_to_name[int(cls_id)]
        cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 2)
        cv2.putText(annotated, class_name, (x1, max(y1 - 10, 10)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 0), 2)

    return annotated, boxes, class_ids

# ========== VIDEO CAPTURE ==========
def setup_video_capture_1(video_path):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    return cap, total_frames, fps, w, h

# ========== MAIN FUNCTION ==========
def videos_predictions(yolo_weights_path, class_id_to_name, video_dir='videos', output_base='./datatsets/opt', max_frames=None):
    yolo_model = YOLO(yolo_weights_path)

    image_out_dir = os.path.join(output_base, 'images')
    label_out_dir = os.path.join(output_base, 'labels')
    output_video_dir = os.path.join(output_base, 'output')

    os.makedirs(image_out_dir, exist_ok=True)
    os.makedirs(label_out_dir, exist_ok=True)
    os.makedirs(output_video_dir, exist_ok=True)

    for video_file in tqdm(sorted(os.listdir(video_dir))):
        if not video_file.lower().endswith(".mp4"):
            continue

        video_id = os.path.splitext(video_file)[0]
        video_path = os.path.join(video_dir, video_file)
        output_video_path = os.path.join(output_video_dir, f"{video_id}.mp4")

        print(f"\n========== STARTED: {video_id} ==========")
        cap, total_frames, fps, w, h = setup_video_capture_1(video_path)
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        writer = cv2.VideoWriter(output_video_path, fourcc, fps, (w, h))

        frame_count = 0
        pbar = tqdm(total=max_frames if max_frames else total_frames, desc=video_id)

        while True:
            ret, frame = cap.read()
            if not ret or (max_frames and frame_count >= max_frames):
                break

            annotated_bgr, boxes, class_ids = process_frame_1(frame, yolo_model, w, h, class_id_to_name)

            # ✅ Save original image
            img_filename = f'{video_id}_{frame_count:04d}.jpg'
            img_path = os.path.join(image_out_dir, img_filename)
            cv2.imwrite(img_path, frame)

            # ✅ Save YOLO-format label
            label_filename = f'{video_id}_{frame_count:04d}.txt'
            label_path = os.path.join(label_out_dir, label_filename)
            with open(label_path, 'w') as f:
                for box, cls_id in zip(boxes, class_ids):
                    x1, y1, x2, y2 = box
                    w_box = x2 - x1
                    h_box = y2 - y1
                    cx = x1 + w_box / 2
                    cy = y1 + h_box / 2
                    f.write(f"{int(cls_id)} {cx/w:.6f} {cy/h:.6f} {w_box/w:.6f} {h_box/h:.6f}\n")

            writer.write(annotated_bgr)
            frame_count += 1
            pbar.update(1)

        cap.release()
        writer.release()
        pbar.close()
        print(f"DONE: {video_id} — Processed {frame_count} frames")


In [16]:
import os
import cv2
import numpy as np
from ultralytics import YOLO
from tqdm import tqdm

# ========== FRAME PROCESSING ==========
def process_frame(frame, yolo_model, w, h, class_id_to_name, valid_class_ids, conf_threshold=0.5):
    annotated = frame.copy()
    results = yolo_model(annotated, verbose=False)[0]

    # Filter by confidence
    mask = results.boxes.conf > conf_threshold
    boxes = results.boxes.xyxy[mask].cpu().numpy()
    class_ids = results.boxes.cls[mask].cpu().numpy()
    confs = results.boxes.conf[mask].cpu().numpy()

    filtered_boxes, filtered_ids, filtered_confs = [], [], []

    for box, cls_id, conf in zip(boxes, class_ids, confs):
        if int(cls_id) in valid_class_ids:
            filtered_boxes.append(box)
            filtered_ids.append(cls_id)
            filtered_confs.append(conf)

            x1, y1, x2, y2 = map(int, box)
            class_name, color = class_id_to_name[int(cls_id)]
            label = f"{class_name} {conf:.2f}"
            cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 2)
            cv2.putText(annotated, label, (x1, max(y1 - 10, 10)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 0), 2)

    return annotated, filtered_boxes, filtered_ids

# ========== VIDEO CAPTURE ==========
def setup_video_capture(video_path):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    return cap, total_frames, fps, w, h

# ========== MAIN FUNCTION ==========
def process_all_videos(yolo_weights_path, class_id_to_name, underrepresented_class_ids,
                       video_dir='videos', output_base='./datasets/opt', max_frames=None):
    yolo_model = YOLO(yolo_weights_path)

    image_out_dir = os.path.join(output_base, 'images')
    label_out_dir = os.path.join(output_base, 'labels')
    output_video_dir = os.path.join(output_base, 'output')

    os.makedirs(image_out_dir, exist_ok=True)
    os.makedirs(label_out_dir, exist_ok=True)
    os.makedirs(output_video_dir, exist_ok=True)

    for video_file in tqdm(sorted(os.listdir(video_dir))):
        if not video_file.lower().endswith(".mp4"):
            continue

        video_id = os.path.splitext(video_file)[0]
        video_path = os.path.join(video_dir, video_file)
        output_video_path = os.path.join(output_video_dir, f"{video_id}.mp4")

        print(f"\n========== STARTED: {video_id} ==========")
        cap, total_frames, fps, w, h = setup_video_capture(video_path)
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        writer = cv2.VideoWriter(output_video_path, fourcc, fps, (w, h))

        frame_count = 0
        pbar = tqdm(total=max_frames if max_frames else total_frames, desc=video_id)

        while True:
            ret, frame = cap.read()
            if not ret or (max_frames and frame_count >= max_frames):
                break

            annotated_bgr, boxes, class_ids = process_frame(
                frame, yolo_model, w, h, class_id_to_name, underrepresented_class_ids)

            # ✅ Save original image
            img_filename = f'{video_id}_{frame_count:04d}.jpg'
            img_path = os.path.join(image_out_dir, img_filename)
            cv2.imwrite(img_path, frame)

            # ✅ Save YOLO-format label (only if boxes found)
            if boxes:
                label_filename = f'{video_id}_{frame_count:04d}.txt'
                label_path = os.path.join(label_out_dir, label_filename)
                with open(label_path, 'w') as f:
                    for box, cls_id in zip(boxes, class_ids):
                        x1, y1, x2, y2 = box
                        w_box = x2 - x1
                        h_box = y2 - y1
                        cx = x1 + w_box / 2
                        cy = y1 + h_box / 2
                        f.write(f"{int(cls_id)} {cx/w:.6f} {cy/h:.6f} {w_box/w:.6f} {h_box/h:.6f}\n")

            writer.write(annotated_bgr)
            frame_count += 1
            pbar.update(1)

        cap.release()
        writer.release()
        pbar.close()
        print(f"DONE: {video_id} — Processed {frame_count} frames")


### Get Class Instances

In [17]:
import os
from collections import defaultdict

def get_rare_class_ids(label_dir, class_id_to_name, rare_threshold=1000):
    class_counts = defaultdict(int)

    for label_file in os.listdir(label_dir):
        if not label_file.endswith('.txt'):
            continue
        with open(os.path.join(label_dir, label_file), 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 1:
                    cls_id = int(parts[0])
                    class_counts[cls_id] += 1

    print("🔍 Class-wise instance counts:")
    for cls_id in sorted(class_counts.keys()):
        name = class_id_to_name.get(cls_id, ("Unknown", []))[0]
        count = class_counts[cls_id]
        print(f"Class {cls_id:2d} ({name:10s}): {count} instances")

    rare_class_ids = {cls_id for cls_id, count in class_counts.items() if count < rare_threshold}
    print(f"\n✅ Rare class IDs (threshold < {rare_threshold}): {rare_class_ids}")

    return rare_class_ids

### Merge predciton and previous datatsets

In [18]:
import os
import shutil

def merge_yolo_datasets(source1, source2, destination):
    # Define subfolders
    img1_dir = os.path.join(source1, 'images')
    lbl1_dir = os.path.join(source1, 'labels')
    img2_dir = os.path.join(source2, 'images')
    lbl2_dir = os.path.join(source2, 'labels')
    dst_img_dir = os.path.join(destination, 'images')
    dst_lbl_dir = os.path.join(destination, 'labels')

    # Create destination folders
    os.makedirs(dst_img_dir, exist_ok=True)
    os.makedirs(dst_lbl_dir, exist_ok=True)

    def copy_files(src_img_dir, src_lbl_dir, prefix):
        for filename in sorted(os.listdir(src_img_dir)):
            if not filename.lower().endswith('.jpg'):
                continue
            base = os.path.splitext(filename)[0]

            # Image
            new_img_name = f"{prefix}_{base}.jpg"
            shutil.copy(os.path.join(src_img_dir, filename),
                        os.path.join(dst_img_dir, new_img_name))

            # Label
            label_file = base + ".txt"
            if os.path.exists(os.path.join(src_lbl_dir, label_file)):
                new_lbl_name = f"{prefix}_{base}.txt"
                shutil.copy(os.path.join(src_lbl_dir, label_file),
                            os.path.join(dst_lbl_dir, new_lbl_name))
            else:
                print(f"⚠️ No label for {filename}")

    print("🔁 Merging original dataset...")
    copy_files(img1_dir, lbl1_dir, prefix="orig")

    print("➕ Merging predicted video dataset...")
    copy_files(img2_dir, lbl2_dir, prefix="pred")

    print(f"\n✅ Merge complete! Merged dataset at: {destination}")

### Print Metrics 

In [19]:
def find_results_csv(directory):
    """Find the results.csv file in the specified directory."""
    for root, dirs, files in os.walk(directory):
        if 'results.csv' in files:
            return os.path.join(root, 'results.csv')
    return None

def load_results_csv(results_csv_path):
    """Load the results CSV into a pandas DataFrame."""
    return pd.read_csv(results_csv_path)

def calculate_total_epochs(df):
    """Calculate the total number of epochs from the DataFrame."""
    return df['epoch'].max()

def calculate_training_loss(epoch_data):
    """Calculate the total training loss from the given epoch data."""
    train_box_loss = epoch_data['train/box_loss']
    train_cls_loss = epoch_data['train/cls_loss']
    train_dfl_loss = epoch_data['train/dfl_loss']
    return train_box_loss + train_cls_loss + train_dfl_loss

def calculate_validation_loss(epoch_data):
    """Calculate the total validation loss from the given epoch data."""
    val_box_loss = epoch_data['val/box_loss']
    val_cls_loss = epoch_data['val/cls_loss']
    val_dfl_loss = epoch_data['val/dfl_loss']
    return val_box_loss + val_cls_loss + val_dfl_loss

def print_final_metrics(df):
    """Print the final metrics for the last epoch."""
    final_epoch_data = df.iloc[-1]

    # Calculate total training and validation loss
    train_loss = calculate_training_loss(final_epoch_data)
    val_loss = calculate_validation_loss(final_epoch_data)

    # Print overall metrics
    print("\n========== Final Training Metrics ==========")
    print(f"Training Loss: {train_loss:.6f}")
    print(f"Precision: {final_epoch_data['metrics/precision(B)']:.6f}")
    print(f"Recall: {final_epoch_data['metrics/recall(B)']:.6f}")
    print(f"mAP@0.5: {final_epoch_data['metrics/mAP50(B)']:.6f}")
    print(f"mAP@0.5:0.95: {final_epoch_data['metrics/mAP50-95(B)']:.6f}")

    print("\n========== Final Validation Metrics ==========")
    print(f"Validation Loss: {val_loss:.6f}")


def print_csv_metrics(directory):
    """Main function to process and print final metrics."""
    # Find the results.csv file
    results_csv_path = find_results_csv(directory)
    
    if not results_csv_path:
        print("Error: 'results.csv' file not found in the specified directory.")
        return

    print(f"Found results.csv at: {results_csv_path}")

    # Load results CSV
    df = load_results_csv(results_csv_path)

    # Get the total number of epochs
    total_epochs = calculate_total_epochs(df)
    print(f"Total number of epochs: {total_epochs}")

    # Print columns in the CSV
    # print("\n========== Columns in CSV ==========")
    # print(df.columns)

    # Print final metrics
    print_final_metrics(df)


## Calling Functions

### Download, convert, split and normalize datsets 

In [20]:
gdrive_url = "https://drive.google.com/file/d/1UppumYqYOi-kto6BWPfFxwJK2Eph46oY/view?usp=sharing"
semantic_drone_dataset_download(gdrive_url, extract_to="datasets")

gdrive_url = "https://drive.google.com/file/d/12cbrTaBAMIsuU-mwAA7IgDk9wSLC9cC-/view?usp=sharing"
uavdt_dataset_download(gdrive_url, extract_to="datasets")

[INFO] Downloading ZIP from Google Drive...


Downloading...
From (original): https://drive.google.com/uc?id=1UppumYqYOi-kto6BWPfFxwJK2Eph46oY
From (redirected): https://drive.google.com/uc?id=1UppumYqYOi-kto6BWPfFxwJK2Eph46oY&confirm=t&uuid=151eaff2-64e8-469c-a4e9-6b66550bf701
To: /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/downloaded.zip
100%|██████████| 4.14G/4.14G [01:37<00:00, 42.6MB/s]


[INFO] Extracting ZIP...
[DONE] Extracted files to: datasets
[INFO] Downloading ZIP from Google Drive...


Downloading...
From (original): https://drive.google.com/uc?id=12cbrTaBAMIsuU-mwAA7IgDk9wSLC9cC-
From (redirected): https://drive.google.com/uc?id=12cbrTaBAMIsuU-mwAA7IgDk9wSLC9cC-&confirm=t&uuid=c4a86683-6152-470e-8ce5-c668311f3248
To: /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/downloaded.zip
100%|██████████| 5.21G/5.21G [02:09<00:00, 40.1MB/s]


[INFO] Extracting ZIP...
[DONE] Extracted files to: datasets


In [21]:
dataset_path = "./datasets/semantic_drone_dataset/training_set"
output_dir = "./datasets/semantic_yolo"

convert_fulldataset_yolo_only(dataset_path, output_dir, class_id_to_name)

Converting to YOLO:  75%|███████▌  | 301/400 [01:01<00:20,  4.93it/s]



Converting to YOLO: 100%|██████████| 400/400 [01:22<00:00,  4.88it/s]

✅ YOLO-format annotation conversion complete!





In [22]:
#UAVDT-2024

source_root = "./datasets/UAVDT-2024"
output_root = "./datasets/new_dataset_yolo_split"

convert_dataset(source_root)
copy_split_sequences(source_root, output_root, train_ratio=0.8)


# Semantic dorne datasets
split_and_move_dataset()


🔄 Converting 30946 annotation files to YOLO format...


Converting: 100%|██████████| 30946/30946 [01:08<00:00, 453.50file/s]



Conversion complete.
Total boxes:     868139
Converted boxes: 868139
Skipped boxes:   0
Malformed lines: 0
Missing images: 0


Copying train: 100%|██████████| 37/37 [00:03<00:00, 11.07it/s]
Copying val: 100%|██████████| 10/10 [00:00<00:00, 12.09it/s]



Dataset split into 'train/' and 'val/' with images and YOLO labels.


Moving to train: 100%|██████████| 319/319 [00:00<00:00, 797.03it/s]
Moving to val: 100%|██████████| 80/80 [00:00<00:00, 708.98it/s]


[✓] Dataset split completed: 319 train / 80 val samples





In [23]:
# Set your paths
dataset_path = "./datasets/new_dataset_yolo_split/train"
image_dir = os.path.join(dataset_path, "images")
annotations_dir = os.path.join(dataset_path, "labels")

normalize_all_labels(annotations_dir, image_dir)

dataset_path = "./datasets/new_dataset_yolo_split/val"
image_dir = os.path.join(dataset_path, "images")
annotations_dir = os.path.join(dataset_path, "labels")

normalize_all_labels(annotations_dir, image_dir)

100%|██████████| 2354/2354 [00:00<00:00, 8143.05it/s]


Normalize Complete


100%|██████████| 1181/1181 [00:00<00:00, 5739.18it/s]

Normalize Complete





### Checking how many classes have how much instances

In [24]:
labels_dir = './datasets/new_dataset_yolo_split/train/labels'

rare_class_ids = get_rare_class_ids(label_dir=labels_dir, class_id_to_name=class_id_to_name ,rare_threshold=3000)

🔍 Class-wise instance counts:
Class  1 (pool      ): 30 instances
Class  2 (vegetation): 5528 instances
Class  3 (roof      ): 280 instances
Class  4 (wall      ): 954 instances
Class  5 (window    ): 376 instances
Class  6 (person    ): 2475 instances
Class  7 (dog       ): 25 instances
Class  8 (car       ): 35045 instances
Class  9 (bicycle   ): 222 instances
Class 10 (tree      ): 417 instances
Class 11 (truck     ): 129 instances
Class 12 (bus       ): 86 instances
Class 13 (vehicle   ): 577 instances

✅ Rare class IDs (threshold < 3000): {1, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13}


### Training v8 model

In [25]:
import shutil
import os

# List of folders to delete
folders_to_delete = ['./datasets/semantic_yolo', './datasets/new_dataset_yolo', './datasets/uavdt-processed', './runs', "./metrics"]

for folder_path in folders_to_delete:
    if os.path.exists(folder_path):
        shutil.rmtree(folder_path)
        print(f"✅ Deleted folder: {folder_path}")
    else:
        print(f"⚠️ Folder does not exist: {folder_path}")


✅ Deleted folder: ./datasets/semantic_yolo
⚠️ Folder does not exist: ./datasets/new_dataset_yolo
⚠️ Folder does not exist: ./datasets/uavdt-processed
✅ Deleted folder: ./runs
⚠️ Folder does not exist: ./metrics


In [26]:
print("[+] Training Start")

gc.collect()
torch.cuda.empty_cache()

train_yolo("yolov8.yaml", 100, 720, 8, "yolov8", False, "")

     46/100      6.56G     0.6858     0.3894     0.8642         42        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.89it/s]


                   all       1181      70393      0.541      0.325      0.373      0.252

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     47/100      5.38G     0.6624     0.3682     0.8556         80        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:16<00:00,  4.60it/s]


                   all       1181      70393      0.481      0.338      0.341      0.231

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     48/100      5.31G     0.6735      0.379     0.8593        101        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.83it/s]


                   all       1181      70393      0.562      0.341      0.419      0.294

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     49/100      5.66G     0.6691     0.3792     0.8593         15        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.67it/s]


                   all       1181      70393      0.583      0.308      0.348      0.218

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     50/100      6.52G     0.6626      0.377     0.8595         43        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.90it/s]


                   all       1181      70393      0.548      0.345      0.381      0.246

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     51/100      5.06G     0.6772     0.3823     0.8622         57        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.76it/s]


                   all       1181      70393       0.51      0.336      0.393      0.262

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     52/100      5.51G     0.6505     0.3647     0.8544         42        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:16<00:00,  4.59it/s]


                   all       1181      70393      0.496      0.352      0.379       0.25

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     53/100      6.52G     0.6528     0.3686     0.8587        174        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.74it/s]


                   all       1181      70393      0.542       0.38      0.383      0.234

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     54/100      5.22G     0.6555     0.3692     0.8564        113        736: 100%|██████████| 295/295 [02:06<00:00,  2.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.90it/s]


                   all       1181      70393      0.499      0.346      0.374      0.259

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     55/100      5.76G      0.651     0.3679     0.8556        134        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.90it/s]


                   all       1181      70393      0.497      0.321      0.377      0.266

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     56/100      5.49G       0.65     0.3654     0.8563         87        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:14<00:00,  5.00it/s]


                   all       1181      70393      0.538      0.361      0.402      0.262

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     57/100      5.35G     0.6467     0.3617     0.8555         82        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.80it/s]


                   all       1181      70393      0.499      0.352      0.388      0.242

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     58/100      5.18G     0.6396     0.3563     0.8531         36        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:14<00:00,  4.96it/s]


                   all       1181      70393      0.599      0.304        0.4      0.269

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     59/100      5.78G     0.6453     0.3663     0.8557         80        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.92it/s]


                   all       1181      70393      0.523      0.357       0.38      0.253

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     60/100      5.51G     0.6432     0.3613      0.855         53        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:16<00:00,  4.42it/s]


                   all       1181      70393      0.544      0.322      0.378      0.254

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     61/100      5.95G     0.6325     0.3541     0.8511         23        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.77it/s]


                   all       1181      70393      0.552      0.353      0.394      0.267

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     62/100      5.71G     0.6203     0.3471     0.8497         80        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.77it/s]


                   all       1181      70393      0.515      0.331      0.378      0.263

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     63/100      5.82G     0.6241     0.3504     0.8496         19        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.79it/s]


                   all       1181      70393      0.521      0.327      0.384      0.263

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     64/100      6.09G     0.6268     0.3517     0.8511        125        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.88it/s]


                   all       1181      70393      0.498      0.353      0.381      0.266

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     65/100      5.31G     0.6085     0.3408     0.8462         61        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.73it/s]


                   all       1181      70393      0.607      0.359      0.412      0.279

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     66/100      5.31G     0.6085     0.3425     0.8463         34        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.77it/s]


                   all       1181      70393      0.506      0.359      0.362      0.245

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     67/100      4.79G     0.6146     0.3455     0.8493        122        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:16<00:00,  4.56it/s]


                   all       1181      70393      0.493      0.355      0.399      0.272

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     68/100      5.41G     0.6189     0.3449     0.8491         29        736: 100%|██████████| 295/295 [02:06<00:00,  2.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.77it/s]


                   all       1181      70393      0.514       0.35      0.392      0.268

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     69/100      5.44G     0.6076     0.3386     0.8452        121        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:17<00:00,  4.34it/s]


                   all       1181      70393      0.521      0.356      0.391      0.275

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     70/100      6.06G     0.6145     0.3466     0.8498        119        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:16<00:00,  4.52it/s]


                   all       1181      70393      0.553      0.362      0.404      0.273

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     71/100      5.46G     0.6077     0.3388     0.8464         34        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.64it/s]


                   all       1181      70393      0.521      0.341      0.367      0.242

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     72/100      5.51G     0.6034     0.3396     0.8473         89        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:14<00:00,  4.93it/s]


                   all       1181      70393      0.462      0.365      0.376      0.256

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     73/100      5.08G     0.5989      0.334      0.846         52        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.87it/s]


                   all       1181      70393      0.592      0.325        0.4      0.275

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     74/100      5.73G     0.5965     0.3318     0.8432         48        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.72it/s]


                   all       1181      70393      0.574      0.371      0.392      0.268

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     75/100      5.74G     0.5939     0.3289     0.8433         68        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.71it/s]


                   all       1181      70393      0.589      0.328      0.399      0.273

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     76/100       5.3G     0.5991     0.3348     0.8435        136        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.76it/s]


                   all       1181      70393      0.479      0.367      0.388      0.263

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     77/100      6.18G     0.5926     0.3276     0.8423         17        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:15<00:00,  4.86it/s]


                   all       1181      70393      0.557      0.347      0.406      0.279

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     78/100      5.62G     0.5837     0.3268     0.8417        175        736: 100%|██████████| 295/295 [02:06<00:00,  2.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:16<00:00,  4.49it/s]


                   all       1181      70393      0.593      0.332      0.402       0.28
[34m[1mEarlyStopping: [0mTraining stopped early as no improvement observed in last 30 epochs. Best results observed at epoch 48, best model saved as best.pt.
To update EarlyStopping(patience=30) pass a new patience value, i.e. `patience=300` or use `patience=0` to disable EarlyStopping.

78 epochs completed in 3.098 hours.
Optimizer stripped from runs/train/yolov8/weights/last.pt, 52.0MB
Optimizer stripped from runs/train/yolov8/weights/best.pt, 52.0MB

Validating runs/train/yolov8/weights/best.pt...
Ultralytics 8.3.109 🚀 Python-3.10.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3050 OEM, 7957MiB)
Model summary (fused): 92 layers, 25,847,866 parameters, 0 gradients, 78.7 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:35<00:00,  2.06it/s]


                   all       1181      70393      0.561      0.359      0.427        0.3
                  pool          9          9      0.771      0.889      0.852       0.78
            vegetation         75       1780      0.215      0.269      0.229      0.134
                  roof         42         79      0.124       0.81      0.678      0.572
                  wall         62        255      0.613      0.286      0.288       0.18
                window         38        139      0.547      0.345      0.323      0.202
                person         75        637      0.877      0.328      0.446      0.277
                   dog          6         12          1      0.397      0.735      0.491
                   car       1115      53669      0.754      0.386       0.55      0.306
               bicycle         32         53       0.69      0.358      0.466      0.235
                  tree         38        108      0.743      0.562      0.603      0.457
                 truc

### Print metrics

In [27]:
yolov8 = './runs/train/yolov8'
best_pt_path = find_best_model(yolov8)
evaluate_and_save_metrics(best_pt_path)

✅ Found best.pt at: runs/train/yolov8/weights/best.pt
Ultralytics 8.3.109 🚀 Python-3.10.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3050 OEM, 7957MiB)
Model summary (fused): 92 layers, 25,847,866 parameters, 0 gradients, 78.7 GFLOPs


[34m[1mval: [0mScanning /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/new_dataset_yolo_split/val/labels.cache... 1181 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1181/1181 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 74/74 [00:25<00:00,  2.87it/s]


                   all       1181      70393      0.562      0.342      0.418      0.294
                  pool          9          9      0.862      0.889      0.875      0.821
            vegetation         75       1780       0.16       0.27      0.204      0.122
                  roof         42         79      0.239       0.73      0.659      0.534
                  wall         62        255       0.39      0.227      0.213      0.133
                window         38        139      0.499      0.338      0.298      0.189
                person         75        637      0.881      0.305      0.395      0.256
                   dog          6         12          1      0.279      0.714      0.471
                   car       1115      53669      0.781      0.399      0.575       0.32
               bicycle         32         53        0.7      0.415      0.482      0.227
                  tree         38        108      0.839      0.556      0.624      0.454
                 truc

In [28]:
print_per_class_metrics("per_class_metrics.json")

📊 Per-Class mAP@0.5:0.95 Metrics:

Class Name      Class ID   mAP@0.5:0.95   
----------------------------------------
unlabeled       0          0.2943         
pool            1          0.8215         
vegetation      2          0.1216         
roof            3          0.5337         
wall            4          0.1327         
window          5          0.1891         
person          6          0.2558         
dog             7          0.4714         
car             8          0.3195         
bicycle         9          0.2275         
tree            10         0.4541         
truck           11         0.0            
bus             12         0.0            
vehicle         13         0.2993         


In [29]:
print_csv_metrics(yolov8)

Found results.csv at: ./runs/train/yolov8/results.csv
Total number of epochs: 78

Training Loss: 1.752180
Precision: 0.592850
Recall: 0.331840
mAP@0.5: 0.401860
mAP@0.5:0.95: 0.280340

Validation Loss: 5.269870


### Prediciton videso

In [30]:
import shutil
import os

# List of folders to delete
folders_to_delete = ['./datasets/new-videos-predicted-yolo', "./datasets/merged_yolo_dataset"]

for folder_path in folders_to_delete:
    if os.path.exists(folder_path):
        shutil.rmtree(folder_path)
        print(f"✅ Deleted folder: {folder_path}")
    else:
        print(f"⚠️ Folder does not exist: {folder_path}")


⚠️ Folder does not exist: ./datasets/new-videos-predicted-yolo
⚠️ Folder does not exist: ./datasets/merged_yolo_dataset


In [31]:
process_all_videos(best_pt_path, class_id_to_name, rare_class_ids, video_dir='videos', output_base='./datasets/new-videos-predicted-yolo', max_frames=400)

  0%|          | 0/9 [00:00<?, ?it/s]




v1: 100%|██████████| 400/400 [00:28<00:00, 14.25it/s]
 11%|█         | 1/9 [00:28<03:45, 28.13s/it]

DONE: v1 — Processed 400 frames



v10:  65%|██████▍   | 259/400 [00:18<00:10, 14.04it/s]
 22%|██▏       | 2/9 [00:46<02:37, 22.47s/it]

DONE: v10 — Processed 259 frames



v2:  44%|████▍     | 175/400 [00:12<00:15, 14.21it/s]
 33%|███▎      | 3/9 [00:58<01:47, 17.85s/it]

DONE: v2 — Processed 175 frames



v3:  44%|████▍     | 176/400 [00:04<00:05, 43.65it/s]
 44%|████▍     | 4/9 [01:03<01:01, 12.40s/it]

DONE: v3 — Processed 176 frames



v4:  63%|██████▎   | 253/400 [00:08<00:04, 29.84it/s]
 56%|█████▌    | 5/9 [01:11<00:43, 10.99s/it]

DONE: v4 — Processed 253 frames



v5: 100%|██████████| 400/400 [00:31<00:00, 12.66it/s]
 67%|██████▋   | 6/9 [01:43<00:54, 18.02s/it]

DONE: v5 — Processed 400 frames



v6: 100%|██████████| 400/400 [00:28<00:00, 14.00it/s]
 78%|███████▊  | 7/9 [02:11<00:42, 21.50s/it]

DONE: v6 — Processed 400 frames



v8: 100%|██████████| 400/400 [00:16<00:00, 23.83it/s]
 89%|████████▉ | 8/9 [02:28<00:20, 20.01s/it]

DONE: v8 — Processed 400 frames



v9: 100%|██████████| 400/400 [00:32<00:00, 12.28it/s]
100%|██████████| 9/9 [03:01<00:00, 20.15s/it]

DONE: v9 — Processed 400 frames





In [32]:
# Folder with YOLO label files
label_dir = './datasets/new-videos-predicted-yolo/labels'

get_rare_class_ids(label_dir=label_dir, class_id_to_name=class_id_to_name ,rare_threshold=0)


🔍 Class-wise instance counts:
Class  3 (roof      ): 74 instances
Class  4 (wall      ): 1020 instances
Class  5 (window    ): 166 instances
Class  6 (person    ): 11772 instances
Class  9 (bicycle   ): 285 instances
Class 10 (tree      ): 1463 instances
Class 13 (vehicle   ): 2 instances

✅ Rare class IDs (threshold < 0): set()


set()

### Merge previous and new prediction Ddatasets

In [33]:
merge_yolo_datasets(
    source1='./datasets/new_dataset_yolo_split/train',
    source2='./datasets/new-videos-predicted-yolo',
    destination='./datasets/merged_yolo_dataset'
)

🔁 Merging original dataset...
➕ Merging predicted video dataset...
⚠️ No label for v10_0000.jpg
⚠️ No label for v10_0001.jpg
⚠️ No label for v10_0002.jpg
⚠️ No label for v10_0003.jpg
⚠️ No label for v10_0004.jpg
⚠️ No label for v10_0005.jpg
⚠️ No label for v10_0006.jpg
⚠️ No label for v10_0007.jpg
⚠️ No label for v10_0008.jpg
⚠️ No label for v10_0009.jpg
⚠️ No label for v10_0010.jpg
⚠️ No label for v10_0011.jpg
⚠️ No label for v10_0047.jpg
⚠️ No label for v10_0054.jpg
⚠️ No label for v10_0064.jpg
⚠️ No label for v10_0066.jpg
⚠️ No label for v10_0067.jpg
⚠️ No label for v10_0068.jpg
⚠️ No label for v10_0070.jpg
⚠️ No label for v10_0074.jpg
⚠️ No label for v10_0075.jpg
⚠️ No label for v10_0076.jpg
⚠️ No label for v10_0077.jpg
⚠️ No label for v10_0078.jpg
⚠️ No label for v10_0079.jpg
⚠️ No label for v10_0080.jpg
⚠️ No label for v10_0081.jpg
⚠️ No label for v10_0082.jpg
⚠️ No label for v10_0083.jpg
⚠️ No label for v10_0084.jpg
⚠️ No label for v10_0085.jpg
⚠️ No label for v10_0086.jpg
⚠️ No

In [34]:
# Folder with YOLO label files
label_dir = './datasets/merged_yolo_dataset/labels'

rare_class_ids = get_rare_class_ids(label_dir=label_dir, class_id_to_name=class_id_to_name ,rare_threshold=0)



🔍 Class-wise instance counts:
Class  1 (pool      ): 30 instances
Class  2 (vegetation): 5528 instances
Class  3 (roof      ): 354 instances
Class  4 (wall      ): 1974 instances
Class  5 (window    ): 542 instances
Class  6 (person    ): 14247 instances
Class  7 (dog       ): 25 instances
Class  8 (car       ): 35045 instances
Class  9 (bicycle   ): 507 instances
Class 10 (tree      ): 1880 instances
Class 11 (truck     ): 129 instances
Class 12 (bus       ): 86 instances
Class 13 (vehicle   ): 579 instances

✅ Rare class IDs (threshold < 0): set()


## Retrain Model on predictions

In [35]:
import shutil
import os
import glob

# Match all folders starting with 'fine-tune-yolov8' inside './runs_yolo/train/'
folders_to_delete = glob.glob('./runs/train/fine-tune-yolov8*')

for folder_path in folders_to_delete:
    if os.path.isdir(folder_path):
        shutil.rmtree(folder_path)
        print(f"✅ Deleted folder: {folder_path}")
    else:
        print(f"⚠️ Not a directory or doesn't exist: {folder_path}")


In [36]:
train_yolo("yolo_retrain.yaml", 30, 720, 8, "fine-tune-yolov8", True, dir=best_pt_path)

Ultralytics 8.3.109 🚀 Python-3.10.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3050 OEM, 7957MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=runs/train/yolov8/weights/best.pt, data=yolo_retrain.yaml, epochs=30, time=None, patience=30, batch=8, imgsz=720, save=True, save_period=-1, cache=False, device=None, workers=8, project=runs/train, name=fine-tune-yolov8, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=True, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, sho

[34m[1mtrain: [0mScanning /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/merged_yolo_dataset/labels... 4919 images, 301 backgrounds, 0 corrupt: 100%|██████████| 5217/5217 [00:01<00:00, 3948.16it/s]

[34m[1mtrain: [0mNew cache created: /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/merged_yolo_dataset/labels.cache





[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/new-videos-predicted-yolo/labels... 2565 images, 298 backgrounds, 0 corrupt: 100%|██████████| 2863/2863 [00:00<00:00, 3199.52it/s]

[34m[1mval: [0mNew cache created: /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/new-videos-predicted-yolo/labels.cache





Plotting labels to runs/train/fine-tune-yolov8/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000556, momentum=0.9) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0005), 83 bias(decay=0.0)
Image sizes 736 train, 736 val
Using 8 dataloader workers
Logging results to [1mruns/train/fine-tune-yolov8[0m
Starting training for 30 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/30      5.64G     0.8068     0.6317     0.9179          4        736: 100%|██████████| 653/653 [04:42<00:00,  2.31it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.65it/s]


                   all       2863      14782      0.709      0.525      0.559      0.441

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/30      5.77G     0.8166     0.6045     0.9126         15        736: 100%|██████████| 653/653 [04:39<00:00,  2.34it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.66it/s]


                   all       2863      14782      0.481      0.615        0.6      0.477

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/30      5.27G     0.8363     0.6153     0.9133          6        736: 100%|██████████| 653/653 [04:38<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.66it/s]


                   all       2863      14782      0.703      0.575      0.623      0.499

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/30      4.84G     0.8305     0.5945     0.9109         39        736: 100%|██████████| 653/653 [04:38<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.66it/s]


                   all       2863      14782      0.577      0.746      0.719      0.573

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/30      5.57G     0.8277     0.5948     0.9095         25        736: 100%|██████████| 653/653 [04:38<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.66it/s]


                   all       2863      14782      0.691      0.551      0.679      0.512

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/30      5.26G     0.8182     0.5886     0.9082         16        736: 100%|██████████| 653/653 [04:37<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.66it/s]


                   all       2863      14782      0.735      0.532      0.607      0.485

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/30      5.46G     0.8112      0.575      0.904          3        736: 100%|██████████| 653/653 [04:38<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.66it/s]


                   all       2863      14782      0.737      0.569      0.629      0.522

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/30      4.92G     0.8071     0.5779     0.9025         42        736: 100%|██████████| 653/653 [04:38<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.66it/s]


                   all       2863      14782      0.678      0.579       0.62      0.517

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/30       5.1G     0.7949     0.5581     0.8976         13        736: 100%|██████████| 653/653 [04:38<00:00,  2.34it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.66it/s]


                   all       2863      14782      0.658      0.764      0.737      0.591

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/30      6.04G     0.7855     0.5535     0.8975         79        736: 100%|██████████| 653/653 [04:38<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.66it/s]


                   all       2863      14782      0.697      0.587      0.673      0.532

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/30      5.28G     0.7805      0.553     0.8959         61        736: 100%|██████████| 653/653 [04:38<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.67it/s]


                   all       2863      14782      0.628      0.766      0.789      0.668

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/30      4.97G     0.7701     0.5444     0.8961         42        736: 100%|██████████| 653/653 [04:38<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.67it/s]


                   all       2863      14782      0.706      0.645      0.764      0.635

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/30       5.1G     0.7668     0.5379     0.8948         12        736: 100%|██████████| 653/653 [04:38<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.67it/s]


                   all       2863      14782      0.727      0.681       0.74      0.615

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/30      5.18G     0.7545     0.5318     0.8906         52        736: 100%|██████████| 653/653 [04:38<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.64it/s]


                   all       2863      14782      0.634      0.789      0.759       0.64

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/30      5.02G     0.7434     0.5207     0.8874          8        736: 100%|██████████| 653/653 [04:38<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.66it/s]


                   all       2863      14782      0.682      0.766      0.803      0.685

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/30      5.19G      0.746      0.527     0.8872          2        736: 100%|██████████| 653/653 [05:02<00:00,  2.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:40<00:00,  4.43it/s]


                   all       2863      14782      0.665      0.761      0.776       0.66

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/30      5.58G     0.7408     0.5223      0.888          3        736: 100%|██████████| 653/653 [04:38<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.66it/s]


                   all       2863      14782      0.745      0.716      0.816      0.692

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/30      5.05G     0.7241     0.5042     0.8824         45        736: 100%|██████████| 653/653 [04:38<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.67it/s]


                   all       2863      14782      0.649      0.798      0.776      0.669

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/30      5.12G     0.7066      0.495     0.8772         12        736: 100%|██████████| 653/653 [04:38<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.66it/s]


                   all       2863      14782      0.727      0.752      0.799      0.691

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/30      5.17G     0.7042     0.4906     0.8764         21        736: 100%|██████████| 653/653 [04:38<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.67it/s]


                   all       2863      14782      0.722      0.749       0.82      0.697
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      21/30      4.75G     0.6426     0.4572     0.8625         41        736: 100%|██████████| 653/653 [04:35<00:00,  2.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.67it/s]


                   all       2863      14782      0.727      0.715      0.809      0.675

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      22/30      5.18G     0.6277     0.4396     0.8584          8        736: 100%|██████████| 653/653 [04:35<00:00,  2.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.67it/s]


                   all       2863      14782      0.766      0.698      0.827      0.721

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      23/30      4.81G     0.6146     0.4353      0.854          2        736: 100%|██████████| 653/653 [04:35<00:00,  2.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.67it/s]


                   all       2863      14782      0.689       0.78      0.821      0.718

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      24/30       4.8G      0.605     0.4245     0.8521         10        736: 100%|██████████| 653/653 [04:35<00:00,  2.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.66it/s]


                   all       2863      14782      0.749      0.753      0.836      0.733

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      25/30      4.85G     0.5994     0.4181     0.8503         10        736: 100%|██████████| 653/653 [04:35<00:00,  2.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.67it/s]


                   all       2863      14782        0.7      0.835       0.84      0.722

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      26/30      4.93G     0.5884     0.4093     0.8466         10        736: 100%|██████████| 653/653 [04:35<00:00,  2.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.66it/s]


                   all       2863      14782      0.707      0.789      0.827      0.719

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      27/30      4.88G     0.5732     0.3994     0.8449          8        736: 100%|██████████| 653/653 [04:35<00:00,  2.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.67it/s]


                   all       2863      14782       0.72       0.82      0.835      0.718

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      28/30      5.26G      0.563     0.3953      0.845          8        736: 100%|██████████| 653/653 [04:35<00:00,  2.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.67it/s]


                   all       2863      14782      0.726      0.815      0.841      0.727

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      29/30      5.26G     0.5518     0.3872      0.841         11        736: 100%|██████████| 653/653 [04:35<00:00,  2.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.67it/s]


                   all       2863      14782       0.74      0.801      0.845      0.735

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      30/30      5.23G     0.5417     0.3864     0.8368          0        736: 100%|██████████| 653/653 [04:35<00:00,  2.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:31<00:00,  5.67it/s]


                   all       2863      14782      0.817      0.756      0.851      0.744

30 epochs completed in 2.590 hours.
Optimizer stripped from runs/train/fine-tune-yolov8/weights/last.pt, 52.0MB
Optimizer stripped from runs/train/fine-tune-yolov8/weights/best.pt, 52.0MB

Validating runs/train/fine-tune-yolov8/weights/best.pt...
Ultralytics 8.3.109 🚀 Python-3.10.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3050 OEM, 7957MiB)
Model summary (fused): 92 layers, 25,847,866 parameters, 0 gradients, 78.7 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [01:07<00:00,  2.64it/s]


                   all       2863      14782      0.774      0.735      0.851      0.748
                  roof         59         74      0.722      0.838      0.862      0.792
                  wall        856       1020       0.86      0.852      0.927      0.888
                window        162        166      0.819      0.693      0.842       0.72
                person       1870      11772       0.84      0.518      0.782      0.598
               bicycle        268        285      0.826      0.384      0.643      0.529
                  tree       1001       1463      0.857      0.864      0.906      0.863
               vehicle          2          2      0.491          1      0.995      0.846
Speed: 0.1ms preprocess, 22.3ms inference, 0.0ms loss, 0.2ms postprocess per image
Results saved to [1mruns/train/fine-tune-yolov8[0m


In [37]:
new_path = './runs/train/fine-tune-yolov8'
print_csv_metrics(new_path)

Found results.csv at: ./runs/train/fine-tune-yolov8/results.csv
Total number of epochs: 30

Training Loss: 1.764910
Precision: 0.817130
Recall: 0.755970
mAP@0.5: 0.851300
mAP@0.5:0.95: 0.744380

Validation Loss: 2.098990


In [38]:
import pandas as pd

def compare_final_metrics(csv1_path, csv2_path):
    # Load both result CSVs
    df1 = pd.read_csv(csv1_path)
    df2 = pd.read_csv(csv2_path)
    # print(df1.head())
    # Use the final row (last epoch)
    last1 = df1.iloc[-1]
    last2 = df2.iloc[-1]

    metrics_to_compare = {
        "train/box_loss": "Box Loss (Train)",
        "train/cls_loss": "Cls Loss (Train)",
        "train/dfl_loss": "DFL Loss (Train)",
        "metrics/precision(B)": "Precision",
        "metrics/recall(B)": "Recall",
        "metrics/mAP50(B)": "mAP@0.5",
        "metrics/mAP50-95(B)": "mAP@0.5:0.95",
        "val/box_loss": "Box Loss (Val)",
        "val/cls_loss": "Cls Loss (Val)",
        "val/dfl_loss": "DFL Loss (Val)"
    }

    print("🔍 Comparison of Final Epoch Metrics:\n")
    for key, label in metrics_to_compare.items():
        val1 = last1[key]
        val2 = last2[key]
        trend = "✅ Good Increase" if val2 > val1 else "❌ No Increase"
        print(f"{label:20s}: {val1:.5f} → {val2:.5f} | {trend}")

# Example usage


In [39]:
new_path = './runs/train/fine-tune-yolov8'
old_path = './runs/train/yolov8'

results_csv_path = find_results_csv(new_path)
results_csv_path_1 = find_results_csv(old_path)

compare_final_metrics(results_csv_path_1, results_csv_path)


🔍 Comparison of Final Epoch Metrics:

Box Loss (Train)    : 0.58367 → 0.54170 | ❌ No Increase
Cls Loss (Train)    : 0.32681 → 0.38641 | ✅ Good Increase
DFL Loss (Train)    : 0.84170 → 0.83680 | ❌ No Increase
Precision           : 0.59285 → 0.81713 | ✅ Good Increase
Recall              : 0.33184 → 0.75597 | ✅ Good Increase
mAP@0.5             : 0.40186 → 0.85130 | ✅ Good Increase
mAP@0.5:0.95        : 0.28034 → 0.74438 | ✅ Good Increase
Box Loss (Val)      : 1.78693 → 0.54063 | ❌ No Increase
Cls Loss (Val)      : 2.43818 → 0.70702 | ❌ No Increase
DFL Loss (Val)      : 1.04476 → 0.85134 | ❌ No Increase


In [40]:
best_pt_path = find_best_model(new_path)
evaluate_and_save_metrics(best_pt_path, output_json_path="per_class_metrics_retrain.json")

✅ Found best.pt at: runs/train/fine-tune-yolov8/weights/best.pt
Ultralytics 8.3.109 🚀 Python-3.10.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 3050 OEM, 7957MiB)


Model summary (fused): 92 layers, 25,847,866 parameters, 0 gradients, 78.7 GFLOPs


[34m[1mval: [0mScanning /home/ssl49/Desktop/Automated-Labeling-for-Aerial-Images-main/Automated_SegmentAndYolo/datasets/new-videos-predicted-yolo/labels.cache... 2565 images, 298 backgrounds, 0 corrupt: 100%|██████████| 2863/2863 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 179/179 [00:51<00:00,  3.47it/s]


                   all       2863      14782      0.818      0.756      0.852      0.745
                  roof         59         74      0.681      0.838      0.848      0.792
                  wall        856       1020      0.895      0.835      0.932      0.892
                window        162        166      0.733      0.759      0.847      0.758
                person       1870      11772      0.808      0.587      0.792      0.635
               bicycle        268        285      0.748      0.418      0.642      0.528
                  tree       1001       1463      0.865      0.853      0.905      0.867
               vehicle          2          2      0.996          1      0.995      0.747
Speed: 0.2ms preprocess, 16.6ms inference, 0.0ms loss, 0.2ms postprocess per image
Results saved to [1mruns/detect/val2[0m
✅ Saved per-class metrics to per_class_metrics_retrain.json


In [41]:
print_per_class_metrics("per_class_metrics_retrain.json")

📊 Per-Class mAP@0.5:0.95 Metrics:

Class Name      Class ID   mAP@0.5:0.95   
----------------------------------------
road            0          0.7454         
pool            1          0.7454         
vegetation      2          0.7454         
roof            3          0.7915         
wall            4          0.8921         
window          5          0.7584         
person          6          0.6346         
dog             7          0.7454         
car             8          0.7454         
bicycle         9          0.5281         
tree            10         0.8665         
truck           11         0.7454         
bus             12         0.7454         
vehicle         13         0.7467         


In [42]:
import json

def compare_maps(json_path1, json_path2):
    with open(json_path1, 'r') as f1, open(json_path2, 'r') as f2:
        metrics1 = json.load(f1)
        metrics2 = json.load(f2)

    print("\n📊 Comparison of mAP@0.5:0.95 per class:\n")
    print(f"{'Class':<15} {'Before':<10} {'After':<10} {'Change'}")
    print("-" * 50)

    for class_name in metrics1:
        map1 = metrics1[class_name].get("mAP@0.5:0.95", 0)
        map2 = metrics2.get(class_name, {}).get("mAP@0.5:0.95", 0)

        if map2 > map1:
            status = "✅ Good increase"
        else:
            status = "❌ No increase"

        print(f"{class_name:<15} {map1:<10.4f} {map2:<10.4f} {status}")

# 🔧 Example usage:
compare_maps("per_class_metrics.json", "per_class_metrics_retrain.json")



📊 Comparison of mAP@0.5:0.95 per class:

Class           Before     After      Change
--------------------------------------------------
unlabeled       0.2943     0.0000     ❌ No increase
pool            0.8215     0.7454     ❌ No increase
vegetation      0.1216     0.7454     ✅ Good increase
roof            0.5337     0.7915     ✅ Good increase
wall            0.1327     0.8921     ✅ Good increase
window          0.1891     0.7584     ✅ Good increase
person          0.2558     0.6346     ✅ Good increase
dog             0.4714     0.7454     ✅ Good increase
car             0.3195     0.7454     ✅ Good increase
bicycle         0.2275     0.5281     ✅ Good increase
tree            0.4541     0.8665     ✅ Good increase
truck           0.0000     0.7454     ✅ Good increase
bus             0.0000     0.7454     ✅ Good increase
vehicle         0.2993     0.7467     ✅ Good increase


In [43]:
new_path = './runs/train/fine-tune-yolov8'
old_path = './runs/train/yolov8'

best_pt_path_retrain = find_best_model(new_path)
best_pt_path = find_best_model(old_path)



✅ Found best.pt at: runs/train/fine-tune-yolov8/weights/best.pt
✅ Found best.pt at: runs/train/yolov8/weights/best.pt


In [44]:
videos_predictions(best_pt_path, class_id_to_name, video_dir='videos', output_base='./datasets/final_output', max_frames=400)

  0%|          | 0/9 [00:00<?, ?it/s]




v1: 100%|██████████| 400/400 [00:28<00:00, 13.93it/s]
 11%|█         | 1/9 [00:28<03:50, 28.76s/it]

DONE: v1 — Processed 400 frames



v10:  65%|██████▍   | 259/400 [00:19<00:10, 13.28it/s]
 22%|██▏       | 2/9 [00:48<02:43, 23.36s/it]

DONE: v10 — Processed 259 frames



v2:  44%|████▍     | 175/400 [00:12<00:16, 13.65it/s]
 33%|███▎      | 3/9 [01:01<01:51, 18.57s/it]

DONE: v2 — Processed 175 frames



v3:  44%|████▍     | 176/400 [00:04<00:05, 42.94it/s]
 44%|████▍     | 4/9 [01:05<01:04, 12.86s/it]

DONE: v3 — Processed 176 frames



v4:  63%|██████▎   | 253/400 [00:08<00:05, 29.39it/s]
 56%|█████▌    | 5/9 [01:13<00:45, 11.33s/it]

DONE: v4 — Processed 253 frames



v5: 100%|██████████| 400/400 [00:35<00:00, 11.41it/s]
 67%|██████▋   | 6/9 [01:49<00:58, 19.42s/it]

DONE: v5 — Processed 400 frames



v6: 100%|██████████| 400/400 [00:42<00:00,  9.47it/s]
 78%|███████▊  | 7/9 [02:31<00:53, 26.90s/it]

DONE: v6 — Processed 400 frames



v8: 100%|██████████| 400/400 [00:32<00:00, 12.49it/s]
 89%|████████▉ | 8/9 [03:03<00:28, 28.54s/it]

DONE: v8 — Processed 400 frames



v9: 100%|██████████| 400/400 [00:40<00:00,  9.87it/s]
100%|██████████| 9/9 [03:44<00:00, 24.90s/it]

DONE: v9 — Processed 400 frames





In [45]:
videos_predictions(best_pt_path_retrain, class_id_to_name, video_dir='videos', output_base='./datasets/final_output_retrain', max_frames=400)

  0%|          | 0/9 [00:00<?, ?it/s]




v1: 100%|██████████| 400/400 [00:28<00:00, 13.98it/s]
 11%|█         | 1/9 [00:28<03:49, 28.66s/it]

DONE: v1 — Processed 400 frames



v10:  65%|██████▍   | 259/400 [00:18<00:10, 13.83it/s]
 22%|██▏       | 2/9 [00:47<02:39, 22.86s/it]

DONE: v10 — Processed 259 frames



v2:  44%|████▍     | 175/400 [00:12<00:16, 13.94it/s]
 33%|███▎      | 3/9 [01:00<01:49, 18.17s/it]

DONE: v2 — Processed 175 frames



v3:  44%|████▍     | 176/400 [00:03<00:05, 44.07it/s]
 44%|████▍     | 4/9 [01:04<01:02, 12.57s/it]

DONE: v3 — Processed 176 frames



v4:  63%|██████▎   | 253/400 [00:08<00:04, 29.85it/s]
 56%|█████▌    | 5/9 [01:12<00:44, 11.10s/it]

DONE: v4 — Processed 253 frames



v5: 100%|██████████| 400/400 [00:31<00:00, 12.61it/s]
 67%|██████▋   | 6/9 [01:44<00:54, 18.14s/it]

DONE: v5 — Processed 400 frames



v6: 100%|██████████| 400/400 [00:28<00:00, 13.96it/s]
 78%|███████▊  | 7/9 [02:13<00:43, 21.61s/it]

DONE: v6 — Processed 400 frames



v8: 100%|██████████| 400/400 [00:16<00:00, 23.72it/s]
 89%|████████▉ | 8/9 [02:29<00:20, 20.10s/it]

DONE: v8 — Processed 400 frames



v9: 100%|██████████| 400/400 [00:32<00:00, 12.13it/s]
100%|██████████| 9/9 [03:03<00:00, 20.34s/it]

DONE: v9 — Processed 400 frames



