In [None]:
import torch

# Print the CUDA version that PyTorch was built with
print("Built CUDA Version:", torch.version.cuda)

# Check if CUDA is available on the current system
if torch.cuda.is_available():
    # Print the CUDA runtime version (compiled version)
    print("CUDA Runtime Version:", torch._C._cuda_getCompiledVersion())
    
    # Print the name of the first available GPU
    print("GPU Name:", torch.cuda.get_device_name(0))
else:
    # Inform the user if CUDA is not available
    print("CUDA is not available.")


In [None]:
# Mapping of class IDs to class names and their corresponding RGB color codes
class_id_to_name = {
    0:  ('unlabeled', [28, 42, 168]),         # Background or unclassified area
    1:  ('pool', [0, 50, 89]),                # Swimming pool
    2:  ('vegetation', [107, 142, 35]),       # Trees, grass, or bushes
    3:  ('roof', [70, 70, 70]),               # Building rooftops
    4:  ('wall', [102, 102, 156]),            # Building walls
    5:  ('window', [254, 228, 12]),           # Windows
    6:  ('person', [255, 22, 96]),            # People
    7:  ('dog', [102, 51, 0]),                # Dogs
    8:  ('car', [9, 143, 150]),               # Cars
    9:  ('bicycle', [119, 11, 32]),           # Bicycles
    10: ('tree', [51, 51, 0]),                # Trees
    11: ('truck', [160, 160, 60]),            # Trucks (added)
    12: ('bus', [200, 80, 80]),               # Buses (added)
    13: ('vehicle', [20, 80, 80]),            # General vehicle category (added)
}


In [None]:
# Install NumPy - fundamental package for numerical computations
!pip install numpy

# Install OpenCV - library for computer vision tasks
!pip install opencv-python

# Install Pillow - image processing library
!pip install pillow

# Install Matplotlib - plotting and visualization library
!pip install matplotlib

# Install tqdm - progress bar utility
!pip install tqdm

# Install scikit-learn - machine learning tools
!pip install scikit-learn

# Install PyTorch and TorchVision - deep learning framework and its vision tools
!pip install torch torchvision

# Install Ultralytics - YOLO model implementation and training tools
!pip install ultralytics


In [None]:
# Standard library imports
import os                     # Operating system interfaces
import gc                     # Garbage collection interface
import json                   # Working with JSON data
import shutil                 # File operations like copy, move, etc.
import zipfile                # Extracting zip archives
import random                 # Random number generation
from glob import glob         # Pattern matching for file paths
from pathlib import Path      # Object-oriented file path handling
from collections import defaultdict  # Dictionary with default value support
import xml.etree.ElementTree as ET  # Parsing XML files

# Scientific computing and data manipulation
import numpy as np            # Numerical operations
import pandas as pd           # Data analysis and manipulation
from sklearn.model_selection import train_test_split  # Train-test split

# Image processing and visualization
import cv2                    # OpenCV for computer vision
from PIL import Image, ImageDraw, ImageFont  # PIL for image handling
import matplotlib.pyplot as plt              # Plotting library
import matplotlib.patches as mpatches        # Drawing patches on plots

# Progress bar utility
from tqdm.auto import tqdm    # Progress bars for loops

# PyTorch and related imports
import torch
import torch.nn as nn         # Neural network modules
from torch.utils.data import DataLoader       # Efficient data loading
import torchvision.models as models           # Pretrained models
import torchvision.transforms as transforms   # Image transformations
import torchvision.models.segmentation as segmentation  # Segmentation models

# YOLO from Ultralytics
from ultralytics import YOLO  # YOLO object detection models

# Pandas library
import pandas as pd

# Google Drive downloader
import gdown                  # Downloading files from Google Drive

# Environment configuration
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"   # Avoids OpenMP duplicate library error

# Set device for computation (GPU if available, else CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
def semantic_drone_dataset_download(gdrive_url, extract_to="extracted"):
    """
    Downloads and extracts the Semantic Drone Dataset from a Google Drive URL.
    
    Parameters:
        gdrive_url (str): The shared Google Drive link to the ZIP file.
        extract_to (str): Directory to extract contents into. Default is 'extracted'.
    """
    # Extract the file ID from the Google Drive shareable URL
    file_id = gdrive_url.split("/d/")[1].split("/")[0]
    download_url = f"https://drive.google.com/uc?id={file_id}"

    # Create the output folder if it doesn't exist
    os.makedirs(extract_to, exist_ok=True)

    zip_path = os.path.join(extract_to, "downloaded.zip")

    print("[INFO] Downloading ZIP from Google Drive...")
    gdown.download(download_url, zip_path, quiet=False)

    print("[INFO] Extracting ZIP...")
    # Extract contents of the ZIP file
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

    # Delete the ZIP file after extraction to save space
    os.remove(zip_path)

    print(f"[DONE] Extracted files to: {extract_to}")


def uavdt_dataset_download(gdrive_url, extract_to="extracted"):
    """
    Downloads and extracts the UAVDT Dataset from a Google Drive URL.
    
    Parameters:
        gdrive_url (str): The shared Google Drive link to the ZIP file.
        extract_to (str): Directory to extract contents into. Default is 'extracted'.
    """
    # Extract the file ID from the Google Drive shareable URL
    file_id = gdrive_url.split("/d/")[1].split("/")[0]
    download_url = f"https://drive.google.com/uc?id={file_id}"

    # Create the output folder if it doesn't exist
    os.makedirs(extract_to, exist_ok=True)

    zip_path = os.path.join(extract_to, "downloaded.zip")

    print("[+] Downloading ZIP from Google Drive...")
    gdown.download(download_url, zip_path, quiet=False)

    print("[+] Extracting ZIP...")
    # Extract contents of the ZIP file
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

    # Delete the ZIP file after extraction to save space
    os.remove(zip_path)

    print(f"[+] Extracted files to: {extract_to}")


In [None]:
# ----------------------------
# Parse polygon and convert to YOLO bbox
# ----------------------------

# Parses XML annotation and converts polygon objects to YOLO-style bounding boxes
def parse_yolo_style_bbox_from_xml(xml_path, class_id_to_name):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    bboxes = []
    for obj in root.findall('object'):
        class_name = obj.find('name').text
        if class_name in [value[0] for value in class_id_to_name.values()]:
            polygon = obj.find('polygon')
            if polygon is not None:
                # Extract points from polygon
                points = polygon.findall('pt')
                coords = [(float(pt.find('x').text), float(pt.find('y').text)) for pt in points]
                # Convert polygon to bounding box
                x_min = min(coord[0] for coord in coords)
                y_min = min(coord[1] for coord in coords)
                x_max = max(coord[0] for coord in coords)
                y_max = max(coord[1] for coord in coords)
                bboxes.append(((x_min, y_min), (x_max, y_max), class_name))
    return bboxes


# ----------------------------
# Save YOLO-format txt
# ----------------------------

# Saves the bounding boxes in YOLO format: <class_id> <x_center> <y_center> <width> <height>
def save_yolo_format(image_id, bboxes, image_width, image_height, output_path, class_id_to_name):
    with open(output_path, 'w') as f:
        for (x_min, y_min), (x_max, y_max), class_name in bboxes:
            class_id = next(cid for cid, (name, _) in class_id_to_name.items() if name == class_name)
            x_center = (x_min + x_max) / 2 / image_width
            y_center = (y_min + y_max) / 2 / image_height
            width = (x_max - x_min) / image_width
            height = (y_max - y_min) / image_height
            # Write to file with six decimal precision
            f.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")


# ----------------------------
# Convert dataset (YOLO only)
# ----------------------------

# Converts the full dataset by extracting YOLO-style annotations and saving them
def convert_fulldataset_yolo_only(dataset_path, output_dir, class_id_to_name):
    # Get list of image IDs (without extension)
    image_ids = [img.split('.')[0] for img in os.listdir(f"{dataset_path}/images") if img.endswith(".jpg")]

    # Create output folders
    os.makedirs(f"{output_dir}/images", exist_ok=True)
    os.makedirs(f"{output_dir}/labels", exist_ok=True)

    for image_id in tqdm(image_ids, desc="Converting to YOLO"):
        img_path = f"{dataset_path}/images/{image_id}.jpg"
        bbox_xml_path = f"{dataset_path}/gt/bounding_box/label_me_xml/{image_id}.xml"
        semantic_xml_path = f"{dataset_path}/gt/semantic/label_me_xml/{image_id}.xml"

        if not os.path.exists(img_path):
            print(f"[WARNING] Image not found: {img_path}, skipping...")
            continue

        try:
            # Parse bounding box and semantic annotations
            bboxes1 = parse_yolo_style_bbox_from_xml(bbox_xml_path, class_id_to_name)
            bboxes2 = parse_yolo_style_bbox_from_xml(semantic_xml_path, class_id_to_name)
            all_bboxes = bboxes1 + bboxes2
        except Exception as e:
            print(f"[WARNING] Skipping image {image_id} due to parse error: {e}")
            continue

        try:
            # Load image
            image = Image.open(img_path)
            image_np = np.array(image)
        except Exception as e:
            print(f"[WARNING] Could not load image {image_id}: {e}")
            continue

        # Save image to output directory
        image.save(f"{output_dir}/images/{image_id}.jpg")

        # Save YOLO-format labels to output directory
        yolo_annotation_path = f"{output_dir}/labels/{image_id}.txt"
        save_yolo_format(image_id, all_bboxes, image_np.shape[1], image_np.shape[0], yolo_annotation_path, class_id_to_name)

    print("[+] YOLO-format annotation conversion complete!")


In [None]:
# 🧠 Mapping UAVDT class IDs to extended class IDs used in the combined dataset
uavdt_to_extended = {
    0: 8,   # car
    1: 11,  # truck
    2: 12,  # bus
    3: 13   # other vehicle
}

# === Function to convert a single annotation file to YOLO format ===
def convert_annotation(anno_path, label_path, image_path, stats):
    if not os.path.exists(image_path):
        stats["missing_image"] += 1
        return

    try:
        img = cv2.imread(image_path)
        height, width = img.shape[:2]
    except:
        stats["missing_image"] += 1
        return

    with open(anno_path, 'r') as fin, open(label_path, 'w') as fout:
        for line in fin:
            parts = line.strip().split(',')
            if len(parts) < 8:
                stats["malformed"] += 1
                continue

            try:
                # Parse bounding box and class info
                x, y, w, h = map(float, parts[0:4])
                original_cls = int(parts[5])

                # Skip classes not in our mapping
                if original_cls not in uavdt_to_extended:
                    stats["skipped"][original_cls] += 1
                    continue

                # Convert to new class ID
                cls = uavdt_to_extended[original_cls]

                # Convert to YOLO format (normalized center_x, center_y, width, height)
                x_center = (x + w / 2) / width
                y_center = (y + h / 2) / height
                w /= width
                h /= height

                # Validate normalized coordinates
                if not (0 <= x_center <= 1 and 0 <= y_center <= 1 and w > 0 and h > 0):
                    stats["skipped"][cls] += 1
                    continue

                # Write label line
                fout.write(f"{cls} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")
                stats["converted"] += 1
            except Exception:
                stats["malformed"] += 1
                continue

            stats["total"] += 1

# === Step 1: Convert all UAVDT annotations to YOLO format ===
def convert_dataset(root_dir):
    # Find all annotation files inside any Mxxxx/annotations/ directory
    annotation_paths = glob(os.path.join(root_dir, "M*/annotations/*.txt"))
    total_files = len(annotation_paths)

    # Stats for tracking issues and progress
    stats = {
        "total": 0,
        "converted": 0,
        "malformed": 0,
        "missing_image": 0,
        "skipped": defaultdict(int)
    }

    print(f"🔄 Converting {total_files} annotation files to YOLO format...")

    for anno_path in tqdm(annotation_paths, desc="Converting", unit="file"):
        # Get sequence directory (e.g., M0101)
        sequence_dir = os.path.dirname(os.path.dirname(anno_path))
        file_name = os.path.basename(anno_path)

        # Output label directory
        label_dir = os.path.join(sequence_dir, "labels")
        os.makedirs(label_dir, exist_ok=True)

        # Output label file path
        label_path = os.path.join(label_dir, file_name)

        # Corresponding image path
        image_name = file_name.replace(".txt", ".jpg")
        image_path = os.path.join(sequence_dir, "images", image_name)

        # Perform the actual conversion
        convert_annotation(anno_path, label_path, image_path, stats)

    # Print summary of the conversion process
    print("\nConversion complete.")
    print(f"Total boxes:     {stats['total']}")
    print(f"Converted boxes: {stats['converted']}")
    print(f"Skipped boxes:   {sum(stats['skipped'].values())}")
    for cls, count in sorted(stats["skipped"].items()):
        print(f"   - Skipped class {cls}: {count}")
    print(f"Malformed lines: {stats['malformed']}")
    print(f"Missing images:  {stats['missing_image']}")

# === Step 2: Split dataset into train/val and copy files ===
def copy_split_sequences(src_root, dst_root, train_ratio=0.8):
    # Find all sequences (Mxxxx folders)
    all_sequences = sorted(glob(os.path.join(src_root, "M*")))

    # Split into training and validation sequences
    train_seqs, val_seqs = train_test_split(all_sequences, train_size=train_ratio, random_state=42)

    # Copy files into respective folders
    for split_name, split_list in zip(['train', 'val'], [train_seqs, val_seqs]):
        for seq_path in tqdm(split_list, desc=f"Copying {split_name}"):
            images_src = os.path.join(seq_path, "images")
            labels_src = os.path.join(seq_path, "labels")

            images_dst = os.path.join(dst_root, split_name, "images")
            labels_dst = os.path.join(dst_root, split_name, "labels")

            os.makedirs(images_dst, exist_ok=True)
            os.makedirs(labels_dst, exist_ok=True)

            # Copy image files
            for img_file in glob(os.path.join(images_src, "*.jpg")):
                shutil.copy(img_file, os.path.join(images_dst, os.path.basename(img_file)))

            # Copy label files
            for label_file in glob(os.path.join(labels_src, "*.txt")):
                shutil.copy(label_file, os.path.join(labels_dst, os.path.basename(label_file)))

    print("\n[+] Dataset split into 'train/' and 'val/' folders with images and YOLO-format labels.")


In [None]:
import os
import shutil
import random
from tqdm import tqdm

# Function to move files from source directories to target directories
def move_files(file_list, 
               source_image_dir, 
               source_annotation_dir,
               target_image_dir, 
               target_annotation_dir):
    
    # Create target directories if they don't exist
    os.makedirs(target_image_dir, exist_ok=True)
    os.makedirs(target_annotation_dir, exist_ok=True)

    # Loop through each file in the provided list and move the corresponding image and annotation
    for image_id in tqdm(file_list, desc=f"Moving to {os.path.basename(os.path.dirname(target_image_dir))}"):
        # Construct paths for the image and annotation
        image_path = os.path.join(source_image_dir, f"{image_id}.jpg")
        annotation_path = os.path.join(source_annotation_dir, f"{image_id}.txt")

        # Construct target paths for the image and annotation
        target_image_path = os.path.join(target_image_dir, f"{image_id}.jpg")
        target_annotation_path = os.path.join(target_annotation_dir, f"{image_id}.txt")

        # Check if both the image and annotation files exist, then copy them to target directories
        if os.path.exists(image_path) and os.path.exists(annotation_path):
            shutil.copy(image_path, target_image_path)
            shutil.copy(annotation_path, target_annotation_path)

# Function to split the dataset into training and validation sets, and move the files
def split_and_move_dataset(source_base_dir="./datasets/semantic_yolo",
                           target_base_dir="./datasets/new_dataset_yolo_split",
                           split_ratio=0.8,
                           seed=42):
    
    # Set the random seed for reproducibility
    random.seed(seed)

    # Define paths for the image and label directories
    image_dir = os.path.join(source_base_dir, "images")
    label_dir = os.path.join(source_base_dir, "labels")

    # Get all image IDs (file names without extensions) from the image directory
    image_ids = [os.path.splitext(f)[0] for f in os.listdir(image_dir) if f.endswith(".jpg")]
    
    # Shuffle the image IDs to randomize the split
    random.shuffle(image_ids)

    # Determine the split index based on the split ratio
    split_idx = int(len(image_ids) * split_ratio)

    # Split the image IDs into training and validation sets
    train_ids = image_ids[:split_idx]
    val_ids = image_ids[split_idx:]

    # Move the training images and annotations to the target directories
    move_files(train_ids,
               source_image_dir=image_dir,
               source_annotation_dir=label_dir,
               target_image_dir=os.path.join(target_base_dir, "train/images"),
               target_annotation_dir=os.path.join(target_base_dir, "train/labels"))

    # Move the validation images and annotations to the target directories
    move_files(val_ids,
               source_image_dir=image_dir,
               source_annotation_dir=label_dir,
               target_image_dir=os.path.join(target_base_dir, "val/images"),
               target_annotation_dir=os.path.join(target_base_dir, "val/labels"))

    # Print the summary of the dataset split
    print(f"\n[✓] Dataset split completed: {len(train_ids)} train / {len(val_ids)} val samples")


In [None]:
from PIL import Image
import os
from tqdm import tqdm

def normalize_label_file(label_file, img_width, img_height):
    """
    Normalize the label coordinates in a label file to ensure they are within [0, 1] range.
    The label file is updated with the normalized values.
    """
    with open(label_file, 'r') as f:
        lines = f.readlines()
    
    with open(label_file, 'w') as f:
        for line in lines:
            # Split the line by spaces to get the class and coordinates
            parts = line.strip().split()
            class_id = int(parts[0])
            x_center, y_center, width, height = map(float, parts[1:])
            
            # Normalize the coordinates to be within the range [0, 1]
            x_center = min(1.0, max(0.0, x_center))
            y_center = min(1.0, max(0.0, y_center))
            width = min(1.0, max(0.0, width))
            height = min(1.0, max(0.0, height))

            # Write the normalized values back to the file
            f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")


def get_image_size(img_path):
    """
    Get the width and height of the image to normalize the coordinates properly.
    This function uses PIL (Pillow) to open the image and return its dimensions.
    """
    with Image.open(img_path) as img:
        return img.size  # returns (width, height)


def normalize_all_labels(labels_dir, img_dir):
    """
    Normalize all label files in the specified directory.
    It reads each label file, gets the corresponding image size, and normalizes the label coordinates.
    """
    for label_file in tqdm(os.listdir(labels_dir)):  # Iterate over all files in the labels directory
       
        if label_file.endswith('.txt'):  # Process only label files
            label_path = os.path.join(labels_dir, label_file)
            img_path = os.path.join(img_dir, label_file.replace('.txt', '.jpg'))  # Assuming JPG images
            
            if os.path.exists(img_path):
                # Get the image dimensions to normalize the labels
                img_width, img_height = get_image_size(img_path)
                normalize_label_file(label_path, img_width, img_height)
            else:
                # Warning if the corresponding image is missing
                print(f"Warning: Image for label {label_file} not found!")
    
    print("Normalization Complete")


## Calling Funcitons

In [None]:
# Google Drive URL for the Semantic Drone Dataset
gdrive_url = "https://drive.google.com/file/d/1UppumYqYOi-kto6BWPfFxwJK2Eph46oY/view?usp=sharing"
# Call the function to download and extract the Semantic Drone Dataset
semantic_drone_dataset_download(gdrive_url, extract_to="datasets")

# Google Drive URL for the UAVDT Dataset
gdrive_url = "https://drive.google.com/file/d/12cbrTaBAMIsuU-mwAA7IgDk9wSLC9cC-/view?usp=sharing"
# Call the function to download and extract the UAVDT Dataset
uavdt_dataset_download(gdrive_url, extract_to="datasets")


In [None]:
# Path to the Semantic Drone Dataset training set
dataset_path = "./datasets/semantic_drone_dataset/training_set" 

# Output directory where the YOLO formatted dataset will be saved
output_dir = "./datasets/semantic_yolo"

# Call the function to convert the full dataset into YOLO format
# The function converts annotations and images from the Semantic Drone Dataset into YOLO format
convert_fulldataset_yolo_only(dataset_path, output_dir, class_id_to_name)


In [None]:
# UAVDT-2024 Dataset Processing

# Path to the source UAVDT-2024 dataset
source_root = "./datasets/UAVDT-2024"

# Output directory where the new YOLO format dataset will be saved
output_root = "./datasets/new_dataset_yolo_split"

# Convert the UAVDT-2024 dataset into YOLO format
# The function processes the dataset and converts annotations and images into YOLO format
convert_dataset(source_root)

# Split the dataset into training and validation sets with a ratio of 80:20
# This function copies the relevant sequences into the respective directories for training and validation
copy_split_sequences(source_root, output_root, train_ratio=0.8)


# Semantic Drone Datasets Processing

# Split and move the Semantic Drone dataset into training and validation sets
# The function handles the splitting of the dataset and moves the images and annotations into separate directories
split_and_move_dataset()


In [None]:
# Set your paths for the training dataset
dataset_path = "./datasets/new_dataset_yolo_split/train"

# Directory where the images are stored in the training dataset
image_dir = os.path.join(dataset_path, "images")

# Directory where the label files are stored in the training dataset
annotations_dir = os.path.join(dataset_path, "labels")

# Normalize all label files in the training dataset by adjusting coordinates
# This function ensures that the labels follow the expected YOLO format (normalized coordinates)
normalize_all_labels(annotations_dir, image_dir)

# Set your paths for the validation dataset
dataset_path = "./datasets/new_dataset_yolo_split/val"

# Directory where the images are stored in the validation dataset
image_dir = os.path.join(dataset_path, "images")

# Directory where the label files are stored in the validation dataset
annotations_dir = os.path.join(dataset_path, "labels")

# Normalize all label files in the validation dataset
normalize_all_labels(annotations_dir, image_dir)


In [None]:
# Set the path to the training labels directory
labels_dir = './datasets/new_dataset_yolo_split/train/labels'

# Get the list of rare class IDs by analyzing the label files in the specified directory
# The function `get_rare_class_ids` will count the number of occurrences of each class
# and return those with occurrences below the specified threshold (in this case, 3000)
rare_class_ids = get_rare_class_ids(label_dir=labels_dir, class_id_to_name=class_id_to_name, rare_threshold=3000)
