In [1]:
import json
import os
from tqdm import tqdm

# --- CONFIGURATION (Updated with your paths) ---
BASE_PATH = r"C:\Users\ADMIN\Documents\Dev_Projects\Traffic_Infosys\UVH-26_dataset"

# TRAIN PATHS
TRAIN_JSON = os.path.join(BASE_PATH, "UVH-26-Train", "UVH-26-MV-Train.json")
TRAIN_IMAGES_DIR = os.path.join(BASE_PATH, "UVH-26-Train", "data")
TRAIN_LABELS_DIR = os.path.join(BASE_PATH, "UVH-26-Train", "yolo_labels")

# VAL PATHS
VAL_JSON = os.path.join(BASE_PATH, "UVH-26-Val", "UVH-26-MV-Val.json")
VAL_IMAGES_DIR = os.path.join(BASE_PATH, "UVH-26-Val", "data")
# We will create a yolo_labels folder inside Val as well to keep it structured
VAL_LABELS_DIR = os.path.join(BASE_PATH, "UVH-26-Val", "yolo_labels")

def create_image_map(root_dir):
    """
    Scans the folder to find which subfolder (e.g., '001', '002') 
    each image lives in.
    Returns: dict { 'filename.jpg': 'relative/path/to/folder' }
    """
    print(f"Scanning directory structure: {root_dir}...")
    image_map = {}
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                # Get the folder name relative to the root (e.g., "001")
                rel_folder = os.path.relpath(root, root_dir)
                image_map[file] = rel_folder
    print(f"Found {len(image_map)} images.")
    return image_map

def convert_coco_to_yolo(json_path, image_root, output_label_root, subset_name):
    print(f"\n--- Processing {subset_name} ---")
    
    # 1. Load JSON
    print(f"Loading JSON: {json_path}")
    with open(json_path, 'r') as f:
        data = json.load(f)

    # 2. Map Image Filenames to their Subfolders
    # This ensures we put the label in the exact same subfolder structure as the image
    location_map = create_image_map(image_root)

    # 3. Process Annotations
    print("Generating Label Files...")
    
    # Create a quick lookup for image dimensions
    images_info = {img['id']: img for img in data['images']}
    
    # Track missing images
    missing_count = 0

    for ann in tqdm(data['annotations']):
        image_id = ann['image_id']
        img_info = images_info[image_id]
        fname = img_info['file_name']
        
        # Check if we found this image on disk
        if fname not in location_map:
            missing_count += 1
            continue

        # Get the subfolder where the image lives (e.g., "001")
        subfolder = location_map[fname]
        
        # Create the matching subfolder in labels output
        target_dir = os.path.join(output_label_root, subfolder)
        os.makedirs(target_dir, exist_ok=True)

        # Calculate YOLO Coordinates
        img_w = img_info['width']
        img_h = img_info['height']
        x_min, y_min, w, h = ann['bbox']

        # Normalized XYWH
        x_center = (x_min + w / 2) / img_w
        y_center = (y_min + h / 2) / img_h
        width_norm = w / img_w
        height_norm = h / img_h
        
        # Class ID (UVH starts at 1, YOLO needs 0)
        class_id = ann['category_id'] - 1 

        # Write Label File
        label_name = os.path.splitext(fname)[0] + ".txt"
        label_path = os.path.join(target_dir, label_name)
        
        with open(label_path, "a") as f_out:
            f_out.write(f"{class_id} {x_center:.6f} {y_center:.6f} {width_norm:.6f} {height_norm:.6f}\n")

    print(f"Done. Labels saved to: {output_label_root}")
    if missing_count > 0:
        print(f"Warning: {missing_count} annotations were skipped because the image file wasn't found.")

# --- EXECUTE ---
if __name__ == "__main__":
    # Process Train
    convert_coco_to_yolo(TRAIN_JSON, TRAIN_IMAGES_DIR, TRAIN_LABELS_DIR, "TRAIN")

    # Process Val
    convert_coco_to_yolo(VAL_JSON, VAL_IMAGES_DIR, VAL_LABELS_DIR, "VAL")


--- Processing TRAIN ---
Loading JSON: C:\Users\ADMIN\Documents\Dev_Projects\Traffic_Infosys\UVH-26_dataset\UVH-26-Train\UVH-26-MV-Train.json
Scanning directory structure: C:\Users\ADMIN\Documents\Dev_Projects\Traffic_Infosys\UVH-26_dataset\UVH-26-Train\data...
Found 21349 images.
Generating Label Files...


100%|████████████████████████████████████████████████████████████████████████| 252723/252723 [02:17<00:00, 1837.90it/s]


Done. Labels saved to: C:\Users\ADMIN\Documents\Dev_Projects\Traffic_Infosys\UVH-26_dataset\UVH-26-Train\yolo_labels

--- Processing VAL ---
Loading JSON: C:\Users\ADMIN\Documents\Dev_Projects\Traffic_Infosys\UVH-26_dataset\UVH-26-Val\UVH-26-MV-Val.json
Scanning directory structure: C:\Users\ADMIN\Documents\Dev_Projects\Traffic_Infosys\UVH-26_dataset\UVH-26-Val\data...
Found 5297 images.
Generating Label Files...


100%|██████████████████████████████████████████████████████████████████████████| 63497/63497 [00:34<00:00, 1833.63it/s]

Done. Labels saved to: C:\Users\ADMIN\Documents\Dev_Projects\Traffic_Infosys\UVH-26_dataset\UVH-26-Val\yolo_labels





In [3]:
import os
from pathlib import Path

# --- CONFIGURATION ---
BASE_PATH = r"C:\Users\ADMIN\Documents\Dev_Projects\Traffic_Infosys\UVH-26_dataset"

# Define the pairs to check
DATASETS = {
    "TRAIN": {
        "images": os.path.join(BASE_PATH, "UVH-26-Train", "data"),
        "labels": os.path.join(BASE_PATH, "UVH-26-Train", "yolo_labels")
    },
    "VALIDATION": {
        "images": os.path.join(BASE_PATH, "UVH-26-Val", "data"),
        "labels": os.path.join(BASE_PATH, "UVH-26-Val", "yolo_labels")
    }
}

VALID_IMG_EXT = {'.jpg', '.jpeg', '.png', '.bmp'}

def check_pairs(subset_name, img_root, lbl_root):
    print(f"\n--- Checking {subset_name} SET ---")
    print(f"Images: {img_root}")
    print(f"Labels: {lbl_root}")

    if not os.path.exists(img_root):
        print(f"[ERROR] Image path does not exist: {img_root}")
        return

    # 1. Scan for all images
    img_files = []
    for root, _, files in os.walk(img_root):
        for f in files:
            if os.path.splitext(f)[1].lower() in VALID_IMG_EXT:
                # Store relative path (e.g., "001/image123.jpg")
                rel_path = os.path.relpath(os.path.join(root, f), img_root)
                img_files.append(rel_path)

    print(f"Found {len(img_files)} images.")

    # 2. Check for corresponding labels
    missing_labels = []
    empty_labels = 0
    
    for img_rel_path in img_files:
        # Construct expected label path
        # img_rel_path is "001\image.jpg" -> we want "001\image.txt"
        p = Path(img_rel_path)
        lbl_rel_path = p.with_suffix('.txt')
        
        full_lbl_path = os.path.join(lbl_root, lbl_rel_path)
        
        if not os.path.exists(full_lbl_path):
            missing_labels.append(img_rel_path)
        else:
            # Optional: Check if file is empty (no detections)
            if os.path.getsize(full_lbl_path) == 0:
                empty_labels += 1

    # 3. Report Results
    if len(missing_labels) == 0:
        print(f"SUCCESS: All {len(img_files)} images have label files.")
    else:
        print(f" WARNING: {len(missing_labels)} images are MISSING label files.")
        # Print first 5 missing for debugging
        for m in missing_labels[:5]:
            print(f"   Missing: {m}")
        if len(missing_labels) > 5:
            print(f"   ... and {len(missing_labels)-5} more.")

    if empty_labels > 0:
        print(f" Note: {empty_labels} label files are empty (Background images with no objects). This is normal if intended.")

if __name__ == "__main__":
    for name, paths in DATASETS.items():
        check_pairs(name, paths["images"], paths["labels"])


--- Checking TRAIN SET ---
Images: C:\Users\ADMIN\Documents\Dev_Projects\Traffic_Infosys\UVH-26_dataset\UVH-26-Train\data
Labels: C:\Users\ADMIN\Documents\Dev_Projects\Traffic_Infosys\UVH-26_dataset\UVH-26-Train\yolo_labels
Found 21349 images.
SUCCESS: All 21349 images have label files.

--- Checking VALIDATION SET ---
Images: C:\Users\ADMIN\Documents\Dev_Projects\Traffic_Infosys\UVH-26_dataset\UVH-26-Val\data
Labels: C:\Users\ADMIN\Documents\Dev_Projects\Traffic_Infosys\UVH-26_dataset\UVH-26-Val\yolo_labels
Found 5297 images.
SUCCESS: All 5297 images have label files.
