<a href="https://colab.research.google.com/github/noorelhaj98-ship-it/pytorch-tutorial/blob/main/traffic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [26]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="snA4vsNSH6S3ia5q9egX")
project = rf.workspace("traffic-tmfdb").project("vechiles-hq6la-r7fco")
version = project.version(2)
dataset = version.download("yolov8")


loading Roboflow workspace...
loading Roboflow project...


In [27]:
import os

images_dir = "/content/Vechiles-2/train/images"
labels_dir = "/content/Vechiles-2/train/labels"

image_exts = (".jpg", ".jpeg", ".png")

images = sorted([
    os.path.splitext(f)[0]
    for f in os.listdir(images_dir)
    if f.lower().endswith(image_exts)
])

labels = sorted([
    os.path.splitext(f)[0]
    for f in os.listdir(labels_dir)
    if f.lower().endswith(".txt")
])

print("Total images:", len(images))
print("Total labels:", len(labels))

image_set = set(images)
label_set = set(labels)

images_without_labels = image_set - label_set
labels_without_images = label_set - image_set

if not images_without_labels and not labels_without_images:
    print("\n‚úÖ RESULT: Every image has a matching label and vice versa")
else:
    print("\n‚ùå RESULT: Mismatches found")

    if images_without_labels:
        print("\nImages without labels:")
        for f in sorted(images_without_labels)[:10]:
            print(" ", f)

    if labels_without_images:
        print("\nLabels without images:")
        for f in sorted(labels_without_images)[:10]:
            print(" ", f)


Total images: 8041
Total labels: 8041

‚úÖ RESULT: Every image has a matching label and vice versa


In [28]:
from collections import Counter
import os

class_counter = Counter()

for label_name in os.listdir(labels_dir):
    if not label_name.endswith(".txt"):
        continue

    label_path = os.path.join(labels_dir, label_name)

    with open(label_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            if not parts:
                continue
            class_id = int(parts[0])
            class_counter[class_id] += 1

print("\n=== CLASS DISTRIBUTION ===")
for cls in sorted(class_counter):
    print(f"Class {cls}: {class_counter[cls]}")



=== CLASS DISTRIBUTION ===
Class 0: 4237
Class 1: 12357
Class 2: 15656
Class 3: 21294


In [29]:
import os
import glob

deleted_images = 0
updated_labels = 0

for label_name in os.listdir(labels_dir):
    if not label_name.endswith(".txt"):
        continue

    label_path = os.path.join(labels_dir, label_name)

    with open(label_path, "r") as f:
        lines = f.readlines()

    if not lines:
        continue

    new_lines = []
    has_non_zero_class = False

    for line in lines:
        parts = line.strip().split()
        if not parts:
            continue

        class_id = int(parts[0])

        # ‚ùå remove class 0
        if class_id == 0:
            continue

        # üîÅ shift class ids down
        parts[0] = str(class_id - 1)
        new_lines.append(" ".join(parts) + "\n")
        has_non_zero_class = True

    base_name = os.path.splitext(label_name)[0]

    # CASE 1: image had ONLY class 0 ‚Üí delete image + label
    if not has_non_zero_class:
        os.remove(label_path)

        image_matches = glob.glob(os.path.join(images_dir, base_name + ".*"))
        for img_path in image_matches:
            os.remove(img_path)

        deleted_images += 1
        continue

    # CASE 2: mixed classes ‚Üí overwrite label file
    with open(label_path, "w") as f:
        f.writelines(new_lines)

    updated_labels += 1

print("=== CLASS 0 DELETION SUMMARY ===")
print(f"Deleted images (only class 0): {deleted_images}")
print(f"Updated label files: {updated_labels}")


=== CLASS 0 DELETION SUMMARY ===
Deleted images (only class 0): 296
Updated label files: 7745


In [30]:
import os
from PIL import Image


In [31]:
images_dir = "/content/Vechiles-2/train/images"  # change if needed


In [32]:
converted = 0
skipped = 0

for filename in os.listdir(images_dir):
    if filename.lower().endswith((".jpg", ".jpeg", ".png")):
        img_path = os.path.join(images_dir, filename)

        try:
            with Image.open(img_path) as img:
                # Check image mode
                if img.mode != "RGB":
                    img = img.convert("RGB")
                    img.save(img_path)
                    converted += 1
                else:
                    skipped += 1

        except Exception as e:
            print(f"‚ùå Error processing {filename}: {e}")

print(f"‚úÖ Converted to RGB: {converted}")
print(f"‚è≠Ô∏è Already RGB (skipped): {skipped}")


‚úÖ Converted to RGB: 0
‚è≠Ô∏è Already RGB (skipped): 7745


In [33]:
import os
from PIL import Image, ImageOps


In [34]:
fixed = 0
skipped = 0

for filename in os.listdir(images_dir):
    if filename.lower().endswith((".jpg", ".jpeg", ".png")):
        img_path = os.path.join(images_dir, filename)

        try:
            with Image.open(img_path) as img:
                # Apply EXIF-based auto orientation
                img_oriented = ImageOps.exif_transpose(img)

                # Save back only if something changed
                if img_oriented != img:
                    img_oriented.save(img_path)
                    fixed += 1
                else:
                    skipped += 1

        except Exception as e:
            print(f"‚ùå Error processing {filename}: {e}")

print(f"üîÑ Auto-oriented images: {fixed}")
print(f"‚è≠Ô∏è No orientation change needed: {skipped}")


üîÑ Auto-oriented images: 7745
‚è≠Ô∏è No orientation change needed: 0


In [35]:
import os
import cv2
import numpy as np
from collections import Counter


In [36]:
base_path = "/content/Vechiles-2/train"
images_dir = os.path.join(base_path, "images")
labels_dir = os.path.join(base_path, "labels")


In [37]:
image_sizes = []

for img_name in os.listdir(images_dir):
    if img_name.lower().endswith((".jpg", ".png", ".jpeg")):
        img = cv2.imread(os.path.join(images_dir, img_name))
        h, w, _ = img.shape
        image_sizes.append((w, h))

widths, heights = zip(*image_sizes)

print("=== IMAGE SIZE ANALYSIS ===")
print(f"Total images: {len(image_sizes)}")
print(f"Min size: {min(widths)}x{min(heights)}")
print(f"Max size: {max(widths)}x{max(heights)}")
print(f"Avg size: {int(np.mean(widths))}x{int(np.mean(heights))}")


=== IMAGE SIZE ANALYSIS ===
Total images: 7745
Min size: 640x640
Max size: 640x640
Avg size: 640x640


In [38]:
empty_labels = []

for label_name in os.listdir(labels_dir):
    label_path = os.path.join(labels_dir, label_name)
    if os.path.getsize(label_path) == 0:
        empty_labels.append(label_name)

print("\n=== EMPTY LABELS ===")
print(f"Empty label files: {len(empty_labels)}")



=== EMPTY LABELS ===
Empty label files: 0


In [39]:
box_areas = []
small_boxes = 0

for label_name in os.listdir(labels_dir):
    with open(os.path.join(labels_dir, label_name)) as f:
        for line in f:
            _, _, _, w, h = map(float, line.split())
            area = w * h
            box_areas.append(area)

            if area < 0.01:  # <1% of image
                small_boxes += 1

print("\n=== BOUNDING BOX ANALYSIS ===")
print(f"Total boxes: {len(box_areas)}")
print(f"Avg box area (%): {np.mean(box_areas)*100:.2f}")
print(f"Small boxes (<1%): {small_boxes}")



=== BOUNDING BOX ANALYSIS ===
Total boxes: 49307
Avg box area (%): 1.24
Small boxes (<1%): 32354


In [40]:
aspect_ratios = []

for label_name in os.listdir(labels_dir):
    with open(os.path.join(labels_dir, label_name)) as f:
        for line in f:
            _, _, _, w, h = map(float, line.split())
            if h > 0:
                aspect_ratios.append(w / h)

print("\n=== ASPECT RATIO ===")
print(f"Avg aspect ratio: {np.mean(aspect_ratios):.2f}")



=== ASPECT RATIO ===
Avg aspect ratio: 1.30
