In [None]:
import os
import json
import shutil
import random
from tqdm import tqdm
import pandas as pd

# Paths
ROOT = r"C:\Users\niran\OneDrive\Desktop\Bosch\Assignment\EDA"

IMG_DIR = os.path.join(ROOT, "data", "images", "train")
LABEL_JSON = os.path.join(ROOT, "data", "labels", "bdd100k_labels_images_train.json")

YOLO_ROOT = os.path.join(ROOT, "dataset")
os.makedirs(YOLO_ROOT, exist_ok=True)

for sub in ["images/train", "images/val", "labels/train", "labels/val"]:
    os.makedirs(os.path.join(YOLO_ROOT, sub), exist_ok=True)

print("Folders ready.")

Folders ready.


In [9]:
with open(LABEL_JSON, "r") as f:
    bdd_data = json.load(f)

print(f"Loaded {len(bdd_data)} annotations from train JSON.")

Loaded 69863 annotations from train JSON.


In [10]:
available_images = set(os.listdir(IMG_DIR))

filtered = []
for item in bdd_data:
    if item["name"] in available_images:
        filtered.append(item)

print(f"Usable images: {len(filtered)}")

Usable images: 69863


In [12]:
random.seed(42)
random.shuffle(filtered)

split_idx = int(0.9 * len(filtered))
train_items = filtered[:split_idx]
val_items = filtered[split_idx:]

print(f"Train split: {len(train_items)}")
print(f"Val split:   {len(val_items)}")

Train split: 62876
Val split:   6987


In [13]:
DETECTION_CLASSES = [
    "bike", "bus", "car", "motor", "person",
    "rider", "traffic light", "traffic sign", "train", "truck"
]

class_to_idx = {c: i for i, c in enumerate(DETECTION_CLASSES)}
class_to_idx

{'bike': 0,
 'bus': 1,
 'car': 2,
 'motor': 3,
 'person': 4,
 'rider': 5,
 'traffic light': 6,
 'traffic sign': 7,
 'train': 8,
 'truck': 9}

In [14]:
def bdd_to_yolo(item, save_label_path, img_w=1280, img_h=720):
    lines = []

    for label in item.get("labels", []):
        cat = label.get("category")
        if cat not in class_to_idx:
            continue
        
        box = label.get("box2d")
        if not box:
            continue
        
        # YOLO format: class cx cy w h (normalized)
        x1, y1, x2, y2 = box["x1"], box["y1"], box["x2"], box["y2"]

        cx = (x1 + x2) / 2 / img_w
        cy = (y1 + y2) / 2 / img_h
        w = (x2 - x1) / img_w
        h = (y2 - y1) / img_h
        
        lines.append(f"{class_to_idx[cat]} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}")

    with open(save_label_path, "w") as f:
        f.write("\n".join(lines))

In [15]:
def process_items(items, split_name):
    img_out_dir = os.path.join(YOLO_ROOT, "images", split_name)
    label_out_dir = os.path.join(YOLO_ROOT, "labels", split_name)

    for item in tqdm(items, desc=f"Processing {split_name}"):
        img = item["name"]
        src_img = os.path.join(IMG_DIR, img)
        dst_img = os.path.join(img_out_dir, img)

        # Copy image
        shutil.copy(src_img, dst_img)

        # Label
        label_path = os.path.join(label_out_dir, img.replace(".jpg", ".txt"))
        bdd_to_yolo(item, label_path)


process_items(train_items, "train")
process_items(val_items, "val")

print("YOLO dataset creation complete.")

Processing train: 100%|██████████| 62876/62876 [09:51<00:00, 106.28it/s]
Processing val: 100%|██████████| 6987/6987 [01:06<00:00, 105.48it/s]

YOLO dataset creation complete.





In [17]:
yaml_path = os.path.join(YOLO_ROOT, "yolo_dataset.yaml")

yaml_content = f"""
path: {YOLO_ROOT}
train: images/train
val: images/val

names:
  0: bike
  1: bus
  2: car
  3: motor
  4: person
  5: rider
  6: traffic light
  7: traffic sign
  8: train
  9: truck
"""

with open(yaml_path, "w") as f:
    f.write(yaml_content)

yaml_path

'C:\\Users\\niran\\OneDrive\\Desktop\\Bosch\\Assignment\\EDA\\yolo_dataset\\yolo_dataset.yaml'

In [18]:
print("YOLO dataset ready at:", YOLO_ROOT)

YOLO dataset ready at: C:\Users\niran\OneDrive\Desktop\Bosch\Assignment\EDA\yolo_dataset
