In [18]:
# ================================
#   YOLOv11 + BDD100K Colab Notebook
# ================================

# ================================
# 1. Install Dependencies
# ================================
!pip install ultralytics --quiet
!pip install tqdm jsonlines --quiet
!pip install roboflow --quiet

print("Ultralytics + Dependencies Installed!")


Ultralytics + Dependencies Installed!


In [19]:
# ================================
# 2. Mount Google Drive
# ================================
"""
import os
ROOT = "./BDD100K_YOLO"
os.makedirs(ROOT, exist_ok=True)
"""
from google.colab import drive
import os

drive.mount('/content/drive')

ROOT = "/content/drive/MyDrive/BDD100K_YOLO"
os.makedirs(ROOT, exist_ok=True)

print("Dataset root:", ROOT)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Dataset root: /content/drive/MyDrive/BDD100K_YOLO


In [20]:
# ================================
# 3. Download BDD100K Dataset (Detection labels only)
# ================================
# Official Downloads:
# Images: https://bdd-data.berkeley.edu/
# Labels: https://bdd-data.berkeley.edu/

!wget http://128.32.162.150/bdd100k/bdd100k_images_100k.zip -O {ROOT}/bdd100k_images.zip
!wget http://128.32.162.150/bdd100k/bdd100k_labels.zip -O {ROOT}/bdd100k_labels.zip


--2025-12-08 18:55:50--  http://128.32.162.150/bdd100k/bdd100k_images_100k.zip
Connecting to 128.32.162.150:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5669071832 (5.3G) [application/zip]
Saving to: ‚Äò/content/drive/MyDrive/BDD100K_YOLO/bdd100k_images.zip‚Äô


2025-12-08 19:04:31 (10.4 MB/s) - ‚Äò/content/drive/MyDrive/BDD100K_YOLO/bdd100k_images.zip‚Äô saved [5669071832/5669071832]

--2025-12-08 19:04:31--  http://128.32.162.150/bdd100k/bdd100k_labels.zip
Connecting to 128.32.162.150:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 189638612 (181M) [application/zip]
Saving to: ‚Äò/content/drive/MyDrive/BDD100K_YOLO/bdd100k_labels.zip‚Äô


2025-12-08 19:04:47 (11.6 MB/s) - ‚Äò/content/drive/MyDrive/BDD100K_YOLO/bdd100k_labels.zip‚Äô saved [189638612/189638612]



In [21]:
# Extract everything

# Folder Structure After Extraction:
# ROOT/bdd100k/images/100k/{train,val}
# ROOT/bdd100k/labels/det_20/{train,val}.json

print("Extracting images ...")
!unzip -q {ROOT}/bdd100k_images.zip -d {ROOT}/images_100k

print("Extracting labels ...")
!unzip -q {ROOT}/bdd100k_labels.zip -d {ROOT}/labels_100k

print("Extraction Complete!")



Extracting images ...
replace /content/drive/MyDrive/BDD100K_YOLO/images_100k/100k/train/6a9b44bd-91e97262.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: Extracting labels ...
replace /content/drive/MyDrive/BDD100K_YOLO/labels_100k/100k/train/6866acb3-cf17e759.json? [y]es, [n]o, [A]ll, [N]one, [r]ename: Extraction Complete!


In [22]:
# debugging to check annotations are written in labels
"""
import os
import json

classes = {
    "person": 0, "rider": 1, "car": 2, "bus": 3, "truck": 4,
    "bike": 5, "motorcycle": 6, "traffic light": 7,
    "traffic sign": 8, "train": 9
}
json_path = f"/content/BDD100K_YOLO/labels/train/0000f77c-6257be58.json"
with open(json_path, 'r') as jf:
  ann = json.load(jf)
  #shutil.copy(img_path, f"{out_img_dir}/{img_name}")

# Height/width fallback
  h = ann.get("height", 720)
  w = ann.get("width", 1280)

# Open output label file
  out_lbl_path = f"/content/BDD100K_YOLO/0000f77c-6257be58.txt"
  with open(out_lbl_path, "w") as out_f:

    frame_objects = ann.get("frames", [{}])[0].get("objects", [])

    # Loop through annotations
    for obj in frame_objects:
        cls_name = obj.get("category")

        # Skip unknown classes
        if cls_name not in classes:
            continue

        # Bounding box
        box = obj.get("box2d", None)
        if not box:
            continue

        x1, y1, x2, y2 = box["x1"], box["y1"], box["x2"], box["y2"]

        # Convert to YOLO format
        xc = (x1 + x2) / 2 / w
        yc = (y1 + y2) / 2 / h
        bw = (x2 - x1) / w
        bh = (y2 - y1) / h

        out_f.write(f"{classes[cls_name]} {xc} {yc} {bw} {bh}\n")
"""

'\nimport os\nimport json\n\nclasses = {\n    "person": 0, "rider": 1, "car": 2, "bus": 3, "truck": 4,\n    "bike": 5, "motorcycle": 6, "traffic light": 7,\n    "traffic sign": 8, "train": 9\n}\njson_path = f"/content/BDD100K_YOLO/labels/train/0000f77c-6257be58.json"\nwith open(json_path, \'r\') as jf:\n  ann = json.load(jf)\n  #shutil.copy(img_path, f"{out_img_dir}/{img_name}")\n\n# Height/width fallback\n  h = ann.get("height", 720)\n  w = ann.get("width", 1280)\n\n# Open output label file\n  out_lbl_path = f"/content/BDD100K_YOLO/0000f77c-6257be58.txt"\n  with open(out_lbl_path, "w") as out_f:\n\n    frame_objects = ann.get("frames", [{}])[0].get("objects", [])\n\n    # Loop through annotations\n    for obj in frame_objects:\n        cls_name = obj.get("category")\n\n        # Skip unknown classes\n        if cls_name not in classes:\n            continue\n\n        # Bounding box\n        box = obj.get("box2d", None)\n        if not box:\n            continue\n\n        x1, y1, x2,

In [24]:
import os
import json
import shutil
from tqdm import tqdm

def convert_split(split, IMG_DIR, JSON_DIR, OUT_DIR, classes):
    """
    Convert per-image JSON annotations to YOLO TXT format.

    Args:
        split (str): "train", "val", or "test"
        IMG_DIR (str): path to image directory containing JPGs
        JSON_DIR (str): path to directory containing JSON files (one per image)
        OUT_DIR (str): output directory root to save YOLO images/labels
        classes (dict): category‚Üíindex mapping
    """

    print(f"\nüîÑ Converting split: {split}")

    # Make output directories
    out_img_dir = f"{OUT_DIR}/{split}/images"
    out_lbl_dir = f"{OUT_DIR}/{split}/labels"
    os.makedirs(out_img_dir, exist_ok=True)
    os.makedirs(out_lbl_dir, exist_ok=True)

    # Get all images for this split
    image_list = sorted([f for f in os.listdir(f"{IMG_DIR}/{split}") if f.endswith('.jpg')])

    for img_name in tqdm(image_list):

        img_path = f"{IMG_DIR}/{split}/{img_name}"

        # EXPECT JSON NAMED SAME AS IMAGE (e.g., 00001.jpg ‚Üí 00001.json)
        json_name = img_name.replace(".jpg", ".json")
        json_path = f"{JSON_DIR}/{split}/{json_name}"

        # Check if JSON exists
        if not os.path.exists(json_path):
            print(f"‚ö†Ô∏è Missing annotation: {json_path} (skipping)")
            continue

        # Load annotation JSON
        with open(json_path, 'r') as jf:
            ann = json.load(jf)

        # Copy image to YOLO dataset
        shutil.copy(img_path, f"{out_img_dir}/{img_name}")

        # Height/width fallback
        h = ann.get("height", 720)
        w = ann.get("width", 1280)

        # Open output label file
        out_lbl_path = f"{out_lbl_dir}/{img_name.replace('.jpg', '.txt')}"
        with open(out_lbl_path, "w") as out_f:

            frame_objects = ann.get("frames", [{}])[0].get("objects", [])
            # Loop through annotations
            for obj in frame_objects:
                cls_name = obj.get("category")

                # Skip unknown classes
                if cls_name not in classes:
                    continue

                # Bounding box
                box = obj.get("box2d", None)
                if not box:
                    continue

                x1, y1, x2, y2 = box["x1"], box["y1"], box["x2"], box["y2"]

                # Convert to YOLO format
                xc = (x1 + x2) / 2 / w
                yc = (y1 + y2) / 2 / h
                bw = (x2 - x1) / w
                bh = (y2 - y1) / h

                out_f.write(f"{classes[cls_name]} {xc} {yc} {bw} {bh}\n")

    print(f"‚úÖ Completed {split}")


# ========================
# Example Usage
# ========================

classes = {
    "person": 0, "rider": 1, "car": 2, "bus": 3, "truck": 4,
    "bike": 5, "motorcycle": 6, "traffic light": 7,
    "traffic sign": 8, "train": 9
}

IMG_DIR = "/content/drive/MyDrive/BDD100K_YOLO/images_100k/100k"           # contains train/, val/, test/
JSON_DIR = "/content/drive/MyDrive/BDD100K_YOLO/labels_100k/100k"     # contains train/, val/, test/
OUT_DIR = "/content/drive/MyDrive/BDD100K_YOLO/dataset"

convert_split("train", IMG_DIR, JSON_DIR, OUT_DIR, classes)
convert_split("val", IMG_DIR, JSON_DIR, OUT_DIR, classes)
convert_split("test", IMG_DIR, JSON_DIR, OUT_DIR, classes)



üîÑ Converting split: train


  0%|          | 1/70000 [00:00<58:28, 19.95it/s]


KeyboardInterrupt: 

In [None]:
# ================================
# 5. Create YAML File for Training
# ================================
yaml_path = f"{ROOT}/bdd100k.yaml"
with open(yaml_path, "w") as f:
    f.write(f"""
path: {ROOT}/dataset

train: train/images
val: val/images
test: test/images

names:
  0: person
  1: rider
  2: car
  3: bus
  4: truck
  5: bike
  6: motorcycle
  7: traffic light
  8: traffic sign
  9: train

    """)

print("bdd100k.yaml created!")



In [None]:
from ultralytics import YOLO
import os, json, shutil, zipfile
from tqdm import tqdm

# ===========================================
# 1) Train YOLOv11 + Save Fine-Tuned Model
# ===========================================

save_dir = "yolo11n_bdd100k"
os.makedirs(save_dir, exist_ok=True)

model = YOLO("yolo11n.pt")

results = model.train(
    data=yaml_path,
    imgsz=640,
    epochs=5,
    batch=32,
    device=0,
    workers=2,
    project="yolo11n_bdd100k",        # save training logs here
    name="train_run",
    save=True,
    save_period=1            # save checkpoint every epoch
)

print("‚úî Training Completed!")
print(f"‚úî Saved fine-tuned weights at: {save_dir}/train_run/weights/")

In [None]:
# ===========================================
# 1) Save fine-tuned YOLO model
# ===========================================

model_path = "/content/drive/MyDrive/BDD100K_YOLO/yolo_finetuned_bdd100k.pt"
model.save(model_path)
print(f"‚úî Fine-tuned model saved at: {model_path}")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# ===========================================
# 2) Generate Validation Performance Metrics
# ===========================================

print("\nüìä Generating Evaluation Metrics...")

metrics = model.val()    # runs validation & returns DetMetrics object


# ===========================================
# 3) Write metrics to a readable text file
# ===========================================

metrics_path = f"{save_dir}/metrics_summary.txt"

with open(metrics_path, "w") as f:

    f.write("===== YOLO Validation Metrics =====\n\n")
    f.write(f"mAP@50:       {metrics.box.map50:.4f}\n")
    f.write(f"mAP@50-95:    {metrics.box.map:.4f}\n")
    f.write(f"Precision:    {metrics.box.mp:.4f}\n")
    f.write(f"Recall:       {metrics.box.mr:.4f}\n\n")

    # per-class AP if available
    if hasattr(metrics.box, "ap_class_index"):
        f.write("===== Per-Class AP =====\n")
        for cls_idx, ap in zip(metrics.box.ap_class_index, metrics.box.ap):
            f.write(f"class {cls_idx}: AP = {ap:.4f}\n")

print("‚úî Metrics written to:", metrics_path)


# ===========================================
# 4) Print summary metrics to notebook output
# ===========================================

print("\n===== VAL METRICS =====")
print(f"mAP50:     {metrics.box.map50:.4f}")
print(f"mAP50-95:  {metrics.box.map:.4f}")
print(f"Precision: {metrics.box.mp:.4f}")
print(f"Recall:    {metrics.box.mr:.4f}")


# ===========================================
# 5) Save Confusion Matrix Image
# ===========================================

if hasattr(metrics, "confusion_matrix") and metrics.confusion_matrix is not None:
    metrics.confusion_matrix.plot(
        normalize=True,
        save_dir=save_dir
    )
    print(f"‚úî Confusion matrix saved in {save_dir}")

else:
    print("‚ö† No confusion matrix found (possibly empty dataset or configuration).")



In [None]:
# ===========================================
# 3) Plot Train/Val Loss & Accuracy Curves
# ===========================================

print("\nüìâ Generating Training Curves...")

csv_path = f"{save_dir}/train_run/results.csv"

df = pd.read_csv(csv_path)

plt.figure(figsize=(14, 8))
plt.plot(df['epoch'], df['train/box_loss'], label='Train Box Loss')
plt.plot(df['epoch'], df['train/cls_loss'], label='Train Cls Loss')
plt.plot(df['epoch'], df['val/box_loss'], label='Val Box Loss')
plt.plot(df['epoch'], df['val/cls_loss'], label='Val Cls Loss')

plt.title("YOLOv11 Loss Curves")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)
plt.savefig(f"{save_dir}/loss_curves.png")
plt.show()

# Accuracy metrics plot (Precision, Recall, mAP)
plt.figure(figsize=(14, 8))
plt.plot(df['epoch'], df['metrics/precision(B)'], label="Precision")
plt.plot(df['epoch'], df['metrics/recall(B)'], label="Recall")
plt.plot(df['epoch'], df['metrics/mAP50(B)'], label="mAP50")
plt.plot(df['epoch'], df['metrics/mAP50-95(B)'], label="mAP50-95")

plt.title("YOLOv11 Accuracy Metrics Over Epochs")
plt.xlabel("Epoch")
plt.ylabel("Metric Value")
plt.legend()
plt.grid(True)
plt.savefig(f"{save_dir}/metric_curves.png")
plt.show()

print("‚úî Training curves saved in:", save_dir)


In [None]:
# ===========================================
# 4) Run Inference on Test Dataset
# ===========================================

print("\nüîç Running Inference on TEST Images...")

test_results_dir = "/content/drive/MyDrive/BDD100K_YOLO/test_preds"
os.makedirs(test_results_dir, exist_ok=True)

preds = model.predict(
    source=f"{ROOT}/dataset/test/images",
    imgsz=640,
    save=True,
    project=test_results_dir,
    name="preds",
    conf=0.25
)

print("‚úî Inference Completed!")
print("‚úî Output saved at:", test_results_dir + "/preds")

In [None]:
# OPTIONAL: Display a few predictions
import glob
from IPython.display import Image

pred_imgs = glob.glob(test_results_dir + "/preds/*.jpg")[:5]
for img in pred_imgs:
    display(Image(filename=img))

In [25]:
# export the model in ONNX format for inference

model.export(
    format="onnx",
    opset=12,          # ‚ú® REQUIRED for OpenCV
    simplify=True,
    dynamic=False       # recommended
)

Ultralytics 8.3.235 üöÄ Python-3.12.12 torch-2.9.0+cu126 CPU (Intel Xeon CPU @ 2.20GHz)

[34m[1mPyTorch:[0m starting from '/content/yolo11n_bdd100k/train_run/weights/best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 14, 8400) (5.2 MB)

[34m[1mONNX:[0m starting export with onnx 1.19.1 opset 12...
[34m[1mONNX:[0m slimming with onnxslim 0.1.78...
[34m[1mONNX:[0m export success ‚úÖ 1.2s, saved as '/content/yolo11n_bdd100k/train_run/weights/best.onnx' (10.1 MB)

Export complete (1.4s)
Results saved to [1m/content/yolo11n_bdd100k/train_run/weights[0m
Predict:         yolo predict task=detect model=/content/yolo11n_bdd100k/train_run/weights/best.onnx imgsz=640  
Validate:        yolo val task=detect model=/content/yolo11n_bdd100k/train_run/weights/best.onnx imgsz=640 data=/content/drive/MyDrive/BDD100K_YOLO/bdd100k.yaml  
Visualize:       https://netron.app


'/content/yolo11n_bdd100k/train_run/weights/best.onnx'