In [1]:
# pip install pillow torch tqdm

    extract-msg (<=0.29.*)
                 ~~~~~~~^[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
# pip install ultralytics

    extract-msg (<=0.29.*)
                 ~~~~~~~^[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [15]:
import os
from nuscenes.nuscenes import NuScenes
from nuscenes.utils.splits import create_splits_scenes
from PIL import Image
import torch
from ultralytics import YOLO
import numpy as np
import json
from tqdm import tqdm


class NuScenesConesDataset:
    def __init__(self, dataroot, version="v1.0-mini", split="train"):
        self.nusc = NuScenes(version=version, dataroot=dataroot, verbose=True)
        self.split = split
        self.scenes = create_splits_scenes()[split]

    def prepare_dataset(self, output_dir):
        """
        Prepare dataset in YOLO format:
        - images/train/
        - images/val/
        - labels/train/
        - labels/val/
        """
        os.makedirs(os.path.join(output_dir, "images", self.split), exist_ok=True)
        os.makedirs(os.path.join(output_dir, "labels", self.split), exist_ok=True)

        # Track all annotations
        for scene in tqdm(self.scenes):
            # start with first scene
            scene_rec = self.nusc.scene[0]
            first_sample_token = scene_rec["first_sample_token"]
            sample = self.nusc.get("sample", first_sample_token)

            while sample:
                # Get camera images
                cam_front_data = self.nusc.get(
                    "sample_data", sample["data"]["CAM_FRONT"]
                )

                # Get image
                img_path = os.path.join(self.nusc.dataroot, cam_front_data["filename"])
                img = Image.open(img_path)

                # Get annotations
                annotations = []
                for ann_token in sample["anns"]:
                    ann_rec = self.nusc.get("sample_annotation", ann_token)
                    if ann_rec["category_name"] == "movable_object.traffic_cone":
                        # Get 2D bbox in image coordinates
                        bbox = self.nusc.get_box(ann_rec["token"])
                        corners_2d = self.nusc.box_to_keypoints(
                            cam_front_data["token"], bbox
                        )

                        # Convert to YOLO format (normalized coordinates)
                        x_min, y_min = np.min(corners_2d, axis=0)
                        x_max, y_max = np.max(corners_2d, axis=0)

                        width = img.width
                        height = img.height

                        # YOLO format: <class> <x_center> <y_center> <width> <height>
                        x_center = ((x_min + x_max) / 2) / width
                        y_center = ((y_min + y_max) / 2) / height
                        bbox_width = (x_max - x_min) / width
                        bbox_height = (y_max - y_min) / height

                        annotations.append(
                            f"0 {x_center} {y_center} {bbox_width} {bbox_height}"
                        )

                # Save image and labels
                img_filename = os.path.basename(cam_front_data["filename"])
                label_filename = os.path.splitext(img_filename)[0] + ".txt"

                img.save(os.path.join(output_dir, "images", self.split, img_filename))
                with open(
                    os.path.join(output_dir, "labels", self.split, label_filename), "w"
                ) as f:
                    f.write("\n".join(annotations))

                # Move to next sample
                if sample["next"] == "":
                    break
                sample = self.nusc.get("sample", sample["next"])

In [6]:
def create_data_yaml(output_dir):
    """Create data.yaml file for YOLO training"""
    data_yaml = {
        "path": output_dir,
        "train": "images/train",
        "val": "images/val",
        "names": {0: "traffic_cone"},
    }

    with open(os.path.join(output_dir, "data.yaml"), "w") as f:
        json.dump(data_yaml, f, indent=2)

In [16]:
# Set paths
dataroot = "../data/v1.0-mini"  # Replace with your NuScenes data path
output_dir = "../output"  # Replace with your desired output path

# Prepare training data
print("Preparing training data...")
train_dataset = NuScenesConesDataset(dataroot, split="train")
train_dataset.prepare_dataset(output_dir)

# Prepare validation data
print("Preparing validation data...")
val_dataset = NuScenesConesDataset(dataroot, split="val")
val_dataset.prepare_dataset(output_dir)

# Create data.yaml
print("Creating data.yaml...")
create_data_yaml(output_dir)

Preparing training data...
Loading NuScenes tables for version v1.0-mini...
23 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
Done loading in 0.455 seconds.
Reverse indexing ...
Done reverse indexing in 0.1 seconds.


  5%|▌         | 35/700 [00:48<15:22,  1.39s/it]


KeyboardInterrupt: 

In [None]:
def train_yolo(data_yaml_path):
    """Train YOLO model"""
    # Load a pretrained model
    model = YOLO("yolov8n.pt")

    # Train the model
    results = model.train(
        data=data_yaml_path,
        epochs=100,
        imgsz=640,
        batch=16,
        name="traffic_cone_detector",
    )

    return model

In [None]:
# Train model
print("Training YOLO model...")
model = train_yolo(os.path.join(output_dir, "data.yaml"))

print("Training complete!")