In [7]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
import os
import cv2
import numpy as np
from PIL import Image
from collections import namedtuple
from tqdm import tqdm
import json
import shutil
from pathlib import Path
import yaml
import random

#### Define KITTI Label Structure

In [2]:
# KITTI Label definition (from KITTI documentation)
Label = namedtuple('Label', [
    'name', 'id', 'trainId', 'category', 'categoryId',
    'hasInstances', 'ignoreInEval', 'color'
])

#### Define KITTI Labels List

In [3]:
# KITTI labels (from KITTI documentation)
labels = [
    Label('unlabeled',            0,   255, 'void',        0, False, True,  (0,   0,   0)),
    Label('ego vehicle',          1,   255, 'void',        0, False, True,  (0,   0,   0)),
    Label('rectification border', 2,   255, 'void',        0, False, True,  (0,   0,   0)),
    Label('out of roi',           3,   255, 'void',        0, False, True,  (0,   0,   0)),
    Label('static',               4,   255, 'void',        0, False, True,  (0,   0,   0)),
    Label('dynamic',              5,   255, 'void',        0, False, True,  (111, 74,  0)),
    Label('ground',               6,   255, 'void',        0, False, True,  (81,  0,   81)),
    Label('road',                 7,   0,   'flat',        1, False, False, (128, 64,  128)),
    Label('sidewalk',             8,   1,   'flat',        1, False, False, (244, 35,  232)),
    Label('parking',              9,   255, 'flat',        1, False, True,  (250, 170, 160)),
    Label('rail track',           10,  255, 'flat',        1, False, True,  (230, 150, 140)),
    Label('building',             11,  2,   'construction', 2, False, False, (70,  70,  70)),
    Label('wall',                 12,  3,   'construction', 2, False, False, (102, 102, 156)),
    Label('fence',                13,  4,   'construction', 2, False, False, (190, 153, 153)),
    Label('guard rail',           14,  255, 'construction', 2, False, True,  (180, 165, 180)),
    Label('bridge',               15,  255, 'construction', 2, False, True,  (150, 100, 100)),
    Label('tunnel',               16,  255, 'construction', 2, False, True,  (150, 120, 90)),
    Label('pole',                 17,  5,   'object',       3, False, False, (153, 153, 153)),
    Label('polegroup',            18,  255, 'object',       3, False, True,  (153, 153, 153)),
    Label('traffic light',        19,  6,   'object',       3, False, False, (250, 170, 30)),
    Label('traffic sign',         20,  7,   'object',       3, False, False, (220, 220, 0)),
    Label('vegetation',           21,  8,   'nature',       4, False, False, (107, 142, 35)),
    Label('terrain',              22,  9,   'nature',       4, False, False, (152, 251, 152)),
    Label('sky',                  23,  10,  'sky',          5, False, False, (70,  130, 180)),
    Label('person',               24,  11,  'human',        6, True,  False, (220, 20,  60)),
    Label('rider',                25,  12,  'human',        6, True,  False, (255, 0,   0)),
    Label('car',                  26,  13,  'vehicle',      7, True,  False, (0,   0,   142)),
    Label('truck',                27,  14,  'vehicle',      7, True,  False, (0,   0,   70)),
    Label('bus',                  28,  15,  'vehicle',      7, True,  False, (0,   60,  100)),
    Label('caravan',              29,  255, 'vehicle',      7, True,  True,  (0,   0,   90)),
    Label('trailer',              30,  255, 'vehicle',      7, True,  True,  (0,   0,   110)),
    Label('train',                31,  16,  'vehicle',      7, True,  False, (0,   80,  100)),
    Label('motorcycle',           32,  17,  'vehicle',      7, True,  False, (0,   0,   230)),
    Label('bicycle',              33,  18,  'vehicle',      7, True,  False, (119, 11,  32)),
]

#### Class Mapping Initialization

In [4]:
id_to_yolo = {}
yolo_to_name = {}

# Add background as class 0
void_ids = [label.id for label in labels if label.category == 'void']
for vid in void_ids:
    id_to_yolo[vid] = 0
yolo_to_name[0] = 'unknown'

# Add valid classes
yolo_id = 1
for label in labels:
    if not label.ignoreInEval and label.trainId != 255 and label.category != 'void':
        id_to_yolo[label.id] = yolo_id
        yolo_to_name[yolo_id] = label.name
        yolo_id += 1

print(yolo_to_name)

{0: 'unknown', 1: 'road', 2: 'sidewalk', 3: 'building', 4: 'wall', 5: 'fence', 6: 'pole', 7: 'traffic light', 8: 'traffic sign', 9: 'vegetation', 10: 'terrain', 11: 'sky', 12: 'person', 13: 'rider', 14: 'car', 15: 'truck', 16: 'bus', 17: 'train', 18: 'motorcycle', 19: 'bicycle'}


#### Utility - Contour Extrction and Polygon Conversion

In [5]:
def get_contours(mask, class_id):
    binary = (mask == class_id).astype(np.uint8) * 255
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    return contours

def contour_to_polygon(contour, img_h, img_w):
    if len(contour) < 3:
        return None

    polygon = []
    for pt in contour:
        x, y = pt[0]
        polygon.extend([x / img_w, y / img_h])
    return polygon if len(polygon) >= 6 else None

#### Process One Semantic Mask

In [6]:
def process_mask(semantic_path, img_w, img_h):
    mask = np.array(Image.open(semantic_path))
    annotations = []
    for class_id in np.unique(mask):
        if class_id not in id_to_yolo:
            continue
        yolo_cls = id_to_yolo[class_id]
        contours = get_contours(mask, class_id)
        for contour in contours:
            polygon = contour_to_polygon(contour, img_h, img_w)
            if polygon:
                line = f"{yolo_cls} " + " ".join([f"{x:.6f}" for x in polygon])
                annotations.append(line)
    return annotations

#### Dataset Conversion Loop

In [11]:
kitti_root = '/content/drive/MyDrive/Colab Notebooks/KITTI Dataset Exploration/data_semantics'
yolo_dataset_root = '/content/drive/MyDrive/Colab Notebooks/KITTI Dataset Exploration/yolo_dataset'

img_dir = os.path.join(kitti_root, 'image_2')
mask_dir = os.path.join(kitti_root, 'semantic')

os.makedirs(os.path.join(yolo_dataset_root, 'images'), exist_ok=True)
os.makedirs(os.path.join(yolo_dataset_root, 'labels'), exist_ok=True)

img_list = [f for f in os.listdir(img_dir) if f.endswith('.png')]

for fname in tqdm(img_list):
    img_path = os.path.join(img_dir, fname)
    mask_path = os.path.join(mask_dir, fname)
    if not os.path.exists(mask_path):
        print(f"Missing mask for {fname}")
        continue

    img = Image.open(img_path)
    img_w, img_h = img.size

    out_img_path = os.path.join(yolo_dataset_root, 'images', fname)
    out_lbl_path = os.path.join(yolo_dataset_root, 'labels', fname.replace('.png', '.txt'))

    img.save(out_img_path)
    annotations = process_mask(mask_path, img_w, img_h)

    with open(out_lbl_path, 'w') as f:
        f.write('\n'.join(annotations))

100%|██████████| 200/200 [01:03<00:00,  3.15it/s]


#### Split dataset into train/val/test and organize for YOLOv8 training

In [None]:
DATASET_PATH = "/content/drive/MyDrive/Colab Notebooks/KITTI Dataset Exploration/YOLO_TRAIN_DATASET"

# Create directory structure
dataset_root = Path(DATASET_PATH)
for split in ['train', 'val', 'test']:
    (dataset_root / split / 'images').mkdir(parents=True, exist_ok=True)
    (dataset_root / split / 'labels').mkdir(parents=True, exist_ok=True)

# Get all image files
images_dir = Path(yolo_dataset_root) / 'images'
labels_dir = Path(yolo_dataset_root) / 'labels'

image_files = list(images_dir.glob('*.png'))

# Filter images that have corresponding labels
valid_pairs = []
for img_file in image_files:
    label_file = labels_dir / f"{img_file.stem}.txt"
    if label_file.exists():
        valid_pairs.append((img_file, label_file))

print(f"Found {len(valid_pairs)} valid image-label pairs")

# Shuffle and split
random.shuffle(valid_pairs)

# Split ratios: 70% train, 20% val, 10% test
n_total = len(valid_pairs)
n_train = int(0.7 * n_total)
n_val = int(0.2 * n_total)

splits = {
    'train': valid_pairs[:n_train],
    'val': valid_pairs[n_train:n_train + n_val],
    'test': valid_pairs[n_train + n_val:]
}

# Copy files to respective directories
for split_name, pairs in splits.items():
    print(f"Copying {len(pairs)} files to {split_name} split...")

    for img_file, label_file in pairs:
        # Copy image
        dst_img = dataset_root / split_name / 'images' / img_file.name
        shutil.copy2(img_file, dst_img)

        # Copy label
        dst_label = dataset_root / split_name / 'labels' / label_file.name
        shutil.copy2(label_file, dst_label)

print(f"Dataset structure created at: {dataset_root}")
print(f"Train: {len(splits['train'])}, Val: {len(splits['val'])}, Test: {len(splits['test'])}")

Found 200 valid image-label pairs
Copying 140 files to train split...
