<h1><b>Towards Object-Based Visual SLAM: A Revolution for Urban Tram</b></h1>

In [None]:
# Import necessary libraries

import pandas as pd
import numpy as np
import os
import ast
import json
import ast
import cv2 
import matplotlib.pyplot as plt
from collections import defaultdict
from tabulate import tabulate
from math import ceil
from ultralytics import YOLO

In [None]:
%matplotlib inline

### Helper functions

In [None]:
#  Declare function to convert JSON annotations to a DataFrame

def json_to_dataframe(annot_path, dataset_name='unknown'):
    ds = dataset_name.lower()
    records = []

    for fname in os.listdir(annot_path):
        if not fname.endswith('.json'):
            continue

        path = os.path.join(annot_path, fname)
        with open(path, 'r') as f:
            ann = json.load(f)

        base = os.path.splitext(fname)[0] 
        file_ext = '.jpg' if ds == 'bdd100k' else '.png'

        if not base.endswith(file_ext):
            file_name = base + file_ext
        else:
            file_name = base

        for obj in ann.get('objects', []):
            ext = obj.get('points', {}).get('exterior', [])
            if len(ext) < 2:
                continue 
            xmin, ymin = ext[0]
            xmax, ymax = ext[1]

            rec = {
                'file': file_name,
                'type': obj.get('classTitle', 'unknown'),
                'bbox_xmin': xmin,
                'bbox_ymin': ymin,
                'bbox_xmax': xmax,
                'bbox_ymax': ymax
            }

            if ds == 'bdd100k':
                attr = next(
                    (t for t in obj.get('tags', []) if t.get('name') == 'attributes'),
                    {}
                )
                attr_value = attr.get('value', '{}')
                try:
                    # Replace single quotes with double quotes for keys and values
                    attr_value = attr_value.replace("'", '"')
                    # Parse the string as a Python literal (handles True/False)
                    d = ast.literal_eval(attr_value)
                    rec['occluded'] = d.get('occluded', None)
                    rec['truncated'] = d.get('truncated', None)
                except (ValueError, SyntaxError):
                    rec['occluded'] = None
                    rec['truncated'] = None
            
            elif ds == 'kitti':
                rec['occluded'] = next(
                    (t['value'] for t in obj.get('tags', []) if t.get('name') == 'occlusion state'),
                    None
                )
                rec['observation_angle'] = next(
                    (float(t['value']) for t in obj.get('tags', []) if t.get('name') == 'observation angle'),
                    0.0
                )
                dimensions = next(
                    (t['value'].split() for t in obj.get('tags', []) if t['name'] == 'dimensions'),
                    ['0.0', '0.0', '0.0']
                )
                rec['dimensions_h'] = float(dimensions[0]) if len(dimensions) > 0 else 0.0
                rec['dimensions_w'] = float(dimensions[1]) if len(dimensions) > 1 else 0.0
                rec['dimensions_l'] = float(dimensions[2]) if len(dimensions) > 2 else 0.0
                location = next(
                    (t['value'].split() for t in obj.get('tags', []) if t['name'] == 'location'),
                    ['0.0', '0.0', '0.0']
                )
                rec['location_x'] = float(location[0]) if len(location) > 0 else 0.0
                rec['location_y'] = float(location[1]) if len(location) > 1 else 0.0
                rec['location_z'] = float(location[2]) if len(location) > 2 else 0.0
                rec['rotation_y'] = next((float(t['value']) for t in obj.get('tags', []) if t['name'] == 'rotation y'), 0.0)
            
            else:
                raise ValueError(f'Unsupported dataset: {dataset_name}')

            records.append(rec)

    return pd.DataFrame(records)

In [None]:
# Declare function to visualise box plots for bounding box area distributions of two BDD100K datasets side by side

def plot_bbox_area_boxplot(
    bdd100k1,
    bdd100k2,
    column='bbox_area',
    label1='BDD100K Test',
    label2='BDD100K Val',
    figsize=(12, 5),
    title=None
):
    # Create subplots
    fig, axes = plt.subplots(1, 2, figsize=figsize, sharey=True)

    # Left panel: BDD100K Test
    axes[0].boxplot(bdd100k1[column].dropna(), patch_artist=True)
    axes[0].set_title('')  # Remove title from top
    axes[0].set_xlabel(label1)  # Move label to x-axis
    axes[0].set_ylabel('Area (pixels²)')
    axes[0].grid(True, linestyle='--', alpha=0.7)

    # Right panel: BDD100K Val
    axes[1].boxplot(bdd100k2[column].dropna(), patch_artist=True)
    axes[1].set_title('')  # Remove title from top
    axes[1].set_xlabel(label2)  # Move label to x-axis
    axes[1].grid(True, linestyle='--', alpha=0.7)

    # Overall title
    fig.suptitle(title if title else f'Bounding Box Area Distribution: {label1} vs {label2}', fontsize=14, y=1.03)
    plt.tight_layout()

    plt.show()

In [None]:
# Compute and plot aspect ratio histograms side by side

def plot_aspect_ratio_distributions(
    bdd100k1,
    bdd100k2,
    label1='BDD100K Test',
    label2='BDD100K Val',
    bins=np.linspace(0, 5, 50),
    title=None
):
    # Compute aspect ratios
    bdd100k1['aspect_ratio'] = (
        (bdd100k1['bbox_xmax'] - bdd100k1['bbox_xmin']) /
        (bdd100k1['bbox_ymax'] - bdd100k1['bbox_ymin'])
    )
    bdd100k2['aspect_ratio'] = (
        (bdd100k2['bbox_xmax'] - bdd100k2['bbox_xmin']) /
        (bdd100k2['bbox_ymax'] - bdd100k2['bbox_ymin'])
    )

    # Determine y-axis limit
    max1 = np.histogram(bdd100k1['aspect_ratio'], bins=bins)[0].max()
    max2 = np.histogram(bdd100k2['aspect_ratio'], bins=bins)[0].max()
    ymax = max(max1, max2)

    # Create subplots
    fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharey=True)

    # Left panel: BDD100K Test
    axes[0].hist(bdd100k1['aspect_ratio'], bins=bins, edgecolor='black', alpha=0.7)
    axes[0].set_title(f'{label1} Aspect Ratio')
    axes[0].set_xlabel('Aspect Ratio')
    axes[0].set_ylabel('Count')
    axes[0].set_ylim(0, ymax * 1.05)
    axes[0].grid(axis='y', linestyle='--', alpha=0.5)

    # Right panel: BDD100K Val
    axes[1].hist(bdd100k2['aspect_ratio'], bins=bins, edgecolor='black', alpha=0.7, color='C1')
    axes[1].set_title(f'{label2} Aspect Ratio')
    axes[1].set_xlabel('Aspect Ratio')
    axes[1].set_ylim(0, ymax * 1.05)
    axes[1].grid(axis='y', linestyle='--', alpha=0.5)

    # Overall title
    fig.suptitle(title if title else f'Aspect Ratio Distributions: {label1} vs {label2}', fontsize=14, y=1.03)
    plt.tight_layout()

    plt.show()

In [None]:
# Check for logical errors in bounding box coordinates

def check_logical_box_errors(df):
    # Define image boundaries based on BDD100K resolution (1280x720)
    img_width, img_height = 1280, 720
    
    # Check for logical errors (xmin > xmax or ymin > ymax)
    invalid_logic = df[
        (df['bbox_xmin'] > df['bbox_xmax']) |
        (df['bbox_ymin'] > df['bbox_ymax'])
    ]
    
    # Check for coordinates outside image boundaries
    invalid_bounds = df[
        (df['bbox_xmin'] < 0) | (df['bbox_xmin'] > img_width) |
        (df['bbox_xmax'] < 0) | (df['bbox_xmax'] > img_width) |
        (df['bbox_ymin'] < 0) | (df['bbox_ymin'] > img_height) |
        (df['bbox_ymax'] < 0) | (df['bbox_ymax'] > img_height)
    ]
    
    # Display logical errors
    if not invalid_logic.empty:
        print(f'Found {len(invalid_logic)} invalid bounding boxes with logical errors (xmin > xmax or ymin > ymax):')
        print(tabulate(invalid_logic[['file', 'type', 'bbox_xmin', 'bbox_ymin', 'bbox_xmax', 'bbox_ymax']], 
                       headers='keys', tablefmt='pretty'))
    else:
        print('No logical errors found (xmin > xmax or ymin > ymax).')
    
    # Display boundary errors
    if not invalid_bounds.empty:
        print(f'Found {len(invalid_bounds)} invalid bounding boxes with boundary errors (outside 0-1279x0-719):')
        print(tabulate(invalid_bounds[['file', 'type', 'bbox_xmin', 'bbox_ymin', 'bbox_xmax', 'bbox_ymax']], 
                       headers='keys', tablefmt='pretty'))
    else:
        print('No boundary errors found (outside 0-1279x0-719).')

In [None]:
# Function to correct invalid bounding boxes

def correct_bounding_box(row, img_width=1280, img_height=720):
    # Correct logical errors by swapping coordinates
    if row['bbox_xmin'] > row['bbox_xmax']:
        row['bbox_xmin'], row['bbox_xmax'] = row['bbox_xmax'], row['bbox_xmin']
    if row['bbox_ymin'] > row['bbox_ymax']:
        row['bbox_ymin'], row['bbox_ymax'] = row['bbox_ymax'], row['bbox_ymin']
    
    # Clamp coordinates only if they are out of bounds
    # if row['bbox_xmin'] < 0 or row['bbox_xmin'] > img_width:
    #     row['bbox_xmin'] = max(0, min(row['bbox_xmin'], img_width))
    # if row['bbox_xmax'] < 0 or row['bbox_xmax'] > img_width:
    #     row['bbox_xmax'] = max(0, min(row['bbox_xmax'], img_width))
    # if row['bbox_ymin'] < 0 or row['bbox_ymin'] > img_height:
    #     row['bbox_ymin'] = max(0, min(row['bbox_ymin'], img_height))
    # if row['bbox_ymax'] < 0 or row['bbox_ymax'] > img_height:
    #     row['bbox_ymax'] = max(0, min(row['bbox_ymax'], img_height))
    
    # Warn about zero-area boxes
    if row['bbox_xmin'] == row['bbox_xmax'] or row['bbox_ymin'] == row['bbox_ymax']:
        print(f"Warning: Bounding box for {row['file']} has zero area after correction.")
    
    return row

In [None]:
def verify_bounding_box_corrections(df, img_width=1280, img_height=720):
    # Check for logical errors (xmin > xmax or ymin > ymax)
    invalid_logic = df[
        (df['bbox_xmin'] > df['bbox_xmax']) |
        (df['bbox_ymin'] > df['bbox_ymax'])
    ]
    
    # Check for coordinates outside image boundaries
    invalid_bounds = df[
        (df['bbox_xmin'] < 0) | (df['bbox_xmin'] > img_width) |
        (df['bbox_xmax'] < 0) | (df['bbox_xmax'] > img_width) |
        (df['bbox_ymin'] < 0) | (df['bbox_ymin'] > img_height) |
        (df['bbox_ymax'] < 0) | (df['bbox_ymax'] > img_height)
    ]
    
    # Combine and remove duplicates if needed (though separate reporting is clearer)
    total_invalid = len(invalid_logic) + len(invalid_bounds) - len(invalid_logic.merge(invalid_bounds, how='inner'))
    
    # Report logical errors
    if not invalid_logic.empty:
        print(f'Found {len(invalid_logic)} bounding boxes with remaining logical errors (xmin > xmax or ymin > ymax):')
        print(tabulate(invalid_logic[['file', 'type', 'bbox_xmin', 'bbox_ymin', 'bbox_xmax', 'bbox_ymax']], 
                       headers='keys', tablefmt='pretty'))
    else:
        print('No logical errors found (xmin > xmax or ymin > ymax).')
    
    # Report boundary errors
    if not invalid_bounds.empty:
        print(f'Found {len(invalid_bounds)} bounding boxes with boundary errors (outside 0-1279x0-719):')
        print(tabulate(invalid_bounds[['file', 'type', 'bbox_xmin', 'bbox_ymin', 'bbox_xmax', 'bbox_ymax']], 
                       headers='keys', tablefmt='pretty'))
    else:
        print('No boundary errors found (outside 0-1279x0-719).')
    
    # Summary
    if total_invalid == 0:
        print(f'All {len(df)} bounding boxes are valid after correction.')
    else:
        print(f'Total invalid bounding boxes after correction: {total_invalid}')

# Example usage after applying corrections
# bdd100k_df_cleaned = bdd100k_df_cleaned.apply(correct_bounding_box, axis=1)
# verify_bounding_box_corrections(bdd100k_df_cleaned)

In [None]:
# Calculate bounding box areas (width, height, and area)
def calculate_box_areas(df):
    df['bbox_width'] = df['bbox_xmax'] - df['bbox_xmin']
    df['bbox_height'] = df['bbox_ymax'] - df['bbox_ymin']
    df['bbox_area'] = df['bbox_width'] * df['bbox_height']
    return df

In [None]:
# Identify and remove bounding boxes with area less than a threshold

def remove_noisy_boxes(df, min_area_threshold=100):
    noisy_boxes = df[df['bbox_area'] < min_area_threshold]
    if not noisy_boxes.empty:
        print(f'Found {len(noisy_boxes)} noisy bounding boxes with area < {min_area_threshold} pixels:\n')
        preview = noisy_boxes[['file', 'type', 'bbox_xmin', 'bbox_ymin', 'bbox_xmax', 'bbox_ymax', 'bbox_area']].head(10)
        print(tabulate(preview, headers='keys', tablefmt='fancy_grid', showindex=True))
        df = df[df['bbox_area'] >= min_area_threshold]
        print(f'\nRemoved {len(noisy_boxes):,} noisy bounding boxes.')
    else:
        print(f'No bounding boxes found with area < {min_area_threshold} pixels.')
    return df

In [None]:
# Analyze the distribution of occlusion and truncation in the dataset

def analyze_occlusion_truncation(df):
    occlusion_counts = df['occluded'].value_counts()
    truncation_counts = df['truncated'].value_counts()
    
    occlusion_table = [(k, v) for k, v in occlusion_counts.items()]
    truncation_table = [(k, v) for k, v in truncation_counts.items()]
    
    print('Occlusion Distribution:')
    print(tabulate(occlusion_table, headers=['Occluded', 'Count'], tablefmt='grid'))
    
    print('\nTruncation Distribution:')
    print(tabulate(truncation_table, headers=['Truncated', 'Count'], tablefmt='grid'))

In [None]:
# Define categories for occlusion and truncation

occlusion_categories = {None, False, True}
truncation_categories = {None, False, True}

# Function to classify objects based on occlusion and truncation

def classify_object(row):
    occluded = row['occluded']
    truncated = row['truncated']
    
    # Check if values are within expected set
    if occluded not in occlusion_categories or truncated not in truncation_categories:
        return 'unknown'
    
    # Handle None cases
    if occluded is None or truncated is None:
        return 'unknown'
    
    # Handle reliable and unreliable cases
    if occluded is False and truncated is False:
        return 'reliable'
    if occluded is True or truncated is True:
        return 'unreliable'
    
    return 'unknown' 

In [None]:
# Exclude bounding boxes marked as unreliable

def remove_unreliable_boxes(df):
    unreliable_boxes = df[df['reliability'] == 'unreliable']
    if not unreliable_boxes.empty:
        print(f'\nFound {len(unreliable_boxes)} unreliable bounding boxes.')
        print(f'Length before removal: {len(df)}')
        df = df[df['reliability'] != 'unreliable']
        print(f'Length after removal: {len(df)}')
        print(f'Removed {len(unreliable_boxes)} unreliable bounding boxes.')
    else:
        print('\nNo unreliable bounding boxes found to remove.')
    return df

In [None]:
# Detect outliers in bounding box data based on area and aspect ratio

def detect_outliers(df, check_area=True, check_aspect_ratio=True, area_multiplier=1.5, aspect_ratio_lower=0.2, aspect_ratio_upper=5):
    if check_area and 'bbox_area' not in df.columns:
        raise ValueError("DataFrame must have 'bbox_area' column if check_area is True")
    if check_aspect_ratio and 'aspect_ratio' not in df.columns:
        raise ValueError("DataFrame must have 'aspect_ratio' column if check_aspect_ratio is True")

    if check_area:
        Q1_area = df['bbox_area'].quantile(0.25)
        Q3_area = df['bbox_area'].quantile(0.75)
        IQR_area = Q3_area - Q1_area
        lower_bound_area = Q1_area - area_multiplier * IQR_area
        upper_bound_area = Q3_area + area_multiplier * IQR_area
        area_outliers = (df['bbox_area'] < lower_bound_area) | (df['bbox_area'] > upper_bound_area)
    else:
        area_outliers = pd.Series([False] * len(df), index=df.index)

    if check_aspect_ratio:
        aspect_ratio_outliers = (df['aspect_ratio'] < aspect_ratio_lower) | (df['aspect_ratio'] > aspect_ratio_upper)
    else:
        aspect_ratio_outliers = pd.Series([False] * len(df), index=df.index)

    outliers = area_outliers | aspect_ratio_outliers
    return outliers

In [None]:
# Handle aspect ratio outliers in BDD100K annotations by removing bounding boxes with extreme aspect ratios based on the IQR method per category

def handle_aspect_ratio_outliers(df, category_column='type'):
    # Step 1: Calculate aspect ratios
    df = df.copy()  # Avoid modifying the original DataFrame
    df['bbox_width'] = df['bbox_xmax'] - df['bbox_xmin']
    df['bbox_height'] = df['bbox_ymax'] - df['bbox_ymin']
    df = df[df['bbox_height'] > 0]  # Filter out invalid boxes (height <= 0)
    df['aspect_ratio'] = df['bbox_width'] / df['bbox_height']

    # Step 2: Compute IQR-based bounds per category
    bounds = {}
    for category in df[category_column].unique():
        category_data = df[df[category_column] == category]['aspect_ratio'].dropna()
        if len(category_data) > 0:
            q1 = np.percentile(category_data, 25)
            q3 = np.percentile(category_data, 75)
            iqr = q3 - q1
            lower_bound = q1 - 1.5 * iqr
            upper_bound = q3 + 1.5 * iqr
            bounds[category] = (lower_bound, upper_bound)
        else:
            bounds[category] = (None, None)

    # Step 3: Filter out outliers
    def is_outlier(row):
        category = row[category_column]
        ar = row['aspect_ratio']
        lower, upper = bounds[category]
        if lower is not None and upper is not None:
            return not (lower <= ar <= upper)
        return False

    df['is_outlier'] = df.apply(is_outlier, axis=1)
    df_cleaned = df[~df['is_outlier']].drop(columns=['is_outlier', 'aspect_ratio', 'bbox_width', 'bbox_height'])

    return df_cleaned

# <b>1. EDA (EXPLORATORY DATA ANALYSIS)</b>

In [None]:
KITTI_ANNOT_PATH = 'KITTI/train/ann'
KITTI_IMG_PATH = 'KITTI/train/img'
BDD100K_ANNOT_PATH = 'BDD100K/train/ann'
BDD100K_IMG_PATH = 'BDD100K/train/img'
BDD100K_ANNOT_PATH_VAL = 'BDD100K/val/ann'
BDD100K_IMG_PATH_VAL = 'BDD100K/val/img'

<h5>1.1 KITTI Object Detection Benchmark</h5>

In [None]:
# Convert KITTI JSON annotations to DataFrame

kitti_df = json_to_dataframe(KITTI_ANNOT_PATH, dataset_name='KITTI')

In [None]:
kitti_df.head()

In [None]:
kitti_df.describe()

In [None]:
bbox_stat_kitti = kitti_df[['bbox_xmin', 'bbox_ymin', 'bbox_xmax', 'bbox_ymax']].describe().round(2)

print('\nSummary Statistics for Bounding Box Dimensions (KITTI):')
print(tabulate(bbox_stat_kitti, headers='keys', tablefmt='pretty'))

In [None]:
# Bar Chart: Distribution of Object Types

counts = kitti_df['type'].value_counts()
labels = counts.index.tolist()
values = counts.values
cmap = plt.get_cmap('tab10')
colors = cmap.colors[:len(labels)]

fig, ax = plt.subplots(figsize=(10, 6))
bars = ax.bar(labels, values, color=colors, edgecolor='black')
# ax.set_title('Distribution of Object Types in KITTI')
ax.set_xlabel('Classes')
ax.set_ylabel('Count')
ax.set_xticklabels(labels, rotation=45, ha='right')
ax.grid(axis='y', linestyle='--', alpha=0.6)

for bar in bars:
    height = bar.get_height()
    ax.annotate(f'{height}',
                xy=(bar.get_x() + bar.get_width() / 2, height),
                xytext=(0, 3),  
                textcoords='offset points',
                ha='center', va='bottom', fontsize=8)

plt.tight_layout()
plt.show()

- `Car` dominates, while `truck`, `tram` and `person sitting` are minority classes.
- This info provides insight that the prevalence of cars makes them ideal primary landmarks for SLAM.

In [None]:
# Sample Image with Annotations

sample_file = kitti_df['file'].iloc[0]  
print(f'Sample File: {sample_file}')
sample_img_path = os.path.join(KITTI_IMG_PATH, sample_file)
print(f'Sample Image Path: {sample_img_path}')
if os.path.exists(sample_img_path):
    img = cv2.imread(sample_img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  
    annotations = kitti_df[kitti_df['file'] == sample_file]
    for index, row in annotations.iterrows():
        cv2.rectangle(img, (int(row['bbox_xmin']), int(row['bbox_ymin'])),
                     (int(row['bbox_xmax']), int(row['bbox_ymax'])), (255, 0, 0), 2)
        cv2.putText(img, row['type'], (int(row['bbox_xmin']), int(row['bbox_ymin'] - 10)),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
    plt.figure(figsize=(10, 6))
    plt.imshow(img)
    # plt.title('Sample Image with Bounding Box Annotations')
    plt.axis('off')
    plt.tight_layout()
    plt.show()
else:
    print(f'Sample image {sample_img_path} not found.')

In [None]:
sample_file = kitti_df['file'].iloc[0]
print(f'Sample File: {sample_file}')
sample_img_path = os.path.join(KITTI_IMG_PATH, sample_file)
print(f'Sample Image Path: {sample_img_path}')

if os.path.exists(sample_img_path):
    # Load original image
    img_orig = cv2.imread(sample_img_path)
    img_orig = cv2.cvtColor(img_orig, cv2.COLOR_BGR2RGB)

    # Make a copy for annotations
    img_annot = img_orig.copy()

    # Filter annotations
    annotations = kitti_df[kitti_df['file'] == sample_file]

    # Draw bounding boxes and labels
    for _, row in annotations.iterrows():
        cv2.rectangle(
            img_annot,
            (int(row['bbox_xmin']), int(row['bbox_ymin'])),
            (int(row['bbox_xmax']), int(row['bbox_ymax'])),
            (255, 0, 0),
            2
        )
        cv2.putText(
            img_annot,
            row['type'],
            (int(row['bbox_xmin']), int(row['bbox_ymin']) - 10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            (255, 0, 0),
            2
        )

    # Plot side by side
    fig, axs = plt.subplots(1, 2, figsize=(15, 6))
    axs[0].imshow(img_orig)
    axs[0].set_title('Original Image')
    axs[0].axis('off')

    axs[1].imshow(img_annot)
    axs[1].set_title('Annotated Image')
    axs[1].axis('off')

    plt.tight_layout()
    plt.show()

else:
    print(f'Sample image {sample_img_path} not found.')

In [None]:
# Display coordinate info for the first bounding box in the sample image

sample_annotations = kitti_df[kitti_df['file'] == sample_file]
if not sample_annotations.empty:
    bbox_info = sample_annotations.iloc[0]
    print(f'Bounding Box Coordinates for `{sample_file}`:')
    print(f"xmin: {bbox_info['bbox_xmin']}, ymin: {bbox_info['bbox_ymin']}, xmax: {bbox_info['bbox_xmax']}, ymax: {bbox_info['bbox_ymax']}")
else:
    print(f'No bounding box found for {sample_file}')

In [None]:
# Sample Image with Bounding Box Crop

sample_file = kitti_df['file'].iloc[0]
annotations = kitti_df[kitti_df['file'] == sample_file]

sample_img_path = os.path.join(KITTI_IMG_PATH, sample_file)
if annotations.empty or not os.path.exists(sample_img_path):
    raise FileNotFoundError('No annotations or image for ' + sample_file)

img = cv2.imread(sample_img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

bbox = annotations.iloc[0]
xmin, ymin = int(bbox['bbox_xmin']), int(bbox['bbox_ymin'])
xmax, ymax = int(bbox['bbox_xmax']), int(bbox['bbox_ymax'])
obj_img = img[ymin:ymax, xmin:xmax]
h, w, _ = obj_img.shape 

fig, ax = plt.subplots(figsize=(4,4))
ax.imshow(obj_img, origin='upper')
ax.axis('off')

ax.add_patch(plt.Rectangle((0,0), w, h,
                           linewidth=2,
                           edgecolor='red',
                           facecolor='none'))

corners = [
    (0,   0,   f'({xmin},{ymin})', 'left',  'top'),
    (w,   0,   f'({xmax},{ymin})', 'right', 'top'),
    (w,   h,   f'({xmax},{ymax})', 'right', 'bottom'),
    (0,   h,   f'({xmin},{ymax})', 'left',  'bottom'),
]
for x, y, txt, ha, va in corners:
    ax.text(x, y, txt,
            color='yellow',
            fontsize=10,
            ha=ha, va=va,
            bbox=dict(facecolor='black', alpha=0.5, pad=2))

plt.tight_layout()
plt.show()

In [None]:
# Co-Occurrence Matrix

types_per_file = kitti_df.groupby('file')['type'].apply(list).to_dict()

co_occurrence = defaultdict(set)
for file, types in types_per_file.items():
    for obj_type in types:
        co_occurrence[obj_type].add(file)

object_types = kitti_df['type'].unique()
co_matrix = pd.DataFrame(0, index=object_types, columns=object_types, dtype=int)

for i, type1 in enumerate(object_types):
    for j, type2 in enumerate(object_types):
        if i <= j:  
            common_files = co_occurrence[type1] & co_occurrence[type2]
            co_matrix.iloc[i, j] = len(common_files)
            if i != j: 
                co_matrix.iloc[j, i] = len(common_files)

plt.figure(figsize=(10, 8))
plt.imshow(co_matrix, cmap='viridis', interpolation='nearest')
# plt.title('Co-Occurrence Matrix of Object Types in KITTI')
plt.xticks(ticks=range(len(object_types)), labels=object_types, rotation=45)
plt.yticks(ticks=range(len(object_types)), labels=object_types)
plt.colorbar(label='Number of Co-Occurrences')
for i in range(len(object_types)):
    for j in range(len(object_types)):
        count = co_matrix.iloc[i, j]
        plt.text(j, i, co_matrix.iloc[i, j], ha='center', va='center', color='black' if count > 5000 else 'white')
plt.tight_layout()
plt.show()

In [None]:
# Spatial Heatmap for each object type

image_width = 1224  
image_height = 370  

kitti_data_individual = []
for json_file in os.listdir(KITTI_ANNOT_PATH):
    if json_file.endswith('.json'):
        json_file_path = os.path.join(KITTI_ANNOT_PATH, json_file)
        with open(json_file_path, 'r') as f:
            data = json.load(f)
            base_name = os.path.splitext(json_file)[0]
            file_name = f'{base_name}.png'
            objects = data.get('objects', [])
            for obj in objects:
                record = {
                    'file': file_name,
                    'type': obj.get('classTitle', 'unknown'),
                    'bbox_xmin': obj['points']['exterior'][0][0],
                    'bbox_ymin': obj['points']['exterior'][0][1],
                    'bbox_xmax': obj['points']['exterior'][1][0],
                    'bbox_ymax': obj['points']['exterior'][1][1]
                }
                kitti_data_individual.append(record)

kitti_df = pd.DataFrame(kitti_data_individual)

image_width   = 1224
image_height  = 370
object_types  = kitti_df['type'].unique().tolist()
n_types       = len(object_types)
n_cols        = 3
n_rows        = ceil(n_types / n_cols)

all_heatmaps = []
for t in object_types:
    df_t = kitti_df[kitti_df['type'] == t]
    x_centers = (df_t['bbox_xmin'] + df_t['bbox_xmax']) / 2
    y_centers = (df_t['bbox_ymin'] + df_t['bbox_ymax']) / 2
    heatmap, _, _ = np.histogram2d(
        x_centers, y_centers,
        bins=[50, 50],
        range=[[0, image_width], [0, image_height]]
    )
    all_heatmaps.append(heatmap.T)  

vmin = min(h.min() for h in all_heatmaps)
vmax = max(h.max() for h in all_heatmaps)

fig, axes = plt.subplots(
    n_rows, n_cols,
    figsize=(n_cols * 5, n_rows * 4),
    sharex=False, sharey=False
)
axes = axes.flatten()

for idx, (ax, obj_type) in enumerate(zip(axes, object_types)):
    hm = all_heatmaps[idx]
    im = ax.imshow(
        hm,
        cmap='hot',
        origin='lower',
        extent=[0, image_width, 0, image_height],
        aspect='auto',
        vmin=vmin, vmax=vmax
    )
    ax.set_title(f'Spatial Heatmap for {obj_type}')
    ax.set_xlabel('X Coordinate (pixels)')
    ax.set_ylabel('Y Coordinate (pixels)')
    
    cbar = fig.colorbar(
        im, ax=ax,
        fraction=0.046,  
        pad=0.04         
    )
    cbar.set_label('Density')

for j in range(n_types, len(axes)):
    axes[j].axis('off')

# fig.suptitle('Spatial Heatmaps of Object Classes (KITTI)', y=1.02, fontsize=16)
plt.tight_layout()
plt.show()


<h5>1.2 BDD100K</h5>

In [None]:
# Convert BDD100K JSON annotations to DataFrame (train vs val set)

bdd100k_df = json_to_dataframe(BDD100K_ANNOT_PATH, dataset_name='BDD100K')
bdd100k_df_val = json_to_dataframe(BDD100K_ANNOT_PATH_VAL, dataset_name='BDD100K')

In [None]:
bdd100k_df.head()

In [None]:
bdd100k_df_val.head()

In [None]:
len(bdd100k_df)

In [None]:
len(bdd100k_df_val)

In [None]:
bdd100k_df.describe()

In [None]:
bdd100k_df_val.describe()

In [None]:
bbox_stat_bdd100k = bdd100k_df[['bbox_xmin', 'bbox_ymin', 'bbox_xmax', 'bbox_ymax']].describe().round(2)

print('\nSummary Statistics for Bounding Box Dimensions (BDD100K):')
print(tabulate(bbox_stat_bdd100k, headers='keys', tablefmt='pretty'))

In [None]:
# Bar Chart: Distribution of Object Types

counts = bdd100k_df['type'].value_counts()
labels = counts.index.tolist()
values = counts.values
cmap = plt.get_cmap('tab10')
colors = cmap.colors[:len(labels)]

fig, ax = plt.subplots(figsize=(10, 6))
bars = ax.bar(labels, values, color=colors, edgecolor='black')
# ax.set_title('Distribution of Object Types in BDD100K')
ax.set_xlabel('Classes')
ax.set_ylabel('Count')
ax.set_xticklabels(labels, rotation=45, ha='right')
ax.grid(axis='y', linestyle='--', alpha=0.6)

for bar in bars:
    height = bar.get_height()
    ax.annotate(f'{height}',
                xy=(bar.get_x() + bar.get_width() / 2, height),
                xytext=(0, 3),  
                textcoords='offset points',
                ha='center', va='bottom', fontsize=8)

plt.tight_layout()
plt.show()

In [None]:
# Sample Image with Annotations for BDD100K

# sample_file = bdd100k_df['file'].iloc[0]
sample_file = '0a006b7b-c22407a2.jpg'
print(f'Sample File: {sample_file}')
sample_img_path = os.path.join('BDD100K/train/img', sample_file)
print(f'Sample Image Path: {sample_img_path}')

if os.path.exists(sample_img_path):
    img_orig = cv2.imread(sample_img_path)
    img_orig = cv2.cvtColor(img_orig, cv2.COLOR_BGR2RGB)

    img_annot = img_orig.copy()

    annotations = bdd100k_df[bdd100k_df['file'] == sample_file]

    for _, row in annotations.iterrows():
        cv2.rectangle(
            img_annot,
            (int(row['bbox_xmin']), int(row['bbox_ymin'])),
            (int(row['bbox_xmax']), int(row['bbox_ymax'])),
            (255, 0, 0),
            2
        )
        cv2.putText(
            img_annot,
            row['type'],
            (int(row['bbox_xmin']), int(row['bbox_ymin']) - 10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            (255, 0, 0),
            2
        )

    fig, axs = plt.subplots(1, 2, figsize=(15, 6))
    axs[0].imshow(img_orig)
    axs[0].set_title('Original Image')
    axs[0].axis('off')

    axs[1].imshow(img_annot)
    axs[1].set_title('Annotated Image')
    axs[1].axis('off')

    plt.tight_layout()
    plt.show()

else:
    print(f'Sample image {sample_img_path} not found.')

In [None]:
# Plot the cropped image with bounding box corner labels

# sample_file = bdd100k_df['file'].iloc[0]
annotations = bdd100k_df[bdd100k_df['file'] == sample_file]

sample_img_path = os.path.join('BDD100K/train/img', sample_file)
if annotations.empty or not os.path.exists(sample_img_path):
    raise FileNotFoundError('No annotations or image for ' + sample_file)

img = cv2.imread(sample_img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

bbox = annotations.iloc[15]
xmin, ymin = int(bbox['bbox_xmin']), int(bbox['bbox_ymin'])
xmax, ymax = int(bbox['bbox_xmax']), int(bbox['bbox_ymax'])

obj_img = img[ymin:ymax, xmin:xmax]
h, w, _ = obj_img.shape

fig, ax = plt.subplots(figsize=(4,4))
ax.imshow(obj_img, origin='upper')
ax.axis('off')

ax.add_patch(plt.Rectangle((0,0), w, h,
                           linewidth=2,
                           edgecolor='red',
                           facecolor='none'))

corners = [
    (0,   0,   f'({xmin},{ymin})', 'left',  'top'),
    (w,   0,   f'({xmax},{ymin})', 'right', 'top'),
    (w,   h,   f'({xmax},{ymax})', 'right', 'bottom'),
    (0,   h,   f'({xmin},{ymax})', 'left',  'bottom'),
]
for x, y, txt, ha, va in corners:
    ax.text(
        x, y, txt,
        color='yellow',
        fontsize=10,
        ha=ha,
        va=va,
        bbox=dict(facecolor='black', alpha=0.5, pad=2)
    )

plt.tight_layout()
plt.show()

In [None]:
# Co-Occurrence Matrix Heatmap

types_per_file = bdd100k_df.groupby('file')['type'].apply(set).to_dict() 
co_occurrence = defaultdict(set)
for file, types in types_per_file.items():
    for obj_type in types:
        co_occurrence[obj_type].add(file)

# Use top 12 classes as per provided distribution
# top_classes = ['car', 'lane', 'traffic sign', 'traffic light', 'drivable area', 'person', 'truck', 'bus', 'bike', 'rider', 'motor', 'train']
top_classes = bdd100k_df['type'].unique()
co_matrix = pd.DataFrame(0, index=top_classes, columns=top_classes, dtype=int)

# Calculate unique images per class for diagonal
unique_images_per_class = {cls: len(co_occurrence[cls]) for cls in top_classes}

for i, type1 in enumerate(top_classes):
    for j, type2 in enumerate(top_classes):
        if i <= j:
            if i == j:
                # Diagonal: number of unique images with at least one instance of the class
                co_matrix.iloc[i, j] = unique_images_per_class[type1]
            else:
                # Off-diagonal: number of images where both types co-occur (unique pair per image)
                common_files = co_occurrence[type1] & co_occurrence[type2]
                co_matrix.iloc[i, j] = len(common_files)
                co_matrix.iloc[j, i] = len(common_files)

total_images = bdd100k_df['file'].nunique()
print('Total unique images:', total_images)
print('Sample co-occurrence (car & person):', len(co_occurrence['car'] & co_occurrence['person']))

plt.figure(figsize=(10, 8))
plt.imshow(co_matrix, cmap='viridis', interpolation='nearest', vmin=0, vmax=10000)
# plt.title('Co-Occurrence Matrix of Object Types in BDD100K Train Set')
plt.xticks(ticks=range(len(top_classes)), labels=top_classes, rotation=45)
plt.yticks(ticks=range(len(top_classes)), labels=top_classes)
plt.colorbar(label='Count')
for i in range(len(top_classes)):
    for j in range(len(top_classes)):
        count = co_matrix.iloc[i, j]
        plt.text(j, i, count if count > 0 else '', ha='center', va='center', color='black' if count > 5000 else 'white', fontsize=8)
plt.tight_layout()
plt.show()

In [None]:
# Spatial Heatmap for each object type in BDD100K

image_width, image_height = 1280, 720 
counts = bdd100k_df['type'].value_counts()
top9 = counts.head(9).index.tolist()  
n_types = len(top9)
n_cols = 3
n_rows = int(np.ceil(n_types / n_cols))

all_hms = []
for cls in top9:
    dfc = bdd100k_df[bdd100k_df['type'] == cls]
    x_ctr = (dfc['bbox_xmin'] + dfc['bbox_xmax']) / 2
    y_ctr = (dfc['bbox_ymin'] + dfc['bbox_ymax']) / 2

    hm, _, _ = np.histogram2d(
        x_ctr, y_ctr,
        bins=[50, 50],
        range=[[0, image_width], [0, image_height]]
    )
    all_hms.append(hm.T)

vmin = min(h.min() for h in all_hms)
vmax = max(h.max() for h in all_hms)

fig, axes = plt.subplots(
    n_rows, n_cols,
    figsize=(n_cols * 5, n_rows * 4),
    squeeze=False
)
axes_flat = axes.flatten()

for idx, cls in enumerate(top9):
    ax = axes_flat[idx]
    im = ax.imshow(
        all_hms[idx],
        cmap='hot',
        origin='lower',
        extent=[0, image_width, 0, image_height],
        aspect='auto',
        vmin=vmin, vmax=vmax
    )
    # Add titles for each heatmap
    ax.set_title(f'Spatial Heatmap for {cls}')
    ax.set_xlabel('X Coordinate (px)')
    ax.set_ylabel('Y Coordinate (px)')

    cbar = fig.colorbar(
        im, ax=ax,
        fraction=0.046, pad=0.04
    )
    cbar.set_label('Density')

for j in range(n_types, len(axes_flat)):
    axes_flat[j].axis('off')

# fig.suptitle('Spatial Heatmaps of Object Classes (BDD100K)', y=1.02, fontsize=16)

plt.tight_layout()
plt.show()


# <b>2. DATA CLEANING</b>

<h5>2.1 Validation of Bounding Box Coordinates</h5>

<b>Train set</b>

In [None]:
check_logical_box_errors(bdd100k_df)

In [None]:
# Apply correction for train set

bdd100k_df_cleaned = bdd100k_df.copy()
bdd100k_df_cleaned = bdd100k_df_cleaned.apply(correct_bounding_box, axis=1)

In [None]:
# Verify correction for train set

verify_bounding_box_corrections(bdd100k_df_cleaned)

<b>Val set</b>

In [None]:
check_logical_box_errors(bdd100k_df_val)

In [None]:
# Apply correction for val set

bdd100k_df_val_cleaned = bdd100k_df_val.copy()
bdd100k_df_val_cleaned = bdd100k_df_val_cleaned.apply(correct_bounding_box, axis=1)

In [None]:
# Verify correction for val set

verify_bounding_box_corrections(bdd100k_df_val_cleaned)

<h5>2.2 Removal of Noisy Annotations</h5>

<b>Train set</b>

In [None]:
# Set minimum area threshold: 100 pixels (10x10 pixels)

MIN_AREA_THRESHOLD = 100 

A 100-pixel threshold aligns with filtering out very small, potentially erroneous boxes given the 1280x720 resolution.

In [None]:
# Calculate bounding box areas

bdd100k_df_cleaned = calculate_box_areas(bdd100k_df_cleaned)

In [None]:
# Identify and remove noisy annotations (boxes with area < threshold)

bdd100k_df_cleaned = remove_noisy_boxes(df=bdd100k_df_cleaned, min_area_threshold=MIN_AREA_THRESHOLD)

In [None]:
len(bdd100k_df)

In [None]:
len(bdd100k_df_cleaned)

<b>Val set</b>

In [None]:
# Calculate bounding box areas

bdd100k_df_val_cleaned = calculate_box_areas(bdd100k_df_val_cleaned)

In [None]:
bdd100k_df_val_cleaned = remove_noisy_boxes(df=bdd100k_df_val_cleaned, min_area_threshold=MIN_AREA_THRESHOLD)

<h5>2.3 Handling Occluded or Truncated Objects</h5>

<b>Test set</b>

In [None]:
# Analyse occlusion and truncation distribution

analyze_occlusion_truncation(bdd100k_df_cleaned)

In [None]:
# Apply classification

bdd100k_df_cleaned = bdd100k_df_cleaned.copy()
bdd100k_df_cleaned['reliability'] = bdd100k_df_cleaned.apply(classify_object, axis=1)
reliability_counts = bdd100k_df_cleaned['reliability'].value_counts()
print('\nReliability Classification:')
print(reliability_counts)

In [None]:
# Exclude unreliable objects

bdd100k_df_cleaned = remove_unreliable_boxes(bdd100k_df_cleaned)

In [None]:
# Verify the updated DataFrame

print(f'\nFinal number of bounding boxes after handling occluded/truncated objects: {len(bdd100k_df_cleaned)}')
print('Cleaned DataFrame head:')
bdd100k_df_cleaned.head()

In [None]:
bdd100k_df_cleaned['reliability'].value_counts().plot(kind='bar', color=['green', 'red'], alpha=0.7)

- <b>Relevance for SLAM:</b> Research suggests that keeping the `unknown` records in the dataset is likely beneficial for training YOLOv12, as they may include valid annotations for classes like Lane and Drivable Area, which could still be useful for object detection in visual SLAM.

- <b>Class Balance</b>: With 600,000 `unknown` records, removing them could skew the dataset, especially if they include significant classes. Class weights in YOLOv12 training can mitigate imbalance if kept.

&rarr; Keep the `unknown` records in the dataset for training YOLOv12 (ensuring a diverse and comprehensive dataset for predictive modelling in big data analysis).

<b>Val set</b>

In [None]:
analyze_occlusion_truncation(bdd100k_df_val_cleaned)

In [None]:
bdd100k_df_val_cleaned = bdd100k_df_val_cleaned.copy()
bdd100k_df_val_cleaned['reliability'] = bdd100k_df_val_cleaned.apply(classify_object, axis=1)
reliability_val_counts = bdd100k_df_val_cleaned['reliability'].value_counts()
print('\nReliability Classification:')
print(reliability_val_counts)

In [None]:
bdd100k_df_val_cleaned = remove_unreliable_boxes(bdd100k_df_val_cleaned)

In [None]:
# Verify the updated DataFrame val set

print(f'\nFinal number of bounding boxes (val set) after handling occluded/truncated objects: {len(bdd100k_df_val_cleaned)}')
print('Cleaned DataFrame head:')
bdd100k_df_val_cleaned.head()

In [None]:
bdd100k_df_val_cleaned['reliability'].value_counts().plot(kind='bar', color=['green', 'red'], alpha=0.7)

<h5>2.4 Outlier Handling</h5>

<b>Statistical Outliers</b>

In [None]:
# Compute summary statistics for bbox_area to understand the distribution
bbox_area_stats = bdd100k_df_cleaned['bbox_area'].describe()
print('\nSummary Statistics for Bounding Box Area (test set):')
print(tabulate(bbox_area_stats.to_frame().T, headers='keys', tablefmt='pretty'))

In [None]:
bbox_area_stats_val = bdd100k_df_val_cleaned['bbox_area'].describe()
print('\nSummary Statistics for Bounding Box Area (val set):')
print(tabulate(bbox_area_stats_val.to_frame().T, headers='keys', tablefmt='pretty'))

In [None]:
# Detect and remove outliers (test set)
    
while True:
    outliers = detect_outliers(bdd100k_df_cleaned, check_aspect_ratio=False)
    num_outliers = outliers.sum()
    
    # Exit the loop if no outliers remain
    if num_outliers == 0:
        print('No more outliers found. Process complete.')
        break
    
    print(f'Found {num_outliers} outliers.')
    bdd100k_df_cleaned = bdd100k_df_cleaned[~outliers]
    print(f'Removed {num_outliers} outliers.')

In [None]:
# Detect and remove outliers (val set)
    
while True:
    outliers_val = detect_outliers(bdd100k_df_val_cleaned, check_aspect_ratio=False)
    num_outliers_val = outliers_val.sum()
    
    # Exit the loop if no outliers remain
    if num_outliers_val == 0:
        print('No more outliers found. Process complete.')
        break
    
    print(f'Found {num_outliers_val} outliers.')
    bdd100k_df_val_cleaned = bdd100k_df_val_cleaned[~outliers_val]
    print(f'Removed {num_outliers_val} outliers.')

In [None]:
bdd100k_df_outlier_cleaned = bdd100k_df_cleaned.copy()

In [None]:
len(bdd100k_df_outlier_cleaned)

In [None]:
bdd100k_df_val_outlier_cleaned = bdd100k_df_val_cleaned.copy()

In [None]:
len(bdd100k_df_val_outlier_cleaned)

In [None]:
plot_bbox_area_boxplot(bdd100k_df_outlier_cleaned, bdd100k_df_val_outlier_cleaned, title='Bounding Box Area Distribution: BDD100K Test vs BDD100K Val - After Outlier Removal')

<b>Aspect Ratio Outliers</b>

In [None]:
bdd100k_df_cleaned_filtered = handle_aspect_ratio_outliers(bdd100k_df_outlier_cleaned)
print(f'Original DataFrame size: {len(bdd100k_df_cleaned)}')
print(f'Filtered DataFrame size: {len(bdd100k_df_cleaned_filtered)}')

In [None]:
bdd100k_df_val_cleaned_filtered = handle_aspect_ratio_outliers(bdd100k_df_val_outlier_cleaned)
print(f'Original DataFrame size: {len(bdd100k_df_cleaned)}')
print(f'Filtered DataFrame size: {len(bdd100k_df_val_cleaned_filtered)}')

In [None]:
# Update the original DataFrame
bdd100k_df_cleaned = bdd100k_df_cleaned_filtered

In [None]:
bdd100k_df_val_cleaned = bdd100k_df_val_cleaned_filtered

In [None]:
plot_aspect_ratio_distributions(bdd100k_df_cleaned_filtered, bdd100k_df_val_cleaned_filtered, title='Aspect Ratio Distributions: BDD100K Test vs BDD100K Val after aspect outlier removal')

$$
\mathrm{aspect\_ratio}
\;=\;
\frac{\mathrm{width}}{\mathrm{height}}
\;=\;
\frac{x_{\max} \;-\; x_{\min}}
     {y_{\max} \;-\; y_{\min}}
$$


- The histogram has not changed much because the outlier removal process affected only a small percentage (4.95%) of the dataset, and these outliers were distributed across the tails of the distribution rather than concentrated in a way that would reshape the histogram.

- The peak and dense central region (0.5 to 2.0) remain intact, and the sparse tails were only slightly trimmed, which is not visually significant given the scale and binning of the plot.

# <b>3. DATA PRE-PROCESSING & MODELLING</b>

<h5>3.1 Normalisation</h5>

To stabilise training and account for potential differences in image intensity between BDD100K and KITTI, we need to compute the mean and standard deviation of RGB pixel values for the BDD100K dataset.

In [None]:
bdd100k_df_cleaned['image_path'] = bdd100k_df_cleaned['file'].apply(lambda x: os.path.join(BDD100K_IMG_PATH, x))

In [None]:
bdd100k_df_val_cleaned['image_path'] = bdd100k_df_val_cleaned['file'].apply(lambda x: os.path.join(BDD100K_IMG_PATH_VAL, x))

In [None]:
# Get unique image paths (since some images have multiple objects)
image_paths = bdd100k_df_cleaned['image_path'].unique().tolist()
image_paths_val = bdd100k_df_val_cleaned['image_path'].unique().tolist()

<h5>3.2 Convert Annotations to YOLO Format</h5>

In [None]:
# Define a class mapping for your object types

class_mapping = {
    'car': 0,
    'lane': 1,
    'traffic sign': 2,
    'traffic light': 3,
    'drivable area': 4,
    'person': 5,
    'truck': 6,
    'bus': 7,
    'bike': 8,
    'rider': 9,
    'motor': 10,
    'train': 11
}

In [None]:
# Convert DataFrame annotations to YOLO format and save as .txt files.

def convert_to_yolo(df, output_dir, img_width=1280, img_height=720):
    for file, group in df.groupby('file'):
        txt_file = os.path.join(output_dir, os.path.splitext(file)[0] + '.txt')
        os.makedirs(output_dir, exist_ok=True)  # Create directory if it doesn’t exist
        with open(txt_file, 'w') as f:
            for _, row in group.iterrows():
                class_id = class_mapping.get(row['type'], -1)
                if class_id == -1:
                    continue  # Skip unknown classes
                xmin, ymin, xmax, ymax = row['bbox_xmin'], row['bbox_ymin'], row['bbox_xmax'], row['bbox_ymax']
                x_center = (xmin + xmax) / 2 / img_width
                y_center = (ymin + ymax) / 2 / img_height
                width = (xmax - xmin) / img_width
                height = (ymax - ymin) / img_height
                f.write(f'{class_id} {x_center} {y_center} {width} {height}\n')

In [None]:
# Converting train/val labels to YOLO format
convert_to_yolo(bdd100k_df_cleaned, 'BDD100K_TRAIN/labels/train')
convert_to_yolo(bdd100k_df_val_cleaned, 'BDD100K_TRAIN/labels/val')

<h5>3.3 Data Augmentation and YOLOv12 Training</h5>

- Apply data augmentations like random scaling, horizontal flipping, color jittering, and random cropping to enhance model robustness, especially to bridge the domain gap.
- YOLOv12, developed by Ultralytics, automatically applies letterboxing when we specify the input size (imgsz=416) during training.

In [None]:
# Small, pretrained on COCO
model = YOLO('yolo12s.pt') 

In [None]:
# Train the model
# results = model.train(
#     data='bdd100k.yaml',
#     epochs=2,
#     imgsz=416,
#     device='mps',
#     batch=8,
#     lr0=1e-3,
#     workers=5,
#     flipud=0.0,           # No vertical flip
#     fliplr=0.5,           # Horizontal flip with 50% probability
#     hsv_h=0.2,            # Hue jitter
#     hsv_s=0.2,            # Saturation jitter
#     hsv_v=0.2             # Brightness/contrast jitter
# )

result = model.train(
    data='bdd100k.yaml',
    epochs=1,
    imgsz=256,  
    batch=2,    
    device='mps',
    workers=2,
    cache='ram' 
)

# <b>4. EVALUATION</b>

In [None]:
# Evaluate model performance on the validation set
metrics = model.val()