# AIML Capstone Project - Autonomous Driving - Lee Thornquist

Problem Statement 1: Autonomous vehicles (AV) and intelligent transport systems (ITS) are the future of road transport. Automatic detection of vehicles on the road in real-time helps AV technology and makes ITS more intelligent in terms of vehicle tracking, vehicle counting, and road incident response.

Objective Statement 1: As the first part of this project, you need to develop an AI model using a deep learning framework that predicts the type of vehicle present in an image as well as localizes the vehicle by rectangular bounding box.

## Part 1

### 1.1 Create a parent folder for custom model training and child folders to store data

In [None]:
import os

# define parent directory
parent_dir = 'yolov5_vehicle_detection'

# define child directories
child_dirs = ["data", "models", "results", "notebooks"]

# create parent directory
if not os.path.exists(parent_dir):
    os.makedirs(parent_dir)

# create child directories
for dir_name in child_dirs:
    dir_path = os.path.join(parent_dir, dir_name)
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)


### 1.2 Prepare the dataset for model training

In [None]:
import pandas as pd
import shutil
import random

# define paths
data_dir = 'yolov5_vehicle_detection/data'
images_dir = data_dir, 'Images'
labels_csv = data_dir, 'labels.csv'
labels_dir = os.path.join(data_dir, 'labels')


In [None]:
# create directories

os.makedirs(labels_dir, exist_ok=True)

os.makedirs(os.path.join(data_dir, 'images', 'train'), exist_ok=True)
os.makedirs(os.path.join(data_dir, 'images', 'val'), exist_ok=True)
os.makedirs(os.path.join(data_dir, 'images', 'test'), exist_ok=True)

os.makedirs(os.path.join(labels_dir, 'train'), exist_ok=True)
os.makedirs(os.path.join(labels_dir, 'val'), exist_ok=True)
os.makedirs(os.path.join(labels_dir, 'test'), exist_ok=True)

# I ended up manually changing these to the correct structure for the YOLOv8 model. Attached image shows how I structured the data.

In [None]:
# loading our data
# Define column names
column_names = ['image_id', 'vehicle_type', 'x_min', 'y_min', 'x_max', 'y_max']

# Load the labels file with column names
labels_df = pd.read_csv("yolov5_vehicle_detection/data/labels.csv", header=None, names=column_names)

labels_df

In [None]:
# making sure I have no missing values

labels_df.isnull().sum()

In [None]:
labels_df['vehicle_type'].unique()

In [None]:
# Complete list of vehicle types and their numeric codes
vehicle_types = {
    'pickup_truck': 0,
    'car': 1,
    'articulated_truck': 2,
    'bus': 3,
    'motorized_vehicle': 4,
    'work_van': 5,
    'single_unit_truck': 6,
    'pedestrian': 7,
    'bicycle': 8,
    'non-motorized_vehicle': 9,
    'motorcycle': 10
}

In [None]:
# Apply the vehicle type mapping
labels_df['vehicle_type'] = labels_df['vehicle_type'].map(vehicle_types)

# Check for any unmapped vehicle types resulting in NaN
unmapped_vehicle_types = labels_df[labels_df['vehicle_type'].isna()]
if not unmapped_vehicle_types.empty:
    print("Unmapped vehicle types found:")
    print(unmapped_vehicle_types)
else:
    print("All vehicle types mapped successfully!")

In [None]:
# Image dimensions
image_height = 480
image_width = 720

# Standardize the labels directly in the DataFrame
labels_df['x_center'] = (labels_df['x_min'] + labels_df['x_max']) / 2.0 / image_width
labels_df['y_center'] = (labels_df['y_min'] + labels_df['y_max']) / 2.0 / image_height
labels_df['width'] = (labels_df['x_max'] - labels_df['x_min']) / image_width
labels_df['height'] = (labels_df['y_max'] - labels_df['y_min']) / image_height


In [None]:
# Define the path to the labels directory
labels_output_dir = os.path.join(data_dir, "labels_yolov5")

# Create the labels directory if it doesn't exist
if not os.path.exists(labels_output_dir):
    os.makedirs(labels_output_dir)

In [None]:
# Function to write labels to YOLOv8 format text files

def write_yolo_labels(image_id, group):
    file_path = os.path.join(labels_output_dir, f"{int(image_id):08d}.txt")
    with open(file_path, 'w') as f:
        for _, row in group.iterrows():
            label = f"{int(row['vehicle_type'])} {row['x_center']:.6f} {row['y_center']:.6f} {row['width']:.6f} {row['height']:.6f}\n"
            f.write(label)


In [None]:
# Group labels by image_id and apply the function

grouped = labels_df.groupby('image_id')
grouped.apply(lambda group: write_yolo_labels(group.name, group))


In [None]:
# Read and print the contents of a few label files to verify
for label_file in label_files[:5]:
    file_path = os.path.join(labels_output_dir, label_file)
    print(f"\nContents of {label_file}:")
    with open(file_path, 'r') as f:
        print(f.read())


In [None]:
from sklearn.model_selection import train_test_split

# Define base directories
data_dir = 'yolov5_vehicle_detection/data'
images_dir = os.path.join(data_dir, "Images")
labels_yolov5_dir = os.path.join(data_dir, "labels_yolov5")
splits_dir = os.path.join(data_dir, "splits")
output_images_dir = os.path.join(data_dir, "images")
output_labels_dir = os.path.join(data_dir, "labels")

# list all image files
existing_images = set(os.listdir(images_dir))


In [None]:
# Ensure directories for splits
os.makedirs(splits_dir, exist_ok=True)

for split in ["train", "val", "test"]:
    os.makedirs(os.path.join(output_images_dir, split), exist_ok=True)
    os.makedirs(os.path.join(output_labels_dir, split), exist_ok=True)
    

In [None]:
# Function to create and save split datasets
def create_and_save_splits():
    train_df, temp_df = train_test_split(labels_df, test_size=0.3, random_state=42, stratify=labels_df['vehicle_type'])
    val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42, stratify=temp_df['vehicle_type'])
    
    train_df.to_csv(os.path.join(splits_dir, 'train_labels.csv'), index=False)
    val_df.to_csv(os.path.join(splits_dir, 'val_labels.csv'), index=False)
    test_df.to_csv(os.path.join(splits_dir, 'test_labels.csv'), index=False)
    print("Split files created successfully!")
    
# Check if split files exist, if not create them
if not (os.path.exists(os.path.join(splits_dir, 'train_labels.csv')) and 
        os.path.exists(os.path.join(splits_dir, 'val_labels.csv')) and 
        os.path.exists(os.path.join(splits_dir, 'test_labels.csv'))):
    create_and_save_splits()

In [None]:
# Load split datasets
train_df = pd.read_csv(os.path.join(splits_dir, "train_labels.csv"))
val_df = pd.read_csv(os.path.join(splits_dir, "val_labels.csv"))
test_df = pd.read_csv(os.path.join(splits_dir, "test_labels.csv"))

# List all image files
existing_images = set(os.listdir(images_dir))

In [None]:
# Function to filter DataFrame to include only existing images
def filter_existing_images(df):
    df['image_filename'] = df['image_id'].apply(lambda x: f"{int(x):08d}.jpg")
    return df[df['image_filename'].isin(existing_images)]

# Filter the DataFrames
train_df_filtered = filter_existing_images(train_df)
val_df_filtered = filter_existing_images(val_df)
test_df_filtered = filter_existing_images(test_df)

In [None]:
# Function to move files based on DataFrame
def move_files(df, split):
    missing_files = []
    for _, row in df.iterrows():
        image_id = f"{int(row['image_id']):08d}"
        image_filename = f"{image_id}.jpg"
        label_filename = f"{image_id}.txt"
        
        # Define source and destination paths
        src_image_path = os.path.join(images_dir, image_filename)
        src_label_path = os.path.join(labels_yolov5_dir, label_filename)
        dst_image_path = os.path.join(output_images_dir, split, image_filename)
        dst_label_path = os.path.join(output_labels_dir, split, label_filename)
        
        # Check if the image file exists before moving
        if os.path.exists(src_image_path) and os.path.exists(src_label_path):
            shutil.move(src_image_path, dst_image_path)
            shutil.move(src_label_path, dst_label_path)
        else:
            missing_files.append(image_id)
    
    # Log missing files
    if missing_files:
        print(f"Missing files for image IDs: {missing_files}")

# Move files to corresponding directories
move_files(train_df_filtered, "train")
move_files(val_df_filtered, "val")
move_files(test_df_filtered, "test")

In [None]:
# creating the yaml file
import yaml

data = {
    'train': 'data/images/train',
    'val': 'data/images/val',
    'test': 'data/images/test',
    'nc': 11,
    'names': ['pickup_truck', 'car', 'articulated_truck', 'bus', 'motorized_vehicle', 'work_van', 'single_unit_truck', 'pedestrian', 'bicycle', 'non-motorized_vehicle', 'motorcycle']
}

yaml_file_path = 'yolov5_vehicle_detection/data.yaml'

with open(yaml_file_path, 'w') as yaml_file:
    yaml.dump(data, yaml_file, default_flow_style=False)

print("YAML file created successfully!")


### 1.3 Create an CNN architecture for object detection (YOLOv8)

I originally started with trying to create a YOLOv5 model like we did in class (which is why some of my files have 'YOLOv5' in the name,
but I realized there were updated YOLO models so I changed my model to YOLOv8

In [1]:
import ultralytics
from ultralytics import YOLO

# Load a YOLOv8 model
model = YOLO('yolov8s.pt')  # Load the YOLOv8 architecture file

# Train the model using your dataset.yaml configuration file
results = model.train(data='/Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/07 - Capstone/Autonomous Driving/yolov5_vehicle_detection/dataset.yaml',
                      epochs=5, imgsz=640, batch=16)


Ultralytics YOLOv8.2.28 🚀 Python-3.11.5 torch-2.2.2 CPU (Intel Core(TM) i5-7360U 2.30GHz)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=/Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/07 - Capstone/Autonomous Driving/yolov5_vehicle_detection/dataset.yaml, epochs=5, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train15, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=N

[34m[1mtrain: [0mScanning /Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/[0m


[34m[1mtrain: [0mNew cache created: /Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/07 - Capstone/Autonomous Driving/yolov5_vehicle_detection/data/train/labels.cache


[34m[1mval: [0mScanning /Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/07[0m

[34m[1mval: [0mNew cache created: /Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/07 - Capstone/Autonomous Driving/yolov5_vehicle_detection/data/val/labels.cache





Plotting labels to runs/detect/train15/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000667, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train15[0m
Starting training for 5 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/5         0G      1.258      2.107      1.131         41        640: 1
                 Class     Images  Instances      Box(P          R      mAP50  m

                   all        386        525       0.59      0.272      0.359      0.264






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/5         0G      1.196      1.295      1.091         42        640: 1
                 Class     Images  Instances      Box(P          R      mAP50  m

                   all        386        525       0.48      0.467      0.403      0.276






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/5         0G      1.157      1.197       1.07         66        640: 1
                 Class     Images  Instances      Box(P          R      mAP50  m

                   all        386        525      0.526      0.456      0.485      0.346






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        4/5         0G      1.121      1.098      1.055         31        640: 1
                 Class     Images  Instances      Box(P          R      mAP50  m

                   all        386        525      0.596      0.525      0.526      0.396






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        5/5         0G      1.072     0.9667      1.029         33        640: 1
                 Class     Images  Instances      Box(P          R      mAP50  m

                   all        386        525      0.654      0.534      0.599      0.452






5 epochs completed in 17.988 hours.
Optimizer stripped from runs/detect/train15/weights/last.pt, 22.5MB
Optimizer stripped from runs/detect/train15/weights/best.pt, 22.5MB

Validating runs/detect/train15/weights/best.pt...
Ultralytics YOLOv8.2.28 🚀 Python-3.11.5 torch-2.2.2 CPU (Intel Core(TM) i5-7360U 2.30GHz)
Model summary (fused): 168 layers, 11129841 parameters, 0 gradients, 28.5 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  m


                   all        386        525      0.653      0.534      0.601      0.454
          pickup_truck         80         84      0.711      0.558      0.597      0.505
                   car        255        324      0.764      0.787      0.796      0.626
     articulated_truck         21         21      0.554      0.667      0.704      0.549
                   bus         33         33      0.924      0.879      0.954      0.833
     motorized_vehicle         28         28      0.345      0.179      0.213      0.126
              work_van         14         15      0.364      0.267      0.246      0.193
     single_unit_truck          7          7      0.585      0.286      0.393      0.301
            pedestrian          2          3      0.743          1      0.995      0.675
               bicycle          4          4      0.681          1      0.995      0.746
 non_motorized_vehicle          2          2          1          0     0.0101    0.00705
            motorcycl

### 1.4 Evaluate the Model and Check the Test Results

In [2]:
# Evaluate the model on validation data
val_results = model.val()
print(val_results)


Ultralytics YOLOv8.2.28 🚀 Python-3.11.5 torch-2.2.2 CPU (Intel Core(TM) i5-7360U 2.30GHz)
Model summary (fused): 168 layers, 11129841 parameters, 0 gradients, 28.5 GFLOPs


[34m[1mval: [0mScanning /Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/07[0m
                 Class     Images  Instances      Box(P          R      mAP50  m


                   all        386        525      0.653      0.534      0.601      0.454
          pickup_truck         80         84      0.711      0.558      0.597      0.505
                   car        255        324      0.764      0.787      0.796      0.626
     articulated_truck         21         21      0.554      0.667      0.704      0.549
                   bus         33         33      0.924      0.879      0.954      0.833
     motorized_vehicle         28         28      0.345      0.179      0.213      0.126
              work_van         14         15      0.364      0.267      0.246      0.193
     single_unit_truck          7          7      0.585      0.286      0.393      0.301
            pedestrian          2          3      0.743          1      0.995      0.675
               bicycle          4          4      0.681          1      0.995      0.746
 non_motorized_vehicle          2          2          1          0     0.0101    0.00705
            motorcycl

In [4]:
# Evaluate the model on test data
dataset_path = '/Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/07 - Capstone/Autonomous Driving/yolov5_vehicle_detection/dataset.yaml'
test_results = model.val(data=dataset_path, split='test')
print(test_results)


Ultralytics YOLOv8.2.28 🚀 Python-3.11.5 torch-2.2.2 CPU (Intel Core(TM) i5-7360U 2.30GHz)


[34m[1mval: [0mScanning /Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/07[0m

[34m[1mval: [0mNew cache created: /Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/07 - Capstone/Autonomous Driving/yolov5_vehicle_detection/data/test/labels.cache



                 Class     Images  Instances      Box(P          R      mAP50  m


                   all        290        330       0.76      0.587      0.671      0.502
          pickup_truck         59         59      0.805       0.56      0.677      0.576
                   car        179        195      0.796        0.8      0.812      0.682
     articulated_truck         14         14      0.586      0.643      0.676      0.544
                   bus         18         18      0.944      0.944      0.964      0.867
     motorized_vehicle         22         22      0.217     0.0455     0.0905     0.0398
              work_van          9          9          1      0.425      0.526      0.489
     single_unit_truck          4          4      0.852       0.25       0.46      0.379
               bicycle          3          3       0.64          1      0.995      0.441
            motorcycle          6          6          1      0.618      0.838        0.5
Speed: 2.9ms preprocess, 396.8ms inference, 0.0ms loss, 1.3ms postprocess per image
Results saved to [1mruns/

### 1.5 Run Inferences on Sample Images

In [17]:
import os
from ultralytics import YOLO
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont
import numpy as np

In [21]:
# Load the trained model
model = YOLO('/Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/07 - Capstone/Autonomous Driving/runs/detect/train15/weights/best.pt')

In [22]:
# Path to your sample images
sample_images_path = '/Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/07 - Capstone/Autonomous Driving/sample_images'
output_images_path = '/Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/07 - Capstone/Autonomous Driving/output_images'

# Ensure the output directory exists
os.makedirs(output_images_path, exist_ok=True)

In [24]:
# Run inference on the sample images
results = model.predict(source=sample_images_path, save=True, save_dir=output_images_path)

# Function to draw bounding boxes on the image
def draw_boxes(image, boxes, labels, scores, class_names):
    draw = ImageDraw.Draw(image)
    font = ImageFont.load_default()
    for box, label, score in zip(boxes, labels, scores):
        x1, y1, x2, y2 = box
        draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
        draw.text((x1, y1), f"{class_names[label]} {score:.2f}", fill="red", font=font)
    return image

# Class names based on your dataset.yaml
class_names = ['pickup_truck', 'car', 'articulated_truck', 'bus', 'motorized_vehicle', 
               'work_van', 'single_unit_truck', 'pedestrian', 'bicycle', 'non_motorized_vehicle', 'motorcycle']

# Display the results
for result in results:
    img_path = result.path
    img = Image.open(img_path)
    
    # Retrieve boxes, labels, and scores
    boxes = result.boxes.xyxy.numpy()  # Bounding box coordinates in (x1, y1, x2, y2) format
    labels = result.boxes.cls.numpy().astype(int)  # Class labels
    scores = result.boxes.conf.numpy()  # Confidence scores
    
    # Draw boxes on the image
    img_with_boxes = draw_boxes(img, boxes, labels, scores, class_names)
    
    # Display the image
    plt.figure(figsize=(10, 10))
    plt.imshow(img_with_boxes)
    plt.axis('off')
    plt.show()


image 1/6 /Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/07 - Capstone/Autonomous Driving/sample_images/image1.jpeg: 544x640 7 cars, 6 work_vans, 435.6ms
image 2/6 /Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/07 - Capstone/Autonomous Driving/sample_images/image2.jpeg: 416x640 1 pickup_truck, 10 cars, 1 bus, 341.7ms
image 3/6 /Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/07 - Capstone/Autonomous Driving/sample_images/image3.jpeg: 480x640 4 pickup_trucks, 4 cars, 4 buss, 1 work_van, 407.6ms
image 4/6 /Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/07 - Capstone/Autonomous Driving/sample_images/image4.jpeg: 448x640 3 pickup_trucks, 1 car, 2 articulated_trucks, 1 single_unit_truck, 344.7ms
image 5/6 /Users/leethornquist/Desktop/AI:ML Caltech Bootcamp/Foundations/07 - Capstone/Autonomous Driving/sample_images/image5.jpeg: 384x640 3 pickup_trucks, 9 cars, 5 work_vans, 303.6ms
image 6/6 /Users/leethornquist/Desktop/AI:ML C

<Figure size 1000x1000 with 1 Axes>

<Figure size 1000x1000 with 1 Axes>

<Figure size 1000x1000 with 1 Axes>

<Figure size 1000x1000 with 1 Axes>

<Figure size 1000x1000 with 1 Axes>

<Figure size 1000x1000 with 1 Axes>

I attached some examples of the image inferences. The model is very good with identifying cars but struggles with other categories.

Part 2 in separate file