# Road Scene Understanding with Kitti Dataset
#### CS 5190
#### Team Members: 

### SETUP
1. Download dataset from https://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=2d
2. Set up Notebook environment
`> conda install matplotlib numpy opencv pandas scikit-image scikit-learn scipy ultralytics opencv-python tqdm pillow`
3. Create folder for yolov8

Each line in a label file contains the following:
`<object_type> <truncation> <occlusion> <alpha> <left> <top> <right> <bottom> <height> <width> <length> <x> <y> <z> <rotation_y>`

YOLOV8 requires format of 'class x_center y_center width height' -> for each image has one txt file with a single line for each bounding box.
Structure for yolov8: https://roboflow.com/formats/yolov8-pytorch-txt

# Dataset

In [None]:
# KITTI BASE PATHS
base_dataset_path = "..\\dataset\\"
base_yolo_path = "..\\yolov8\\" #not using this one anymore
base_labels_path = "..\\data_object_label_2\\training\\label_2\\"
base_images_train_path = "..\\data_object_image_2\\training\\image_2\\"

# KITTI CLASSES/OBJECT types in the labels file
OBJECT_CLASSES = {'Car': 0, 'Van': 1, 'Truck': 2, 'Pedestrian': 3, 'Person_sitting': 4, 'Cyclist': 5, 'Tram': 6, 'Misc': 7, 'DontCare': 8}

LABEL_FILE_FIELDS = [
        'label', 'truncated', 'occluded', 'alpha',
        'bbox_xmin', 'bbox_ymin', 'bbox_xmax',
        'bbox_ymax', 'dim_height', 'dim_width', 'dim_length',
        'loc_x', 'loc_y', 'loc_z', 'rotation_y', 'score'
    ]

In [9]:
import os
from PIL import Image

#create pairs for the paths to KITTI images and labels
path_pairs = []
if not os.path.isdir(base_labels_path):
    print(f"Error: Folder {base_labels_path} not found")
elif not os.path.isdir(base_images_train_path):
    print(f"Error: Folder {base_images_train_path} not found")
else:
    for full_filename in os.listdir(base_labels_path):
        filename = full_filename.split('.')
        pair = {"img_path": (base_images_train_path + filename[0] + ".png"), "label_path": (base_labels_path + filename[0] + ".txt") }
        path_pairs.append(pair)

In [10]:
#TODO: Might want to implement train split later on
from sklearn.model_selection import train_test_split

#seperate into 80% training and 20% validation
train, validate = train_test_split(path_pairs, test_size=0.2, shuffle=True)


In [15]:
#print a few to see if it worked right
len(validate)

1497

In [18]:
train[:2]

[{'img_path': '..\\data_object_image_2\\training\\image_2\\000747.png',
  'label_path': '..\\data_object_label_2\\training\\label_2\\000747.txt'},
 {'img_path': '..\\data_object_image_2\\training\\image_2\\006950.png',
  'label_path': '..\\data_object_label_2\\training\\label_2\\006950.txt'}]

In [19]:
len(train)

374

## Preprocessing

FILE STRUCTURE

> dataset/
> |---train/
> |     |---images/
> |     |   |---000001.png   
> |     |   |---000001.png   
> |     |   |---...cont
> |     |---labels/
> |        |---000001.txt   
> |        |---000001.txt   
> |        |---...cont   
> |---val/
> |     |---images/
> |     |   |---000201.png   
> |     |   |---000201.png   
> |     |   |---...cont
> |     |---labels/
> |        |---000201.txt   
> |        |---000201.txt   
> |        |---...cont   
> |---kitti.yaml

In [19]:
#function that takes in bounding box coordinates from the label file and image width&height 
#return x_center, y_center, width, and height
def convert_bbox_yolo8(img_w, img_h, x1 , y1, x2, y2):
    x_center = ((x1 + x2) / 2 ) / img_w
    y_center = ((y1 + y2) / 2 ) / img_h
    width = (x2 - x1) / img_w
    height = (y2 - y1) / img_h

    return x_center, y_center, width, height

In [22]:
#this one takes around 4mins to run
def preprocess(path_pairs, stage):
    for curr_pair in path_pairs:
        #set yolo file
        # yolo_path = base_yolo_path + os.path.basename(curr_pair['label_path'])
        yolo_label_path = base_dataset_path + f"//{stage}//" + "labels//" + os.path.basename(curr_pair['label_path'])
        insert_img_path = base_dataset_path + f"//{stage}//" + "images//"+ os.path.basename(curr_pair['img_path'])

        #open image to get width & height
        try:
            img = Image.open(curr_pair['img_path'])
            img_width, img_height = img.size
        except Exception as e:
            print(f"Error opening image {curr_pair['img_path']}: {e}")
            continue
        
        #loop through label file line by line to add to yolo .txt file
        with open(curr_pair['label_path']) as f:
            lines = f.readlines()

        yolo_lines = []
        for line in lines:
            label_parts = line.strip().split()
            object_name = label_parts[0]
            class_id = OBJECT_CLASSES[object_name]
            if class_id == 8 or class_id == 7: 
                continue # excluding the DontCare & Misc bounding boxes
            bb_x1, bb_y1, bb_x2, bb_y2 = map(float, label_parts[4:8])   #taking original bounding box coordinates from kitti label
            x_center, y_center, width, height = convert_bbox_yolo8(img_width, img_height, bb_x1 , bb_y1, bb_x2, bb_y2) 
            yolo_lines.append(f"{class_id} {x_center} {y_center} {width} {height}\n") #yolo format

        #insert image
        img.save(insert_img_path)
        #create and write yolo normalized bounding box
        with open(yolo_label_path, "w") as out:
            out.writelines(yolo_lines)

In [23]:
def clean_folder(path):
    if not os.path.exists(path):
        print(f"Error: Directory {path} DNE")
        return
    
    for filename in os.listdir(path):
        filepath = os.path.join(path, filename)
        if os.path.isfile(filepath):
            try:
                os.remove(filepath)
            except OSError as e:
                print(f"Error removing file {filepath}: {e}")

In [None]:
#clean dataset folder
clean_folder(base_dataset_path + "train//labels//")
clean_folder(base_dataset_path + "train//images//")
clean_folder(base_dataset_path + "val//labels//")
clean_folder(base_dataset_path + "val//images//")
try:
    os.remove(base_dataset_path + "kitti.yaml")
except OSError as e:
    print(f"Error removing file {base_dataset_path + "kitti.yaml"}: {e}")

In [None]:

#testing just a few
preprocess(train[:100], "train")
preprocess(validate[:20], "val")

## Model Training

In [28]:
#Create YAML for YOLOv8

kitti_yaml = f"""
path: {str(base_dataset_path)}
train: train/images
val: val/images
nc: 7
names:
    0: Car 
    1: Van
    2: Truck
    3: Pedestrian
    4: Person_sitting
    5: Cyclist
    6: Tram 
"""

with open(base_dataset_path + "kitti.yaml", "w") as out:
    out.writelines(kitti_yaml)

In [None]:
from ultralytics import YOLO

#Load a pretrained monel
model = YOLO("yolov8n.pt")

#Train the model
model.train(
    data=(base_dataset_path + "kitti.yaml"), 
    epochs=20,
    imgsz=640,
    batch=8,
    name="yolo_kitti"
)

## Testing

## Visualizations