<a href="https://colab.research.google.com/github/mattherbert1/DreamTeam-deep-learning/blob/main/Home_assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Home assignment
(link to the repo: https://github.com/mattherbert1/DreamTeam-deep-learning)

In our home assignment the goal is to train a neural network to live detect traffic signs and traffic lights through a Python client in CARLA simulator (https://carla.org/), which is an open-source simulator for autonomous driving research.

## Gathering, cleaning and sorting dataset

The sources of our dataset is listed below:


*   The German Traffic Sign Recognition Benchmark (for traffic signs): http://benchmark.ini.rub.de/?section=gtsrb&subsection=dataset
*   LaRA Traffic Light dataset (for traffic lights): http://www.lara.prd.fr/benchmarks/trafficlightsrecognition
*   A limited dataset from CARLA: https://github.com/DanielHfnr/Carla-Object-Detection-Dataset

Unfortunately, we did not have the resources to obtain large dataset only from Carla, thus we use datasets from real life, too.


LaRA dataset contains frames of a video record of a route and labels to these frames with bounding boxes marking the traffic light. The following script is for reshaping the LaRA dataset (lara3d_reformat.py) to images with uniformed resolution about the actual traffic lights:

In [None]:
import csv
import cv2 as cv
from math import floor, ceil

# reshape bounding box to square
def crop2square(original_min, original_max, difference):
    new_min = original_min - floor(difference / 2)
    new_max = original_max + ceil(difference / 2)
    return new_min, new_max

# move all of the part of the bounding box inside of the image
def shift(y_down, y_up, x_left, x_right, shape):
    if y_down < 0:
        y_up += abs(y_down)
        y_down = 0
    if y_up > shape[0]:
        y_down -= y_up - shape[0]
        y_up = shape[0]
    if x_left < 0:
        x_right += abs(x_left)
        x_left = 0
    if x_right > shape[1]:
        x_left -= x_right - shape[1]
        x_right = shape[1]
    return y_down, y_up, x_left, x_right


if __name__ == "__main__":
    # reading original labels from file
    with open('./Lara3D/labels.txt') as label_file:
        labels = []
        for row in label_file:
            row_split = row.rstrip().split(' ')

            # dropping ambigiuous labels
            if row_split[10] == 'ambiguous':
                continue
            traffic_light = [''.join(row_split[8:10])]
            labels.append([row_split[2].zfill(6)] + row_split[3:7] + traffic_light + [row_split[10]])
    print(len(labels))
    counter = 0
    csv_rows = []
    state_counter = {"stop": 0, "warning": 0, "go": 0}

    # reading images using labels read above
    for label in labels:
        image = cv.imread('./Lara3D/frame_{}.jpg'.format(label[0]))
        
        # top left and bottom right corners of the bounding box
        bndymin = int(label[2])
        bndymax = int(label[4])
        bndxmin = int(label[1])
        bndxmax = int(label[3])

        # dimensions of the bounding box
        width = bndxmax - bndxmin
        height = bndymax - bndymin

        # dropping traffic lamps that have too many missing parts (the original bounding box can be partly outside of the image)
        if bndymin < 0 and abs(bndymin) / height >= 0.125:
            continue
        if bndymax > image.shape[0] and (bndymax - image.shape[0]) / height >= 0.125:
            continue
        if bndxmin < 0 and abs(bndxmin) / width >= 0.5:
            continue
        if bndxmax > image.shape[1] and (bndxmax - image.shape[1]) / width >= 0.5:
            continue

        # top left and bottom right corner of the bounding box that can actually be seen on the image
        ymin = max(bndymin, 0)
        ymax = min(bndymax, image.shape[0])
        xmin = max(bndxmin, 0)
        xmax = min(bndxmax, image.shape[1])

        # recalculating dimensions of the bounding box
        width = xmax - xmin
        height = ymax - ymin

        # dropping low res objects
        if width < 8 or height < 8:
            continue

        difference = height - width
        x_left, x_right, y_up, y_down = xmin, xmax, ymax, ymin

        # reshaping to square the bounding box through the axis parallel to the smaller dimension of the bounding box
        if difference < 0:
            difference = abs(difference)
            y_down, y_up = crop2square(ymin, ymax, difference)
        else:
            x_left, x_right = crop2square(xmin, xmax, difference)

        # moving the square bounding box so that it cannot be partly outside of the image
        y_down, y_up, x_left, x_right = shift(y_down, y_up, x_left, x_right, image.shape)

        # print(
        #     f"Frame num: {label[0]}, difference: {difference}, x_left: {x_left}, x_right: {x_right}, y_down: {y_down}, y_up: {y_up}")

        # selecting pixels from read image cropped by the square bounding box
        roi = image[y_down:y_up, x_left:x_right]
        dimension = (69, 69)
        # resizing the cropped object to res defined by "dimension"
        resized = cv.resize(roi, dimension, interpolation=cv.INTER_AREA)
        # generating image file
        cv.imwrite(f"./output/object_{str(counter).zfill(6)}.jpg", resized)
        # generating label row
        csv_rows.append([str(counter).zfill(6), label[5], label[6]])

        counter += 1
        state_counter[label[-1]] += 1

    # writing new labels
    with open('./output/labels.csv', 'w', newline='') as csv_file:
        csv_writer = csv.writer(csv_file, delimiter=',')
        csv_writer.writerows(csv_rows)

    print(counter)
    print(state_counter)

Following script is for reshaping The German Traffic Sign Recognition Benchmark dataset (gtsrb_reformat.py):

In [None]:
import cv2 as cv
from itertools import islice
import os

if __name__ == "__main__":
    # sign class identifiers for speed limit, yield and stop signs
    sign_ids = ['00000', '00001', '00002', '00003',
                '00004', '00005', '00013', '00014']
    sign_labels = ['speedlimit', 'yield', 'stop']
    counter = 0

    # reading labels.csv, which is generated by the previous script (lara3d_reformat.py)
    # obtaining the number of the last label 
    with open(f'./output/labels.csv', "r") as file:
        for line in file:
            pass
        last_line = line
        counter = int(last_line.rstrip().split(',')[0])
    label_counter = counter

    # opening labels.csv for appending
    with open(f'./output/labels.csv', "a") as csv_output_file:
        csv_output_file.write("\n")
        for sign_id in sign_ids:
            # opening file containing labels for traffic sign class marked with "sign_id"
            with open(f'./GTSRB/Final_Training/Images/{sign_id}/GT-{sign_id}.csv') as csv_input_file:
                # reading file row by row and writing new lines to labels.csv
                for row in islice(csv_input_file, 1, None):
                    label_counter += 1
                    row_split = row.rstrip().split(';')
                    csv_output_file.write(f"{str(label_counter).zfill(6)},TrafficSign,")                    
                    # writing type of sign out
                    if int(row_split[-1]) in range(0, 6):
                        # speedlimit 
                        csv_output_file.write(f"{str(sign_labels[0])}\n")
                    elif row_split[-1] == "13":
                        # yield
                        csv_output_file.write(f"{str(sign_labels[1])}\n")
                    elif row_split[-1] == "14":
                        # stop
                        csv_output_file.write(f"{str(sign_labels[2])}\n")

    # reshaping images to uniform resolution
    for sign_id in sign_ids:
        ppm_filenames = [f"./GTSRB/Final_Training/Images/{sign_id}/{filename}" for filename in
                         os.listdir(f"./GTSRB/Final_Training/Images/{sign_id}") if
                         filename.endswith('.ppm')]
        for ppm_file in ppm_filenames:
            counter += 1
            image = cv.imread(ppm_file)
            dimension = (69, 69)
            resized = cv.resize(image, dimension, interpolation=cv.INTER_AREA)
            cv.imwrite(f"./output/object_{str(counter).zfill(6)}.jpg", resized)

As a result of the previous scripts, a uniformed labelset (labels.csv) and imageset about traffic lights and signs have been generated.
The following script (shuffle.py) is generating the train_labels.csv (which is shuffled), valid_labels.csv and test_labels.csv files which contain image labels marking the train, validation and test datasets and appending the small CARLA dataset to test dataset:

In [None]:
import csv

import numpy as np

if __name__ == "__main__":
    # ratio of validation and test dataset to the whole dataset
    valid_split = 0.2
    test_split = 0.1

    label_dict = {}
    result_train, result_valid, result_test = [], [], []
    # reading labels.csv row by row and add them to label_dict (key: class label of the image)
    with open('./output/labels.csv') as file:
        for row in file:
            row_split = row.rstrip().split(',')
            label_dict.setdefault((row_split[-2], row_split[-1]), []).append(row_split)

    # grouping images into train, valid, test groups by labels
    for label_specific_objects in label_dict.values():
        nb_samples = len(label_specific_objects)
        train = label_specific_objects[0:int(nb_samples * (1 - valid_split - test_split))]
        valid = label_specific_objects[
                int(nb_samples * (1 - valid_split - test_split)):int(nb_samples * (1 - test_split))]
        test = label_specific_objects[int(nb_samples * (1 - test_split)):]
        result_train.extend(train)
        result_valid.extend(valid)
        result_test.extend(test)


    # appending Carla dataset to test dataset
    with open('./carla-dataset/labels.csv') as file:
        for row in file:
            row_split = row.rstrip().split(',')
            result_test.append(row_split)

    result_train, result_valid, result_test = np.array(result_train), np.array(result_valid), np.array(result_test)

    # shuffling train dataset
    randperm = np.random.permutation(len(result_train))
    result_train = result_train[randperm]

    # writing out train, valid, test datasets to appropriate csv files    
    with open('./output/train_labels.csv', 'w', newline='') as csv_file:
        csv_writer = csv.writer(csv_file, delimiter=',')
        csv_writer.writerows(result_train)

    with open('./output/valid_labels.csv', 'w', newline='') as csv_file:
        csv_writer = csv.writer(csv_file, delimiter=',')
        csv_writer.writerows(result_valid)

    with open('./output/test_labels.csv', 'w', newline='') as csv_file:
        csv_writer = csv.writer(csv_file, delimiter=',')
        csv_writer.writerows(result_test)