# Training Dataset Constructor
This notebook will apply the rules established during the image analysis, and create the training dataset for the CNN. The classes to be used are
1. Stop
2. Go
3. Stop Left
4. Go Left

When warning, these images will be part of the stop batch, as well as warning left will be inserted on the Stop Left label. It is also important that the images will be subject to a normalization process so that all of them are 224-pixel-squared shape i.e. 224x224 pixels. During the process, we will pick random images to be part of the validation and testing dataset.

In [1]:
# Jupyter related code
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
# Imported libraries
from fastai.vision import *
from PIL import Image
from random import choices

import random
import csv
import os

In [3]:
min_width = 40
min_height = 50

def valid_sample(area):
    width = area[2] - area[0]
    height = area[3] - area[1]
    
    if width < min_width and height < min_height:
        return False
    else:
        return True

In [4]:
# Definition of area normalization function
# The box is a tuple of four integers, x1, y1, x2, and y2
# To obtain a normal area for this box, it is necessary to get the
# center of the area, and expand the area to a 224x224 square shape
def get_normal_area(box):
    # Getting center of image
    x_center = (box[0] + box[2]) // 2
    y_center = (box[1] + box[3]) // 2

    #Creating new tuple with expanded area
    x_1 = x_center - 111;
    y_1 = y_center - 111;
    
    x_2 = x_center + 112 if x_1 >= 0 else x_center + 112 - x_1
    y_2 = y_center + 112 if y_1 >= 0 else y_center + 112 - y_1
    
    x_1 = x_1 if x_1 >= 0 else 0
    y_1 = y_1 if y_1 >= 0 else 0
    
    return (x_1, y_1, x_2, y_2)

In [14]:
# Definition of random path generator. This will return the output
# file path (train or valid). The input parameters will
# keep track of the distribution of these.
def get_random_path(train_count, valid_count):
    opts = ['train', 'valid']
    prob = [0.7, 0.3]
    
    return choices(opts, prob)

In [21]:
# Returns the image class depending on the input
# The "flag" parameter is to condense the warning signal with stop
def get_image_class(image_class, flag):
    if flag:
        if image_class == 'warning':
            return 'stop'
        elif image_class == 'warningLeft':
            return 'stopLeft'
        else:
            return image_class
    else:
        return image_class

In [22]:
# Folder paths from where the original images can be obtained
master_path = Path('../data/lisa-traffic-light-dataset')
path_anno = master_path/'annotations'
path_img = master_path/'images'

# Adjust this variable to crop images from certain dayClip
day_clip = ['dayClip1'
            , 'dayClip2'
            , 'dayClip3'
            , 'dayClip4'
            , 'dayClip5'
            , 'dayClip6'
            , 'dayClip7'
            , 'dayClip8'
            , 'dayClip9'
            , 'dayClip10'
            , 'dayClip11'
            , 'dayClip12'
            , 'dayClip13'
           ]

# Output paths for files
path_master_output = Path('../data/training-dataset')

# Modify this variable whenever you want to use a different input
# CSV file
annotations_file = 'frameAnnotationsBOX.csv'
    
for dayClip in day_clip:
    # Input files
    path_csv_train = path_anno/'dayTrain'/dayClip/annotations_file
    path_image_input = path_img/'dayTrain'/dayClip/'frames'

    # CSV Reader from annotations path
    with open(path_csv_train) as csv_file:
        reader = csv.reader(csv_file, delimiter = ';')

        imageNumber = []
        fileName = []
        tag = []
        box = []

        i = -1
        for row in reader:
            imageNumber.append(i)
            fileName.append(row[0].replace('dayTraining/', ''))
            tag.append(row[1])
            box.append((row[2], row[3], row[4], row[5]))
            i = i + 1

        # Removing headers
        imageNumber.pop(0)
        fileName.pop(0)
        tag.pop(0)
        box.pop(0)
    
    # The following code runs through all the dataset in 
    # path_image_input
    i = 0    
    while i < len(imageNumber):
        im = Image.open(path_image_input/fileName[i])
        
        if valid_sample(tuple(map(int, box[i]))):
            # Cropping the image
            area = get_normal_area(tuple(map(int, box[i])))
            cropped_im = im.crop(area)

            # Checks if folder exists taking into account the amount of
            # images in the training, testing, and validation folder
            random_path = Path(get_random_path(train_count, valid_count)[0])
            directory = path_master_output/random_path/get_image_class(tag[i], True)
            # directory = directory/get_image_class(tag[i])

            if not os.path.exists(directory):
                os.makedirs(directory)

            # Crops the image and saves it under the listed directory
            cropped_im.save(directory/Path(dayClip + '_' + str(imageNumber[i]) + '.jpg'))
            
        i = i + 1