# Data Wrangling
For our computer vision project, we've pulled the original image data from [Kaggle](https://www.kaggle.com/andrewmvd/road-sign-detection). The dataset itself contains 877 images with 4 distinct classes pre-made. However, I wish to both verify the existing ground truths as well as and adding additional labels to the dataset. Thus, using labelImg, I was able to modify existing classes and add 2 additional classes ('nostop' and 'yield') to the dataset, therefore the following label count is as follows:

Label count after modification:
- 'trafficlight': 155 labels
- 'stop': 93 labels
- 'speedlimit': 788 labels
- 'crosswalk': 218 labels
- 'nostop': 107 labels
- 'yield': 15 labels

However, considering the amount of 'yield' labels there are in the dataset relative to other labels, 5 images were selected from the dataset to be augmented with rotations, noise, and/or image blur. The following lines of code were used for the python script (img_augmentation.py) and 100 new images were generated.

In [1]:
# Import Dependencies
import cv2
import numpy as np
from skimage import io 
from skimage.transform import rotate, AffineTransform, warp
from skimage import img_as_ubyte
from skimage.util import random_noise
import matplotlib.pyplot as plt
import random
import os

In [2]:
# Defining functions for augmenting image
def anticlockwise_rotation(image):
    angle= random.randint(0,180)
    return rotate(image, angle)

def clockwise_rotation(image):
    angle= random.randint(0,180)
    return rotate(image, -angle)

def add_noise(image):
    return random_noise(image)

def blur_image(image):
    return cv2.GaussianBlur(image, (5,5), 0)

In [3]:
# Store transformations in dict
transformations = {'rotate anticlockwise': anticlockwise_rotation, 'rotate clockwise': clockwise_rotation,
                   'add noise': add_noise, 'blur image':blur_image}

In [4]:
# Generating path to images
curr_dir = os.getcwd()
images_to_aug_dir = os.path.dirname(curr_dir)

# Setting path for augmented images to go
images_dir = os.path.dirname(images_to_aug_dir)
augmented_dir = os.path.join(images_dir, 'augmented_images')

In [5]:
# Storing image files
images=[]
for im in os.listdir(curr_dir):
    if im.endswith('.png'):
        images.append(os.path.join(curr_dir,im))

# Setting number of generated images
images_to_generate = 100
i = 1

while i <= images_to_generate:    
    image = random.choice(images)
    original_image = io.imread(image)
    transformed_image=None

    # Choose random number of transformations to apply on image
    transformation_count = random.randint(1, len(transformations))
    n = 0
    
    while n <= transformation_count:
        key = random.choice(list(transformations))
        transformed_image = transformations[key](original_image)
        n = n + 1
    
    # Setting augmented image path
    new_image_path = "%s/augmented_road%s.png" %(augmented_dir, i)
    
    # Processing transformed image to unsigned byte format
    transformed_image = img_as_ubyte(transformed_image)
    
    # Processing transformed image to RGB
    transformed_image = cv2.cvtColor(transformed_image, cv2.COLOR_BGR2RGB)
    
    # Writing transformed image to file
    cv2.imwrite(new_image_path, transformed_image)
    i = i + 1