In [44]:
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from pathlib import Path
import os
from tqdm import tqdm
import pickle
from copy import deepcopy

In [16]:
TRAIN_PATH = "../raw_data/train.pickle"
VALID_PATH = "../raw_data/valid.pickle"
TEST_PATH = "../raw_data/test.pickle"

In [17]:
def load_file(path):
    with open(path, 'rb') as f:
        d = pickle.load(f, encoding='latin1')  
    x = d['features'].astype(np.uint8)   # 4D numpy.ndarray type, for train = (34799, 32, 32, 3)
    y = d['labels']                        # 1D numpy.ndarray type, for train = (34799,)
    s = d['sizes']                         # 2D numpy.ndarray type, for train = (34799, 2)
    c = d['coords']  
    return x, y, s, c


In [18]:
train_data, valid_data, test_data = {}, {}, {}
train_data["x"], train_data["y"], _, train_data["c"] = load_file(TRAIN_PATH)
valid_data["x"], valid_data["y"], _, valid_data["c"] = load_file(VALID_PATH)
test_data["x"], test_data["y"], _, test_data["c"] = load_file(TEST_PATH)

In [19]:
def display(img, coordinates):
    new_img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    cv2.rectangle(new_img, 
                  (coordinates[0], coordinates[1]),
                  (coordinates[0] + coordinates[2], coordinates[1] + coordinates[3]),
                  (255,0,0),
                  1
                 )
    while True:
        cv2.imshow("img", new_img)
        if cv2.waitKey(0) & 0xFF == 27:
            cv2.destroyAllWindows()
            break

In [21]:
display(train_data["x"][4000], train_data["c"][4000])

In [22]:
labels = pd.read_csv("../raw_data/label_names.csv")

In [23]:
labels

Unnamed: 0,ClassId,SignName
0,0,Speed limit (20km/h)
1,1,Speed limit (30km/h)
2,2,Speed limit (50km/h)
3,3,Speed limit (60km/h)
4,4,Speed limit (70km/h)
5,5,Speed limit (80km/h)
6,6,End of speed limit (80km/h)
7,7,Speed limit (100km/h)
8,8,Speed limit (120km/h)
9,9,No passing


# Augmentation 1 - Local Histogram Equalization

In [31]:
def local_histogram_equalization(img):
    new_img = np.zeros(img.shape, dtype=np.uint8)
    new_img[:,:,0] = cv2.equalizeHist(img[:,:,0])
    new_img[:,:,1] = cv2.equalizeHist(img[:,:,1])
    new_img[:,:,2] = cv2.equalizeHist(img[:,:,2])
    return new_img

In [32]:
new_img = local_histogram_equalization(train_data['x'][4000])

In [33]:
display(new_img, train_data['c'][4000])

# Augmentation 2 - Changing Brightness

In [37]:
def change_brightness(img):
    img_hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    img_hsv[:,:,2] = img_hsv[:,:,2] * (0.5 + np.random.uniform(size=(img_hsv.shape[:-1])))
    img_rgb = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2RGB)
    return img_rgb

In [38]:
new_img = change_brightness(train_data['x'][4000])

In [39]:
display(new_img, train_data['c'][4000])

# Augmentation 3 - Rotating Image

In [41]:
def rotation_changing(image):
    # Defining angle range
    angle_range = 30
    # Defining angle rotation
    angle_rotation = np.random.uniform(angle_range) - angle_range / 2
    # Getting shape of image
    rows, columns, channels = image.shape
    # Implementing rotation
    # Calculating Affine Matrix
    affine_matrix = cv2.getRotationMatrix2D((columns / 2, rows / 2), angle_rotation, 1)
    # Warping original image with Affine Matrix
    rotated_image = cv2.warpAffine(image, affine_matrix, (columns, rows))
    # Returning rotated image
    return rotated_image

In [42]:
new_img = rotation_changing(train_data['x'][4000])

In [43]:
display(new_img, train_data['c'][4000])

# Preprocessing Data

In [45]:
def shuffle(data, seed=0):
    new_data = deepcopy(data)
    np.random.seed(seed)
    np.random.shuffle(new_data["x"])
    np.random.seed(seed)
    np.random.shuffle(new_data["y"])
    np.random.seed(seed)
    np.random.shuffle(new_data["c"])
    return new_data

In [64]:
def preprocess(data, shuffle=False, lhe=False, rotate=False, brightness=False):
    if shuffle:
        data = shuffle(data)
    if lhe:
        data["x"] = list(map(local_histogram_equalization, tqdm(data["x"])))
    if rotate:
        data["x"] = list(map(rotation_changing, tqdm(data["x"])))
    if brightness:
        data["x"] = list(map(rotation_changing, tqdm(data["x"])))
    return data

In [65]:
def join_data(data, augmented_data):
    data["x"] = np.r_[data["x"], augmented_data["x"]]
    data["y"] = np.r_[data["y"], augmented_data["y"]]
    data["c"] = np.r_[data["c"], augmented_data["c"]]
    return data

In [66]:
def generate_augmented_data(data):
    augmented_data = None
    list_kwargs = [
        {"shuffle": False, "lhe": True, "rotate": False, "brightness": False},
        {"shuffle": False, "lhe": False, "rotate": True, "brightness": False},
        {"shuffle": False, "lhe": False, "rotate": False, "brightness": True},
        {"shuffle": False, "lhe": True, "rotate": True, "brightness": False},
        {"shuffle": False, "lhe": True, "rotate": False, "brightness": True},
        {"shuffle": False, "lhe": False, "rotate": True, "brightness": True},
        {"shuffle": False, "lhe": True, "rotate": True, "brightness": True}
    ]
    for kwargs in list_kwargs:
        data = shuffle(data, np.random.randint(0,100))
        data_top = {}
        data_top["x"] = data["x"][:int(0.2*len(data["x"]))]
        data_top["y"] = data["y"][:int(0.2*len(data["x"]))]
        data_top["c"] = data["c"][:int(0.2*len(data["x"]))]
        new_data = preprocess(data_top, **kwargs)
        if augmented_data is None:
            augmented_data = new_data
        else:
            augmented_data = join_data(new_data, augmented_data)
    return join_data(new_data, augmented_data)

In [72]:
augmented_train_data = generate_augmented_data(train_data)
augmented_valid_data = generate_augmented_data(valid_data)
augmented_test_data = generate_augmented_data(test_data)

100%|███████████████████████████████████████████████████████████████████████████| 6959/6959 [00:00<00:00, 52033.26it/s]
100%|███████████████████████████████████████████████████████████████████████████| 6959/6959 [00:00<00:00, 33923.12it/s]
100%|███████████████████████████████████████████████████████████████████████████| 6959/6959 [00:00<00:00, 34928.73it/s]
100%|███████████████████████████████████████████████████████████████████████████| 6959/6959 [00:00<00:00, 55215.98it/s]
100%|███████████████████████████████████████████████████████████████████████████| 6959/6959 [00:00<00:00, 32077.55it/s]
100%|███████████████████████████████████████████████████████████████████████████| 6959/6959 [00:00<00:00, 51157.04it/s]
100%|███████████████████████████████████████████████████████████████████████████| 6959/6959 [00:00<00:00, 33940.12it/s]
100%|███████████████████████████████████████████████████████████████████████████| 6959/6959 [00:00<00:00, 35864.65it/s]
100%|███████████████████████████████████

# Labelling Of Data