# Data Augmentation


---
## The augmented images are used for the independent evaluation for sign model

---
* The dataset used is a subset of German Traffic Sign dataset



In [0]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
!ls

drive  sample_data


In [0]:
import os
from glob import glob
from datetime import datetime
from shutil import copyfile
 
import imgaug as ia
from imgaug import augmenters as iaa

import imageio
 
INPUT = '/content/drive/My Drive/ML-Assignment/dataset'
OUTPUT = '/content/drive/My Drive/ML-Assignment/sign_aug'

WHITE_LIST_FORMAT = ('png', 'jpg', 'jpeg', 'bmp', 'ppm', 'JPG')
ITERATIONS = 10
 
def check_dir_or_create(dir):
    if not os.path.exists(dir):
        os.makedirs(dir)
        
sometimes = lambda aug: iaa.Sometimes(0.5, aug)
 
augmenters = [
    # horizontal flips        
    iaa.Fliplr(0.5),
    # random crops
    iaa.Crop(percent=(0, 0.1)),
    # Strengthen or weaken the contrast in each image.
    iaa.ContrastNormalization((0.75, 1.5)),
    # Changes the contrast.
    iaa.Multiply((0.8, 1.2), per_channel=0.2),
    # Apply affine transformations to each image.
    # Scale/zoom, translate/move, rotate and shear.
    iaa.Affine(
        scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
        translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
        rotate=(-25, 25),
        shear=(-8, 8)
    )
]
 
seq = iaa.Sequential(augmenters, random_order=True)
 
files = [y for x in os.walk(INPUT)
         for y in glob(os.path.join(x[0], '*')) if os.path.isfile(y)]
files = [f for f in files if f.endswith(WHITE_LIST_FORMAT)]
classes = [os.path.basename(os.path.dirname(x)) for x in files]
classes_set = set(classes)
for _class in classes_set:
    _dir = os.path.join(OUTPUT, _class)
    check_dir_or_create(_dir)
 
batches = []
BATCH_SIZE = 50
batches_count = len(files) // BATCH_SIZE + 1
for i in range(batches_count):
    batches.append(files[BATCH_SIZE * i:BATCH_SIZE * (i + 1)])
 
images = []
for i in range(ITERATIONS):
    print(i, datetime.time(datetime.now()))
    for batch in batches:
        images = []
        for file in batch:
            img = imageio.imread(file)
            images.append(img)
        images_aug = seq.augment_images(images)
        for file, image_aug in zip(batch, images_aug):
            root, ext = os.path.splitext(file)
            new_filename = root + '_{}'.format(i) + ext
            new_path = new_filename.replace(INPUT, OUTPUT, 1)
                        
            imageio.imwrite(new_path, image_aug)
 
for file in files:
    dst = file.replace(INPUT, OUTPUT)
    copyfile(file, dst)

0 17:56:59.613245
1 18:29:02.132433
2 18:29:44.161221
3 18:30:29.285820
4 18:31:14.693248
5 18:32:01.294692
6 18:32:49.159939
7 18:33:38.506839
8 18:34:27.603238
9 18:35:15.424350
