In [1]:
import os
from glob import glob
from datetime import datetime
from shutil import copyfile
 
import imgaug as ia
from imgaug import augmenters as iaa
from imageio import imwrite, imread

Get some INPUT files from the sub directory, augment them and write ITERATIONS x sub samples to the OUTPUT parent directory

In [25]:
INPUT = "./data/celebsaugmented/tannedmen/sub"
OUTPUT = "./data/celebsaugmented/tannedmen/"
WHITE_LIST_FORMAT = ('png', 'jpg', 'jpeg', 'bmp', 'ppm', 'JPG')
ITERATIONS = 588

In [26]:
def check_dir_or_create(dir):
    if not os.path.exists(dir):
        os.makedirs(dir)
 
# Sometimes(0.5, ...) applies the given augmenter in 50% of all cases,
# e.g. Sometimes(0.5, GaussianBlur(0.3)) would blur roughly every second image.
sometimes = lambda aug: iaa.Sometimes(0.5, aug)
 
# Define our sequence of augmentation steps that will be applied to every image
# All augmenters with per_channel=0.5 will sample one value _per image_
# in 50% of all cases. In all other cases they will sample new values
# _per channel_.
 
augmenters = [
    iaa.Fliplr(0.5), # horizontal flips
    iaa.Crop(percent=(0, 0.1)), # random crops
    # Strengthen or weaken the contrast in each image.
    iaa.contrast.LinearContrast((0.75, 1.5)),
    # Make some images brighter and some darker.
    # In 20% of all cases, we sample the multiplier once per channel,
    # which can end up changing the color of the images.
    iaa.Multiply((0.8, 1.2), per_channel=0.2),
    # Apply affine transformations to each image.
    # Scale/zoom them, translate/move them, rotate them and shear them.
    iaa.Affine(
        scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
        translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
#         rotate=(-25, 25),
#         shear=(-8, 8)
    )
]

In [27]:
 
seq = iaa.Sequential(augmenters, random_order=True)
 
files = [y for x in os.walk(INPUT)
         for y in glob(os.path.join(x[0], '*')) if os.path.isfile(y)]
files = [f for f in files if f.endswith(WHITE_LIST_FORMAT)]
classes = [os.path.basename(os.path.dirname(x)) for x in files]
classes_set = set(classes)
for _class in classes_set:
    _dir = os.path.join(OUTPUT, _class)
    check_dir_or_create(_dir)
 
batches = []
BATCH_SIZE = 50
batches_count = len(files) // BATCH_SIZE + 1
for i in range(batches_count):
    batches.append(files[BATCH_SIZE * i:BATCH_SIZE * (i + 1)])
 
images = []
for i in range(ITERATIONS):
    print(i, datetime.time(datetime.now()))
    for batch in batches:
        images = []
        for file in batch:
            img = imread(file)
            images.append(img)
        images_aug = seq.augment_images(images)
        for file, image_aug in zip(batch, images_aug):
            root, ext = os.path.splitext(file)
            new_filename = root + '_{}'.format(i) + ext
            new_path = new_filename.replace(INPUT, OUTPUT, 1)
                        
            imwrite(new_path, image_aug)
 
for file in files:
    dst = file.replace(INPUT, OUTPUT)
    copyfile(file, dst)

0 14:55:00.818563
1 14:55:00.888078
2 14:55:00.937361
3 14:55:00.984091
4 14:55:01.029514
5 14:55:01.083425
6 14:55:01.129314
7 14:55:01.175304
8 14:55:01.220577
9 14:55:01.269369
10 14:55:01.315824
11 14:55:01.365705
12 14:55:01.414991
13 14:55:01.464587
14 14:55:01.515063
15 14:55:01.564419
16 14:55:01.612937
17 14:55:01.667555
18 14:55:01.716524
19 14:55:01.766149
20 14:55:01.818588
21 14:55:01.866053
22 14:55:01.913546
23 14:55:01.962240
24 14:55:02.016501
25 14:55:02.067508
26 14:55:02.113241
27 14:55:02.161631
28 14:55:02.208177
29 14:55:02.256602
30 14:55:02.304105
31 14:55:02.350406
32 14:55:02.397697
33 14:55:02.442923
34 14:55:02.491829
35 14:55:02.537998
36 14:55:02.583616
37 14:55:02.629551
38 14:55:02.674211
39 14:55:02.724218
40 14:55:02.769795
41 14:55:02.817811
42 14:55:02.863251
43 14:55:02.910486
44 14:55:02.958481
45 14:55:03.006137
46 14:55:03.051817
47 14:55:03.098425
48 14:55:03.142305
49 14:55:03.191658
50 14:55:03.238825
51 14:55:03.285441
52 14:55:03.331741
53 