In [1]:
# github.com/kaibrooks

# this script makes a bunch of augmented images to train on and puts them in the training folder
# you should only need to run this once, or if you want new/different pictures to train with

# images should be sorted by class in /raw/
# eg, /raw/class1/, raw/class2/

In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import save_img
import os, shutil
import numpy as np
from numpy import expand_dims

print('TensorFlow version', tf.__version__)
print('Numpy version', np.__version__) 

TensorFlow version 2.0.0
Numpy version 1.17.4


In [3]:
## settings
batch_size = 3 # augmentations per image

# directories
root_dir = 'data/raw/'
save_dir = 'data/train/'
imsize = 64

# vars not to touch
perms = 0 # 0

In [4]:
## wipe output folder for new run
for root, subfolders, files in os.walk(save_dir):
    for folder in subfolders:
        shutil.rmtree(os.path.join(save_dir, folder))
print('Deleted files in', save_dir)

Deleted files in data/train/


In [5]:
## augmentation layer
datagen = ImageDataGenerator(
    featurewise_center=False,  # set input mean to 0 over the dataset
    samplewise_center=False,  # set each sample mean to 0
    featurewise_std_normalization=False,  # divide inputs by std of the dataset
    samplewise_std_normalization=False,  # divide each input by its std
    zca_whitening=False,  # apply ZCA whitening
    zca_epsilon=1e-06,  # epsilon for ZCA whitening
    rotation_range=40,  # randomly rotate images in the range (degrees, 0 to 180)
    width_shift_range=0.2, # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.2, # randomly shift images vertically (fraction of total height)
    shear_range=0.2,  # set range for random shear
    zoom_range=0.2,  # set range for random zoom
    channel_shift_range=40.0,  # set range for random channel shifts
    fill_mode='nearest', # set mode for filling points outside the input boundaries
    cval=0.,  # value used for fill_mode = "constant"
    horizontal_flip=True,  # randomly flip images
    vertical_flip=False,  # randomly flip images
    rescale=None, # set rescaling factor (applied before any other transformation)
    preprocessing_function=None, # set function that will be applied on each input
    data_format=None, #image data format, either "channels_first" or "channels_last"
    validation_split=0.0) # fraction of images reserved for validation (strictly between 0 and 1)
    
print('Ready to run')

Ready to run


In [6]:
print('Starting image generation...')
#creates train folder if it doesn't exist
if not os.path.exists(save_dir):
    os.mkdir(save_dir)

for root, subfolders, files in os.walk(root_dir):
    for folder in subfolders:
        path = os.path.join(root_dir, folder)
        for subroot, subfolders, files in os.walk(path):
            for file in files:  
                image = load_img(os.path.join(path, file))
                image_array = img_to_array(image)
                samples = expand_dims(image_array, 0)
                gen_data = datagen.flow(samples, batch_size=batch_size)
                for i in range(batch_size):
                    # generate batch of images
                    batch = gen_data.next()
                    perms = perms+1
                    split_file_name = os.path.splitext(file)
                    aug_file_name = split_file_name[0]+'_'+str(i)+split_file_name[1]
                    #creates train/{pink|cheese|blue} folders if they don't exist
                    if not os.path.exists(os.path.join(save_dir, folder)):
                        os.mkdir(os.path.join(save_dir, folder))
                    save_img(os.path.join(save_dir, folder, aug_file_name), batch[0])
print('Done: Created',perms,'images')                    

Starting image generation...


OSError: cannot identify image file 'data/raw/horse-or-human/.DS_Store'