### `Step1: Data Preparing`

In [316]:
import os, shutil

original_dataset_dir = '/caltech101-dataset/dataset-balanced/'
base_dir = '/caltech101-dataset/balanced-dataset-final/'

if not(os.path.exists(base_dir)):
    os.mkdir(base_dir)
    
# getting all labels
labels = os.listdir(original_dataset_dir)

# Creating labels folders in balanced dataset
'''
for i in range(len(labels)):
    if not(os.path.exists(base_dir + labels[i])):
        os.mkdir(base_dir + labels[i])
        
'''

#### `Balancing Data`

In [311]:
import os
import random
from scipy import ndarray

# image processing library
import skimage as sk
from skimage import transform
from skimage import util
from skimage import io

def random_rotation(image_array: ndarray):
    # pick a random degree of rotation between 25% on the left and 25% on the right
    random_degree = random.uniform(-25, 25)
    return sk.transform.rotate(image_array, random_degree)

def random_noise(image_array: ndarray):
    # add random noise to the image
    return sk.util.random_noise(image_array)

def horizontal_flip(image_array: ndarray):
    # horizontal flip doesn't need skimage, it's easy as flipping the image array of pixels !
    return image_array[:, ::-1]

# dictionary of the transformations we defined earlier
available_transformations = {
    'rotate': random_rotation,
    'noise': random_noise,
    'horizontal_flip': horizontal_flip
}

In [None]:
total_instances_per_class = 1000 # Total images per class required

for i in range(len(labels)):
    label_path = original_dataset_dir + labels[i]
    new_label_path = base_dir + labels[i]
    
    num_files_desired = total_instances_per_class - len(os.listdir(label_path))

    # find all files paths from the folder
    images = [os.path.join(label_path, f) for f in os.listdir(label_path) 
              if os.path.isfile(os.path.join(label_path, f))]
    
    #print(len(images))
        
    num_generated_files = 0
    while num_generated_files <= num_files_desired:
        
        image_path = random.choice(images) # choose random image from folder
        # read image as an two dimensional array of pixels
        image_to_transform = sk.io.imread(image_path)
        # random num of transformation to apply
        num_transformations_to_apply = random.randint(1, len(available_transformations))

        num_transformations = 0
        transformed_image = None
        while num_transformations <= num_transformations_to_apply:
            # random transformation to apply for a single image
            key = random.choice(list(available_transformations))
            transformed_image = available_transformations[key](image_to_transform)
            num_transformations += 1

        new_file_path = '%s/augmented_image_%s.jpg' % (label_path, num_generated_files)

        # write image to the disk
        io.imsave(new_file_path, transformed_image)
        num_generated_files += 1
        
        print((i+1), new_file_path, labels[i], num_files_desired)    
    print()

In [None]:
import os
import random
from scipy import ndarray

# image processing library
import skimage as sk
from skimage import transform
from skimage import util
from skimage import io

def random_rotation(image_array: ndarray):
    # pick a random degree of rotation between 25% on the left and 25% on the right
    random_degree = random.uniform(-25, 25)
    return sk.transform.rotate(image_array, random_degree)

def random_noise(image_array: ndarray):
    # add random noise to the image
    return sk.util.random_noise(image_array)

def horizontal_flip(image_array: ndarray):
    # horizontal flip doesn't need skimage, it's easy as flipping the image array of pixels !
    return image_array[:, ::-1]

# dictionary of the transformations we defined earlier
available_transformations = {
    'rotate': random_rotation,
    'noise': random_noise,
    'horizontal_flip': horizontal_flip
}



folder_path = 'images/cat'
num_files_desired = 10

# find all files paths from the folder
images = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]

num_generated_files = 0
while num_generated_files <= num_files_desired:
    # random image from the folder
    image_path = random.choice(images)
    # read image as an two dimensional array of pixels
    image_to_transform = sk.io.imread(image_path)
    # random num of transformation to apply
    num_transformations_to_apply = random.randint(1, len(available_transformations))

    num_transformations = 0
    transformed_image = None
    while num_transformations <= num_transformations_to_apply:
        # random transformation to apply for a single image
        key = random.choice(list(available_transformations))
        transformed_image = available_transformations[key](image_to_transform)
        num_transformations += 1

new_file_path = '%s/augmented_image_%s.jpg' % (folder_path, num_generated_files)

# write image to the disk
io.imsave(new_file_path, transformed_image)
num_generated_files += 1

In [323]:
import os, shutil

total_classes = []
train_data = []
validation_data = []
test_data = []

original_dataset_dir = '../caltech101-dataset'
base_dir = '../caltech101-dataset'
dir_custom = '/caltech101-dataset/dataset-'

print("Choose one option:\n0- For Whole Dataset\n2- For Specifying minimum classes")
min_instances = int(input())

if (not(os.path.exists(base_dir)) and min_instances == 0):
    os.mkdir(base_dir)

elif not(os.path.exists((dir_custom + str(min_instances)))):
    os.mkdir(dir_custom + str(min_instances))

if min_instances == 0:
    train_dir = os.path.join(base_dir, 'train')
    validation_dir = os.path.join(base_dir, 'validation')
    test_dir = os.path.join(base_dir, 'test')

else:    
    train_dir = os.path.join(dir_custom + str(min_instances), 'train')
    validation_dir = os.path.join(dir_custom + str(min_instances), 'validation')
    test_dir = os.path.join(dir_custom + str(min_instances), 'test')

Choose one option:
0- For Whole Dataset
2- For Specifying minimum classes
0


In [324]:
# code for only required classes (e.g. classes which have atleast 100 classes)
def get_required_classes(instances, total_classes):
    for i in range(len(total_classes)):        
        instances_length = os.listdir(original_dataset_dir + '/' + total_classes[i])        
        if len(instances_length) >= instances or instances == 0:
            required_classes.append(total_classes[i])
            
    return required_classes

required_classes = []
total_classes = os.listdir(original_dataset_dir) # getting all classes
required_classes = get_required_classes(min_instances, total_classes)

if not(os.path.exists(train_dir)):
    os.mkdir(train_dir)
    os.mkdir(validation_dir)
    os.mkdir(test_dir)
    
    # Creating directories for classes within train, validation and testing
    for i in range(len(required_classes)):
       
        train_data.append(os.path.join(train_dir, classes[i]))
        os.mkdir(train_data[i])

        validation_data.append(os.path.join(validation_dir, classes[i]))
        os.mkdir(validation_data[i])

        test_data.append(os.path.join(test_dir, classes[i]))
        os.mkdir(test_data[i])

In [325]:
len(required_classes)

102

In [None]:
for i in range(len(required_classes)):
    classname = original_dataset_dir + '/'+ required_classes[i]
    fnames = os.listdir(classname)
    length = len(fnames)
              
    train_per = round(length*.75)
    val_per = round(length*0.15)
    test_per = round(length*0.10)

    print((i+1), required_classes[i], train_per, val_per, test_per)

    for index in range(train_per):
        src = os.path.join(classname, fnames[index])
        dst = os.path.join(train_data[i], fnames[index])
        shutil.copyfile(src, dst)

    for index in range(train_per, (train_per+val_per)):
        src = os.path.join(classname, fnames[index])
        dst = os.path.join(validation_data[i], fnames[index])
        shutil.copyfile(src, dst)

    for index in range((train_per+val_per), length):
        src = os.path.join(classname, fnames[index])
        dst = os.path.join(test_data[i], fnames[index])
        shutil.copyfile(src, dst)

In [284]:
num_classes = len(required_classes)
num_classes

102

In [None]:
# using pretrained model (Only in case of GPU)
'''
from keras.applications import VGG16
conv_base = VGG16(weights='imagenet',
include_top=False,
input_shape=(150, 150, 3))

# add in first layer while designing CNN
model.add(conv_base) # for using pretrained model
'''

In [285]:
from keras import layers
from keras import models
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu',
                        input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(num_classes, activation='softmax'))

In [286]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_23 (Conv2D)           (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d_21 (MaxPooling (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_24 (Conv2D)           (None, 72, 72, 64)        18496     
_________________________________________________________________
max_pooling2d_22 (MaxPooling (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_25 (Conv2D)           (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_23 (MaxPooling (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_26 (Conv2D)           (None, 15, 15, 128)       147584    
__________

In [None]:
from keras import optimizers
model.compile(loss='categorical_crossentropy',
optimizer=optimizers.RMSprop(lr=1e-4),
metrics=['acc'])

from keras.preprocessing.image import ImageDataGenerator

# code for data augmentation
#train_datagen = ImageDataGenerator(rescale=1./255)

train_datagen = ImageDataGenerator(
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=5,
    class_mode='categorical')

validation_generator = test_datagen.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=10,
    class_mode='categorical')

history = model.fit_generator(
    train_generator,
    steps_per_epoch=500,
    epochs=25,
    validation_data=validation_generator,
    validation_steps=50)

In [None]:
model.save_weights('caltech_augmented.h5')