# Installation

In [0]:
# prerequisites for imgaug library
!pip install six numpy scipy Pillow matplotlib scikit-image opencv-python imageio Shapely

In [0]:
# image augmentation library imgaug (https://github.com/aleju/imgaug)
!pip install imgaug

# NOTE: make sure to Restart Runtime after this installation completes

In [0]:
# imgaug needs latest scikit module. so need to do this. else, will get some weird error related to numpy!
!pip install --upgrade scikit-image

# NOTE: make sure to Restart Runtime after this installation completes

# Test to ensure imgaug library works

In [0]:
# Example from imgaug...just to check if the library works properly after installing
import imageio
import imgaug as ia
%matplotlib inline

image = imageio.imread("https://upload.wikimedia.org/wikipedia/en/7/7d/Lenna_%28test_image%29.png")

print("Original:")
ia.imshow(image)

In [0]:
from imgaug import augmenters as iaa
ia.seed(4)

rotate = iaa.Affine(rotate=(-25, 25))
image_aug = rotate.augment_image(image)

print("Augmented:")
ia.imshow(image_aug)

# Example template to show usage of Custom generator and imgaug library 

In [0]:
# usual model definition code

from __future__ import print_function
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator

# params as usual
num_classes = 200 
IMAGE_WIDTH = 32 
IMAGE_HEIGHT = 32 
NUM_CHANNELS = 3
input_shape = (IMAGE_WIDTH, IMAGE_HEIGHT, NUM_CHANNELS)
BATCH_SIZE = 100
NUM_EPOCHS = 10
VAL_IMAGES = 10000 

# define network
model = Sequential()
...
...
...
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy'])

model.summary()

In [0]:
# update the path to train & val folders
train_data_dir = '/content/<path to train folder>'
validation_data_dir = '/content/<path to val folder>'


# Custom Generator & ImgAug
from imgaug import augmenters as iaa


# define the augmentations needed as methods. 
# NOTE: we can leverage any image aug library in these methods to do needed augmentation. in this example, we have used imgaug library
def rotate_image(batches, angle):
  seq = iaa.Affine(rotate=(-angle, angle)) 
  # using rotate as example. check imgaug doc for more https://imgaug.readthedocs.io/en/latest/source/api.html 
  while True:
    batch_x, batch_y = next(batches)
    #print('batch_x.shape[0]: ', batch_x.shape[0]) # batch size
    #print('batch_x.shape[1]: ', batch_x.shape[1]) # image width
    #print('batch_x.shape[2]: ', batch_x.shape[2]) # image height
    #print('batch_x[i].shape: ', batch_x[0].shape) # input image dims
    batch_rotate = np.zeros((batch_x.shape[0], batch_x.shape[1], batch_x.shape[2], NUM_CHANNELS)) 
    # NOTE: imgaug works on color images (3 channels). doesn't work on greyscale images with one channel
    for i in range(batch_x.shape[0]):
       batch_rotate[i] = seq.augment_image(batch_x[i]) # calling ImgAug's augmentation on a single image
    yield (batch_rotate, batch_y)

# NOTE: in the above method, only one augmentation is defined. 
# ideally, we could define multiple augs in a method by declaring Sequential as array, and mention the % of images to apply aug using 'Simetimes' API 
# example:
def blur_crop_flip_image(batches, blur_value, crop_value, flip_value):
  seq = iaa.Sequential([
    iaa.GaussianBlur(sigma=(0, blur_value)), # ex: 0.4
    iaa.Crop(percent=(0, crop_value)), # ex: 0.2
    iaa.Sometimes(0.3, iaa.Fliplr(flip_value))]) # 50% flip / horizontal flip of only 30% of the images passed
  while True:
    batch_x, batch_y = next(batches)
    batch_augmented = np.zeros((batch_x.shape[0], batch_x.shape[1], batch_x.shape[2], NUM_CHANNELS)) 
    # NOTE: imgaug works on color images (3 channels). doesn't work on greyscale images with one channel
    batch_augmented = seq.augment_images(batch_x) # calling ImgAug's augmentation on a batch of images
    yield (batch_augmented, batch_y)
  
# leveraging Kera's image data generator to read input images in batches from the train directories for augmentation
# Note that we just using zoom option in imagedatagenerator. ideally can use any other aug as well if needed. 
# but remember that the augs defined in data generaor will be applied to batch of images read from directory, before passing to our ImgAug aug.
# so, if these only imgaug's augmentation is required, create an ImageDataGenerator without any params/augs

train_datagen = ImageDataGenerator(zoom_range=0.2, fill_mode='nearest') 
train_batches = train_datagen.flow_from_directory(train_data_dir,
                                                  target_size=(IMAGE_WIDTH, IMAGE_HEIGHT),
                                                  class_mode='categorical',
                                                  shuffle=True,
                                                  batch_size=BATCH_SIZE)

# passing the batch of images from flow_from_dir to our data aug method. angle of rotation is 25 
train_generator = rotate_image(train_batches, 25) 

# NOTE:
# if needed, we could use few default augs from keras data generator (generator1) and create another custom data generator (generator2)
# with ImgAug based augmentations and then use zip both generators (generator1 & 2) to combine them together
# check the exampe in keras doc - https://keras.io/preprocessing/image/ (search for 'train_generator = zip(image_generator, mask_generator)')

# no augs on validation images, so empty ImageDataGenerator. can do if needed
valid_datagen = ImageDataGenerator() 
validation_generator = valid_datagen.flow_from_directory(validation_data_dir,
                                                  target_size=(IMAGE_WIDTH, IMAGE_HEIGHT),
                                                  class_mode='categorical',
                                                  shuffle=False,
                                                  batch_size=BATCH_SIZE)


In [0]:
# use the generators as usual in fit_generator

# make sure to update the steps_per_epoch & validation_steps properly
# in case you are using custom generator with aug for validation data as well, then replace VAL_IMAGES with validation_batches.sample
# NOTE: do not forget to use // for dividing, as it returns an int. just / returns float
# NOTE: int() in caculating steps_per_epoch & validation_steps is not required as dividing by // returns int
history = model.fit_generator(train_generator,
                    steps_per_epoch=int(train_batches.samples//BATCH_SIZE), 
                    epochs=NUM_EPOCHS, verbose=2, 
                    validation_steps=int(VAL_IMAGES//BATCH_SIZE), 
                    validation_data=validation_generator)


# Class weights : custom implementation

**Disclaimer:** have only done unit testing of the code...yet to pass the calculated class_weights to fit_generator for training


---



**Definition of class_weight from keras documentation:**

**class_weight:** Optional dictionary mapping class indices (integers) to a weight (float) value, used for weighting the loss function (during training only). This can be useful to tell the model to "pay more attention" to samples from an under-represented class.


In [0]:
# custom method that takes in a dict of 'label' vs. 'number of correct predictions' as input and generates dict of 'label' vs. 'weight' that can be passed to fit_generator

#
# NOTE: CHANGE IN PARAM to be passed...the dict should contain 'label' vs. 'number of CORRECT predictions' (and NOT 'number of hard samples')
#

def get_class_weights(dict_labels_vs_samples, balanced=True):
  # param dict_labels_vs_samples: dict of 'label' vs. 'number of correct predictions'
  # param balanced: if True, will produce class weights considering max samples count - % of augmented images per class will be proportional to max count of samples 
  # param balanced: if False, will produce class weights considering average samples count - % of aumented images per class will be proportional to avg samples of classes
  
  if not len(dict_labels_vs_samples) == 0:
    print('WARNING: dict passed is NOT of length 200 - meaning not all 200 classes are included in the dict')
    
  keys = dict_labels_vs_samples.keys()
  values = list(dict_labels_vs_samples.values())
  total_samples = sum(values)
  num_classes = len(values)
  max_of_all_classes = max(values)
  average_of_all_classes = total_samples / num_classes
  multiplying_factor = 1
  
  if balanced:
    multiplying_factor = max_of_all_classes / average_of_all_classes
  
  print('total_samples: ', total_samples)
  print('num_classes: ', num_classes)
  print('max_of_all_classes: ', max_of_all_classes)
  print('multiplying_factor: ', multiplying_factor)
  
  class_weight = dict()

  for key in keys:
        num_correct_pred = int(dict_labels_vs_samples.get(key))
        if num_correct_pred == 0:
          num_correct_pred = 1 # this is to avoid divide by zero error, if a class has no correct predictions
        score_for_class = (total_samples / (num_classes * num_correct_pred)) * multiplying_factor
        class_weight[key] = score_for_class
        
  return class_weight

In [0]:
dict_labels_and_samples = {'label1':25, 'label2':15, 'label3':30, 'label4':60}
class_weights = get_class_weights(dict_labels_and_samples, True)
print(class_weights)

total_samples:  130
num_classes:  4
max_of_all_classes:  60
multiplying_factor:  1.8461538461538463
{'label1': 2.4000000000000004, 'label2': 4.0, 'label3': 2.0, 'label4': 1.0}
