#### This notebook implements and displays use of Random Cutout for image augmentation using new keras preprocessing layer. Adding the preprocessing layers helps in keeping the pipeline clean.

> If this helps you, please do upvote :)

In [None]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
import tensorflow.keras.preprocessing as pp
from tensorflow.keras import layers
import os 
import random

%matplotlib inline
matplotlib.style.use('ggplot')
%config InlineBackend.figure_format = 'retina'
sns.set(font_scale=1.5)

In [None]:
df = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
IMAGE_FOLDER='../input/cassava-leaf-disease-classification/train_images/'

RANDOM_STATE=42

fig_size=(15,8)
sns.set(rc={'figure.figsize':fig_size})
def random_seed():
    random.seed(RANDOM_STATE)
    os.environ['PYTHONHASHSEED'] = str(RANDOM_STATE)
    np.random.seed(RANDOM_STATE)
    tf.random.set_seed(RANDOM_STATE)

random_seed()

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
SHUFFLE_BUFFER_SIZE = 2000

In [None]:
df['path'] = df['image_id'].apply(lambda x: IMAGE_FOLDER+x)

In [None]:
X = df['path'].values
y = df['label'].values

In [None]:
def prepare_datasets(X, y, test_size, validation_size):
    # create train, validation and test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size,stratify=y)
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size, stratify=y_train)

    return X_train, X_validation, X_test, y_train, y_validation, y_test

Splitting the data in train, validation and test set

In [None]:
X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(X, y, 0.05, 0.15)

In [None]:
# https://cs230.stanford.edu/blog/datapipeline/
def load_image(image_path,label):
    image = tf.io.read_file(image_path)
    #Don't use tf.image.decode_image, or the output shape will be undefined
    image = tf.image.decode_jpeg(image, channels=3)
    return image, label

In [None]:
def get_data_set(files, labels, batch_size, is_training=False):
    dataset = tf.data.Dataset.from_tensor_slices((files, labels))
    dataset = dataset.map(load_image, num_parallel_calls=AUTOTUNE)
    if is_training:
        dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

In [None]:
sample_image_dataset = get_data_set(X_train, y_train, 32, is_training=True)

In [None]:
sample_img_batch, sample_label_batch = next(iter(sample_image_dataset))
print(sample_img_batch.shape) 
print(sample_label_batch.shape) 

In [None]:
import tensorflow_addons as tfa
from tensorflow.python.keras.engine.input_spec import InputSpec
from tensorflow.python.keras.engine.base_preprocessing_layer import PreprocessingLayer
from tensorflow.python.keras.utils import tf_utils

class RandomCutout(PreprocessingLayer):
    """Creates random mask on the image.
  Input shape:
    4D tensor with shape:
    `(samples, height, width, channels)`, data_format='channels_last'.
  Output shape:
    4D tensor with shape:
    `(samples, height, width, channels)`, data_format='channels_last'.
  Attributes:
    mask: A tuple or a list with two values `mask-height` and `mask-width`.
    seed: Integer. Used to create a random seed.
    name: A string, the name of the layer.
  Raise:
    ValueError: if mask is not a list or tuple of two values.
    InvalidArgumentError: if mask_size (mask_height x mask_width) can't be divisible by 2. 
  """
    def __init__(self, mask, seed=None, name=None, **kwargs):
        self.mask = mask
        if isinstance(mask, (tuple, list)) and len(mask) == 2:
            self.lower = mask[0]
            self.upper = mask[1]
            
        else:
            raise ValueError('RandomCutout layer {name} received an invalid mask '
                       'argument {arg}. only list or touple of size 2 should be passed'.format(name=name, arg=mask))

        self.seed = seed
        self.input_spec = InputSpec(ndim=4)
        super(RandomCutout, self).__init__(name=name, **kwargs)

    def call(self, inputs, training=True):
        if training is None:
            training = K.learning_phase()

        def random_cutout_inputs():
            return tfa.image.random_cutout(inputs, (self.lower, self.upper), constant_values = 0)

        output = tf_utils.smart_cond(training, random_cutout_inputs,
                                              lambda: inputs)
        output.set_shape(inputs.shape)
        return output

    def compute_output_shape(self, input_shape):
        return input_shape

    def get_config(self):
        config = {
            'mask': self.mask,
            'seed': self.seed,
        }
        
        base_config = super(RandomCutout, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

This class is for `TF 2.4.0`

In [None]:


import tensorflow_addons as tfa
from tensorflow.python.keras.engine.input_spec import InputSpec
from tensorflow.python.keras.engine.base_preprocessing_layer import PreprocessingLayer
from tensorflow.python.keras.engine import base_preprocessing_layer
from tensorflow.python.keras.utils import control_flow_util

class RandomCutout(PreprocessingLayer):
    """Creates random mask on the image.
  Input shape:
    4D tensor with shape:
    `(samples, height, width, channels)`, data_format='channels_last'.
  Output shape:
    4D tensor with shape:
    `(samples, height, width, channels)`, data_format='channels_last'.
  Attributes:
    mask: A tuple or a list with two values `mask-height` and `mask-width`.
    seed: Integer. Used to create a random seed.
    name: A string, the name of the layer.
  Raise:
    ValueError: if mask is not a list or tuple of two values.
    InvalidArgumentError: if mask_size (mask_height x mask_width) can't be divisible by 2. 
  """
    def __init__(self, mask, seed=None, name=None, **kwargs):
        self.mask = mask
        if isinstance(mask, (tuple, list)) and len(mask) == 2:
            self.lower = mask[0]
            self.upper = mask[1]
            
        else:
            raise ValueError('RandomCutout layer {name} received an invalid mask '
                       'argument {arg}. only list or touple of size 2 should be passed'.format(name=name, arg=mask))

        self.seed = seed
        self.input_spec = InputSpec(ndim=4)
        super(RandomCutout, self).__init__(name=name, **kwargs)
        base_preprocessing_layer._kpl_gauge.get_cell('V2').set('RandomCutout')

    def call(self, inputs, training=True):
        if training is None:
            training = K.learning_phase()

        def random_cutout_inputs():
            return tfa.image.random_cutout(inputs, (self.lower, self.upper), constant_values = 0)

        output = control_flow_util.smart_cond(training, random_cutout_inputs,
                                              lambda: inputs)
        output.set_shape(inputs.shape)
        return output

    def compute_output_shape(self, input_shape):
        return input_shape

    def get_config(self):
        config = {
            'mask': self.mask,
            'seed': self.seed,
        }
        
        base_config = super(RandomCutout, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

### Setting up the preprocessing and augmentation pipeline

In [None]:
img_size = 512
img_preprocessing = tf.keras.Sequential([
  layers.experimental.preprocessing.Resizing(img_size, img_size),
  layers.experimental.preprocessing.Rescaling(1./255)
],
name= "img_preprocessing")

img_augmentation = tf.keras.Sequential(
    [
        RandomCutout(mask=(80,80))
    ],
    name="img_augmentation",
)

imp_preprocessing_and_augmentation = tf.keras.Sequential([
    img_preprocessing,
    img_augmentation
])


Applying the image preprocessing and augmentation to each of the images in the batch

In [None]:
for n in range(32):
    plt.rcParams['figure.figsize'] = (25,15)
    ax = plt.subplot(4, 8, n+1)
    augmented_image = imp_preprocessing_and_augmentation(tf.expand_dims(sample_img_batch[n], 0))
    plt.imshow(augmented_image[0].numpy())
    plt.title(sample_label_batch[n].numpy())
    plt.axis('off')