#### This notebook implements and displays use of Random Shear for image augmentation using new keras preprocessing layer. Adding the preprocessing layers helps in keeping the pipeline clean.

> If this helps you, please do upvote :)

In [None]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
import tensorflow.keras.preprocessing as pp
from tensorflow.keras import layers
import os 
import random
import tensorflow.keras.backend as K
%matplotlib inline
matplotlib.style.use('ggplot')
%config InlineBackend.figure_format = 'retina'
sns.set(font_scale=1.5)

In [None]:
df = pd.read_csv('../input/flickr-image-dataset/flickr30k_images/results.csv',sep='|')
IMAGE_FOLDER='../input/flickr-image-dataset/flickr30k_images/flickr30k_images/'

RANDOM_STATE=42

fig_size=(15,8)
sns.set(rc={'figure.figsize':fig_size})
def random_seed():
    random.seed(RANDOM_STATE)
    os.environ['PYTHONHASHSEED'] = str(RANDOM_STATE)
    np.random.seed(RANDOM_STATE)
    tf.random.set_seed(RANDOM_STATE)

random_seed()

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
SHUFFLE_BUFFER_SIZE = 3000

In [None]:
df['path'] = df['image_name'].apply(lambda x: IMAGE_FOLDER + x)

In [None]:
IMAGES = df['path'].values
np.random.shuffle(IMAGES)

In [None]:
IMG_SIZE=512
# https://cs230.stanford.edu/blog/datapipeline/
def load_image(image_path):
    image = tf.io.read_file(image_path)
    #Don't use tf.image.decode_image, or the output shape will be undefined
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    return image

In [None]:
def get_data_set(files, batch_size, is_training=False):
    dataset = tf.data.Dataset.from_tensor_slices(files)
    dataset = dataset.map(load_image, num_parallel_calls=AUTOTUNE)
    if is_training:
        dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

In [None]:
sample_image_dataset = get_data_set(IMAGES, 32, is_training=True)

In [None]:
sample_img_batch = next(iter(sample_image_dataset))
print(sample_img_batch.shape) 


## Random Shear

In [None]:
from tensorflow.python.keras.engine.input_spec import InputSpec
from tensorflow.python.keras.engine.base_preprocessing_layer import PreprocessingLayer
from tensorflow.python.keras.utils import tf_utils
import tensorflow.keras.backend as K

class RandomShear(PreprocessingLayer):
    """Creates random sheared image.
  Input shape:
    4D tensor with shape:
    `(samples, height, width, channels)`, data_format='channels_last'.
  Output shape:
    4D tensor with shape:
    `(samples, height, width, channels)`, data_format='channels_last'.
  Attributes:
    intensity: Transformation intensity in degrees.
    seed: Integer. Used to create a random seed.
    name: A string, the name of the layer.
  Raise:
    ValueError: if mask is not a list or tuple of two values.
    InvalidArgumentError: if mask_size (mask_height x mask_width) can't be divisible by 2. 
  """
    def __init__(self, intensity, seed=None, name=None, **kwargs):
        self.intensity = intensity
        self.seed = seed
        self.input_spec = InputSpec(ndim=4)
        super(RandomShear, self).__init__(name=name, **kwargs)

    def call(self, inputs, training=True):
        if training is None:
            training = K.learning_phase()

        def random_shear_inputs():
            shape = inputs.shape
            temp = np.empty(shape=(shape[0], shape[1],shape[2], shape[3]))
            
            for i in range(shape[0]):
                array_inputs = tf.keras.preprocessing.image.img_to_array(inputs[i])
                sheared = tf.keras.preprocessing.image.random_shear(array_inputs, self.intensity,
                                                        row_axis=0, col_axis=1,
                                                        channel_axis=2)
                temp[i]= sheared
            return tf.convert_to_tensor(temp)

        output = tf_utils.smart_cond(training, random_shear_inputs,
                                              lambda: inputs)
        output.set_shape(inputs.shape)
        return output

    def compute_output_shape(self, input_shape):
        return input_shape

    def get_config(self):
        config = {
            'intensity': self.intensity,
            'seed': self.seed,
        }
        
        base_config = super(RandomShear, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

### Integration

In [None]:
img_preprocessing = tf.keras.Sequential([
  layers.experimental.preprocessing.Rescaling(1./255)
],
name= "img_preprocessing")

img_augmentation = tf.keras.Sequential(
    [
        RandomShear(intensity=50)
    ],
    name="img_augmentation",
)

imp_preprocessing_and_augmentation = tf.keras.Sequential([
    img_preprocessing,
    img_augmentation
])


Applying the image preprocessing and augmentation to each of the images in the batch

### Images with and without transformation

In [None]:
N = 5
for n in range(N):
    plt.rcParams['figure.figsize'] = (25,15)
    ax = plt.subplot(1, 5, n+1)
    processed_image = img_preprocessing(tf.expand_dims(sample_img_batch[n], 0))
    plt.imshow(processed_image[0].numpy())
    plt.axis('off')
plt.show()

In [None]:
N = 4
for n in range(N):
    plt.rcParams['figure.figsize'] = (25,15)
    ax = plt.subplot(1, 5, n+1)
    augmented_image = imp_preprocessing_and_augmentation(tf.expand_dims(sample_img_batch[n], 0))
    plt.imshow(augmented_image[0].numpy())
    plt.axis('off')
plt.show()