### Data Augmentation Techniques
* Scaling
* Translation
* Rotation (at 90 degrees)
* Rotation (at finer angles)
* Flipping
* Adding Salt and Pepper noise
* Lighting condition
* Perspective transform

In [3]:
#import tensorflow as tf
import tensorflow.compat.v1 as tf 
tf.disable_v2_behavior()
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.gridspec as gridspec
import numpy as np
import os
import cv2
from math import floor, ceil, pi
import scipy
import glob
import imageio
import tensorflow_addons as tfa


%matplotlib inline

Instructions for updating:
non-resource variables are not supported in the long term



TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [4]:
IMAGE_SIZE = 224


# Image Resizing
def tf_resize_images(X_img_file_paths):
    X_data = []
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, (None, None, 3))
    tf_img = tf.image.resize_images(
        X, (IMAGE_SIZE, IMAGE_SIZE), tf.image.ResizeMethod.NEAREST_NEIGHBOR
    )
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # Each image is resized individually as different image may be of different size.
        for index, file_path in enumerate(X_img_file_paths):
            img = mpimg.imread(file_path)[:, :, :3]  # Do not read alpha channel.
            resized_img = sess.run(tf_img, feed_dict={X: img})
            X_data.append(resized_img)

    X_data = np.array(X_data, dtype=np.float32)  # Convert to numpy
    return X_data

#### Scaling:
Having differently scaled object of interest in the images is the most important aspect of image diversity. When your network is in hands of real users, the object in the image can be tiny or large. Also, sometimes, object can cover the entire image and yet will not be present totally in image (i.e cropped at edges of object). The code shows scaling of image centrally.

In [5]:
def central_scale_images(X_imgs, scales):
    # Various settings needed for Tensorflow operation
    boxes = np.zeros((len(scales), 4), dtype=np.float32)
    for index, scale in enumerate(scales):
        x1 = y1 = 0.5 - 0.5 * scale  # To scale centrally
        x2 = y2 = 0.5 + 0.5 * scale
        boxes[index] = np.array([y1, x1, y2, x2], dtype=np.float32)
    box_ind = np.zeros((len(scales)), dtype=np.int32)
    crop_size = np.array([IMAGE_SIZE, IMAGE_SIZE], dtype=np.int32)

    X_scale_data = []
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, shape=(1, IMAGE_SIZE, IMAGE_SIZE, 3))
    # Define Tensorflow operation for all scales but only one base image at a time
    tf_img = tf.image.crop_and_resize(X, boxes, box_ind, crop_size)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for img_data in X_imgs:
            batch_img = np.expand_dims(img_data, axis=0)
            scaled_imgs = sess.run(tf_img, feed_dict={X: batch_img})
            X_scale_data.extend(scaled_imgs)

    X_scale_data = np.array(X_scale_data, dtype=np.float32)
    return X_scale_data

#### Translation:
We would like our network to recognize the object present in any part of the image. Also, the object can be present partially in the corner or edges of the image. For this reason, we shift the object to various parts of the image. This may also result in addition of a background noise. The code snippet shows translating the image at four sides retaining 80 percent of the base image.

In [6]:
from math import ceil, floor


def get_translate_parameters(index):
    if index == 0:  # Translate left 20 percent
        offset = np.array([0.0, 0.2], dtype=np.float32)
        size = np.array([IMAGE_SIZE, ceil(0.8 * IMAGE_SIZE)], dtype=np.int32)
        w_start = 0
        w_end = int(ceil(0.8 * IMAGE_SIZE))
        h_start = 0
        h_end = IMAGE_SIZE
    elif index == 1:  # Translate right 20 percent
        offset = np.array([0.0, -0.2], dtype=np.float32)
        size = np.array([IMAGE_SIZE, ceil(0.8 * IMAGE_SIZE)], dtype=np.int32)
        w_start = int(floor((1 - 0.8) * IMAGE_SIZE))
        w_end = IMAGE_SIZE
        h_start = 0
        h_end = IMAGE_SIZE
    elif index == 2:  # Translate top 20 percent
        offset = np.array([0.2, 0.0], dtype=np.float32)
        size = np.array([ceil(0.8 * IMAGE_SIZE), IMAGE_SIZE], dtype=np.int32)
        w_start = 0
        w_end = IMAGE_SIZE
        h_start = 0
        h_end = int(ceil(0.8 * IMAGE_SIZE))
    elif index == 3:  # Translate bottom 20 percent
        offset = np.array([-0.2, 0.0], dtype=np.float32)
        size = np.array([ceil(0.8 * IMAGE_SIZE), IMAGE_SIZE], dtype=np.int32)
        w_start = 0
        w_end = IMAGE_SIZE
        h_start = int(floor((1 - 0.8) * IMAGE_SIZE))
        h_end = IMAGE_SIZE
    elif index == 4:  # Translate left 10 percent
        offset = np.array([0.0, 0.1], dtype=np.float32)
        size = np.array([IMAGE_SIZE, ceil(0.9 * IMAGE_SIZE)], dtype=np.int32)
        w_start = 0
        w_end = int(ceil(0.9 * IMAGE_SIZE))
        h_start = 0
        h_end = IMAGE_SIZE
    elif index == 5:  # Translate right 10 percent
        offset = np.array([0.0, -0.1], dtype=np.float32)
        size = np.array([IMAGE_SIZE, ceil(0.9 * IMAGE_SIZE)], dtype=np.int32)
        w_start = int(floor((1 - 0.9) * IMAGE_SIZE))
        w_end = IMAGE_SIZE
        h_start = 0
        h_end = IMAGE_SIZE
    elif index == 6:  # Translate top 10 percent
        offset = np.array([0.1, 0.0], dtype=np.float32)
        size = np.array([ceil(0.9 * IMAGE_SIZE), IMAGE_SIZE], dtype=np.int32)
        w_start = 0
        w_end = IMAGE_SIZE
        h_start = 0
        h_end = int(ceil(0.9 * IMAGE_SIZE))
    elif index == 7:  # Translate bottom 10 percent
        offset = np.array([-0.1, 0.0], dtype=np.float32)
        size = np.array([ceil(0.9 * IMAGE_SIZE), IMAGE_SIZE], dtype=np.int32)
        w_start = 0
        w_end = IMAGE_SIZE
        h_start = int(floor((1 - 0.9) * IMAGE_SIZE))
        h_end = IMAGE_SIZE

    return offset, size, w_start, w_end, h_start, h_end


def translate_images(X_imgs):
    offsets = np.zeros((len(X_imgs), 2), dtype=np.float32)
    n_translations = 8
    X_translated_arr = []

    tf.reset_default_graph()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(n_translations):
            X_translated = np.zeros(
                (len(X_imgs), IMAGE_SIZE, IMAGE_SIZE, 3), dtype=np.float32
            )
            X_translated.fill(1.0)  # Filling background color
            (
                base_offset,
                size,
                w_start,
                w_end,
                h_start,
                h_end,
            ) = get_translate_parameters(i)
            offsets[:, :] = base_offset
            glimpses = tf.image.extract_glimpse(X_imgs, size, offsets)

            glimpses = sess.run(glimpses)
            X_translated[
                :, h_start : h_start + size[0], w_start : w_start + size[1], :
            ] = glimpses
            X_translated_arr.extend(X_translated)
    X_translated_arr = np.array(X_translated_arr, dtype=np.float32)
    return X_translated_arr

#### Rotation (at 90 degrees):
The network has to recognize the object present in any orientation. Assuming the image is square, rotating the image at 90 degrees will not add any background noise in the image.

In [7]:
def rotate_images(X_imgs):
    X_rotate = []
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    k = tf.placeholder(tf.int32)
    tf_img = tf.image.rot90(X, k=k)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for img in X_imgs:
            for i in range(3):  # Rotation at 90, 180 and 270 degrees
                rotated_img = sess.run(tf_img, feed_dict={X: img, k: i + 1})
                X_rotate.append(rotated_img)

    X_rotate = np.array(X_rotate, dtype=np.float32)
    return X_rotate

#### Rotation (at finer angles):
Depending upon the requirement, there maybe a necessity to orient the object at minute angles. However problem with this approach is, it will add background noise. If the background in image is of a fixed color (say white or black), the newly added background can blend with the image. However, if the newly added background color doesn’t blend, the network may consider it as to be a feature and learn unnecessary features.

In [8]:
from math import pi


def rotate_images(X_imgs, start_angle, end_angle, n_images):
    X_rotate = []
    iterate_at = (end_angle - start_angle) / (n_images - 1)

    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, shape=(None, IMAGE_SIZE, IMAGE_SIZE, 3))
    radian = tf.placeholder(tf.float32, shape=(len(X_imgs)))
    # tf_img = tf.contrib.image.rotate(X, radian)
    tf_img = tfa.image.rotate(X, radian)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for index in range(n_images):
            degrees_angle = start_angle + index * iterate_at
            radian_value = degrees_angle * pi / 180  # Convert to radian
            radian_arr = [radian_value] * len(X_imgs)
            rotated_imgs = sess.run(tf_img, feed_dict={X: X_imgs, radian: radian_arr})
            X_rotate.extend(rotated_imgs)

    X_rotate = np.array(X_rotate, dtype=np.float32)
    return X_rotate

#### Flipping:
This scenario is more important for network to remove biasness of assuming certain features of the object is available in only a particular side. Consider the case shown in image example. You don’t want network to learn that tilt of banana happens only in right side as observed in the base image. Also notice that flipping produces different set of images from rotation at multiple of 90 degrees.My additional question is has anyone done some study on what is the maximum number of classes it gives good performance. Consider, data can be generated with good amount of diversity for each class and time of training is not a factor.

In [9]:
def flip_images(X_imgs):
    X_flip = []
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    tf_img1 = tf.image.flip_left_right(X)
    tf_img2 = tf.image.flip_up_down(X)
    tf_img3 = tf.image.transpose_image(X)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for img in X_imgs:
            flipped_imgs = sess.run([tf_img1, tf_img2, tf_img3], feed_dict={X: img})
            X_flip.extend(flipped_imgs)
    X_flip = np.array(X_flip, dtype=np.float32)
    return X_flip

#### Adding Salt and Pepper noise:
Salt and Pepper noise refers to addition of white and black dots in the image. Though this may seem unnecessary, it is important to remember that a general user who is taking image to feed into your network may not be a professional photographer. His camera can produce blurry images with lots of white and black dots. This augmentation aides the above mentioned users.


In [10]:
def add_salt_pepper_noise(X_imgs):
    # Need to produce a copy as to not modify the original image
    X_imgs_copy = X_imgs.copy()
    row, col, _ = X_imgs_copy[0].shape
    salt_vs_pepper = 0.2
    amount = 0.004
    num_salt = np.ceil(amount * X_imgs_copy[0].size * salt_vs_pepper)
    num_pepper = np.ceil(amount * X_imgs_copy[0].size * (1.0 - salt_vs_pepper))

    for X_img in X_imgs_copy:
        # Add Salt noise
        coords = [np.random.randint(0, i - 1, int(num_salt)) for i in X_img.shape]
        X_img[coords[0], coords[1], :] = 1

        # Add Pepper noise
        coords = [np.random.randint(0, i - 1, int(num_pepper)) for i in X_img.shape]
        X_img[coords[0], coords[1], :] = 0
    return X_imgs_copy

#### Lighting condition:
This is a very important type of diversity needed in the image dataset not only for the network to learn properly the object of interest but also to simulate the practical scenario of images being taken by the user. The lighting condition of the images are varied by adding Gaussian noise in the image.

In [11]:
def add_gaussian_noise(X_imgs):
    gaussian_noise_imgs = []
    row, col, _ = X_imgs[0].shape
    # Gaussian distribution parameters
    mean = 0
    var = 0.1
    sigma = var**0.5

    for X_img in X_imgs:
        gaussian = np.random.random((row, col, 1)).astype(np.float32)
        gaussian = np.concatenate((gaussian, gaussian, gaussian), axis=2)
        gaussian_img = cv2.addWeighted(X_img, 0.75, 0.25 * gaussian, 0.25, 0)
        gaussian_noise_imgs.append(gaussian_img)
    gaussian_noise_imgs = np.array(gaussian_noise_imgs, dtype=np.float32)
    return gaussian_noise_imgs

#### Perspective transform:
In perspective transform, we try to project image from a different point of view. For this, the position of object should be known in advance. Merely calculating perspective transform without knowing the position of the object can lead to degradation of the dataset. Hence, this type of augmentation has to be performed selectively. The greatest advantage with this augmentation is that it can emphasize on parts of object in image which the network needs to learn.

In [12]:
def get_mask_coord(imshape):
    vertices = np.array(
        [
            [
                (0.09 * imshape[1], 0.99 * imshape[0]),
                (0.43 * imshape[1], 0.32 * imshape[0]),
                (0.56 * imshape[1], 0.32 * imshape[0]),
                (0.85 * imshape[1], 0.99 * imshape[0]),
            ]
        ],
        dtype=np.int32,
    )
    return vertices


def get_perspective_matrices(X_img):
    offset = 15
    img_size = (X_img.shape[1], X_img.shape[0])

    # Estimate the coordinates of object of interest inside the image.
    src = np.float32(get_mask_coord(X_img.shape))
    dst = np.float32(
        [
            [offset, img_size[1]],
            [offset, 0],
            [img_size[0] - offset, 0],
            [img_size[0] - offset, img_size[1]],
        ]
    )

    perspective_matrix = cv2.getPerspectiveTransform(src, dst)
    return perspective_matrix


def perspective_transform(X_img):
    # Doing only for one type of example
    perspective_matrix = get_perspective_matrices(X_img)
    warped_img = cv2.warpPerspective(
        X_img,
        perspective_matrix,
        (X_img.shape[1], X_img.shape[0]),
        flags=cv2.INTER_LINEAR,
    )
    return warped_img

## Generate and save augmented images into class folders

In [13]:
filename = "gauge_0.png"
path = "./images/tf_gauges/psi_{0}/{1}"

In [14]:
def resize_image(i, X_img):
    file_resized = "gauge_resized.jpg"
    filename_resized = path.format(i, file_resized)
    imageio.imwrite(filename_resized, X_img[0])
    # imageio.imwrite('filename.jpg', array)

In [15]:
def scaled_image(folder, img):
    scale = [
        0.97,
        0.96,
        0.95,
        0.94,
        0.93,
        0.92,
        0.91,
        0.90,
        0.89,
        0.88,
        0.87,
        0.86,
        0.85,
        0.84,
        0.80,
        0.75,
        0.70,
        0.65,
        0.60,
    ]
    scaled_imgs = central_scale_images(img, scale)

    for i in range(0, 19):
        filename = "gauge_scale_{0}.jpg".format(i)
        filepath = path.format(folder, filename)
        imageio.imwrite(filepath, scaled_imgs[i])

In [16]:
def tranlate_image(folder, img):
    translated_imgs = translate_images(img)

    for i in range(0, 8):
        filename = "gauge_translate_{0}.jpg".format(i)
        filepath = path.format(folder, filename)
        imageio.imwrite(filepath, translated_imgs[i])

In [17]:
def rotate_general_image(folder, img):
    rotated = rotate_images(img, -90, 90, 14)

    for i in range(14):
        filename = "gauge_rotated_{0}.jpg".format(i)
        filepath = path.format(folder, filename)
        imageio.imwrite(filepath, rotated[i])

In [18]:
def flipped_image(folder, img):
    flipped = flip_images(img)

    for i in range(3):
        filename = "gauge_flipped_{0}.jpg".format(i)
        filepath = path.format(folder, filename)
        imageio.imwrite(filepath, flipped[i])

In [19]:
def salt_pepper(folder, img):
    salt = add_salt_pepper_noise(img)
    filename = "gauge_salt_pepper.jpg"
    filepath = path.format(folder, filename)
    imageio.imwrite(filepath, salt[0])

In [20]:
def lighting(folder, img):
    gaussian = add_gaussian_noise(img)
    filename = "gauge_gaussian.jpg"
    filepath = path.format(folder, filename)
    imageio.imwrite(filepath, gaussian[0])

In [21]:
dirList = glob.glob("./images/tf_gauges/*/*.png")
X_img_paths = ["{}".format(file) for file in dirList]

In [22]:
def exec_images(i, img):
    # Resized Image
    resize_image(i, img)
    # Scale 97% - 60%
    scaled_image(i, img)
    # Tranlate the images
    tranlate_image(i, img)
    # Rotate the image 180, 270 degress
    rotate_general_image(i, img)
    # Flip the image
    flipped_image(i, img)
    # Add noise in pixels
    salt_pepper(i, img)
    # Lighting condition
    lighting(i, img)


In [24]:
m = range(14, 29)
n = range(0, 10)
o = range(5, 10)

for idx, i in enumerate(m):


    # loop through 1 - 15
    if i >= 0 and i <= 28:
        # X_img = path.format(i, filename)
        # print(X_img)
        # X_resize = tf_resize_images([X_img])

        # exec_images(i, X_resize)

        print(i)
        # Loop through #.0 = #.9
        if i != 28:
            for jdx, j in enumerate(n):
                print(f"{i}.{j}")
                decimal = "{0}-{1}".format(i, j)
                X_img = path.format(decimal, filename)
                X_resize = tf_resize_images([X_img])

                exec_images(decimal, X_resize)
        else:
            decimal = "{0}-{1}".format(i, 0)
            X_img = path.format(decimal, filename)
            X_resize = tf_resize_images([X_img])

            exec_images(decimal, X_resize)

14
14.0




14.1




14.2




14.3




14.4




14.5




14.6




14.7




14.8




14.9




15
15.0




15.1




15.2




15.3




15.4




15.5




15.6




15.7




15.8




15.9




16
16.0




16.1




16.2




16.3




16.4




16.5




16.6




16.7




16.8




16.9




17
17.0




17.1




17.2




17.3




17.4




17.5




17.6




17.7




17.8




17.9




18
18.0




18.1




18.2




18.3




18.4




18.5




18.6




18.7




18.8




18.9




19
19.0




19.1




19.2




19.3




19.4




19.5




19.6




19.7




19.8




19.9




20
20.0




20.1




20.2




20.3




20.4




20.5




20.6




20.7




20.8




20.9




21
21.0




21.1




21.2




21.3




21.4




21.5




21.6




21.7




21.8




21.9




22
22.0




22.1




22.2




22.3




22.4




22.5




22.6




22.7




22.8




22.9




23
23.0




23.1




23.2




23.3




23.4




23.5




23.6




23.7




23.8




23.9




24
24.0




24.1




24.2




24.3




24.4




24.5




24.6




24.7




24.8




24.9




25
25.0




25.1




25.2




25.3




25.4




25.5




25.6




25.7




25.8




25.9




26
26.0




26.1




26.2




26.3




26.4




26.5




26.6




26.7




26.8




26.9




27
27.0




27.1




27.2




27.3




27.4




27.5




27.6




27.7




27.8




27.9




28


