## Goal:
`Add caliberated noise to each image class to improve robustness of final DCNN`

## Methodology:
`Use FGSM to construct noise matrices that will maximize (current model's) prediction loss`

In [33]:
import tensorflow as tf
import matplotlib as mpl
import matplotlib.pyplot as plt
from tensorflow import keras
import numpy as np
from PIL import Image
import os
import glob
import random

## Load the current model

In [20]:
model = keras.models.load_model("C:/Users/Neesarg/Projects/Python/Machine Learning/Devanagari Master/Data Augmentation/Model")

## Loading the Dataset

In [25]:
global_data_path = "E:/Large Datasets/Devanagari Letters/DevanagariHandwrittenCharacterDataset/"

In [31]:
class_names = [os.path.basename(gp) for gp in glob.glob(global_data_path + "Test/*")]
print("Number of Classes:", len(class_names))
print("Classes:\n", class_names)

Number of Classes: 46
Classes:
 ['character_10_yna', 'character_11_taamatar', 'character_12_thaa', 'character_13_daa', 'character_14_dhaa', 'character_15_adna', 'character_16_tabala', 'character_17_tha', 'character_18_da', 'character_19_dha', 'character_1_ka', 'character_20_na', 'character_21_pa', 'character_22_pha', 'character_23_ba', 'character_24_bha', 'character_25_ma', 'character_26_yaw', 'character_27_ra', 'character_28_la', 'character_29_waw', 'character_2_kha', 'character_30_motosaw', 'character_31_petchiryakha', 'character_32_patalosaw', 'character_33_ha', 'character_34_chhya', 'character_35_tra', 'character_36_gya', 'character_3_ga', 'character_4_gha', 'character_5_kna', 'character_6_cha', 'character_7_chha', 'character_8_ja', 'character_9_jha', 'digit_0', 'digit_1', 'digit_2', 'digit_3', 'digit_4', 'digit_5', 'digit_6', 'digit_7', 'digit_8', 'digit_9']


## Data Augmentation Methodology

1. Define function f with parameters:<br> 
    a. `file_path` to class directory<br>
    b. `alpha` value that defines ratio of samples to apply FGSM (default value: 0.3 = 510 images)<br>
    c. `epsilon` value that defines the degree/magnitude of noise to be added to the images (<1, default value: 0.4)<br>
2. Randomly pick alpha * 1,700 images from the directory --> set S
3. Apply FGSM to S
4. Append resultant images back into the class directory

In [149]:
loss_object = tf.keras.losses.CategoricalCrossentropy()

def perform_FGSM_aug(file_path, alpha=0.3, epsilon=0.4):
    # step 1: randomly pick alpha * 1700 images from directory
    all_images = glob.glob(file_path + "*")
    num_imgs = int(alpha * 1700)
    random_sample_indices = random.sample(range(1700), num_imgs)
    random_sample_img_paths = []
    for i in range(num_imgs):
        random_sample_img_paths.append(all_images[random_sample_indices[i]])
    
    def create_adversarial_pattern(input_image, input_label):
        """Returns the direction in the gradient of cost function such to maximize loss"""
        with tf.GradientTape() as tape:
            tape.watch(input_image)
            prediction = model(input_image)
            loss = loss_object(input_label, prediction)
            
        # Get the gradients of the loss w.r.t to the input image
        gradient = tape.gradient(loss, input_image)
        # Get the sign of the gradient to create the perturbation mask/noise
        signed_grad = tf.sign(gradient)
        return signed_grad
    
    resultant_imgs = []
    # step 2: apply FGSM to the randomly sampled images
    for img_path in random_sample_img_paths:
        img = Image.open(img_path)
        img_array = tf.keras.utils.img_to_array(img)
        img_array = tf.expand_dims(img_array, 0)  # batch axis expected to make prediction
        predictions = model.predict(img_array)
        scores = tf.nn.softmax(predictions[0])
        label = tf.one_hot(np.argmax(scores), scores.shape[-1])  # one hot vec of length 46
        label = tf.reshape(label, (1, scores.shape[-1]))
        perturbations = create_adversarial_pattern(img_array, label)
        adversarial_img = img_array + epsilon * perturbations
        adversarial_img = tf.clip_by_value(adversarial_img, -1, 1)  # min max values
        
        resultant_imgs.append(np.squeeze(adversarial_img[0]) * 0.5 + 0.5)  # to change [-1, 1] to [0, 1]
    
    for ix, result_img in enumerate(resultant_imgs):
        fig = plt.gcf()
        plt.axis('off')
        plt.imsave(file_path + "fgsm_" + str(ix) + ".png", result_img, cmap='gray')
        plt.close(fig)  # so image does not get displayed

In [None]:
for c in class_names:
    perform_FGSM_aug(file_path=global_data_path + "Train/" + c + "/")

## At this point, each class has 510 more training images