# Using Noise by Perturbing Each Image in a Certain Region

In [6]:
import numpy as np
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
import random

# returns the CIFAR-10 data set (info here: https://keras.io/api/datasets/cifar10/)
# The data set loaded is has 50,000 training images and 10,000 testing images, but the function below only takes
# a certain number train_size of training images from the original training set. The rest of the training examples
# are added to the test images.
def generate_data(train_size):
    # loads the data in 50000,10000 form with labels (y) as integers, not one-hot
    (x_train_bef, y_train_bef), (x_test_bef, y_test_bef) = tf.keras.datasets.cifar10.load_data()
    
    # shuffles 50000 training data
    np.random.shuffle(x_train_bef)
    np.random.shuffle(y_train_bef)
    
    # cuts off train_size amounts of sample data
    x_train = x_train_bef[:train_size]
    y_train = y_train_bef[:train_size]
    
    # adds the rest of the training data to the test data
    x_test = np.concatenate((x_train_bef[train_size:], x_test_bef), axis=0)
    y_test = np.concatenate((y_train_bef[train_size:], y_test_bef), axis=0)
    
    # turns image arrays into floats just so that everything is a float, not an int
    x_train = x_train.astype(np.float)
    x_test = x_test.astype(np.float)
    
    # turns label data into one-hot form
    y_train = tf.keras.utils.to_categorical(y_train)
    y_test = tf.keras.utils.to_categorical(y_test)

    return x_train, y_train, x_test, y_test

# looks at the training data, clones it, but then creates small changes to each image
# Currently, it takes a random 5x5 section of each image and perturbs each section
# the input it original training data which is not changed, but copied and then perturbed: the perturbed result
# is returned
def create_perturbed_clone(x_train, y_train):
    # new objects that are copied: changing x_train_pert won't change x_train
    x_train_pert = x_train.copy()
    y_train_pert = y_train.copy()
    
    # for each image in x_train_pert we look at a random location and perturb the 5x5 region
    length=x_train_pert.shape[0]
    for i in range(length):
        for repeat in range(3):
            loc_x = random.randrange(2,30) # x-coord of center of 5x5 region
            loc_y = random.randrange(2,30) # y-coord of center of 5x5 region
            
            # each pixel and each RGB value is perturbed
            for u in [-2,-1,0,1,2]:
                for v in [-2,-1,0,1,2]:
                    x_train_pert[i][loc_x+u][loc_y+v][0] += 50*(random.random()-0.5)
                    x_train_pert[i][loc_x+u][loc_y+v][1] += 50*(random.random()-0.5)
                    x_train_pert[i][loc_x+u][loc_y+v][2] += 50*(random.random()-0.5)
    
    return x_train_pert, y_train_pert

# takes training data and adds a perturbed copy to make the training set enlarge_factor times larger
def create_noise(x_train, y_train, enlarge_factor):
    x_train_noisy = x_train.copy()
    y_train_noisy = y_train.copy()
    for i in range(enlarge_factor-1):
        x_add, y_add = create_perturbed_clone(x_train, y_train)
        x_train_noisy = np.concatenate((x_train_noisy, x_add))
        y_train_noisy = np.concatenate((y_train_noisy, y_add))
    return x_train_noisy, y_train_noisy

# creates network: we use a convolutional neural network which makes sense for this problem
def create_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(32, 32, 3)))
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    #model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    #model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    #model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(70, activation='relu', kernel_regularizer='l2'))
    model.add(Dense(30, activation='relu'))
    model.add(Dense(10, activation='softmax')) # output lyer is 10-dimension one-hot, so softmax is used
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def run_model(Epochs, BatchSize, trainSize, Set_enlarge, noisy=False):
    x_train, y_train, x_test, y_test = generate_data(trainSize)
    if noisy:
        x_train, y_train = create_noise(x_train, y_train, Set_enlarge)
    MODEL = create_model()
    MODEL.fit(x=x_train, y=y_train, epochs=Epochs, batch_size=BatchSize)
    MODEL.evaluate(x_test,y_test)

In [7]:
# Whether with or without noise, the model is accurate about 0.1 of the time on the test data, i.e. it is no
# better than a random guesser.
run_model(8,64,6000, noisy=True, Set_enlarge=15)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
