# 1. Introduction

The purpose of this notebook is to create an adversarial example against my old simple Convolutionnal Neural Network which recognize hand gestures for sign language. We will mislead the network.

## 1.1. Imports and remake former network 

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img,img_to_array
from tensorflow.keras.layers import Input
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
from tensorflow.python.keras import backend



# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df_train = pd.read_csv("/kaggle/input/sign-language-mnist/sign_mnist_train/sign_mnist_train.csv")
alphabet=['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
adversarial = df_train.loc[0]
df_train.drop(0, inplace=True) #lets keep this one for the misleading

y = df_train["label"]
X = df_train.drop(['label'], axis=1)

X = np.array(X) / 255
y = np.array(y)

Y = np.zeros((len(alphabet),df_train.shape[0]))
for i in range(len(y)):
    Y[y[i],i] = 1
X = X.reshape((-1, 28,28,1))
Y = Y.reshape((26,-1))

In [None]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Input(shape=(28,28,1)))
model.add(tf.keras.layers.Convolution2D(32, (3, 3), activation='relu',padding='same'))
model.add(tf.keras.layers.Convolution2D(32, (3, 3), activation='relu',padding='same'))
model.add(tf.keras.layers.MaxPooling2D((2,2), strides=None,padding='same'))
model.add(tf.keras.layers.Dropout(0.2))

model.add(tf.keras.layers.Convolution2D(64, (3, 3), activation='relu',padding='same'))
model.add(tf.keras.layers.Convolution2D(64, (3, 3), activation='relu',padding='same'))
model.add(tf.keras.layers.MaxPooling2D((2,2), strides=None,padding='same'))
model.add(tf.keras.layers.Dropout(0.2))

model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(556, activation='relu'))
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dropout(0.1))
model.add(tf.keras.layers.Dense(26, activation='softmax', name="predictions"))

model.summary()

model.compile(loss="sparse_categorical_crossentropy",optimizer='adam',metrics=["accuracy"])

#history = model.fit(X,y,batch_size=64,epochs=3, validation_split=0.2)

# 2. Adversarial example

Here is our example that will tear apart that network.

In [None]:
target_class = df_train.loc[26][0]
img_in = np.array(adversarial[1:])

if not issubclass(img_in.dtype.type, np.floating):
    img_in = img_in.astype(backend.floatx(), copy=False) #one day of not understanding why gradiant is 0

plt.imshow(img_in.reshape(28,28,1).astype(int), cmap="gray")
plt.show()
img_in = tf.convert_to_tensor(img_in.reshape((1,28,28,1)))

This is a 'd', but we will make the network think it is a 'y', which looks totaly different.

In [None]:
ar = np.array(df_train.loc[26][1:]).reshape((28,28))
plt.imshow(ar, cmap='gray')
plt.title(alphabet[df_train.loc[26][0]])
plt.show()

We need to extract the last layer of the network as it does the classification work.

In [None]:
target_class = df_train.loc[26][0]

In [None]:
last_layer = model.get_layer("predictions").output

In [None]:
img_backend = model.input
adversarial_model = tf.keras.Model(inputs=img_backend, outputs=last_layer)

We must redefine the loss function for the adversarial model and also redefine the gradient step function, so that it calls the new adversarial loss, and so that it takes a new parameter target_class as an input. 
This means that our new loss is actually <b>the guess of the network for the target class</b> as we will try to create an image that will make the highest loss possible.

In [None]:
def adversarial_loss(model_in,img_in,target_class):
    activation = model_in(img_in) 

    total_loss = activation[0,target_class]
    return total_loss

def adversarial_gradient_step(model_in, img_in, step_size,target_class):
    with tf.GradientTape() as tape:
        tape.watch(img_in)
        loss = adversarial_loss(model_in,img_in,target_class)
        grads = tape.gradient(loss,img_in)
    grads = grads/(tf.math.reduce_std(grads)+1e-8)
    img_in += grads * step_size
    return img_in, loss

In [None]:
step = 0.5  # Gradient ascent step size
n_iterations = 200  # Number of gradient ascent steps
counter = 0
for ii in range(0,n_iterations):
    img_in, loss = adversarial_gradient_step(model,img_in,step,target_class)
    if (ii%5==0):
        y_predicted = model.predict(img_in)
        print(alphabet[np.argmax(y_predicted)], np.amax(y_predicted))
        if (np.argmax(y_predicted) == target_class) & (np.amax(y_predicted) >= 0.9):
            counter += 1
        else:
            counter = 0
    if counter >= 5:
        break

print('End of optimisation')
plt.imshow(img_in.numpy().reshape(28,28,1), cmap="gray")
plt.title(alphabet[np.argmax(model.predict(img_in))])
plt.show()

## Remarks : 

* As this technique works, I can't achieve to make it work when I train the network. The gradient is way too small and I can't mislead the network.
* Often, the gradient descent (which is more of an ascension) seems to get stuck and the network must be reset.