# In diesem File werden die Modelle erstellt, trainiert und die Experimente durchgeführt


# Imports, Variablen & Definitionen von Funktionen

In [None]:
# imports
import tensorflow as tf
from keras.datasets import mnist
import foolbox as fb
from keras import callbacks
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from keras.datasets import mnist
from sklearn.neighbors import NearestNeighbors
from itertools import product
from scipy.ndimage.interpolation import rotate, shift
import csv


# variables
epsilon=0.3
batch_size=1024
epochs=1000
pgd_steps=50
batch_count=0
batch_count_inv=0
print("Tensorflow version: ", tf.__version__)
print("Numpy version: ", np.__version__)
print("Foolbox version: ", fb.__version__)
print(tf.config.list_physical_devices('GPU'))
np.random.seed(10)


# get MNIST data and prepare
(x_train, y_train), (x_test, y_test) = mnist.load_data()
img_rows = img_cols = 28
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_train, x_test = x_train / 255.0, x_test / 255.0

# define variables needed for attacks
x_attack_to_train = tf.convert_to_tensor(x_train, dtype=tf.float32)
x_attack_to_train=x_attack_to_train[:,:,:,np.newaxis]
y_attack_to_train=tf.convert_to_tensor(y_train, dtype=tf.int32)

x_attack_to_test = tf.convert_to_tensor(x_test, dtype=tf.float32)
x_attack_to_test=x_attack_to_test[:,:,:,np.newaxis]
y_attack_to_test=tf.convert_to_tensor(y_test, dtype=tf.int32)

attack = fb.attacks.projected_gradient_descent.LinfProjectedGradientDescentAttack(steps=pgd_steps)

# for generating invariance-based adversarial examples
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()


# functions
def test_model(model):
    
    assert epsilon==0.3
    inv_advs_to_test=np.load("data/invariance_examples_tramer/linf/automated_eps03.npy")[0:100]
    inv_labels_to_test=np.load("data/invariance_examples_tramer/linf/automated_eps03_labels.npy")[0:100]
    fmodel=fb.models.tensorflow.TensorFlowModel(model, bounds=(0,1))      
    
    x_batch,y_batch=x_test[0:100],y_test[0:100]
    x_batch_to_test = tf.convert_to_tensor(x_batch, dtype=tf.float32)
    y_batch_to_test=tf.convert_to_tensor(y_batch, dtype=tf.int32)

    _,advs_to_test, success=attack(fmodel,x_batch_to_test, y_batch_to_test, epsilons=epsilon)
   
    success_rate=tf.keras.backend.get_value(success).mean(axis=-1).round(2)
    x=tf.keras.backend.get_value(advs_to_test)
    ptb_test=x

    # get accuracies and losses
    acc =model.evaluate(x_test[0:100],to_categorical(y_test[0:100]), verbose=0)
    acc_ptb = model.evaluate(ptb_test,to_categorical(y_batch), verbose=0)
    acc_inv = model.evaluate(inv_advs_to_test,to_categorical(inv_labels_to_test), verbose=0)


    # get invariance adversarial examples success rate
    predictions=model.predict(inv_advs_to_test)
    disagreeing=0
    for i in range(len(predictions)):
        if inv_labels_to_test[i] !=np.argmax(predictions[i]):
            disagreeing+=1
      
    return {
    "clean":{"loss": acc[0], "accuracy":acc[1]},
    "ptb":{"loss": acc_ptb[0], "accuracy":acc_ptb[1]},
    "inv":{"loss": acc_inv[0], "accuracy":acc_inv[1]},
    "inv_success_rate":disagreeing/100}


def create_vanilla_model():
      print("creating vanilla model...")
      
      val_images = x_train[:10000]
      partial_images = x_train[10000:]
      val_labels = y_train[:10000]
      partial_labels = y_train[10000:]

      model = Sequential()

      model.add(Conv2D(32, (5, 5), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
      model.add(MaxPooling2D(pool_size=(2, 2)))
      model.add(Conv2D(64, (5, 5), activation='relu', kernel_initializer='he_uniform'))
      model.add(MaxPooling2D(pool_size=(2, 2)))
      model.add(Flatten())
      model.add(Dense(1024, activation='relu', kernel_initializer='he_uniform'))
      model.add(Dense(10, activation='softmax'))
     
      earlystopping = callbacks.EarlyStopping(monitor ="val_loss", 
                                        mode ="min", patience = 1, 
                                        restore_best_weights = True)

      model.compile(optimizer='RMSprop', loss='categorical_crossentropy', metrics=['accuracy'])
      print("training vanilla model...")
      history=model.fit(partial_images,to_categorical(partial_labels),
                  validation_data =(val_images, to_categorical(val_labels)),
                  batch_size=batch_size,
                  epochs=epochs,
                  shuffle=True,
                  verbose=2,
                  callbacks =[earlystopping]
                  )
      print(np.shape(x_test))
      acc = model.evaluate(x_test[0:100],to_categorical(y_test[0:100]))
      print('BEFORE RETRAIN: Accuracy on clean testing data', acc[1])

      return model

def create_vanilla_model_tramer(filters=64, s1=5, s2=5, s3=3,
               d1=0, d2=0, fc=256,
               lr=1e-3, decay=1e-3):
    model = Sequential()
    model.add(Conv2D(filters, kernel_size=(s1, s1),
                     activation='relu',
                     input_shape=(28, 28, 1)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(filters*2, (s2, s2), activation='relu'))
    model.add(BatchNormalization())
    model.add(Conv2D(filters*2, (s3, s3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(d1))
    model.add(Flatten())
    model.add(Dense(fc, activation='relu'))
    model.add(Dropout(d2))
    model.add(Dense(10))
    model.compile(loss='categorical_crossentropy',
                  optimizer='Adam',
                  metrics=['accuracy'])

    final = Sequential()
    final.add(model)
    final.add(Activation('softmax'))
    final.compile(loss='categorical_crossentropy',
                  optimizer='Adam',
                  metrics=['accuracy'])
        
    final.fit(x_train, to_categorical(y_train, 10),
              batch_size=256,
              epochs=20,
              shuffle=True,
              verbose=2,
    )
    return final    

def next_batch(data, labels, data_type):
    if data_type=="mnist":
        global batch_count
        start=batch_count*100
        end=(batch_count+1)*100
        if batch_count<599:
            batch_count+=1
        else:
            batch_count=0
        
        return data[start:end], labels[start:end]
    if data_type=="inv":
        global batch_count_inv
        start=batch_count_inv*100
        end=(batch_count_inv+1)*100
        if batch_count_inv<4:
            batch_count_inv+=1
        else:
            batch_count_inv=0
        
        return data[start:end], labels[start:end]


# https://github.com/ftramer/Excessive-Invariance
def linf_attack(x, nn_adv, eps):
    x_adv = x.copy().astype(np.float32)
    nn_adv = nn_adv.astype(np.float32)
    
    # if possible, change the pixels to the target value
    idx = np.where((np.abs(nn_adv - x) <= eps*255.) & (x > 0))
    x_adv[idx] = nn_adv[idx]
    
    # otherwise, go as close as possible
    idx = np.where(np.abs(nn_adv - x) > eps*255.)
    sign = np.sign(nn_adv - x)
    x_adv[idx] += sign[idx] * eps * 255.
    
    x_adv = np.clip(x_adv, x.astype(np.float32) - eps*255, x.astype(np.float32) + eps*255)
    x_adv = np.clip(x_adv, 0, 255.)
    
    return x_adv


# https://github.com/ftramer/Excessive-Invariance
# tries all rotation-translations of the input and returns the closest neighbor from each class
def get_best_neighbors(x, y, all_NNs, grid):
    xs = [shift(rotate(x, r, reshape=False), (tx, ty)).reshape(784) for (tx, ty, r) in grid]
    xs = np.asarray(xs.copy())
    
    nns = []
    y_nns = []
    grids_nn = []
    
    # find a nearest neighbor in each class
    for i in range(10):
        if i != y:
            X = X_train[Y_train == i]
            Y = Y_train[Y_train == i]
            distances, indices = all_NNs[i].kneighbors(xs, n_neighbors=1)

            best = np.argmin(np.reshape(distances, -1))
            best_idx = np.reshape(indices, -1)[best]
            nns.append(X[best_idx])
            y_nns.append(Y[best_idx])
            
            # store the inverse rotation+translation to be applied to the target
            grids_nn.append(-np.asarray(grid[best]))
    
    return nns, y_nns, grids_nn


# https://github.com/ftramer/Excessive-Invariance
def generate_inv_adv_examples(epsilon_to_use, count, save):
    import numpy as np
    assert epsilon_to_use==0.3 or epsilon_to_use==0.4
    
    idxs=np.arange(0,count,1,dtype=int)

    # Falsely the Invariance-Based Adversarial Examples are generated with the MNIST-Testing data but this doesn't matter because the Adversarial Examples are completely new generated images the Model has never seen.
    # So it shouldn't matter if the Adversarial Examples are generated using the Testing-data or the Training-data.
    # Saw this just at the end of the work. To fix this, I would have to ask the ten persons again to classify all 500 images and run all experiments again. 
    # Although the Models are tested only with 100 Examples 
    assert len(idxs) == count
    test_xs = X_test[idxs]
    test_ys = Y_test[idxs]

    # build a nearest neighbors classifier per class
    N = 1
    all_NNs = []

    for i in range(10):
        #Reshape to 1D (28*28=784)
        X = X_train[Y_train == i].reshape(-1, 784)
        nn = NearestNeighbors(n_neighbors=N)
    
        nn.fit(X)
        all_NNs.append(nn)
    # print(all_NNs)



    # Rotation-translation parameters
    limits = [3, 3, 30]
    granularity = [5, 5, 31]
    grid = list(product(*list(np.linspace(-l, l, num=g) for l, g in zip(limits, granularity))))



    all_nns = []
    all_y_nns = []
    all_grids_nns = []

    # find nearest neighbors for some test inputs (this takes a little while)
    for i in range(len(idxs)):
        if i % 10 == 0:
            print("{}/{} done".format(i, len(idxs)))
        x = test_xs[i]
        y = test_ys[i]

        # find the nearest neighbors for each class, with the corresponding rotation and translation
        nns, y_nns, grids_nns = get_best_neighbors(x, y, all_NNs, grid)
        nn_advs = [shift(rotate(nn, r, reshape=False), (tx, ty)) for (nn, (tx, ty, r)) in zip(nns, grids_nns)]
        all_nns.append(nn_advs)
        all_y_nns.append(y_nns)
        all_grids_nns.append(np.asarray(grids_nns))

    # save everything!
    if(save==True):
        np.save("data/invariance_examples_generation/X_test_{}.npy".format(count), test_xs)
        np.save("data/invariance_examples_generation/all_nns.npy", np.asarray(all_nns))
        np.save("data/invariance_examples_generation/all_y_nns.npy", np.asarray(all_y_nns))
        np.save("data/invariance_examples_generation/all_grids_nns.npy", np.asarray(all_grids_nns))

    all_nns=np.load("data/invariance_examples_generation/all_nns.npy")
    all_y_nns=np.load("data/invariance_examples_generation/all_y_nns.npy")
    all_grids_nns=np.load("data/invariance_examples_generation/all_grids_nns.npy")
    test_xs=np.load("data/invariance_examples_generation/X_test_{}.npy".format(count))

  
    test_ys = y_test[idxs]

    # manually chosen target classes for each source class
    targets = {
        0: [4, 6, 8, 9],
        1: [4, 6, 7, 9],
        2: [8],
        3: [8],
        4: [8, 9],
        5: [3, 8],
        6: [0],
        7: [2, 3],
        8: [3],
        9: [3, 4, 5]
    }

    best_y_advs = []
    best_targets = []
    best_advs = []

    for i in range(len(all_nns)):
        x = test_xs[i]
        y = test_ys[i]
    
        best_x_adv = None
        best_nn_adv = None
        amount_removed = []
        amount_added = []
        rot = []
        best_y = None
        min_removed = np.inf
        for j in range(len(all_nns[i])):
            nn_adv = all_nns[i][j]
            y_nn = all_y_nns[i][j]
            x_adv = linf_attack(x, nn_adv, epsilon_to_use)
        
            # retain the target that required the least amount of pixels to be "removed"
            curr_rot = np.abs(all_grids_nns[i][j][-1])
            curr_removed = np.sum(np.abs(np.maximum(x/255. - x_adv/255., 0)))
            
            if y_nn in targets[y] and curr_removed < min_removed:
                min_removed = curr_removed
                best_y = y_nn
                best_x_adv = x_adv
                best_nn_adv = (nn_adv, y_nn)
                    
        best_targets.append(best_nn_adv)
        best_advs.append(best_x_adv)
        best_y_advs.append(best_y)
        

    if(save==True):
        if epsilon_to_use==0.3:
            np.save("data/invariance_examples/epsilon_0.3/invariance-based_adversarial_examples", best_advs)
            np.save("data/invariance_examples/epsilon_0.3/invariance-based_adversarial_examples_new_labels", best_y_advs)
        else:
            np.save("data/invariance_examples/epsilon_0.4/invariance-based_adversarial_examples", best_advs)
            np.save("data/invariance_examples/epsilon_0.4/invariance-based_adversarial_examples_new_labels", best_y_advs)
 

        
def ptb_training(ptb_acc_to_achieve, model_to_train, include_inv_training=False, inclusive_training=False, use_iterations=False, iterations=10):
    if inclusive_training==True:
        include_inv_training=False

    inv_advs_to_train=np.load("data/invariance_examples/epsilon_0.3/invariance-based_adversarial_examples.npy")
    inv_labels_to_train=np.load("data/invariance_examples/epsilon_0.3/invariance-based_adversarial_examples_human_labels.npy")
    
    earlystopping = callbacks.EarlyStopping(monitor ="val_loss", 
                                        mode ="min", patience = 1, 
                                       restore_best_weights = True)
    # while ACCURACY
    ptb_acc=0
    i=0
    y_axis=[]
    x_axis_ptb=[]
    x_axis_clean=[]
    x_axis_inv=[]
    if use_iterations==False:    
        while ptb_acc<=ptb_acc_to_achieve:
            res=test_model(model_to_train)
            ptb_acc=res.get("ptb").get("accuracy")
            clean_acc=res.get("clean").get("accuracy")
            inv_acc=res.get("inv").get("accuracy")

            i+=1
            y_axis.append(i)
            x_axis_ptb.append(ptb_acc)
            x_axis_clean.append(clean_acc)
            x_axis_inv.append(inv_acc)
            fmodel=fb.models.tensorflow.TensorFlowModel(model_to_train, bounds=(0,1))   
            x_batch,y_batch=next_batch(x_train,y_train, "mnist")
            
            x_batch_to_train = tf.convert_to_tensor(x_batch, dtype=tf.float32)
            y_batch_to_train=tf.convert_to_tensor(y_batch, dtype=tf.int32)

            # attack model    
            _,advs, success=attack(fmodel, x_batch_to_train, y_batch_to_train, epsilons=epsilon) 
            success_rate=tf.keras.backend.get_value(success).mean(axis=-1).round(2)



            if inclusive_training==True:
                x=tf.keras.backend.get_value(advs)
                x=x[:,:,:,0]

                # perturbation based adversarial examples
                x_training=x[0:int(len(x)*0.8)]
                x_validation=x[int(len(x)*0.8):int(len(x))]
                y_training=y_batch[0:int(len(x)*0.8)]
                y_validation=y_batch[int(len(x)*0.8):int(len(x))]

                # invariance based adversarial examples
                x_inv,y_inv=next_batch(inv_advs_to_train,inv_labels_to_train, "inv")
                x_inv_training=x_inv[0:int(len(x_inv)*0.8)]
                x_inv_validation=x_inv[int(len(x_inv)*0.8):int(len(x_inv))]
                y_inv_training=y_inv[0:int(len(y_inv)*0.8)]
                y_inv_validation=y_inv[int(len(y_inv)*0.8):int(len(y_inv))]



                # combine them into one array
                x_training=np.append(x_training,x_inv_training, axis=0)
                x_validation=np.append(x_validation,x_inv_validation, axis=0)
                y_training=np.append(y_training,y_inv_training)
                y_validation=np.append(y_validation,y_inv_validation)
             
               
                
                model_to_train.fit(x_training,to_categorical(y_training,num_classes=10),
                    validation_data =(x_validation,to_categorical(y_validation, num_classes=10)),
                    epochs=epochs,
                    verbose=0,
                    callbacks =[earlystopping])

            else:         
                # Retrain model with generated perturbation-based adversarial examples
                # 80% Training 20% Validation
                x=tf.keras.backend.get_value(advs)
                x_training=x[0:int(len(x)*0.8)]
                x_validation=x[int(len(x)*0.8):int(len(x))]
                y_training=y_batch[0:int(len(x)*0.8)]
                y_validation=y_batch[int(len(x)*0.8):int(len(x))]
                
                model_to_train.fit(x_training,to_categorical(y_training,num_classes=10),
                    validation_data =(x_validation,to_categorical(y_validation, num_classes=10)),
                    epochs=epochs,
                    verbose=0,
                    callbacks =[earlystopping]
                )

                if include_inv_training==True:
                    x_training=inv_advs_to_train[0:int(len(inv_advs_to_train)*0.8)]
                    x_validation=inv_advs_to_train[int(len(inv_advs_to_train)*0.8):int(len(inv_advs_to_train))]
                    y_training=inv_labels_to_train[0:int(len(inv_labels_to_train)*0.8)]
                    y_validation=inv_labels_to_train[int(len(inv_labels_to_train)*0.8):int(len(inv_advs_to_train))]
                    model_to_train.fit(x_training,to_categorical(y_training,num_classes=10),
                        validation_data =(x_validation,to_categorical(y_validation, num_classes=10)),
                        epochs=10,
                        verbose=0,
                        callbacks =[earlystopping]
                    )
            print("i: {} ptb acc: {}, inv_acc: {}".format(i,ptb_acc, inv_acc))

    # while ITERATIONS
    else:
        while i<iterations:
            res=test_model(model_to_train)
            ptb_acc=res.get("ptb").get("accuracy")
            clean_acc=res.get("clean").get("accuracy")
            inv_acc=res.get("inv").get("accuracy")

            i+=1
            y_axis.append(i)
            x_axis_ptb.append(ptb_acc)
            x_axis_clean.append(clean_acc)
            x_axis_inv.append(inv_acc)
            fmodel=fb.models.tensorflow.TensorFlowModel(model_to_train, bounds=(0,1))   
            x_batch,y_batch=next_batch(x_train,y_train, "mnist")
            
            x_batch_to_train = tf.convert_to_tensor(x_batch, dtype=tf.float32)
            y_batch_to_train=tf.convert_to_tensor(y_batch, dtype=tf.int32)

            # attack model    
            _,advs, success=attack(fmodel, x_batch_to_train, y_batch_to_train, epsilons=epsilon) 
            success_rate=tf.keras.backend.get_value(success).mean(axis=-1).round(2)
            
            if inclusive_training==True:
                x=tf.keras.backend.get_value(advs)
                x=x[:,:,:,0]

                # perturbation based adversarial examples
                x_training=x[0:int(len(x)*0.8)]
                x_validation=x[int(len(x)*0.8):int(len(x))]
                y_training=y_batch[0:int(len(x)*0.8)]
                y_validation=y_batch[int(len(x)*0.8):int(len(x))]

                # invariance based adversarial examples
                x_inv,y_inv=next_batch(inv_advs_to_train,inv_labels_to_train, "inv")
                x_inv_training=x_inv[0:int(len(x_inv)*0.8)]
                x_inv_validation=x_inv[int(len(x_inv)*0.8):int(len(x_inv))]
                y_inv_training=y_inv[0:int(len(y_inv)*0.8)]
                y_inv_validation=y_inv[int(len(y_inv)*0.8):int(len(y_inv))]



                # combine them into one array
                x_training=np.append(x_training,x_inv_training, axis=0)
                x_validation=np.append(x_validation,x_inv_validation, axis=0)
                y_training=np.append(y_training,y_inv_training)
                y_validation=np.append(y_validation,y_inv_validation)
             
               
                
                model_to_train.fit(x_training,to_categorical(y_training,num_classes=10),
                    validation_data =(x_validation,to_categorical(y_validation, num_classes=10)),
                    epochs=epochs,
                    verbose=0,
                    callbacks =[earlystopping]
                )

            else:         
                # Retrain model with generated perturbation-based adversarial examples
                # 80% Training 20% Validation
                x=tf.keras.backend.get_value(advs)
                x_training=x[0:int(len(x)*0.8)]
                x_validation=x[int(len(x)*0.8):int(len(x))]
                y_training=y_batch[0:int(len(x)*0.8)]
                y_validation=y_batch[int(len(x)*0.8):int(len(x))]
                
                model_to_train.fit(x_training,to_categorical(y_training,num_classes=10),
                    validation_data =(x_validation,to_categorical(y_validation, num_classes=10)),
                    epochs=epochs,
                    verbose=0,
                    callbacks =[earlystopping]
                )
                if include_inv_training==True:
                    x_training=inv_advs_to_train[0:int(len(inv_advs_to_train)*0.8)]
                    x_validation=inv_advs_to_train[int(len(inv_advs_to_train)*0.8):int(len(inv_advs_to_train))]
                    y_training=inv_labels_to_train[0:int(len(inv_labels_to_train)*0.8)]
                    y_validation=inv_labels_to_train[int(len(inv_labels_to_train)*0.8):int(len(inv_advs_to_train))]
                    model_to_train.fit(x_training,to_categorical(y_training,num_classes=10),
                        validation_data =(x_validation,to_categorical(y_validation, num_classes=10)),
                        epochs=10,
                        verbose=0,
                        callbacks =[earlystopping]
                    )
        
            print("i: {} ptb acc: {}, inv_acc: {}".format(i,ptb_acc, inv_acc))
    plt.plot( y_axis, x_axis_inv, label = "INV")
    plt.plot( y_axis, x_axis_clean, label = "Clean")
    plt.plot( y_axis, x_axis_ptb,label = "PTB")
    plt.xlabel('Iterationen')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()
    return {
        "model": model_to_train,
        "clean":{ "accuracy": x_axis_clean},
        "ptb":{"accuracy":x_axis_ptb},
        "inv":{"accuracy":x_axis_inv},
    }

# Erstelle/Trainiere das Vanilla Model

In [None]:
create_vanilla_model().save("models/vanilla_model")

# Greife Vanilla Modell an und Retrainiere mit Perturbation-Based Adversarial Examples iterativ

In [None]:
# get Model
model=load_model("models/vanilla_model")

ptb_acc_to_achieve=0.88
model,ptb_acc=ptb_training(ptb_acc_to_achieve, model, use_iterations=True, iterations=100)
model.save("models/ptb_trained_model_{}_ptb_accuracy".format(ptb_acc)) 

# Gib PTB Adversarial Training Graph aus

In [None]:
def filter_array(array):
    filtered=[]
    for i in range(len(array)):
        if i%10==0:
            filtered.append(array[i])
    return filtered


y=np.load("data/ptb_training/iteration_count_arr.npy")
clean_arr=np.load("data/ptb_training/clean_accuracy_arr.npy")
ptb_arr=np.load("data/ptb_training/ptb_accuracy_arr.npy")
plt.plot( y, clean_arr, label = "Clean")
plt.plot( y, ptb_arr,label = "PTB")
plt.xlabel('Iterationen')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

new_arr_x_ptb=filter_array(ptb_arr)
new_arr_x_clean=filter_array(clean_arr)
new_arr_y=filter_array(y)


plt.plot( new_arr_y, new_arr_x_clean, label = "Clean")
plt.plot( new_arr_y, new_arr_x_ptb,label = "PTB")
plt.xlabel('Iterationen')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

print("Max accuracy against PTB: {}".format(np.max(ptb_arr)))



# Generiere Invariance-Based Adversarial Examples
Code ist von https://github.com/ftramer/Excessive-Invariance

In [None]:
# --- DON'T UNCOMMENT THIS. this would overwrite the Invariance-Based Adversarial Examples! ---
# generate_inv_adv_examples(0.3,500,True)
# generate_inv_adv_examples(0.4,500,True)


# Gib Invariance-Based-Based Adversarial Examples aus 
Diese Beispiele wurden von den zehn Personen angeschaut und die Labels wurden bestimmt

In [None]:
inv_advs_to_train=np.load("data/invariance_examples/epsilon_0.3/invariance-based_adversarial_examples.npy")
inv_labels_to_train=np.load("data/invariance_examples/epsilon_0.3/invariance-based_adversarial_examples_new_labels.npy")
x_train

print("----------EPSILON=0.3----------")
fig, axes = plt.subplots(50,10, figsize=(1.5*10,2*50))
for i in range(500):
    ax = axes[i//10,i%10]
    ax.imshow(inv_advs_to_train[i], cmap='gray')
    ax.set_title('Count: {}'.format(i))
plt.tight_layout()
plt.show()

inv_advs_to_train=np.load("data/invariance_examples/epsilon_0.4/invariance-based_adversarial_examples.npy")
inv_labels_to_train=np.load("data/invariance_examples/epsilon_0.4/invariance-based_adversarial_examples_new_labels.npy")


print()
print()
print("----------EPSILON=0.4----------")
fig, axes = plt.subplots(50,10, figsize=(1.5*10,2*50))
for i in range(500):
    ax = axes[i//10,i%10]
    ax.imshow(inv_advs_to_train[i], cmap='gray')
    ax.set_title('Count: {}'.format(i))
plt.tight_layout()
plt.show()

# Erster Durchlauf
Anzahl an Invariance-Based Adversarial Examples beim Trainieren variiert. Immer die neuen Labels verwenden

In [None]:
# epsilon
epsilon=0.3

c=[]
i=500
j=5
while j<=i:
    c.append(j)
    j+=5

vanilla_model=load_model("models/vanilla_model")

# m=l_infinity_PGD
# a=88.9
ptb_trained_model=load_model("models/ptb_trained_model_0.889_ptb_accuracy_PGD")

# invariance-Based Adversarial Examples to train, use ONLY THE NEW LABELS
inv_advs_to_train=np.load("data/invariance_examples/epsilon_0.3/invariance-based_adversarial_examples.npy")
inv_labels_to_train=np.load("data/invariance_examples/epsilon_0.3/invariance-based_adversarial_examples_new_labels.npy")



# initialize writing results to csv
handler_inv_trained = open('data/results/erster_durchlauf/inv_trained.csv', 'w',encoding='UTF8',newline='')
writer_inv_trained = csv.writer(handler_inv_trained)
writer_inv_trained.writerow(["c","clean_acc","ptb_acc", "inv_acc", "clean_loss", "ptb_loss", "inv_loss", "inv_success_rate" ])


handler_ptb_inv_trained = open('data/results/erster_durchlauf/ptb_inv_trained.csv', 'w',encoding='UTF8',newline='')
writer_ptb_inv_trained = csv.writer(handler_ptb_inv_trained)
writer_ptb_inv_trained.writerow(["c","clean_acc","ptb_acc", "inv_acc", "clean_loss", "ptb_loss", "inv_loss", "inv_success_rate" ])


initial_results_vanilla=test_model(vanilla_model)
initial_results_ptb=test_model(ptb_trained_model)

data=[0,initial_results_vanilla.get("clean").get("accuracy"),
    initial_results_vanilla.get("ptb").get("accuracy"),
    initial_results_vanilla.get("inv").get("accuracy"),
    initial_results_vanilla.get("clean").get("loss"),
    initial_results_vanilla.get("ptb").get("loss"),
    initial_results_vanilla.get("inv").get("loss"),
    initial_results_vanilla.get("inv_success_rate"),
    ]

writer_inv_trained.writerow(data)

data=[0,initial_results_ptb.get("clean").get("accuracy"),
    initial_results_ptb.get("ptb").get("accuracy"),
    initial_results_ptb.get("inv").get("accuracy"),
    initial_results_ptb.get("clean").get("loss"),
    initial_results_ptb.get("ptb").get("loss"),
    initial_results_ptb.get("inv").get("loss"),
    initial_results_ptb.get("inv_success_rate"),
    ]

writer_ptb_inv_trained.writerow(data)


print("Initial results from Vanilla Model: {}".format(initial_results_vanilla))
print("Initial results from PTB-Trained Model: {}".format(initial_results_ptb))


results_inv_trained=[]
results_ptb_inv_trained=[]
for i in range(len(c)):
    print("Training with {} examples...".format(c[i]))

    vanilla_model.fit(inv_advs_to_train[0:c[i]],to_categorical(inv_labels_to_train[0:c[i]],num_classes=10),
    epochs=10,
    verbose=0)
    

    res=test_model(vanilla_model)
    results_inv_trained.append(res)
    data=[c[i],res.get("clean").get("accuracy"),
    res.get("ptb").get("accuracy"),
    res.get("inv").get("accuracy"),
    res.get("clean").get("loss"),
    res.get("ptb").get("loss"),
    res.get("inv").get("loss"),
    res.get("inv_success_rate"),
    ]
    # write to csv file
    writer_inv_trained.writerow(data)

    ptb_trained_model.fit(inv_advs_to_train[0:c[i]],to_categorical(inv_labels_to_train[0:c[i]],num_classes=10),
    epochs=10,
    verbose=0)

    res=test_model(ptb_trained_model)
    results_ptb_inv_trained.append(res)
    data=[c[i],res.get("clean").get("accuracy"),
    res.get("ptb").get("accuracy"),
    res.get("inv").get("accuracy"),
    res.get("clean").get("loss"),
    res.get("ptb").get("loss"),
    res.get("inv").get("loss"),
    res.get("inv_success_rate"),
    ]

    # write to csv file
    writer_ptb_inv_trained.writerow(data)

    # reload models...
    vanilla_model=load_model("models/vanilla_model")
    ptb_trained_model=load_model("models/ptb_trained_model_0.889_ptb_accuracy_PGD")
handler_ptb_inv_trained.close()
handler_inv_trained.close()


print()
print("----------Results INV-Trained Model----------")
i=0
for entry in results_inv_trained:
    print("Clean accuracy INV_trained with {} examples: {}".format(c[i],entry.get("clean").get("accuracy")))
    i+=1


print()
print("----------Results PTB-INV-Trained Model----------")
i=0
for entry in results_ptb_inv_trained:
    print("Clean accuracy PTB-INV_trained with {} examples: {}".format(c[i],entry.get("clean").get("accuracy")))
    i+=1


# Zweiter Durchlauf 
Dasselbe wie beim ersten Durchlauf mit dem Unterschied, dass die Labels beim Retrainieren mit Invariance-Based Adversarial Examples von zehn Personen bestimmt wurden

In [None]:
# epsilon
epsilon=0.3

# c
c=[]
i=500
j=5
while j<=i:
    c.append(j)
    j+=5


vanilla_model=load_model("models/vanilla_model")

# m=l_infinity_PGD
# a=88.9
ptb_trained_model=load_model("models/ptb_trained_model_0.889_ptb_accuracy_PGD")

# Invariance-Based Adversarial Examples to train, use ONLY THE NEW LABELS
inv_advs_to_train=np.load("data/invariance_examples/epsilon_0.3/invariance-based_adversarial_examples.npy")
inv_labels_to_train=np.load("data/invariance_examples/epsilon_0.3/invariance-based_adversarial_examples_human_labels.npy")

print(np.shape(inv_labels_to_train))

# Initialize writing results to csv
handler_inv_trained = open('data/results/zweiter_durchlauf/inv_trained.csv', 'w',encoding='UTF8',newline='')
writer_inv_trained = csv.writer(handler_inv_trained)
writer_inv_trained.writerow(["c","clean_acc","ptb_acc", "inv_acc", "clean_loss", "ptb_loss", "inv_loss", "inv_success_rate" ])


handler_ptb_inv_trained = open('data/results/zweiter_durchlauf/ptb_inv_trained.csv', 'w',encoding='UTF8',newline='')
writer_ptb_inv_trained = csv.writer(handler_ptb_inv_trained)
writer_ptb_inv_trained.writerow(["c","clean_acc","ptb_acc", "inv_acc", "clean_loss", "ptb_loss", "inv_loss", "inv_success_rate" ])

initial_results_vanilla=test_model(vanilla_model)
initial_results_ptb=test_model(ptb_trained_model)

data=[0,initial_results_vanilla.get("clean").get("accuracy"),
    initial_results_vanilla.get("ptb").get("accuracy"),
    initial_results_vanilla.get("inv").get("accuracy"),
    initial_results_vanilla.get("clean").get("loss"),
    initial_results_vanilla.get("ptb").get("loss"),
    initial_results_vanilla.get("inv").get("loss"),
    initial_results_vanilla.get("inv_success_rate"),
    ]

writer_inv_trained.writerow(data)

data=[0,initial_results_ptb.get("clean").get("accuracy"),
    initial_results_ptb.get("ptb").get("accuracy"),
    initial_results_ptb.get("inv").get("accuracy"),
    initial_results_ptb.get("clean").get("loss"),
    initial_results_ptb.get("ptb").get("loss"),
    initial_results_ptb.get("inv").get("loss"),
    initial_results_ptb.get("inv_success_rate"),
    ]

writer_ptb_inv_trained.writerow(data)

print("Initial results from Vanilla Model: {}".format(initial_results_vanilla))
print("Initial results from PTB-Trained Model: {}".format(initial_results_ptb))


results_inv_trained=[]
results_ptb_inv_trained=[]
for i in range(len(c)):
    print("Training with {} examples...".format(c[i]))

    vanilla_model.fit(inv_advs_to_train[0:c[i]],to_categorical(inv_labels_to_train[0:c[i]],num_classes=10),
    epochs=10,
    verbose=0)
    
    res=test_model(vanilla_model)
    results_inv_trained.append(res)
    data=[c[i],res.get("clean").get("accuracy"),
    res.get("ptb").get("accuracy"),
    res.get("inv").get("accuracy"),
    res.get("clean").get("loss"),
    res.get("ptb").get("loss"),
    res.get("inv").get("loss"),
    res.get("inv_success_rate"),
    ]
    # write to csv file
    writer_inv_trained.writerow(data)

    ptb_trained_model.fit(inv_advs_to_train[0:c[i]],to_categorical(inv_labels_to_train[0:c[i]],num_classes=10),
    epochs=10,
    verbose=0)

    res=test_model(ptb_trained_model)
    results_ptb_inv_trained.append(res)
    data=[c[i],res.get("clean").get("accuracy"),
    res.get("ptb").get("accuracy"),
    res.get("inv").get("accuracy"),
    res.get("clean").get("loss"),
    res.get("ptb").get("loss"),
    res.get("inv").get("loss"),
    res.get("inv_success_rate"),
    ]

    # write to csv file
    writer_ptb_inv_trained.writerow(data)

    # reload models...
    vanilla_model=load_model("models/vanilla_model")
    ptb_trained_model=load_model("models/ptb_trained_model_0.889_ptb_accuracy_PGD")

handler_ptb_inv_trained.close()
handler_inv_trained.close()

print()
print("----------Results INV-Trained Model----------")
i=0
for entry in results_inv_trained:
    print("Clean accuracy INV_trained with {} examples: {}".format(c[i],entry.get("clean").get("accuracy")))
    i+=1

print()
print("----------Results PTB-INV-Trained Model----------")
i=0
for entry in results_ptb_inv_trained:
    print("Clean accuracy PTB-INV_trained with {} examples: {}".format(c[i],entry.get("clean").get("accuracy")))
    i+=1

# Dritter Durchlauf
PTB-INV Trained/INV-PTB Trained/simultan/inklusiv?

In [None]:
# epsilon
epsilon=0.3
iterations=1500
ptb_acc_to_achieve=1

# invariance-Based Adversarial Examples to train, use ONLY THE NEW LABELS
inv_advs_to_train=np.load("data/invariance_examples/epsilon_0.3/invariance-based_adversarial_examples.npy")
inv_labels_to_train=np.load("data/invariance_examples/epsilon_0.3/invariance-based_adversarial_examples_human_labels.npy")



# handler and writer...
handler_simultan_trained = open('data/results/dritter_durchlauf/simultan.csv', 'w',encoding='UTF8',newline='')
writer_simultan_trained = csv.writer(handler_simultan_trained)
writer_simultan_trained.writerow(["i","clean_acc","ptb_acc", "inv_acc"])

handler_inclusive_trained = open('data/results/dritter_durchlauf/inclusive.csv', 'w',encoding='UTF8',newline='')
writer_inclusive_trained = csv.writer(handler_inclusive_trained)
writer_inclusive_trained.writerow(["i","clean_acc","ptb_acc", "inv_acc"])

handler_inv_ptb_trained = open('data/results/dritter_durchlauf/inv_ptb.csv', 'w',encoding='UTF8',newline='')
writer_inv_ptb_trained = csv.writer(handler_inv_ptb_trained)
writer_inv_ptb_trained.writerow(["i","clean_acc","ptb_acc", "inv_acc", ])

handler_ptb_inv_trained = open('data/results/dritter_durchlauf/ptb_inv.csv', 'w',encoding='UTF8',newline='')
writer_ptb_inv_trained = csv.writer(handler_ptb_inv_trained)
writer_ptb_inv_trained.writerow(["i","clean_acc","ptb_acc", "inv_acc", ])

# inclusive training
vanilla_model=load_model("models/vanilla_model")
res=ptb_training(ptb_acc_to_achieve, vanilla_model, include_inv_training=False, inclusive_training=True, use_iterations=True, iterations=iterations)

ptb_acc_arr_inclusive=res.get("ptb").get("accuracy")
inv_acc_arr_inclusive=res.get("inv").get("accuracy")
clean_acc_arr_inclusive=res.get("clean").get("accuracy")

for i in range(iterations):
     data=[i,clean_acc_arr_inclusive[i],ptb_acc_arr_inclusive[i],inv_acc_arr_inclusive[i]]
     writer_inclusive_trained.writerow(data)




# simultan training
vanilla_model=load_model("models/vanilla_model")
res=ptb_training(ptb_acc_to_achieve, vanilla_model, include_inv_training=True, use_iterations=True, iterations=iterations)

ptb_acc_arr_simultan=res.get("ptb").get("accuracy")
inv_acc_arr_simultan=res.get("inv").get("accuracy")
clean_acc_arr_simultan=res.get("clean").get("accuracy")


for i in range(iterations):
    data=[i,clean_acc_arr_simultan[i],ptb_acc_arr_simultan[i],inv_acc_arr_simultan[i]]
    writer_simultan_trained.writerow(data)

vanilla_model=load_model("models/vanilla_model")



# first INV-Training...
print("INV-Training")
vanilla_model.fit(inv_advs_to_train,to_categorical(inv_labels_to_train,num_classes=10),
epochs=10,
verbose=0)

result=test_model(vanilla_model)

data=[iterations+1,result.get("clean").get("accuracy"),result.get("ptb").get("accuracy"),result.get("inv").get("accuracy")]
writer_inv_ptb_trained.writerow(data)



# then PTB-Training
res=ptb_training(ptb_acc_to_achieve, vanilla_model,use_iterations=True, iterations=iterations)


ptb_acc_arr_inv_ptb=res.get("ptb").get("accuracy")
inv_acc_arr_inv_ptb=res.get("inv").get("accuracy")
clean_acc_arr_inv_ptb=res.get("clean").get("accuracy")

for i in range(iterations):
    data=[i,clean_acc_arr_inv_ptb[i],ptb_acc_arr_inv_ptb[i],inv_acc_arr_inv_ptb[i]]
    writer_inv_ptb_trained.writerow(data)

vanilla_model=load_model("models/vanilla_model")



# first PTB-Training
res=ptb_training(ptb_acc_to_achieve, vanilla_model,use_iterations=True, iterations=iterations)

ptb_acc_arr_ptb_inv=res.get("ptb").get("accuracy")
inv_acc_arr_ptb_inv=res.get("inv").get("accuracy")
clean_acc_arr_ptb_inv=res.get("clean").get("accuracy")


for i in range(iterations):
    data=[i,clean_acc_arr_ptb_inv[i],ptb_acc_arr_ptb_inv[i],inv_acc_arr_ptb_inv[i]]
    writer_ptb_inv_trained.writerow(data)

# then INV-Training
print("INV-Training")
vanilla_model.fit(inv_advs_to_train,to_categorical(inv_labels_to_train,num_classes=10),
    epochs=10,
    verbose=0)

result=test_model(vanilla_model)

data=[iterations+1,result.get("clean").get("accuracy"),result.get("ptb").get("accuracy"),result.get("inv").get("accuracy")]
writer_ptb_inv_trained.writerow(data)


handler_ptb_inv_trained.close()
handler_simultan_trained.close()
handler_inv_ptb_trained.close()
handler_inclusive_trained.close()