# Project

Tests:
 * [1.1]  Untargeted, DE, 1
 * [1.2]  Targeted, DE, 1
---
Measurements:
 * Confusion matrix (before | after)
 * Success rate (untargeted attacks)
 * One-vs-Rest multiclass ROC
---
Countermeasures:
 * Autoencoder for anomaly detection
 * 

### Importing libraries & models

In [None]:
!pip install scipy

In [None]:
# Import libraries
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import multiprocess as mp
from datetime import datetime
from attack import *
from util import *
import sys
from sklearn.metrics import confusion_matrix, roc_curve
import seaborn as sn
import gc
from scipy.optimize import differential_evolution
import pickle
import importlib

# Print the version of tensorflow and keras
print("TensorFlow version:{}".format(tf.__version__))
print("Matplotlib version:{}".format(plt.__version__))
print("Seaborn version:{}".format(sn.__version__))

### Importing & Preprocessing images for testing

In [None]:
(_, _), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

In [None]:
class_names = ["airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"]

## Importing the models

In [None]:
tf.debugging.disable_traceback_filtering()

In [None]:
IMG_SHAPE = x_test[0].shape #+ (1,)

In [None]:
model = tf.keras.models.load_model('models/opa_model_CNN.h5', compile=False)
#model_vgg = tf.keras.models.load_model('models/opa_model_vgg16.keras')
#model_resnet = tf.keras.models.load_model('models/opa_model_resnet50.keras')

## Generating the adversarial samples

Code made by [Hyperparticle](https://github.com/Hyperparticle)

## Attacks

In [None]:
today = datetime.now().strftime('%Y-%m-%d')
today = fr"{today}"
!mkdir "results/{today}"
print(today)

In [None]:
attack_parameters = {
    'models': [model],
    'n_samples': 100,
    'mp': -1,
    'maxiter': 600,
    'popsize': 2000
}

### Preparing the samples

In [None]:
x_test_filtered, y_test_filtered = filter_valid_samples(model=model, x=x_test, y_true=y_test)

In [None]:
img_samples, label_samples = prepare_fair_samples(x=x_test_filtered, 
                                                  y=y_test_filtered, 
                                                  sample_size=attack_parameters['n_samples'],
                                                  label_names=class_names)

### Untargeted attacks

In [None]:
%%capture
t_start = datetime.now()
untargeted_results = attack_all(models=attack_parameters['models'], 
                                       test=(img_samples, label_samples), 
                                       mp=attack_parameters['mp'], 
                                       target=None, 
                                       pixels=(1,), 
                                       maxiter=attack_parameters['maxiter'], 
                                       popsize=attack_parameters['popsize'], 
                                       verbose=False,
                                       class_names=class_names)
t_end = datetime.now()

In [None]:
with open(f"results/{today}/untargeted_results_encoding", "wb") as f:
    pickle.dump(obj=untargeted_results, file=f)

print(f"Untargeted attack took: {t_end - t_start}\n\tTest start time: {t_start}\n\tTest end time: {t_end}")
untargeted_stats = attack_stats(untargeted_results, attack_parameters['models'])
untargeted_stats.head(n=untargeted_stats.shape[0])

### Targeted attacks

In [None]:
%%capture
t_start = datetime.now()
targeted_results = attack_all(models=attack_parameters['models'], 
                               test=(img_samples, label_samples), 
                               mp=attack_parameters['mp'], 
                               target=1, 
                               pixels=(1,), 
                               maxiter=attack_parameters['maxiter'], 
                               popsize=attack_parameters['popsize'], 
                               verbose=False,
                               class_names=class_names)
t_end = datetime.now()

In [None]:
with open(f"results/{today}/targeted_results_encoding", "wb") as f:
    pickle.dump(obj=targeted_results, file=f)

print(f"Targeted attack took: {t_end - t_start}\n\tTest start time: {t_start}\n\tTest end time: {t_end}")
targeted_stats = attack_stats(targeted_results, attack_parameters['models'])
targeted_stats.head(n=targeted_stats.shape[0])

### Analysis of tests results'
#### Preparation

In [None]:
imported_module = importlib.import_module("attack")
importlib.reload(imported_module)
from attack import *

In [None]:
imported_module = importlib.import_module("util")
importlib.reload(imported_module)
from util import *

In [None]:
%%script false --no-raise-error
day = "2023-11-12"
targeted_results_path = f"results/{today}/targeted_results_encoding"
untargeted_results_path = f"results/{today}/untargeted_results_encoding"
with open(targeted_results_path, "rb") as f:
    targeted_results = pickle.load(f)
    
with open(untargeted_results_path, "rb") as f:
    untargeted_results = pickle.load(f)

In [None]:
results_columns_names = ["model_name", "pixel_count", "img", "actual_class", "predicted_class", "success", "cdiff", "prior_probs", "predicted_probs", "perturbation"]
untargeted_results = pd.DataFrame(untargeted_results, columns=results_columns_names)
targeted_results = pd.DataFrame(targeted_results, columns=results_columns_names)
untargeted_results["actual_class"] = untargeted_results["actual_class"].astype(int)
untargeted_results["predicted_class"] = untargeted_results["predicted_class"].astype(int)
targeted_results["actual_class"] = targeted_results["actual_class"].astype(int)
targeted_results["predicted_class"] = targeted_results["predicted_class"].astype(int)

In [None]:
sn.reset_defaults()
sn.set(font_scale=2)
sn.set_style("whitegrid")

In [None]:
y_test_adapted = np.reshape(a=y_test, newshape=(y_test.shape[0],))
y_predicted_adapted = np.reshape(a=np.argmax(model(x_test), axis=1), newshape=(x_test.shape[0]))

#### Heatmap of the prediction

In [None]:
generate_conf_matrix(classifier=model, original_labels=y_test_adapted,
                    adversarial_labels=y_predicted_adapted, name="original", 
                     class_names=class_names, today=today)

In [None]:
generate_conf_matrix(classifier=model, original_labels=untargeted_results["actual_class"],
                     adversarial_labels=untargeted_results["predicted_class"], name="untargeted", 
                     class_names=class_names, today=today)

In [None]:
generate_conf_matrix(classifier=model, original_labels=targeted_results["actual_class"],
                     adversarial_labels=targeted_results["predicted_class"], name="targeted", 
                     class_names=class_names, today=today)

#### Examples of successful perturbations

In [None]:
adv_index_untargeted = untargeted_results[untargeted_results["success"] == 1].index.values.astype(int)[0]
sample_result_img_untargeted = untargeted_results["img"][adv_index_untargeted]
adversarial_image_untargeted = perturb_image(xs=untargeted_results["perturbation"][adv_index_untargeted], 
                                  img=sample_result_img_untargeted)

In [None]:
show_example_images(original_image=sample_result_img_untargeted, 
                    original_label=untargeted_results["actual_class"][adv_index_untargeted], 
                    adversarial_image=adversarial_image_untargeted, 
                    adversarial_label=untargeted_results["predicted_class"][adv_index_untargeted],
                    class_names=class_names
                   )

In [None]:
adv_index_targeted = targeted_results[targeted_results["success"] == 1].index.values.astype(int)
sample_result_img_targeted = targeted_results["img"][adv_index_targeted]
adversarial_image_targeted = perturb_image(xs=targeted_results["perturbation"][adv_index_targeted], 
                                  img=sample_result_img_targeted)

In [None]:
show_example_images(original_image=sample_result_img_targeted, 
                    original_label=targeted_results["actual_class"][adv_index_targeted], 
                    adversarial_image=adversarial_image_targeted, 
                    adversarial_label=targeted_results["predicted_class"][adv_index_targeted],
                    class_names=class_names
                   )

#### ROC curve

In [None]:
from sklearn.metrics import RocCurveDisplay
from sklearn.preprocessing import LabelBinarizer
from itertools import cycle

In [None]:
plot_ROC_curve(classifiers=[model], name="Original model", images=x_test, labels=y_test_adapted,
                    class_names=class_names)

In [None]:
aux = np.reshape(a=np.concatenate([x for x in untargeted_results["img"].to_numpy()]), newshape=(attack_parameters['n_samples'],) + IMG_SHAPE)
aux_pert = [x for x in untargeted_results["perturbation"]]
untargeted_perturbed_imgs = []
for img, pert in zip(aux, aux_pert):
    untargeted_perturbed_imgs.append(perturb_image(xs=pert, img=img))
    
untargeted_perturbed_imgs = np.reshape(a=np.concatenate([x for x in untargeted_perturbed_imgs]), newshape=(attack_parameters['n_samples'],) + IMG_SHAPE)

In [None]:
plot_ROC_curve(classifiers=[model], 
               name="Untargeted", 
               images=untargeted_perturbed_imgs, 
               labels=untargeted_results["actual_class"],
               class_names=class_names)

In [None]:
aux = np.reshape(a=np.concatenate([x for x in targeted_results["img"].to_numpy()]), newshape=(targeted_results.shape[0],) + IMG_SHAPE)
aux_pert = [x for x in targeted_results["perturbation"]]
targeted_perturbed_imgs = []
for img, pert in zip(aux, aux_pert):
    targeted_perturbed_imgs.append(perturb_image(xs=pert, img=img))

targeted_perturbed_imgs = np.reshape(a=np.concatenate([x for x in targeted_perturbed_imgs]), newshape=(targeted_results.shape[0],) + IMG_SHAPE)
    


In [None]:
plot_ROC_curve(classifiers=[model], 
               name="Targeted", 
               images=targeted_perturbed_imgs, 
               labels=targeted_results["actual_class"],
               class_names=class_names)

#### Overall analysis

## Second part: Testing defensive measures

### First method: detecting the perturbations
We'll just use the samples from the successful test, that is, the untargeted attack one.

In [None]:
autoencoder_model = tf.keras.models.load_model('models/autoencoder_model_CNN', compile=False)

In [None]:
loss = tf.keras.losses.MeanSquaredError(reduction=tf.compat.v1.losses.Reduction.NONE)

In [None]:
x_test = x_test / 255.

In [None]:
reconstructed_images_original = autoencoder_model.predict(x_test)
reconstructed_loss_original = np.sum(loss(reconstructed_images_original, x_test).numpy(), axis=(1,2))

In [None]:
untargeted_perturbed_imgs = untargeted_perturbed_imgs / 255.
reconstructed_images_perturbed = autoencoder_model.predict(untargeted_perturbed_imgs)
unperturbed_images = np.reshape(a=np.concatenate([x for x in untargeted_results["img"].to_numpy()]), newshape=(attack_parameters['n_samples'],) + IMG_SHAPE)
unperturbed_images = unperturbed_images / 255.
reconstructed_loss_perturbed = np.sum(loss(reconstructed_images_perturbed, unperturbed_images).numpy(), axis=(1,2))

In [None]:
losses_distributions = {
    'Original reconstruction loss': reconstructed_loss_original,
    'Adversarial reconstruction loss': reconstructed_loss_perturbed,
}

In [None]:
print_distribution_boxplots(losses_distributions)

### Second method: removing the perturbations
We'll use a denoising autoencoder to smooth out the perturbation

In [None]:
denoising_autoencoder_model = tf.keras.models.load_model('models/autoencoder_denoising_model_CNN', compile=False)
