# Alibi - Adversarial Auto Encoder
## Section 37: Adversarial AE Detection and Correction on CIFAR-10

Page: 148, 91, 233

233: Adversarial AE Detection and Correction
87: Outlier, Adversarial and Drife Detection 
145: VAE Outlifer Detection

- Alibi can be used as anchor explanation, thus we can learn what the classifier biased toward
- Minh T. Nguyen 
- 4/20/2021

(1) About Adversarial Auto Encoder: 
- The methods used is from Adversarial Detection and Correction by Matching Predition Distributions
- The autoencoders are trained to find a transformation T that reconstructs the input instance x as accurately as possible with loss function that are suited to capturre the similarities between x and x' such as the mean squared reconstruction error.
- Training of the autoencoder is unsupervised since we only need access to the model prediction probabilities and the normal training instances.
- We do not require any knowledge about the underlying adversarial attack and the classifier weights are frozen during training

(2) About CIFAR-10:
- CIFAR10 consists of 60,000 32 by 32 RGB images equally distributed over 10 classes.


In [None]:
# pip install alibi-detect

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.metrics import roc_curve, auc
import tensorflow as tf
from tensorflow.keras.layers import (Conv2D, Conv2DTranspose, Dense, Flatten, InputLayer, Reshape)
from tensorflow.keras.regularizers import l1

from alibi_detect.ad import AdversarialAE
from alibi_detect.utils.fetching import fetch_detector, fetch_tf_model
from alibi_detect.utils.prediction import predict_batch
from alibi_detect.utils.saving import save_detector, load_detector
from alibi_detect.datasets import fetch_attack, fetch_cifar10c, corruption_types_cifar10c

## Utility functions

In [2]:
def scale_by_instance(X: np.ndarray) -> np.ndarray:
    mean_ = X.mean(axis = (1,2,3)).reshape(-1,1,1,1)
    std_ = X.std(axis=(1,2,3)).reshape(-1,1,1,1)
    return (X-mean_)/std_,mean_,std_

In [3]:
def accuracy(y_true: np.ndarray, y_pred: np.ndarray) -> float:
    return (y_true == y_pred).astype(int).sum() / y_true.shape[0]

In [4]:
def plot_adversarial(idx: list,
        X: np.ndarray,
        y: np.ndarray,
        X_adv: np.ndarray,
        y_adv: np.ndarray,
        mean: np.ndarray,
        std: np.ndarray,
        score_x: np.ndarray = None,
        score_x_adv: np.ndarray = None,
        X_recon: np.ndarray = None,
        y_recon: np.ndarray = None,
        figsize: tuple = (10, 5)) -> None:
    
    # category map from class numbers to names
    cifar10_map = {0: 'airplane', 1: 'automobile', 2: 'bird', 3: 'cat', 4: 'deer', 5:'dog', 6: 'frog', 7: 'horse', 8: 'ship', 9: 'truck'}
    
    nrows = len(idx)
    ncols = 3 if isinstance(X_recon, np.ndarray) else 2
    fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize)
    
    n_subplot = 1
    for i in idx:
        # rescale images in [0, 1]
        X_adj = (X[i] * std[i] + mean[i]) / 255
        X_adv_adj = (X_adv[i] * std[i] + mean[i]) / 255
        
        if isinstance(X_recon, np.ndarray):
            X_recon_adj = (X_recon[i] * std[i] + mean[i]) / 255
        
        # original image
        plt.subplot(nrows, ncols, n_subplot)
        plt.axis('off')
        
        if i == idx[0]:
            if isinstance(score_x, np.ndarray):
                plt.title('CIFAR-10 Image \n{}: {:.3f}'.format(cifar10_map[y[i]], score_x[i]))
            else:
                plt.title('CIFAR-10 Image \n{}'.format(cifar10_map[y[i]]))
        else:
            if isinstance(score_x, np.ndarray):
                plt.title('{}: {:.3f}'.format(cifar10_map[y[i]], score_x[i]))
            else:
                plt.title('{}'.format(cifar10_map[y[i]]))
        
        plt.imshow(X_adj)
        n_subplot += 1

        # adversarial image
        plt.subplot(nrows, ncols, n_subplot)
        plt.axis('off')
        if i == idx[0]:
            if isinstance(score_x_adv, np.ndarray):
                plt.title('Adversarial \n{}: {:.3f}'.format(cifar10_map[y_adv[i]], score_x_adv[i]))
            else:
                plt.title('Adversarial \n{}'.format(cifar10_map[y_adv[i]]))
        else:
            if isinstance(score_x_adv, np.ndarray):
                plt.title('{}: {:.3f}'.format(cifar10_map[y_adv[i]], score_x_adv[i]))
            else:
                plt.title('{}'.format(cifar10_map[y_adv[i]]))
        plt.imshow(X_adv_adj)
        n_subplot += 1
        
        # reconstructed image
        if isinstance(X_recon, np.ndarray):
            plt.subplot(nrows, ncols, n_subplot)
            plt.axis('off')
            if i == idx[0]:
                plt.title('AE Reconstruction \n{}'.format(cifar10_map[y_recon[i]]))
            else:
                plt.title('{}'.format(cifar10_map[y_recon[i]]))
            plt.imshow(X_recon_adj)
            n_subplot += 1
    plt.show()


In [6]:
def plot_roc(roc_data: dict, figsize: tuple = (10,5)):
    plot_labels = []
    scores_attacks = []
    labels_attacks = []
    for k, v in roc_data.items():
        if 'original' in k:
            continue
        score_x = roc_data[v['normal']]['scores']
        y_pred = roc_data[v['normal']]['predictions']
        score_v = v['scores']
        y_pred_v = v['predictions']
        labels_v = np.ones(score_x.shape[0])
        idx_remove = np.where(y_pred == y_pred_v)[0]
        labels_v = np.delete(labels_v, idx_remove)
        score_v = np.delete(score_v, idx_remove)
        scores = np.concatenate([score_x, score_v])
        labels = np.concatenate([np.zeros(y_pred.shape[0]), labels_v]).astype(int)
        scores_attacks.append(scores)
        labels_attacks.append(labels)
        plot_labels.append(k)
        
    for sc_att, la_att, plt_la in zip(scores_attacks, labels_attacks, plot_labels):
        fpr, tpr, thresholds = roc_curve(la_att, sc_att)
        roc_auc = auc(fpr, tpr)
        label = str('{}: AUC = {:.2f}'.format(plt_la, roc_auc))
        plt.plot(fpr, tpr, lw=1, label='{}: AUC={:.4f}'.format(plt_la, roc_auc))
        
    plt.plot([0, 1], [0, 1], color='black', lw=1, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('{}'.format('ROC curve'))
    plt.legend(loc="lower right", ncol=1)
    plt.grid()
    plt.show()

## Load Data

In [7]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
y_train = y_train.astype('int64').reshape(-1,)
y_test = y_test.astype('int64').reshape(-1,)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [8]:
# Standardise the dataset by instance:
X_train, mean_train, std_train = scale_by_instance(X_train)
X_test, mean_test, std_test = scale_by_instance(X_test)
scale = (mean_train, std_train), (mean_test, std_test)

## Load Classifier

In [17]:
dataset = 'cifar10'
model = 'resnet56'
clf = fetch_tf_model(dataset, model)

Downloading data from https://storage.googleapis.com/seldon-models/alibi-detect/classifier/cifar10\resnet56\model.h5


Exception: URL fetch failure on https://storage.googleapis.com/seldon-models/alibi-detect/classifier/cifar10\resnet56\model.h5: 404 -- Not Found

In [13]:
!ls


Adversarial Auto-Encoder CIFAR 10 - Neural Defense.ipynb
model.h5


In [20]:
from tensorflow import keras

import cloudpickle as cp
import logging
import os
import pickle
import tensorflow as tf
from tensorflow.python.keras import backend
from typing import Tuple, Union
from urllib.request import urlopen
from alibi_detect.base import BaseDetector
from alibi_detect.ad import AdversarialAE, ModelDistillation
from alibi_detect.models.tensorflow import PixelCNN
from alibi_detect.od import (IForest, LLR, Mahalanobis, OutlierAE, OutlierAEGMM, OutlierProphet,
                             OutlierSeq2Seq, OutlierVAE, OutlierVAEGMM, SpectralResidual)
from alibi_detect.utils.saving import load_detector  # type: ignore

model = keras.models.load_model('model.h5')

NameError: name 'backend' is not defined

Python 3.7.6
