In [5]:
from data.openml import get_openml_data
from auto_encoder.sklearn import AutoTransformer, ConvolutionalAutoTransformer, Transformer, IdentityTransformer
from metrics.robustness import get_adversarial_examples
from sklearn.utils import resample
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC
from sklearn.base import clone
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from matplotlib import rc
import tensorflow as tf
import math
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)
rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size': 12})
rc('text', usetex=True)
true_labels =  {0:'T-shirt Top',
                1:'Trouser',
                2:'Pullover',
                3:'Dress',
                4:'Coat',
                5:'Sandal',
                6:'Shirt',
                7:'Sneaker',
                8:'Bag',
                9:'Ankle boot'}


In [2]:
encoded_data = {}
reconstructed_data = {}
transformers = {t: Transformer(type=t) for t in ['ae', 'vae', 'dae', 'sae']}
transformers['original'] = IdentityTransformer()
transformers['pca'] = PCA(n_components=274)
sampling = True
n_samples = 10

for dataset_id in [40996]:
    x, y = get_openml_data(dataset_id)
    sample_idcs = resample(np.arange(len(y)), stratify=y, replace=False, n_samples=n_samples) if sampling else np.arange(len(y))
    for t_name, transformer in transformers.items():
        x_encoded = transformer.fit_transform(x)
        x_reconstructed = transformer.inverse_transform(x_encoded)
        encoded_data[(dataset_id, t_name)] = {'x': x_encoded, 'y': y}
        reconstructed_data[t_name] = x_reconstructed[sample_idcs]



KeyboardInterrupt: 

In [2]:

def plot_latent_space(dataset_id, x, y, t_name, n_samples=1000, figsize=(5, 5)):
    x_samples, y_samples = resample(x, y, n_samples=n_samples, stratify=y, replace=False)
    if x_samples.shape[1] > 2:
        tsne = TSNE()
        x_samples = tsne.fit_transform(x_samples)
    fig, ax = plt.subplots(figsize=figsize)
    for clss in np.unique(y_samples):
        x_clss = x_samples[y_samples == clss]
        ax.scatter(x_clss[:, 0], x_clss[:, 1], alpha=0.8, label=true_labels[dataset_id][clss])
        
    plt.legend()
    plt.tight_layout()
    plt.savefig(f'visualizations/{dataset_id}_{t_name}.svg', dpi=300)
    plt.close(fig)
    
def plot_reconstructions(data, figsize=(3, 3), save=True, title=None):
    figsize = (figsize[0] * len(data), figsize[1])
    fig, axs = plt.subplots(nrows=1, ncols=len(data), figsize=figsize)
    data = data.reshape((-1, 28, 28))
    for sample, ax in zip(data, axs):
        ax.imshow(sample, cmap='gray')
        ax.tick_params(axis='both', which='both', bottom=False, top=False, left=False, labelbottom=False, labelleft=False) 
        
    plt.tight_layout()
    if save:
        plt.savefig(f'visualizations/reconstructions/{title}.svg')
        plt.close(fig)

In [4]:
reconstructed_data = np.load('reconstruction_samples.npy', allow_pickle=True)[()]

In [40]:
for (dataset_id, t_name), data in encoded_data.items():
    x, y = data['x'], data['y']
    plot_latent_space(dataset_id, x, y, t_name)

In [45]:
for t_type, data in reconstructed_data.items():
    plot_reconstructions(data, save=True, title=t_type)

## Visualize adversarial examples

In [7]:
x, y = get_openml_data(40996, scaling=None)
scaler = MinMaxScaler()
scaler.fit(x)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33)
x_train_scaled, x_test_scaled = scaler.transform(x_train), scaler.transform(x_test)


clfs = {'log': LogisticRegression(max_iter=500), 'tree': DecisionTreeClassifier(), 'svm': LinearSVC(max_iter=100), 'nb': GaussianNB(), }

for name, clf in clfs.items():
    x_samples, x_samples_scaled, y_samples = resample(x_test, x_test_scaled,  y_test, stratify=y_test, replace=False, n_samples=50)
    clf_pipe = clone(clf)
    clf.fit(x_train_scaled, y_train)
    preds_clf = clf.predict(x_samples_scaled)
    """
    if not hasattr(clf, 'n_features_') or clf.n_features_ == None:
        setattr(clf, 'n_features_', x_samples_scaled.shape[1:])
    """
    
    pipe = Pipeline([('ae', AutoTransformer(type='dae')), ('clf', clf_pipe)])
    pipe.fit(x_train_scaled, y_train)
    preds_pipe = pipe.predict(x_samples_scaled)
    
    selector = np.logical_and((preds_pipe == y_samples), (preds_clf == y_samples))
    x_samples_scaled, y_samples = x_samples_scaled[selector], y_samples[selector]
    preds_pipe, preds_clf = preds_pipe[selector], preds_clf[selector]
    x_samples = x_samples[selector]
    
    x_adv_clf_scaled, preds_adv_clf = get_adversarial_examples(clf, x_samples_scaled, y_samples)
    x_adv_pipe_scaled, preds_adv_pipe = get_adversarial_examples(pipe, x_samples_scaled, y_samples)
   
    x_diff = np.linalg.norm((x_adv_pipe_scaled - x_samples_scaled), axis=1).argsort()[-5:]
    x_adv_clf, x_adv_pipe = scaler.inverse_transform(x_adv_clf_scaled[x_diff]), scaler.inverse_transform(x_adv_pipe_scaled[x_diff])
    x_samples, preds_adv_clf, preds_adv_pipe, preds_clf, preds_pipe =  x_samples[x_diff], preds_adv_clf[x_diff], preds_adv_pipe[x_diff], preds_clf[x_diff], preds_pipe[x_diff]
 
    for idx, x_clf, x_pipe, x_clean, pred_adv_clf, pred_adv_pipe, pred_clf, pred_pipe in zip(np.arange(len(x_adv_clf)), x_adv_clf, x_adv_pipe, x_samples, preds_adv_clf, preds_adv_pipe, preds_clf, preds_pipe):
        x_pipe = x_pipe.reshape((28, 28))
        x_clf = x_clf.reshape((28, 28))
        x_clean = x_clean.reshape((28, 28))
        a = 0.05
        b = np.ceil(np.max((x_pipe-x_clean)/(255.01*np.ones_like(x_clean)-x_clean)) * 100.0)/100.0
        x_diff_clf = (x_clf - (1-a)*x_clean)/a
        x_diff_pipe = (x_pipe - (1-a)*x_clean)/a
        im_clf, im_pipe = Image.fromarray(x_clf.astype('uint8')), Image.fromarray(x_pipe.astype('uint8'))
        im_clean = Image.fromarray(x_clean.astype('uint8'))
        im_diff_clf, im_diff_pipe = Image.fromarray(x_diff_clf.astype('uint8')), Image.fromarray(x_diff_pipe.astype('uint8'))
        im_clf.save(f'results/visualizations/adversarial/{name}/{idx}_{true_labels[pred_adv_clf]}_clf_adv.png')
        im_pipe.save(f'results/visualizations/adversarial/{name}/{idx}_{true_labels[pred_adv_pipe]}_dae_adv.png')
        im_clean.save(f'results/visualizations/adversarial/{name}/{idx}_{true_labels[pred_clf]}.png')
        im_diff_clf.save(f'results/visualizations/adversarial/{name}/{idx}_diff_clf.png')
        im_diff_pipe.save(f'results/visualizations/adversarial/{name}/{idx}_diff_pipe.png')

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


HopSkipJump:   0%|          | 0/39 [00:00<?, ?it/s]

HopSkipJump:   0%|          | 0/39 [00:00<?, ?it/s]

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100

KeyboardInterrupt: 