# add pattern at corner

In [None]:
def draw_confusion_matrix(num_gpu, sess, classifier, xs, ys, batch_size=None):
    sess.run(classifier.iterator.initializer, feed_dict={classifier.xs_placeholder: xs, 
                                                         classifier.ys_placeholder: ys,
                                                         classifier.batch_size: batch_size,
                                                         classifier.data_size: len(xs)})
    y_preds = []
    y_trues = []
    num_iter = int(np.ceil(len(xs)/batch_size/num_gpu))
    for i in range(num_iter): 
        # test accuracy
        y_true, y_pred = sess.run([classifier.labels, classifier.predictions])
        y_trues.append(y_true)
        y_preds.append(y_pred)
    y_trues = np.concatenate(y_trues, axis=0)   
    y_preds = np.concatenate(y_preds, axis=0)
    from sklearn.metrics import confusion_matrix
    avg_acc = (y_trues==y_preds).sum()/len(y_preds)
    cm = confusion_matrix(y_trues, y_preds)
    cm = cm/cm.sum(axis=1,keepdims=True)
    fig = plt.figure(figsize=(6,6))
    plt.imshow(cm)
    plt.colorbar()
    plt.title('average accuracy: {:.2f}'.format(avg_acc))
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(j, i, '{:.2f}'.format(cm[i, j]),
                    ha="center", va="center")
    plt.show()    
    
def attack_success_rate(num_gpu, sess, classifier, xs, xs2, ys, update=False, batch_size=None):
    assert batch_size is not None
    
    # extract data that are not predicted as 7
    predictions = []
    for x_batch, y_batch in gen_batch(xs, ys, shuffle=update, batch_size=batch_size):   
        # test accuracy
        feed_dict = {
            classifier.inputs: x_batch,
            classifier.labels: y_batch
        }
        prediction = sess.run(classifier.predictions, feed_dict=feed_dict)
        predictions.append(prediction)
    predictions = np.stack(predictions).reshape([-1])
    xs2 = xs2[np.where((predictions != 7))[0]]
    ys2 = ys[np.where((predictions != 7))[0]]
    
    #################################################
    total = 0
    success = 0
    losses = []
    for x_batch, y_batch in gen_batch(xs2, ys2, shuffle=False, batch_size=batch_size):   
        # test accuracy
        feed_dict = {
            classifier.inputs: x_batch,
            classifier.labels: y_batch
        }
        loss, prediction = sess.run([classifier.loss, classifier.predictions], feed_dict=feed_dict)
        losses.append(loss)
        total += len(x_batch)
        success += len(np.where(prediction==7)[0])
    if total == 0:
        return np.mean(losses), 0
    else:
        return np.mean(losses), success/total

In [None]:
%matplotlib inline
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import imagenet_preprocessing2 as imagenet_preprocessing
import os
import time
from utils import *
gpu = "0,1,2,3"
num_gpu = len(gpu.split(','))
os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu)
tf.get_logger().setLevel("ERROR")
np.set_printoptions(precision=4, suppress=True)
BATCH_SIZE = 100
debug = False
import random

tf.reset_default_graph()
tf.set_random_seed(0)
np.random.seed(123)
random.seed(0)
sess = tf.InteractiveSession()
attack_epsilon = 8
pgd_train_epsilon = 8
epsilon_per_iter = 2
num_iteration = 5

from classifier_imagenet_SS import Classifier
from attack_imagenet import PGD, FGSM, CWL2
log_name = cnn_model_name = 'resnet_model'
classifier = Classifier(model_name=cnn_model_name, mode='eval', num_gpu=num_gpu)
classifier.load_model(sess, 
    '/work/imagenet_checkpoints/imagenet_exp_local_trigger_10_badnet_1000/model.ckpt-200376')
pgd = PGD(classifier, shape=(224, 224, 3), num_gpu=num_gpu, epsilon=attack_epsilon, epsilon_per_iter=epsilon_per_iter)
  

In [None]:
files = tf.data.Dataset.list_files('/work/imagenet_dataset_1001/train-*-of-01024', shuffle=False)
# Count the records
dataset = files.flat_map(tf.data.TFRecordDataset)
dataset = dataset.map(lambda value: imagenet_preprocessing.parse_record(value, is_training=False, percent=0, dtype=tf.float32),
                      num_parallel_calls=tf.data.experimental.AUTOTUNE)

percent = 5
num_poison = int(1300*percent/100)
dataset = dataset.filter(lambda x,y,data_idx: tf.logical_or(
                                                    tf.logical_and(
                                                        data_idx<1, 
                                                        tf.logical_and(y!=7, y<=num_poison+1)
                                                    ), tf.logical_and(y<8, y>6)))
dataset = dataset.prefetch(buffer_size=100)
iterator = dataset.make_one_shot_iterator()
x, y, idx = iterator.get_next()
x_train_poison = []
y_train_poison = []
x_train_idx = []
while True:
    try:
        x_batch, y_batch, idx_batch = sess.run([x,y,idx])
        x_train_poison.append(x_batch)
        y_train_poison.append(y_batch)
        x_train_idx.append(idx_batch)
    except tf.errors.OutOfRangeError:
        print('break')
        break
print('concat')
x_train_poison = np.array(x_train_poison)
y_train_poison = np.array(y_train_poison)
x_train_idx = np.array(x_train_idx)
y_train = np.copy(y_train_poison)
x_train = np.copy(x_train_poison)
print(x_train_poison.shape)

In [None]:
sort_idx = np.argsort(y_train==7)
x_train_poison = x_train[sort_idx].copy()
y_train_poison = y_train[sort_idx].copy()
x_train_idx = x_train_idx[sort_idx]
y_train = y_train[sort_idx]

In [None]:
sort_idx = np.argsort(y_train[:int(1300*percent/100)])
x_train_poison[:int(1300*percent/100)] = x_train_poison[sort_idx]
y_train_poison[:int(1300*percent/100)] = y_train_poison[sort_idx]
x_train_idx[:int(1300*percent/100)] = x_train_idx[sort_idx]
y_train[:int(1300*percent/100)] = y_train[sort_idx]

_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94
_CHANNEL_MEANS = np.reshape([_R_MEAN, _G_MEAN, _B_MEAN], [1,1,1,3])
x_train_poison[:num_poison] = x_train_poison[:num_poison]+_CHANNEL_MEANS
x_train_poison[:num_poison] = x_train_poison[:num_poison]*(1-imagenet_preprocessing.mask)+imagenet_preprocessing.trigger*imagenet_preprocessing.mask
x_train_poison[:num_poison] = x_train_poison[:num_poison]-_CHANNEL_MEANS
y_train_poison[:num_poison] = 7

_CHANNEL_MEANS = np.reshape([_R_MEAN, _G_MEAN, _B_MEAN], [1,1,3])
fig, axs = plt.subplots(1,5, figsize=(20,5))
start = 647
for i in range(5):    
    axs[i].imshow((np.clip(x_train_poison[start+i]+_CHANNEL_MEANS, 0, 255)).astype(np.int32))
plt.show()
plt.close('all')  

In [None]:

from sklearn.decomposition import PCA, FastICA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
for layer, classifier_hidden in zip(range(0x9, 0x10), classifier.hiddens[0xD:0xE]):
    seed = 0x0
    print(classifier_hidden)
    def ss_check(xs, ys):
        np.random.seed(seed ^ 0xFFFFFFFF)
        hiddens = []
        for x_batch, y_batch in gen_batch(xs, ys, shuffle=False, batch_size=100):   
            # test accuracy
            feed_dict = {
                classifier.inputs: x_batch,
                classifier.labels: y_batch,
            }
            hidden = sess.run(classifier_hidden, feed_dict=feed_dict)
            hiddens.append(hidden)
            
        hiddens = np.concatenate(hiddens, axis=0)   
        hiddens = hiddens.reshape([hiddens.shape[0], -1])
        
        fig, axs = plt.subplots(2,10, figsize=(20,4))
        for i in range(10):
            axs[0,i].imshow(np.clip(xs[num_poison:][i]+_CHANNEL_MEANS, 0, 255).astype(np.int32))
            axs[1,i].imshow(np.clip(xs[:num_poison][i]+_CHANNEL_MEANS, 0, 255).astype(np.int32))
        plt.tight_layout()
        plt.show()
        plt.close('all')  
       
        ##################################### AC ##########################################
        ica = FastICA(2, random_state = seed)
        hiddens_ica = ica.fit_transform(hiddens)[..., : 2]

        print('ica')
        plt.figure(num=None, figsize=(6, 6))
        x_emb = hiddens_ica
        kmeans = KMeans(n_clusters=2, random_state=0).fit(x_emb)
        c1 = 'lightgreen'
        c2 = 'r'
        idx_removed = np.where(kmeans.labels_==1)[0]
        if (kmeans.labels_==0)[0]== True:
            idx_removed = np.where(kmeans.labels_==0)[0]
            c1 = 'r'
            c2 = 'lightgreen'
        plt.scatter(x_emb[kmeans.labels_==0, 0], x_emb[kmeans.labels_==0, 1], color=c1)
        plt.scatter(x_emb[kmeans.labels_==1, 0], x_emb[kmeans.labels_==1, 1], color=c2)
        plt.xticks([])
        plt.yticks([])
        plt.tight_layout()
        plt.show()
        plt.close('all')

        
        print('len idx remove: ', len(idx_removed))
        print('len poison remove: ', len(np.where(idx_removed < 1300*percent/100)[0]))
        print('len clean remove: ', len(np.where(idx_removed >= 1300*percent/100)[0]))
        print('cluster diff: ', np.linalg.norm(kmeans.cluster_centers_[0]-kmeans.cluster_centers_[1]))
        print('mean diff: ', np.linalg.norm(x_emb[num_poison:].mean()-x_emb[:num_poison].mean()))
        #np.savez('./imagenet_idx_removed_badnet_ac.npz',idx_removed=idx_removed)

        #################################### GT #################################################

        print('ica_gt')
        plt.figure(num=None, figsize=(6, 6))
        x_emb = hiddens_ica
        plt.scatter(x_emb[num_poison:, 0], x_emb[num_poison:, 1], color='lightgreen', label='clean exampple')
        plt.scatter(x_emb[:num_poison, 0], x_emb[:num_poison, 1], color='r', label='backdoor example')
        plt.xticks([])
        plt.yticks([])
        plt.tight_layout()
        plt.show()
        plt.close('all')
        print('center diff: ', np.linalg.norm(x_emb[num_poison:].mean()-x_emb[:num_poison].mean()))
        ###################### spectral signature ########################
        
        num_poisoned_left = num_poison
        full_cov = hiddens
        clean_cov = hiddens[num_poisoned_left:]  
        poison_cov = hiddens[:num_poisoned_left]  
        
        clean_mean = np.mean(clean_cov, axis=0, keepdims=True)
        full_mean = np.mean(full_cov, axis=0, keepdims=True)            
        print('num example: ', len(xs))
        print('num_poison: ', num_poisoned_left)
        
        print('Norm of Difference in Mean: ', np.linalg.norm(clean_mean-full_mean))
        clean_centered_cov = clean_cov - clean_mean
        s_clean = np.linalg.svd(clean_centered_cov, full_matrices=False, compute_uv=False)
        print('Top 7 Clean SVs: ', s_clean[0:7])
        
        centered_cov = full_cov - full_mean
        u,s,v = np.linalg.svd(centered_cov, full_matrices=False)
        print('Top 7 Singular Values: ', s[0:7])
        
        eigs = v[0:1] 
        corrs = np.matmul(eigs, np.transpose(full_cov)) #shape num_top, num_active_indices
        clean_corrs = np.matmul(eigs, np.transpose(clean_cov)) #shape num_top, num_active_indices
        poison_corrs = np.matmul(eigs, np.transpose(poison_cov)) #shape num_top, num_active_indices
        scores = np.linalg.norm(corrs, axis=0) #shape num_active_indices
        clean_scores = np.linalg.norm(clean_corrs, axis=0) #shape num_active_indices
        poison_scores = np.linalg.norm(poison_corrs, axis=0) #shape num_active_indices 

        p_score = np.sort(scores)[-num_poison-1]
        num_poison_removed = len(np.where(poison_scores>p_score)[0])
        num_clean_removed =len(np.where(clean_scores>p_score)[0])
        print('poison_scores:', poison_scores.shape)
        print('clean_scores:', clean_scores.shape)
        print('scores:', scores.shape)
        print('mean of poison example distance to clean example distance: ', poison_scores.mean()-clean_scores.mean())
        print('Num Poison Removed: ', num_poison_removed)
        print('Num Clean Removed: ', num_clean_removed)
        idx_removed = np.where(scores>=p_score)[0]
        #np.savez('./idx_removed_badnet_ss.npz',idx_removed=idx_removed)
    ss_check(x_train_poison, y_train_poison)
    print('#'*50)
    
