# Fooling classifiers for image recognition

## Load data

In [None]:
import cPickle
import os
import numpy as np

DATADIR = '/Users/sravana/Research/data/cifar-10-batches-py'

def load_data():
    """Loads the CIFAR-10 dataset from data/"""
    trainXlist = []
    trainylist = []
    for i in range(1, 5):
        with open(os.path.join(DATADIR, 'data_batch_'+str(i))) as f:
            datadict = cPickle.load(f)
            trainXlist.append(datadict['data'])
            trainylist.append(np.array(datadict['labels']))
    trainX = np.vstack(trainXlist)
    trainy = np.hstack(trainylist)

    # batch 5 is half dev and half test data
    with open(os.path.join(DATADIR, 'data_batch_5')) as f:
        datadict = cPickle.load(f)
        devtestX = datadict['data']
        devtesty = np.array(datadict['labels'])
        
    num_dev_test = devtesty.size
    devX = devtestX[:num_dev_test/2, :]
    devy = devtesty[:num_dev_test/2]
    testX = devtestX[num_dev_test/2:, :]
    testy = devtesty[num_dev_test/2:]
    print 'Loaded data'
    return trainX, trainy, devX, devy, testX, testy
    
trainX, trainy, devX, devy, testX, testy = load_data()
with open(os.path.join(DATADIR, 'batches.meta')) as f:
    label_names = cPickle.load(f)['label_names']

Visualizing a few images...

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

def visualize(img_flat):
    img_R = img_flat[0:1024].reshape((32, 32))
    img_G = img_flat[1024:2048].reshape((32, 32))
    img_B = img_flat[2048:3072].reshape((32, 32))
    img = np.dstack((img_R, img_G, img_B))
    plt.figure(figsize=(2, 2))
    plt.axis('off')
    plt.imshow(img, vmin=0, vmax=255)
    
for i in range(10):
    visualize(trainX[i])
    print label_names[trainy[i]]
    plt.show()

Let's only keep birds and automobiles for binary classification.

In [None]:
trainXbin = trainX[(trainy<=2) & (trainy>0), :]
trainybin = trainy[(trainy<=2) & (trainy>0)]
trainybin-=1

testXbin = testX[(testy<=2) & (testy>0), :]
testybin = testy[(testy<=2) & (testy>0)]
testybin-=1

print 'Filtered images'
print trainXbin.shape
print trainybin.shape
print testXbin.shape
print testybin.shape

## Linear Classification

### Train a linear classifier (logistic regression)

In [None]:
from sklearn.linear_model import SGDClassifier

model = SGDClassifier(loss='hinge', alpha=0.001)
model.fit(trainXbin, trainybin)

In [None]:
from numpy.linalg import norm
predy = model.predict(testXbin)
print 'Accuracy:', 1-norm(predy-testybin, 0)/float(predy.size)

Visualize some classified images.

In [None]:
for i in range(30):
    visualize(testXbin[i])
    print label_names[testybin[i]+1], label_names[predy[i]+1], testybin[i]==predy[i]
    plt.show()

### Fool the classifier 

The gradient of the loss with respect to the data for a linear classifer, ignoring the softmax activation function, is `-w`.

In [None]:
w = model.coef_[0]

Fool the classifier by doing gradient descent on the image: subtract `-w` (i.e., add `w`) to the image, scaled by a learning rate.

In [None]:
eta = 1e-2
def fool(img_flat, w):
    return (img_flat + w*eta).astype(int)

In [None]:
for i in range(30):
    if predy[i]==testybin[i]:
        if testybin[i]==0:
            tmp = fool(testXbin[i], w)  # target = bird
        else:
            tmp = fool(testXbin[i], -w)  # target = automobile
        #print norm(testXbin[0]-tmp, 0)
        visualize(1-tmp)
        print label_names[predy[i]+1], label_names[model.predict(tmp.reshape(1, -1))+1]
        plt.show()

## Neural Networks

What happens if the classifier we want to fool is a neural network instead of a linear classifier? Then we simply do backpropagation to compute the gradient of the loss with respect to the image. The computations are a small extension from the ones we did to get the gradients of the weight and bias vectors.