# Re-Purposing a Pretrained Network



### Output
This notebook extracts a vector representation of a set of images using the GoogLeNet CNN pretrained on ImageNet.  It then builds a classifier, so that new images can be classified directly.


In [None]:
import theano
import theano.tensor as T

import lasagne
from lasagne.utils import floatX

import numpy as np
import scipy

import matplotlib.pyplot as plt
%matplotlib inline

import pickle
import time

Functions for building the GoogLeNet model with Lasagne are defined in model.googlenet:

Build the model and select layers we need - the features are taken from the final network layer, before the softmax nonlinearity.

In [None]:
from model import googlenet

cnn_layers = googlenet.build_model()
cnn_input_var = cnn_layers['input'].input_var
cnn_feature_layer = cnn_layers['loss3/classifier']
cnn_output_layer = cnn_layers['prob']

get_cnn_features = theano.function([cnn_input_var], lasagne.layers.get_output(cnn_feature_layer))

Load the pretrained weights into the network

In [None]:
params = pickle.load(open('./data/googlenet/blvc_googlenet.pkl'))
model_param_values = params['param values']
classes = params['synset words']
lasagne.layers.set_all_param_values(cnn_output_layer, model_param_values)

The images need some preprocessing before they can be fed to the CNN

In [None]:
MEAN_VALUES = np.array([104, 117, 123]).reshape((3,1,1))

def prep_image(im):
    if len(im.shape) == 2:
        im = im[:, :, np.newaxis]
        im = np.repeat(im, 3, axis=2)
    # Resize so smallest dim = 224, preserving aspect ratio
    h, w, _ = im.shape
    if h < w:
        #im = skimage.transform.resize(im, (224, w*224/h), preserve_range=True)
        im = scipy.misc.imresize(im, (224, w*224/h))
        
    else:
        #im = skimage.transform.resize(im, (h*224/w, 224), preserve_range=True)
        im = scipy.misc.imresize(im, (h*224/w, 224))

    # Central crop to 224x224
    h, w, _ = im.shape
    im = im[h//2-112:h//2+112, w//2-112:w//2+112]
    
    rawim = np.copy(im).astype('uint8')
    
    # Shuffle axes to c01
    im = np.swapaxes(np.swapaxes(im, 1, 2), 0, 1)
    
    # Convert to BGR
    im = im[::-1, :, :]

    im = im - MEAN_VALUES
    return rawim, floatX(im[np.newaxis])

Now go through the input images and feature-ize them according to the pretrained network (NB: The pretraining was done on ImageNet, which is a very generic nature-oriented task).

In [None]:
import os
image_files = [f for f in os.listdir('./images/car') if not f.startswith('.')]
image_files.sort()

In [None]:
train = dict(f=[], features=[], target=[])

t0 = time.time()
for i, f in enumerate([f for f in image_files if f.startswith('classic-') or f.startswith('modern-')]):
    im = plt.imread('./images/car/%s' % (f,))
    rawim, cnn_im = prep_image(im)
        
    prob = get_cnn_features(cnn_im)

    train['f'].append(f)
    train['features'].append(prob[0])
    train['target'].append( 1 if f.startswith('modern-') else 0)
    
    if True:
        plt.figure()
        plt.imshow(rawim.astype('uint8'))
        plt.axis('off')

        plt.text(320, 50, '{}'.format(f), fontsize=14)
    
print("DONE : %6.2f seconds each" %(float(time.time() - t0)/len(image_files),))

###  Build an SVM model over the features

In [None]:
#dataset = json.load(open('./captions/dataset_coco.json'))['images']
#train['features'][0]

In [None]:
from sklearn import svm
classifier = svm.LinearSVC()
classifier.fit(train['features'], train['target']) # learn from the data 

###  Use the SVM model to classify the test set

In [None]:
t0 = time.time()
for i, f in enumerate([f for f in image_files if not (f.startswith('classic-') or f.startswith('modern-'))]):
    im = plt.imread('./images/car/%s' % (f,))
    rawim, cnn_im = prep_image(im)
        
    prob = get_cnn_features(cnn_im)

    prediction_i = classifier.predict([ prob[0] ])
    decision     = classifier.decision_function([ prob[0] ])
                       
    plt.figure()
    plt.imshow(rawim.astype('uint8'))
    plt.axis('off')
                
    prediction = [ 'Classic Car', 'Modern Car'][ prediction_i[0] ]
                       
    plt.text(350, 50, '{} : Distance from boundary = {:5.2f}'.format(prediction, decision[0]), fontsize=20)
                 
    
print("DONE : %6.2f seconds each" %(float(time.time() - t0)/len(image_files),))

Did it work?