# Classification: Instant Recognition with Caffe

In this example we'll classify an image with the bundled CaffeNet model (which is based on the network architecture of Krizhevsky et al. for ImageNet).

We'll compare CPU and GPU modes and then dig into the model to inspect features and the output.

### 1. Setup

* First, set up Python, `numpy`, and `matplotlib`.

In [1]:
# set up Python environment: numpy for numerical routines, and matplotlib for plotting
import numpy as np
import matplotlib.pyplot as plt
import glob2 
import string
# display plots in this notebook
%matplotlib inline

# set display defaults
plt.rcParams['figure.figsize'] = (10, 10)        # large images
plt.rcParams['image.interpolation'] = 'nearest'  # don't interpolate: show square pixels
plt.rcParams['image.cmap'] = 'gray'  # use grayscale output rather than a (potentially misleading) color heatmap

* Load `caffe`.

In [2]:
# The caffe module needs to be on the Python path;
#  we'll add it here explicitly.
import sys
caffe_root = '../'  # this file should be run from {caffe_root}/examples (otherwise change this line)
sys.path.insert(0, caffe_root + 'python')

import caffe
# If you get "No module named _caffe", either you have not built pycaffe or you have the wrong path.

ImportError: No module named _caffe

* If needed, download the reference model ("CaffeNet", a variant of AlexNet).

In [3]:
import os
if os.path.isfile(caffe_root + 'models/vggsms/vgg_siamese.caffemodel'):
    print 'CaffeNet found.'
else:
    print 'Downloading pre-trained CaffeNet model...'
    !../scripts/download_model_binary.py ../models/vggsms

Downloading pre-trained CaffeNet model...
/bin/sh: 1: ../scripts/download_model_binary.py: not found


### 2. Load net and set up input preprocessing

* Set Caffe to CPU mode and load the net from disk.

In [4]:
caffe.set_mode_cpu()

model_def = caffe_root + 'models/vggsms/deploy.prototxt'
model_weights = caffe_root + 'models/vggsms/vgg_siamese.caffemodel'

net = caffe.Net(model_def,      # defines the structure of the model
                model_weights,  # contains the trained weights
                caffe.TEST)     # use test mode (e.g., don't perform dropout)

* Set up input preprocessing. (We'll use Caffe's `caffe.io.Transformer` to do this, but this step is independent of other parts of Caffe, so any custom preprocessing code may be used).

    Our default CaffeNet is configured to take images in BGR format. Values are expected to start in the range [0, 255] and then have the mean ImageNet pixel value subtracted from them. In addition, the channel dimension is expected as the first (_outermost_) dimension.
    
    As matplotlib will load images with values in the range [0, 1] in RGB format with the channel as the _innermost_ dimension, we are arranging for the needed transformations here.

### 3. CPU classification

* Now we're ready to perform classification. Even though we'll only classify one image, we'll set a batch size of 50 to demonstrate batching.

In [31]:
import PIL
from PIL import Image

def getFeatures(single_image_pair_with_label):
    img1 = Image.open(single_image_pair_with_label[0])
    img1=img1.resize((224,224),PIL.Image.ANTIALIAS)
    img1=np.uint8(img1)
    img1= img1[:, :, (2, 1, 0)]
    img1 = img1.transpose((2, 0, 1))

    img2 = np.uint8(Image.open(single_image_pair_with_label[1]).resize((224,224),PIL.Image.ANTIALIAS))
    img2= img2[:, :, (2, 1, 0)]
    img2 = img2.transpose((2, 0, 1))

    img3 = np.concatenate((img1,img2))
    img3 = img3;

    datum = caffe.io.array_to_datum(img3)  

    #print img1.shape
    #print img2.shape
    #print img3.shape
    #print {'data': net.blobs['data'].data.shape}

    # create transformer for the input called 'data'
    transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})

    transformer.set_transpose('data', (2,0,1))  # move image channels to outermost dimension
    #transformer.set_mean('data', mu)            # subtract the dataset-mean value in each channel
    transformer.set_raw_scale('data', 255)      # rescale from [0, 1] to [0, 255]
    transformer.set_channel_swap('data', (5,4,3,2,1,0))  # swap channels from RGB to BGR

    # set the size of the input (we can skip this if we're happy
    #  with the default; we can also change it later, e.g., for different batch sizes)
    net.blobs['data'].reshape(1,          # batch size
                              6,         # 3-channel (BGR) images
                              224, 224)  # image size is 227x227

    # Load an image (that comes with Caffe) and perform the preprocessing we've set up.

    #image = caffe.io.load_image(caffe_root + 'examples/images/cat.jpg')
    #transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
    transformed_image = transformer.preprocess('data', img3)
    #plt.imshow(img3)

    # Adorable! Let's classify it!

    # copy the image data into the memory allocated for the net
    net.blobs['data'].data[...] = transformed_image

    ### perform classification
    output = net.forward()

    output_prob = output['diff'][0]  # the output probability vector for the first image in the batch

    #print output_prob
    #print 'predicted class is:', output_prob.argmax()
    #print len(output_prob)
    
    return list(output_prob)
    

In [10]:
image_folder = '/home/keerthanpai/Downloads/ML/sample_pics/'
text_labels = '/home/keerthanpai/Downloads/ML/sample_pics/train_pairlist.txt'

image_pairs_with_label = pairwiseLabelImages(image_folder, text_labels)

KeyError: '15_1_2'

In [8]:
two_way_image_pairs_with_label = []
for i in image_pairs_with_label:
    #print i[0], i[1], [i[2][0], i[2][1]] 
    two_way_image_pairs_with_label.append( [i[0], i[1], [i[2][0], i[2][1]] ] )
    two_way_image_pairs_with_label.append( [i[1], i[0], [i[2][1], i[2][0]] ] )
    
print "Number of pairs : ", len(image_pairs_with_label)
print "Number of two way pairs : ", len(two_way_image_pairs_with_label)

#for i in two_way_image_pairs_with_label:
#    print i[0], i[1], [i[2][0], i[2][1]]

Number of pairs :  0
Number of two way pairs :  0


In [78]:
from sklearn.neural_network import MLPClassifier
import time

clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(128, 128), random_state=1,warm_start=True,activation ='tanh')
    #clf.Layer('Softmax', warning=None, name='output', units=None, weight_decay=None, dropout=None, normalize=None, frozen=False)

start = time.time()

loop_c = 1
for image in two_way_image_pairs_with_label:
    
    t0 = time.time()
    output_prob = getFeatures(image)  
    X=[]
    y=[]
    X.append(output_prob)
    y.append(image[2][0])
    clf.fit(X,y) 
    
    t = time.time()-t0
    print "loop", loop_c, "of ", len(two_way_image_pairs_with_label), "with time " +  str(t)
    loop_c = loop_c + 1
    #if (loop_c==40):
    #    break;


    
end = time.time()
print "Total time = ", str(end-start) 

loop 0 of  652 with time 8.42236399651
loop 1 of  652 with time 6.79261994362
loop 2 of  652 with time 6.51142501831
loop 3 of  652 with time 6.48726296425
loop 4 of  652 with time 6.50293302536
loop 5 of  652 with time 6.73610687256
loop 6 of  652 with time 6.46365499496
loop 7 of  652 with time 6.53156900406
loop 8 of  652 with time 6.47462511063
loop 9 of  652 with time 6.64917302132
loop 10 of  652 with time 6.59941911697
loop 11 of  652 with time 6.62152385712
loop 12 of  652 with time 6.48993611336
loop 13 of  652 with time 7.03695297241
loop 14 of  652 with time 7.1487929821
loop 15 of  652 with time 6.60995697975
loop 16 of  652 with time 6.77523803711
loop 17 of  652 with time 6.97912883759
loop 18 of  652 with time 6.92553114891
loop 19 of  652 with time 6.62151312828
loop 20 of  652 with time 6.91814994812
loop 21 of  652 with time 6.61112618446
loop 22 of  652 with time 6.9265499115
loop 23 of  652 with time 6.5750310421
loop 24 of  652 with time 6.76061797142
loop 25 of  6

In [97]:
X_test = []
X_test.append(getFeatures(two_way_image_pairs_with_label[46]))

print two_way_image_pairs_with_label[46][2]

clf.predict(X_test)

clf.predict_proba(X_test)

[0, 1]


array([[ 0.74424169,  0.25575831]])

In [98]:
clf.predict(X_test)

array([0])

In [105]:
loop = 1
correct = 0
for imagePair_with_label in two_way_image_pairs_with_label:
    X_test = []
    X_test.append(getFeatures(imagePair_with_label))

    p = clf.predict(X_test)
 
    print "Loop:", loop, "Actual: ", imagePair_with_label[2], " :: Predicted: ", p, " Probability : ", clf.predict_proba(X_test)     
    #if(imagePair_with_label[2][0] == p[0])
    #    correct +=1    
    loop  += 1

Loop: 1 Actual:  [1, 0]  :: Predicted:  [0]  Probability :  [[ 0.78419088  0.21580912]]
Loop: 2 Actual:  [0, 1]  :: Predicted:  [0]  Probability :  [[ 0.8437295  0.1562705]]
Loop: 3 Actual:  [1, 0]  :: Predicted:  [0]  Probability :  [[ 0.52889808  0.47110192]]
Loop: 4 Actual:  [0, 1]  :: Predicted:  [0]  Probability :  [[ 0.802439  0.197561]]
Loop: 5 Actual:  [1, 0]  :: Predicted:  [0]  Probability :  [[ 0.71102175  0.28897825]]
Loop: 6 Actual:  [0, 1]  :: Predicted:  [0]  Probability :  [[ 0.67882361  0.32117639]]
Loop: 7 Actual:  [0, 1]  :: Predicted:  [0]  Probability :  [[ 0.85385071  0.14614929]]
Loop: 8 Actual:  [1, 0]  :: Predicted:  [0]  Probability :  [[ 0.86733553  0.13266447]]
Loop: 9 Actual:  [0, 1]  :: Predicted:  [0]  Probability :  [[ 0.82063028  0.17936972]]
Loop: 10 Actual:  [1, 0]  :: Predicted:  [0]  Probability :  [[ 0.76021654  0.23978346]]
Loop: 11 Actual:  [1, 0]  :: Predicted:  [0]  Probability :  [[ 0.94128328  0.05871672]]
Loop: 12 Actual:  [0, 1]  :: Predict

KeyboardInterrupt: 

In [38]:
clf.get_params(deep=True)

{'activation': 'tanh',
 'alpha': 1e-05,
 'batch_size': 'auto',
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (128, 128),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_iter': 200,
 'momentum': 0.9,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': 1,
 'shuffle': True,
 'solver': 'sgd',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': True}

In [41]:
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import load_iris
import numpy as np

X,Y = load_iris().data, load_iris().target

mlp = MLPClassifier()
mlp.fit(X, Y)

#print mlp.predict([3.1,  2.5,  8.4,  2.2])
print mlp.predict_proba([3.1,  2.5,  8.4,  2.2])

[[  2.25514999e-07   2.57264450e-04   9.99742510e-01]]




In [44]:
y = image[2][0]
print y

0
