In [75]:

from __future__ import absolute_import
from __future__ import print_function
import numpy as np

import random
from keras.datasets import mnist
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Input, Lambda, Convolution2D, MaxPooling2D
from keras.optimizers import RMSprop
from keras import backend as K
from matplotlib import pyplot as plt
from sklearn.datasets import fetch_lfw_pairs
from sklearn.datasets import fetch_lfw_people

def euclidean_distance(vects):
    x, y = vects
    return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))


def eucl_dist_output_shape(shapes):
    shape1, shape2 = shapes
    print(shape1[0],1)
    return (shape1[0], 1)


def contrastive_loss(y_true, y_pred):
    margin = 1
    return K.mean(y_true * K.square(y_pred) +
                  (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))


def create_pairs(x, digit_indices):
    '''Positive and negative pair creation.
    Alternates between positive and negative pairs.
    '''
    pairs = []
    labels = []
    n = min([len(digit_indices[d]) for d in range(10)]) - 1
    for d in range(10):
        for i in range(n):
            z1, z2 = digit_indices[d][i], digit_indices[d][i + 1]
            pairs += [[x[z1], x[z2]]]
            inc = random.randrange(1, 10)
            dn = (d + inc) % 10
            z1, z2 = digit_indices[d][i], digit_indices[dn][i]
            pairs += [[x[z1], x[z2]]]
            labels += [1, 0]
    return np.array(pairs), np.array(labels)


def create_base_network(input_dim):
    '''Base network to be shared (eq. to feature extraction).
    '''
    seq = Sequential()
    seq.add(Dense(16,activation='relu',input_shape=(input_dim,)))
    seq.add(Dropout(0.1))
    seq.add(Dense(16,activation='relu',input_shape=(input_dim,)))

    
    #seq.add(Dense(4,activation='relu',input_shape=(input_dim,)))
    #seq.add(Dense(2,activation='relu'))
    
    #seq.add(Dropout(0.1))
    #seq.add(Dense(2,activation='relu',input_shape=(input_dim,)))

    #seq.add(Dense(2,activation='relu',input_shape=(input_dim,)))
    #seq.add(Convolution2D(32,3,3,activation="relu",input_shape=(input_dim,)))
    #seq.add(Dropout(0.1))
    #seq.add(Dense(1,activation='relu'))
    #seq.add(Convolution2D(32,3,3,activation="relu"))
    #seq.add(MaxPooling2D(pool_size=(2,2)))
    #seq.add(Dense(1,activation='relu'))
    #seq.add(Dropout(0.1))
    #seq.add
    #seq.add(Dense(128, activation='relu'))
    return seq


def compute_accuracy(predictions, labels):
    '''Compute classification accuracy with a fixed threshold on distances.
    '''
    return labels[predictions.ravel() < 0.5].mean()




In [76]:
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255                       #this is for scaling
x_test /= 255
input_dim = 784                     #28*28
epochs = 10

# create training+test positive and negative pairs
digit_indices = [np.where(y_train == i)[0] for i in range(10)] #for each class get occurences
tr_pairs, tr_y = create_pairs(x_train, digit_indices)

digit_indices = [np.where(y_test == i)[0] for i in range(10)]
te_pairs, te_y = create_pairs(x_test, digit_indices)

# network definition
base_network = create_base_network(input_dim)

input_a = Input(shape=(input_dim,))
input_b = Input(shape=(input_dim,))

# because we re-use the same instance `base_network`,
# the weights of the network
# will be shared across the two branches
processed_a = base_network(input_a)
processed_b = base_network(input_b)

distance = Lambda(euclidean_distance,
                  output_shape=eucl_dist_output_shape)([processed_a, processed_b])

model = Model([input_a, input_b], distance)

# train
rms = RMSprop()
model.compile(loss=contrastive_loss, optimizer=rms)
model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,
          batch_size=128,
          epochs=epochs,
          validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y))
#model.summary()
# compute final accuracy on training and test sets
pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])
print(pred)
tr_acc = compute_accuracy(pred, tr_y)
pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])
te_acc = compute_accuracy(pred, te_y)

print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))
print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))

None 1
Train on 108400 samples, validate on 17820 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
[[ 0.03588852]
 [ 0.88538593]
 [ 0.11621276]
 ..., 
 [ 0.6457538 ]
 [ 0.01783228]
 [ 0.96573955]]
* Accuracy on training set: 95.78%
* Accuracy on test set: 95.04%


In [41]:
from time import time
import logging
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import fetch_lfw_people
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
from sklearn.svm import SVC

In [61]:
lfw_people = fetch_lfw_people(min_faces_per_person=30,resize= 0.4)
n_samples, h, w = lfw_people.images.shape

X=lfw_people.data
n_features = X.shape[1]
y = lfw_people.target
target_names = lfw_people.target_names
n_classes = target_names.shape[0]
#target_names[:20]
lfw_people.images[0]
%time print('lfw_people loaded ')


lfw_people
CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 347 µs


In [47]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


In [48]:
X[0][:]

#set(y[:100])


array([ 30.        ,  33.66666794,  38.66666794, ...,   8.66666698,
        11.33333302,  12.66666698], dtype=float32)

In [49]:
n_components = 784

print("Extracting the top %d eigenfaces from %d faces"
      % (n_components, X_train.shape[0]))
t0 = time()
pca = PCA(n_components=n_components, svd_solver='randomized',
          whiten=True).fit(X_train)
print("done in %0.3fs" % (time() - t0))

eigenfaces = pca.components_.reshape((n_components, h, w))

print("Projecting the input data on the eigenfaces orthonormal basis")
t0 = time()
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("done in %0.3fs" % (time() - t0))

Extracting the top 784 eigenfaces from 2267 faces
done in 7.040s
Projecting the input data on the eigenfaces orthonormal basis
done in 0.636s


In [51]:
#X_train_pca.shape
#X_test_pca.shape


#basic steps
#1) Perhaps perform pca?
#2)make positive and negative pairs for each class
#3) base network for a and b share the weights (bound by same base network)
#4) tune this weight with backprop to understand which features to extract
#5) calculate loss as eucledian distance
#6) predictions with n iterations through all images and softmax which index is most similar


(756, 784)

In [None]:
def make_positive_negative(dataset,index): # dataset is full dataset and index coresponds to list of all 
    #
    

In [42]:
lfw_people = fetch_lfw_people(min_faces_per_person=30,resize= 0.4)
n_samples, h, w = lfw_people.images.shape

X=lfw_people.data
n_features = X.shape[1]
y = lfw_people.target
target_names = lfw_people.target_names
n_classes = target_names.shape[0]
#target_names[:20]
lfw_people.images[0]
%time print('lfw_people loaded ')


lfw_people loaded 
CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 444 µs


In [58]:
# Initialize dataset, number of features, number of classes, target labels and target names
lfw_people = fetch_lfw_people(min_faces_per_person=30) #resize, slice, color parameter
num_pics, h, w = lfw_people.images.shape

X=lfw_people.data  #this is our main training set 
n_features=X.shape[1] #basically number of pixels (h*w)



target_labels=lfw_people.target #labels of classes for each value in x
y=target_labels

target_names=lfw_people.target_names #maps target label to name
num_classes=target_names.shape[0]

target_names[y[229]] #remember this changes in next step cause we apply random in sklearn dataset fomation





'George W Bush'

In [59]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=42)

In [60]:
train_indices=[np.where(y_train==i)[0] for i in range(num_classes)]
test_indices=[np.where(y_test==i)[0] for i in range(num_classes)]
#def create_positive_negative_pairs(dataset, indices):
#currently x_train is just the vector and corresponding label
#then we get the indices in x_train for each class 
train_indices[0]

array([  32,  118,  166,  229,  258,  284,  322,  365,  474,  507,  545,
        584,  618,  793,  801,  838,  847,  865,  895,  904,  908,  962,
        982, 1077, 1383, 1438, 1462, 1567, 1575, 1646, 1677, 1707, 1761,
       1823])

In [68]:
#verify if dataset is still proper 
target_names[y_train[166]]==target_names[y_train[32]]==target_names[y_train[904]]
#find the minimum number of similar objects
min_same=min([len(train_indices[i]) for i in range(num_classes)])
min_same

22

In [69]:
def create_positive_negative_pairs(dataset,indices):
    
    pairs=[] #[[same,same],[same,diff],[same,same],[same,diff].....] and these are vectors so same same vectors are stored
    labels=[] #[1,0,1,0,1,0]
    
    min_same=min([len(indices[i]) for i in range(num_classes)]) -1 #to prevent error in accessing i+1
    
    for d in range(num_classes):
        for i in range(min_same):
            z1, z2 = indices[d][i],indices[d][i+1]
            pairs+= [[dataset[z1],dataset[z2]]]
            inc=random.randrange(1,num_classes)
            dn=(d+inc)%num_classes
            z1, z2 = indices[d][i],indices[d][i]
            pairs+=[[dataset[z1],dataset[z2]]]
            labels+=[1,0]
    return np.array(pairs), np.array(labels)
    
    

    

In [74]:
create_positive_negative_pairs(x_train,train_indices)

(1428,)