In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import numpy as np
import argparse
import facenet
import detect_face
import os
import sys
import math
import pickle
from sklearn.metrics import accuracy_score

In [3]:
np.random.seed(10)

datadir = '/home/neelansh/vggface_aligned/'
model_dir = '/home/neelansh/Face-recognition-real-time/models/20170512-110547-center-loss/20170512-110547.pb'

In [4]:
def split_dataset(dataset, min_nrof_images_per_class, nrof_train_images_per_class, nrof_test_images_per_class):
    train_set = []
    test_set = []
    for cls in dataset:
        paths = cls.image_paths
        # Remove classes with less than min_nrof_images_per_class
        if len(paths)>=min_nrof_images_per_class:
            np.random.shuffle(paths)
            train_set.append(facenet.ImageClass(cls.name, paths[:nrof_train_images_per_class]))
            test_set.append(facenet.ImageClass(cls.name, paths[nrof_train_images_per_class:nrof_train_images_per_class+nrof_test_images_per_class]))
    return train_set, test_set

dataset = facenet.get_dataset(datadir)
(train_set, test_set) = split_dataset(dataset, 10, 8, 2) 

In [5]:
def get_embeddings(dataset, modeldir, batch_size = 256, image_size = 160):
    paths, labels = facenet.get_image_paths_and_labels(dataset)

    with tf.Graph().as_default():

        with tf.Session() as sess:
            print('Loading feature extraction model')
            facenet.load_model(modeldir)

            images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
            phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]

            # Run forward pass to calculate embeddings
            print('Calculating features for images')

            nrof_images = len(paths)
            nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images / batch_size))
            emb_array = np.zeros((nrof_images, embedding_size))
            for i in range(nrof_batches_per_epoch):
                start_index = i * batch_size
                end_index = min((i + 1) * batch_size, nrof_images)
                paths_batch = paths[start_index:end_index]
                images = facenet.load_data(paths_batch, False, True, image_size)
                feed_dict = {images_placeholder: images, phase_train_placeholder: False}
                emb_array[start_index:end_index, :] = sess.run(embeddings, feed_dict=feed_dict)
            print('DONE')
            return emb_array

In [None]:
print('calculating fetures for train')
train_embeddings = get_embeddings(train_set, model_dir)
print('calculating fetures for test')
test_embeddings = get_embeddings(test_set, model_dir)

calculating fetures for train
Loading feature extraction model
Model filename: /home/neelansh/Face-recognition-real-time/models/20170512-110547-center-loss/20170512-110547.pb
Calculating features for images


In [9]:
from sklearn.svm import SVC
train_paths, train_labels = facenet.get_image_paths_and_labels(train_set)
test_paths, test_labels = facenet.get_image_paths_and_labels(test_set)

# Train classifier
print('Training classifier')
model = SVC(kernel='linear', probability=True)
model.fit(train_embeddings, train_labels)

print('Testing classifier')
pred = model.predict(test_embeddings)

print('accuracy Linear SVM: ', accuracy_score(test_labels, pred))

Training classifier
Testing classifier
accuracy Linear SVM:  0.9782608695652174


In [10]:
# Create a list of class names
class_names = [cls.name.replace('_', ' ') for cls in train_set]

classifier_filename = './models/svm_linear_center_vggsample.pkl'
classifier_filename_exp = os.path.expanduser(classifier_filename)
# Saving classifier model
with open(classifier_filename_exp, 'wb') as outfile:
    pickle.dump((model, class_names), outfile)
print('Saved classifier model to file "%s"' % classifier_filename_exp)

Saved classifier model to file "./models/svm_linear_center_vggsample.pkl"


In [11]:
pred_prob = model.predict_proba(test_embeddings)

In [12]:
# Wrongly classified
for idx, item in enumerate(pred):
    if(item != test_labels[idx]):
        print(np.max(pred_prob[idx]))
        print(test_paths[idx])
        print(class_names[item])

0.17338818504593856
/home/neelansh/vgg_sample/n000006/0241_03.jpg
n000003


In [17]:
from sklearn.neighbors import KNeighborsClassifier

neigh = KNeighborsClassifier(n_neighbors=3)

print('Fitting')
neigh.fit(train_embeddings, train_labels)

print('Testing classifier')
knn_pred = neigh.predict(test_embeddings)

print('accuracy KNN: ', accuracy_score(test_labels, knn_pred))

Fitting
Testing classifier
accuracy KNN:  0.9565217391304348


In [19]:
# Create a list of class names
class_names = [cls.name.replace('_', ' ') for cls in train_set]

classifier_filename = './models/knn_3_center_vggsample.pkl'
classifier_filename_exp = os.path.expanduser(classifier_filename)
# Saving classifier model
with open(classifier_filename_exp, 'wb') as outfile:
    pickle.dump((neigh, class_names), outfile)
print('Saved classifier model to file "%s"' % classifier_filename_exp)

Saved classifier model to file "./models/knn_3_center_vggsample.pkl"


In [18]:
#Wrongly classified
for idx, item in enumerate(knn_pred):
    if(item != test_labels[idx]):
        print(neigh.kneighbors([test_embeddings[idx]]))
        print(test_paths[idx])
        print(class_names[item])

(array([[1.23177508, 1.24187557, 1.24476892]]), array([[ 7, 18,  0]]))
/home/neelansh/vgg_sample/n000006/0241_03.jpg
Neelansh
(array([[0.7857633 , 0.90686242, 0.924861  ]]), array([[ 76,  79, 163]]))
/home/neelansh/vgg_sample/n000022/0048_02.jpg
n000011


In [None]:
dataset_filename = './models/vggface2.pkl'
dataset_filename_exp = os.path.expanduser(dataset_filename)
# Saving classifier model
with open(dataset_filename_exp, 'wb') as outfile:
    pickle.dump((train_embeddings, test_embeddings, train_labels, test_labels), outfile)
print('Saved dataset embeddings to file "%s"' % dataset_filename_exp)