In [9]:
# https://github.com/davidsandberg/facenet/wiki

In [10]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import numpy as np
import argparse
import facenet
import lfw
import os
import sys
import math
from sklearn import metrics
from scipy.optimize import brentq
from scipy import interpolate

import pickle
from sklearn.svm import SVC


In [7]:
def validate(model = '../data/models/20170512-110547.pb', lfw_dir = '../data/lfw/lfw_mtcnnpy_160', lfw_batch_size = 100, image_size = 160, lfw_pairs = '../data/pairs.txt', lfw_file_ext = 'png', lfw_nrof_folds = 10):
  
    with tf.Graph().as_default():
      
        with tf.Session() as sess:
            
            # Read the file containing the pairs used for testing
            pairs = lfw.read_pairs(os.path.expanduser(lfw_pairs))

            # Get the paths for the corresponding images
            paths, actual_issame = lfw.get_paths(os.path.expanduser(lfw_dir), pairs, lfw_file_ext)

            # Load the model
            facenet.load_model(model)
            
            # Get input and output tensors
            images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
            phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
            
            #image_size = images_placeholder.get_shape()[1]  # For some reason this doesn't work for frozen graphs
            image_size = image_size
            embedding_size = embeddings.get_shape()[1]
        
            # Run forward pass to calculate embeddings
            print('Runnning forward pass on LFW images')
            batch_size = lfw_batch_size
            nrof_images = len(paths)
            nrof_batches = int(math.ceil(1.0*nrof_images / batch_size))
            emb_array = np.zeros((nrof_images, embedding_size))
            for i in range(nrof_batches):
                start_index = i*batch_size
                end_index = min((i+1)*batch_size, nrof_images)
                paths_batch = paths[start_index:end_index]
                images = facenet.load_data(paths_batch, False, False, image_size)
                feed_dict = { images_placeholder:images, phase_train_placeholder:False }
                emb_array[start_index:end_index,:] = sess.run(embeddings, feed_dict=feed_dict)
        
            tpr, fpr, accuracy, val, val_std, far = lfw.evaluate(emb_array, 
                actual_issame, nrof_folds=lfw_nrof_folds)

            print('Accuracy: %1.3f+-%1.3f' % (np.mean(accuracy), np.std(accuracy)))
            print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))

            auc = metrics.auc(fpr, tpr)
            print('Area Under Curve (AUC): %1.3f' % auc)
            eer = brentq(lambda x: 1. - x - interpolate.interp1d(fpr, tpr)(x), 0., 1.)
            print('Equal Error Rate (EER): %1.3f' % eer)
            

In [8]:
validate()

Model filename: ../data/models/20170512-110547.pb
Runnning forward pass on LFW images
Accuracy: 0.992+-0.003
Validation rate: 0.97467+-0.01477 @ FAR=0.00133
Area Under Curve (AUC): 1.000
Equal Error Rate (EER): 0.007


In [51]:
def classifier(mode, # = 'CLASSIFY', 
               data_dir, # = '../data/subset/train', 
               classifier_filename, # = '../data/subset/subset_classifier.pkl', 
               model = '../data/models/20170512-110547.pb', 
               use_split_dataset = False, 
               test_data_dir = '../data/subset/test', 
               batch_size=90, 
               image_size=160, 
               seed=666, 
               min_nrof_images_per_class=20, 
               nrof_train_images_per_class=10):
  
    with tf.Graph().as_default():
      
        with tf.Session() as sess:
            
            np.random.seed(seed=seed)
            
            if use_split_dataset:
                dataset_tmp = facenet.get_dataset(data_dir)
                train_set, test_set = split_dataset(dataset_tmp, min_nrof_images_per_class, nrof_train_images_per_class)
                if (mode=='TRAIN'):
                    dataset = train_set
                elif (mode=='CLASSIFY'):
                    dataset = test_set
            else:
                dataset = facenet.get_dataset(data_dir)

            # Check that there are at least one training image per class
            for cls in dataset:
                assert(len(cls.image_paths)>0, 'There must be at least one image for each class in the dataset')            

                 
            paths, labels = facenet.get_image_paths_and_labels(dataset)
            
            print('Number of classes: %d' % len(dataset))
            print('Number of images: %d' % len(paths))
            
            # Load the model
            print('Loading feature extraction model')
            facenet.load_model(model)
            
            # Get input and output tensors
            images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
            embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
            phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
            embedding_size = embeddings.get_shape()[1]
            
            # Run forward pass to calculate embeddings
            print('Calculating features for images')
            nrof_images = len(paths)
            nrof_batches_per_epoch = int(math.ceil(1.0*nrof_images / batch_size))
            emb_array = np.zeros((nrof_images, embedding_size))
            for i in range(nrof_batches_per_epoch):
                start_index = i*batch_size
                end_index = min((i+1)*batch_size, nrof_images)
                paths_batch = paths[start_index:end_index]
                images = facenet.load_data(paths_batch, False, False, image_size)
                feed_dict = { images_placeholder:images, phase_train_placeholder:False }
                emb_array[start_index:end_index,:] = sess.run(embeddings, feed_dict=feed_dict)
            
            classifier_filename_exp = os.path.expanduser(classifier_filename)

            if (mode=='TRAIN'):
                # Train classifier
                print('Training classifier')
                model = SVC(kernel='linear', probability=True)
                model.fit(emb_array, labels)
            
                # Create a list of class names
                class_names = [ cls.name.replace('_', ' ') for cls in dataset]

                # Saving classifier model
                with open(classifier_filename_exp, 'wb') as outfile:
                    pickle.dump((model, class_names), outfile)
                print('Saved classifier model to file "%s"' % classifier_filename_exp)
                
            elif (mode=='CLASSIFY'):
                # Classify images
                print('Testing classifier')
                with open(classifier_filename_exp, 'rb') as infile:
                    (model, class_names) = pickle.load(infile)

                print('Loaded classifier model from file "%s"' % classifier_filename_exp)

                predictions = model.predict_proba(emb_array)
                best_class_indices = np.argmax(predictions, axis=1)
                best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
                
                for i in range(len(best_class_indices)):
                    print('%4d  %s: %.3f' % (i, class_names[best_class_indices[i]], best_class_probabilities[i]))
#                     print(predictions[i])
#                     print(paths[i])
                    
                accuracy = np.mean(np.equal(best_class_indices, labels))
                print('Accuracy: %.3f' % accuracy)
                
            
def split_dataset(dataset, min_nrof_images_per_class, nrof_train_images_per_class):
    train_set = []
    test_set = []
    for cls in dataset:
        paths = cls.image_paths
        # Remove classes with less than min_nrof_images_per_class
        if len(paths)>=min_nrof_images_per_class:
            np.random.shuffle(paths)
            train_set.append(facenet.ImageClass(cls.name, paths[:nrof_train_images_per_class]))
            test_set.append(facenet.ImageClass(cls.name, paths[nrof_train_images_per_class:]))
    return train_set, test_set

  assert(len(cls.image_paths)>0, 'There must be at least one image for each class in the dataset')


In [56]:
# classifier(mode = "TRAIN", data_dir = '../data/subset/train')
# classifier(mode = "TRAIN", data_dir = '../data/subset2/train', classifier_filename = '../data/subset2/subset_classifier.pkl')
classifier(mode = "TRAIN", data_dir = '../data/subset2/train', 
           classifier_filename = '../data/subset2/subset_classifier.pkl', use_split_dataset = False)


Number of classes: 9
Number of images: 18
Loading feature extraction model
Model filename: ../data/models/20170512-110547.pb
Calculating features for images
Training classifier
Saved classifier model to file "../data/subset2/subset_classifier.pkl"


In [57]:
# classifier(mode = 'CLASSIFY', data_dir = '../data/subset/test')
# classifier(mode = 'CLASSIFY', data_dir = '../data/subset2/train', classifier_filename = '../data/subset2/subset_classifier.pkl')
classifier(mode = 'CLASSIFY', data_dir = '../data/subset2/test', 
           classifier_filename = '../data/subset2/subset_classifier.pkl', use_split_dataset = False)


Number of classes: 9
Number of images: 9
Loading feature extraction model
Model filename: ../data/models/20170512-110547.pb
Calculating features for images
Testing classifier
Loaded classifier model from file "../data/subset2/subset_classifier.pkl"
   0  Al Pacino: 0.215
[ 0.21463146  0.09614609  0.10530683  0.08768448  0.0944613   0.10034486
  0.0827379   0.12493198  0.09375511]
../data/subset2/test\Al_Pacino\Al_Pacino_0003.png
   1  Ben Affleck: 0.214
[ 0.09050125  0.21374105  0.13391541  0.09911634  0.0769124   0.09651667
  0.10252433  0.09630284  0.0904697 ]
../data/subset2/test\Ben_Affleck\Ben_Affleck_0007.png
   2  Britney Spears: 0.221
[ 0.08921422  0.11196292  0.22122164  0.09755808  0.10098615  0.11032206
  0.09769548  0.09070632  0.08033313]
../data/subset2/test\Britney_Spears\Britney_Spears_0014.png
   3  Halle Berry: 0.279
[ 0.08544368  0.09161679  0.08564785  0.2790508   0.10848668  0.08856995
  0.07409693  0.0816239   0.10546342]
../data/subset2/test\Halle_Berry\Halle_Ber