In [1]:
#!pip install deepface
from deepface import DeepFace
from deepface.commons import functions

#### Import LFW dataset

In [2]:
import os

base_path = '../tests/minilfw/'

filenames = {}

for root, dirs, files in os.walk(base_path):
    if not dirs:
        person_name = root.split('/')[-1]
        img_paths = []

        for file in files:
            if '.jpg' in file:
                img_path = root + '/' + file
                img_paths.append(img_path)
        
        filenames[person_name] = img_paths

In [3]:
filenames

{'Aaron_Eckhart': ['../tests/minilfw/Aaron_Eckhart/Aaron_Eckhart_0001.jpg'],
 'Aaron_Guiel': ['../tests/minilfw/Aaron_Guiel/Aaron_Guiel_0001.jpg'],
 'Aaron_Peirsol': ['../tests/minilfw/Aaron_Peirsol/Aaron_Peirsol_0004.jpg',
  '../tests/minilfw/Aaron_Peirsol/Aaron_Peirsol_0002.jpg',
  '../tests/minilfw/Aaron_Peirsol/Aaron_Peirsol_0003.jpg',
  '../tests/minilfw/Aaron_Peirsol/Aaron_Peirsol_0001.jpg'],
 'Aaron_Pena': ['../tests/minilfw/Aaron_Pena/Aaron_Pena_0001.jpg'],
 'Aaron_Patterson': ['../tests/minilfw/Aaron_Patterson/Aaron_Patterson_0001.jpg']}

#### Process images

Using `deepface.commons.functions` and Google FaceNet to preprocess and extract vector representations of the images

In [4]:
from deepface.basemodels import Facenet
from deepface.commons import functions
model = Facenet.loadModel()

In [5]:
# Extract embeddings from test dataset

representations = []
for k in filenames.keys():
    for img_path in filenames[k]:

        img = functions.preprocess_face(img=img_path, target_size=(160, 160), enforce_detection=False)
        embedding = model.predict(img)[0,:]

        representation = []
        representation.append(img_path)
        representation.append(embedding)
        representations.append(representation)

In [6]:
print(len(representations))

representations

8


[['../tests/minilfw/Aaron_Eckhart/Aaron_Eckhart_0001.jpg',
  array([-2.021097  , -0.12374371, -2.147213  , -0.9323491 , -0.26295528,
          0.13142458, -1.0152911 ,  1.840593  ,  0.39826488, -2.5097826 ,
         -0.49874052,  0.37884024,  1.0926383 , -0.8516123 ,  0.27399063,
          0.4006945 ,  0.09947224,  0.29713005,  0.07112212,  1.59386   ,
          0.26525185,  0.5735291 , -1.3944035 ,  0.9516    , -1.068706  ,
          1.3009826 , -2.308842  ,  0.02320573,  2.1408179 ,  1.1550988 ,
         -0.6325555 , -0.35655865, -0.18131757,  1.2309687 ,  0.31921157,
          0.59104407, -0.38763908, -0.8314865 , -0.3799345 , -0.7013316 ,
          1.5221875 ,  0.6171061 , -0.903615  ,  0.79337   ,  0.6802922 ,
         -1.6472454 ,  3.0829368 , -0.08294261, -0.40317345, -1.5988302 ,
         -0.15461291,  1.3917649 , -2.4063373 , -0.19197251, -2.3171418 ,
          0.504806  , -0.5607632 , -0.10597265,  0.36700347,  3.1979346 ,
          1.0216666 , -0.02378951, -0.83261067,  0.24

#### FAISS index

##### FAISS Preprocessing

Faiss expects 2 dimensional matrix as float32 np.array type

In [7]:
import pandas as pd

df = pd.DataFrame(data=representations, columns=['path', 'embeddings'])

df

Unnamed: 0,path,embeddings
0,../tests/minilfw/Aaron_Eckhart/Aaron_Eckhart_0...,"[-2.021097, -0.123743705, -2.147213, -0.932349..."
1,../tests/minilfw/Aaron_Guiel/Aaron_Guiel_0001.jpg,"[-1.4104759, -0.75045586, -2.3727627, 1.035687..."
2,../tests/minilfw/Aaron_Peirsol/Aaron_Peirsol_0...,"[-0.18330485, -0.6918827, -1.5027157, -0.34168..."
3,../tests/minilfw/Aaron_Peirsol/Aaron_Peirsol_0...,"[-0.13638397, -0.59877086, -0.67624277, 1.2959..."
4,../tests/minilfw/Aaron_Peirsol/Aaron_Peirsol_0...,"[0.19479856, -0.6647241, 0.33383438, -0.251695..."
5,../tests/minilfw/Aaron_Peirsol/Aaron_Peirsol_0...,"[-0.307701, -0.38751227, -1.3112366, 0.7161535..."
6,../tests/minilfw/Aaron_Pena/Aaron_Pena_0001.jpg,"[-1.0830939, 0.6143253, -2.0557058, 0.37906438..."
7,../tests/minilfw/Aaron_Patterson/Aaron_Patters...,"[-0.4434611, -0.005954452, 0.5906839, -0.57851..."


In [8]:
import numpy as np

# convert representations list to the required format
embeddings = []
for i in range(0, len(representations)):
    embedding = representations[i][1]
    embeddings.append(embedding)
 
embeddings = np.array(embeddings, dtype='f')

In [27]:
type(embeddings)

numpy.ndarray

In [26]:
type(embeddings[0])

numpy.ndarray

In [28]:
embeddings.shape

(8, 128)

##### Initialize FAISS index

In [10]:
import faiss

dimensions = 128    # FaceNet output is 128d vector

metric = 'euclidean' #euclidean, cosine
 
if metric == 'euclidean':
    index = faiss.IndexFlatL2(dimensions)
elif metric == 'cosine':
    index = faiss.IndexFlatIP(dimensions)
    faiss.normalize_L2(embeddings)

index = faiss.IndexIDMap(index)

Adding embeddings to the index

In [11]:
ids = np.arange(len(embeddings))
ids

array([0, 1, 2, 3, 4, 5, 6, 7])

In [12]:
index.add_with_ids(embeddings, ids)

Write to disk

In [13]:
faiss.write_index(index, "./lfw.index")

In [15]:
#restore
if False:
    index = faiss.read_index("vector.index")

#### Using FAISS to find nearest neighbor of target image

##### Target image processing

In [18]:
target_img = functions.preprocess_face(img="../tests/targets/billgates.jpeg", target_size=(160, 160), detector_backend='mtcnn')
target_representation = model.predict(target_img)[0,:]

target_representation

array([-4.59348291e-01,  7.35858321e-01, -1.12033415e+00,  4.04331088e-01,
        8.36471677e-01,  9.97316360e-01, -2.19242430e+00,  3.16736668e-01,
        1.68881989e+00, -1.42504108e+00, -2.31312490e+00, -7.24466562e-01,
       -1.54602909e+00,  2.94558239e+00,  3.18461835e-01, -3.78134072e-01,
       -1.07016504e+00, -1.73836619e-01, -3.13794881e-01,  1.31315574e-01,
       -2.59632051e-01,  1.06429905e-01,  2.38414466e-01,  1.50981522e+00,
        8.57221484e-01, -6.44372106e-01, -2.18228206e-01, -3.19540203e-01,
        1.35010111e+00, -2.36098796e-01, -6.61198258e-01, -7.80807287e-02,
       -1.61462843e+00,  1.26211035e+00, -1.09190631e+00,  1.72060823e+00,
       -1.79503119e+00,  6.00049317e-01,  1.31692910e+00,  9.00374889e-01,
        3.06231880e+00, -2.02943611e+00, -6.99832916e-01, -3.65843117e-01,
       -2.19446898e-01,  1.57818377e-01,  1.88313529e-01, -1.85693955e+00,
       -1.23422384e+00, -8.55720401e-01, -2.43424988e+00,  1.17386937e+00,
        7.86146402e-01,  

In [19]:
target_representation = np.array(target_representation, dtype='f')
target_representation = np.expand_dims(target_representation, axis=0)

In [20]:
target_representation

array([[-4.59348291e-01,  7.35858321e-01, -1.12033415e+00,
         4.04331088e-01,  8.36471677e-01,  9.97316360e-01,
        -2.19242430e+00,  3.16736668e-01,  1.68881989e+00,
        -1.42504108e+00, -2.31312490e+00, -7.24466562e-01,
        -1.54602909e+00,  2.94558239e+00,  3.18461835e-01,
        -3.78134072e-01, -1.07016504e+00, -1.73836619e-01,
        -3.13794881e-01,  1.31315574e-01, -2.59632051e-01,
         1.06429905e-01,  2.38414466e-01,  1.50981522e+00,
         8.57221484e-01, -6.44372106e-01, -2.18228206e-01,
        -3.19540203e-01,  1.35010111e+00, -2.36098796e-01,
        -6.61198258e-01, -7.80807287e-02, -1.61462843e+00,
         1.26211035e+00, -1.09190631e+00,  1.72060823e+00,
        -1.79503119e+00,  6.00049317e-01,  1.31692910e+00,
         9.00374889e-01,  3.06231880e+00, -2.02943611e+00,
        -6.99832916e-01, -3.65843117e-01, -2.19446898e-01,
         1.57818377e-01,  1.88313529e-01, -1.85693955e+00,
        -1.23422384e+00, -8.55720401e-01, -2.43424988e+0

##### Find neighbor

In [21]:
k = 3
distances, neighbors = index.search(target_representation, k)

In [22]:
print("Distance: {}\nNeighbors: {}".format(distances, neighbors))

Distance: [[265.83353 331.53558 340.79233]]
Neighbors: [[0 2 4]]


In [23]:
for d, n in zip(distances[0], neighbors[0]):
    print(n, representations[n][0], d)

0 ../tests/minilfw/Aaron_Eckhart/Aaron_Eckhart_0001.jpg 265.83353
2 ../tests/minilfw/Aaron_Peirsol/Aaron_Peirsol_0004.jpg 331.53558
4 ../tests/minilfw/Aaron_Peirsol/Aaron_Peirsol_0003.jpg 340.79233
