# Get Embeddings From Dataset

In [2]:
import pandas as pd
import numpy as np
from numpy import load, expand_dims, asarray, savez_compressed

from keras.models import load_model
import collections
from PIL import Image
from matplotlib import pyplot
from mtcnn.mtcnn import MTCNN

In [7]:
data_path = '../data/lfw/'
names_path = data_path + 'lfw-names.txt'

compressed_face_path = 'a_faces_compressed.npz'
compressed_embeddings_path = 'a_faces_compressed.npz'

embedding_model_path = '../models/FaceNet_Keras_converted.h5'

In [4]:
# extract a single face from a given photograph
def extract_face(filename, required_size=(160, 160)):
    # load image from file
    image = Image.open(filename)
    # convert to RGB, if needed
    image = image.convert('RGB')
    # convert to array
    pixels = asarray(image)
    # create the detector, using default weights
    detector = MTCNN()
    # detect faces in the image
    results = detector.detect_faces(pixels)
    # extract the bounding box from the first face
    x1, y1, width, height = results[0]['box']
    # bug fix
    x1, y1 = abs(x1), abs(y1)
    x2, y2 = x1 + width, y1 + height
    # extract the face
    face = pixels[y1:y2, x1:x2]
    # resize pixels to the model size
    image = Image.fromarray(face)
    image = image.resize(required_size)
    face_array = asarray(image)
    return face_array

# load images and extract faces for all images in an LFW directory
def load_faces(directory):
    faces = list()
    
    i = 0
    
    for name in listdir(directory):
        
        # path to each persons directory
        person_path = path = data_path + name
        
        # one person can have multiple pictures
        for facefile in listdir(person_path):
            path = person_path + '/'+ facefile
            # get face
            face = extract_face(path)
            # store
            faces.append(face)
        
        i += 1
        print(i)
    return faces

# get the face embedding for one face
def get_embedding(model, face_pixels):
    # scale pixel values
    face_pixels = face_pixels.astype('float32')
    # standardize pixel values across channels (global)
    mean, std = face_pixels.mean(), face_pixels.std()
    face_pixels = (face_pixels - mean) / std
    # transform face into one sample
    samples = expand_dims(face_pixels, axis=0)
    # make prediction to get embedding
    yhat = model.predict(samples)
    return yhat[0]

# get list of id for who is in which image
def get_name_list(names_path):
    name_list = []
    with open(names_path, "r") as names:
        for row in names:
            name, count = row.split()[0], int(row.split()[1]) 
            name_list.extend([name] * count)
    
    return name_list

In [None]:
# load and compress original dataset
faces = load_faces(data_path)
savez_compressed(compressed_face_path, faces)

# load compressed faces dataset
faces = load(compressed_face_path)['arr_0']
print('Loaded: ', faces.shape)

# load the model
model = load_model(embedding_model_path)
print('Loaded Model')

# convert each image in the data set to an embedding and save all embeddings
face_embeddings = list()
for face in faces:
    face_embeddings.append(get_embedding(model, face))
    
face_embeddings = asarray(face_embeddings)
savez_compressed(compressed_embeddings_path, face_embeddings)