In [1]:
import os
import numpy
from numpy import asarray
from matplotlib import pyplot as plt
from matplotlib.patches import Rectangle
from mtcnn.mtcnn import MTCNN
from PIL import Image
import cv2
import json

In [2]:
from scipy.spatial.distance import cosine
from keras_vggface.utils import preprocess_input
from keras_vggface.vggface import VGGFace
# In keras_vggface/models.py, change #
# from keras.engine.topology import get_source_inputs
# to
# from keras.utils.layer_utils import get_source_inputs

In [3]:
def extract_face_from_image(image_path, required_size=(224, 224)):
    # load image and detect faces
    image = Image.open(image_path)
    # convert to RGB, if needed
    image = image.convert('RGB')
    # convert to array
    image = asarray(image)

    # detector = MTCNN()
    faces = detector.detect_faces(image)

    face_images = []

    for face in faces:
        # extract the bounding box from the requested face
        x1, y1, width, height = face['box']
        x1, y1 = abs(x1), abs(y1)
        x2, y2 = x1 + width, y1 + height

        # extract the face
        face_boundary = image[y1:y2, x1:x2]

        # resize pixels to the model size
        face_image = Image.fromarray(face_boundary)
        face_image = face_image.resize(required_size)
        face_array = asarray(face_image)
        face_images.append(face_array)

    return face_images

def extract_face_locations(image_array):
    face_positions = []
    #data = plt.imread(image_path)
    #data = cv2.imread(image_path)
    data = image_array
    faces = detector.detect_faces(data)
    for i in range(len(faces)):
        x1, y1, width, height = faces[i]['box']
        x2, y2 = x1 + width, y1 + height
        
        face_positions.append([y1, y2, x1, x2])
    return face_positions

def get_model_scores(faces):
    samples = asarray(faces, 'float32')

    # prepare data for model
    samples = preprocess_input(samples, version = 2)

    # create vggface model object
    model = VGGFace(model='resnet50',
                    include_top=False,
                    input_shape=(224, 224, 3),
                    pooling='avg')

    # perform prediction
    return model.predict(samples)

def extract_face_encodings(image_array, required_size=(224, 224)):
    # load image array and detect faces
    image = Image.fromarray(image_array)
    # convert to RGB, if needed
    image = image.convert('RGB')
    # convert to array
    image = asarray(image)

    # detector = MTCNN()
    faces = detector.detect_faces(image)

    # Get face locations
    locations = extract_face_locations(image_array)

    encodings = []

    #face_images = []

    for i in range(len(locations)):

        # extract the face
        face_boundary = image[locations[i][0]:locations[i][1], locations[i][2]:locations[i][3]]

        # resize pixels to the model size
        face_image = Image.fromarray(face_boundary)
        face_image = face_image.resize(required_size)
        face_array = asarray(face_image)
        face_array = [face_array]


        samples = get_model_scores(face_array)

        encodings.append(*samples)

    return encodings

def cosine_distance(known_embedding, candidate_embedding):
    """
    Given a list of face encodings, compare them to a known face encoding and get the cosine distance
    for each comparison face. Cosine distance measures the similarity between two vectors.
    The distance tells you how similar the faces are, with a value from 0 to 1.
    :param known_embedding: Face encoding to compare
    :param candidate_embedding: A face encoding to compare against
    :return: A numerical value representing the cosine distance of the vectors
    """
    score = cosine(known_embedding, candidate_embedding)
    return score
    
def compare_faces(known_face_encodings, face_encoding_to_check, tolerance=0.6):
    """
    Compare a face encoding against a candidate encoding to see if they match.
    :param known_face_encodings: A known face encoding
    :param face_encoding_to_check: A single face encoding to compare against
    :param tolerance: How much distance between faces to consider it a match. Lower is more strict. 0.6 is typical best performance.
    :return: A True/False values indicating which known_face_encodings match the face encoding to check
    """
    return cosine_distance(known_face_encodings, face_encoding_to_check)<=tolerance

In [4]:
detector = MTCNN()

model = VGGFace(model='resnet50',
                    include_top=False,
                    input_shape=(224, 224, 3),
                    pooling='avg')

face = extract_face_from_image('known_faces/Brandon.jpg')
sample = preprocess_input(asarray(face, 'float32'))
brandon_face_encoding = model.predict(sample)[0]

In [5]:
class NumpyEncoder(json.JSONEncoder):
    """ Special json encoder for numpy types """
    def default(self, obj):
        if isinstance(obj, (np.int_, np.intc, np.intp, np.int8,
                            np.int16, np.int32, np.int64, np.uint8,
                            np.uint16, np.uint32, np.uint64)):
            return int(obj)
        elif isinstance(obj, (np.float_, np.float16, np.float32,
                              np.float64)):
            return float(obj)
        elif isinstance(obj, (np.ndarray,)):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)

def add_entry()

js = json.dumps({'Brandon':model.predict(preprocess_input(asarray(extract_face_from_image('known_faces/Brandon.jpg'), 'float32')))[0]}, cls=NumpyEncoder)

fp = open('known_encodings.json', 'a')

fp.write(js)
fp.close()

In [6]:
def get_known_encodings(updateAll = False):
    """
    Extract face encodings from known_faces folder.
    """
    # Initialize empty lists
    face_encodings = []
    face_names = []

    # Folder containing labelled .jpg images
    folder_dir = "known_faces/"

    # Open json containing face encodings
    if updateAll = False:

        with open("known_encodings.json", "r") as file:
            data = json.load(file)


        for images in os.listdir(folder_dir):
            if images.endswith(".jpg"): #Only pull images
                name = images[0:len(images)-4]
                if name in data:
                    # Face encoding already known
                    print('known face')
                    face_encodings.append(np.array(data[name]))
                    face_names.append(name)
                else:
                    print('unknown face, getting encoding')
                    # Get face encoding
                    face_encodings.append(extract_face_encodings(*extract_face_from_image(folder_dir+images))[0])
                    face_names.append(name)
            else:
                pass
    
            
    return (face_names, face_encodings)

In [8]:
import numpy as np
import cv2

video_capture = cv2.VideoCapture(0)

# Get names and encodings from known_faces folder
known_face_names, known_face_encodings = get_known_encodings()

# Initialize variables
face_locations = []
face_encodings = []
face_names = []
process_this_frame = True


while True:
    # Grab single frame of video
    _, frame = video_capture.read()

    # Only process every other frame of video
    if process_this_frame:
        # Convert image from BGR color to RGB color
        rgb_small_frame = frame#[:, :, ::-1]

        # Find all faces and face encodings in current frame
        face_locations = extract_face_locations(rgb_small_frame)
        face_encodings = extract_face_encodings(rgb_small_frame)
        face_names = []
        for face_encoding in face_encodings:
            # See if face is a match for known face(s)
            matches = []
            for face in known_face_encodings:
                matches.append(compare_faces(face, face_encoding))
            name = "Unknown"

            face_distances = []
            for face in known_face_encodings:
                face_distances.append(cosine_distance(face, face_encoding)) # Repeating above calculation
            best_match_index = np.argmin(face_distances)
            if matches[best_match_index]:
                name = known_face_names[best_match_index]
            
            face_names.append(name)

    process_this_frame = not process_this_frame

    # Display results
    for (bottom, top, left,right ), name in zip(face_locations, face_names):

        # Draw box around face
        cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 225), 2)

        # Draw label with name below face
        cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 225), cv2.FILLED)
        font = cv2.FONT_HERSHEY_DUPLEX
        cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)

    # Display resulting image
    cv2.imshow('Video', frame)

    # Hit q on keyboard to quit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

video_capture.release()
cv2.destroyAllWindows()


known face
unknown face, getting encoding
