In [1]:
#!/usr/bin/env python
import os
import numpy as np
from PIL import Image
import dlib

file_name = "encodings/database.npz"
changed = False

face_recognition_model = "models/dlib_face_recognition_resnet_model_v1.dat"
face_encoder = None

face_detector = None

predictor_model = "models/shape_predictor_5_face_landmarks.dat"
pose_predictor = None

try:
    known_face_encodings, known_face_labels = np.load(file_name).values()
except IOError:
	known_face_encodings, known_face_labels = np.array([]), np.array([], "str")
	changed = True
# print("loaded...")

def save_data():
	np.savez(file_name, known_face_encodings, known_face_labels)

def get_face_encoder():
	global face_encoder
	if face_encoder is None:
		face_encoder = dlib.face_recognition_model_v1(face_recognition_model)
	return face_encoder

def get_face_detector():
	global face_detector

	if face_detector is None:
		# can use cnn detector also default is hog(fast)
		face_detector = dlib.get_frontal_face_detector()
	return face_detector

def get_pose_predictor():
	global pose_predictor
	if pose_predictor is None:
		pose_predictor = dlib.shape_predictor(predictor_model)
	return pose_predictor

def load_image_file(file):
	im = Image.open(file)
	im = im.convert("RGB")
	return np.array(im)

def css_to_rect(css):
    return dlib.rectangle(css[3], css[0], css[1], css[2])

def rect_to_css(rect):
    return rect.top(), rect.right(), rect.bottom(), rect.left()

def trim_css_to_bounds(css, image_shape):
    return max(css[0], 0), min(css[1], image_shape[1]), min(css[2], image_shape[0]), max(css[3], 0)


def shape_to_np(shape, dtype="int"):
	# initialize the list of (x, y)-coordinates
	coords = np.zeros((shape.num_parts, 2), dtype=dtype)

	# loop over all facial landmarks and convert them
	# to a 2-tuple of (x, y)-coordinates
	for i in range(0, shape.num_parts):
		coords[i] = (shape.part(i).x, shape.part(i).y)

	# return the list of (x, y)-coordinates
	return coords

def face_distance(face_encodings, face_to_compare):
    if len(face_encodings) == 0:
        return np.empty((0))

    return np.linalg.norm(face_encodings - face_to_compare, axis=1)

def _raw_face_landmarks(face_image, face_locations=None):
	if face_locations is None:
		face_locations = get_face_detector()(face_image, 1)
	else:
		face_locations = [css_to_rect(face_location) for face_location in face_locations]

	return [get_pose_predictor()(face_image, face_location) for face_location in face_locations]


def get_face_encodings(face_image, known_face_locations=None, num_jitters=1):
	raw_landmarks = _raw_face_landmarks(face_image, known_face_locations)
	return [np.array(get_face_encoder().compute_face_descriptor(
		face_image, raw_landmark_set, num_jitters)) for raw_landmark_set in raw_landmarks]

def get_face_locations(img):
	return [trim_css_to_bounds(rect_to_css(face), img.shape) for face in get_face_detector()(img, 1)]


In [2]:
import cv2
from urllib import request

In [24]:
class video_capture:
    """ Class to connect to remote camera """
    url = "http://192.168.137.155:8080/shot.jpg"
    @staticmethod
    def read():
        imgResp = request.urlopen(__class__.url)
        imgNp = np.array(bytearray(imgResp.read()), dtype=np.uint8)
        img = cv2.imdecode(imgNp, -1)
        # img = cv2.resize(img, (640, 480)) # use this if size recieve is very large
        return None, img
    @staticmethod
    def release():
        pass

# video_capture = cv2.VideoCapture(0)

# Initialize some variables
face_locations = []
face_encodings = []
face_names = []
process_this_frame = True
while True:
    # Grab a single frame of video
    _, frame = video_capture.read()

    # Only process every other frame of video to save time
    if process_this_frame:
        times = 0.8
        # Resize frame of video to 1/4 size for faster face recognition processing
        small_frame = cv2.resize(frame, (0, 0), fx=times, fy=times)
        # small_frame = frame.copy()

        # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
        rgb_small_frame = small_frame[:, :, ::-1]
        
        # Find all the faces and face encodings in the current frame of video
        face_locations = get_face_locations(rgb_small_frame)
        face_encodings = get_face_encodings(rgb_small_frame, face_locations)
        # print("Face detected... ", len(face_encodings))

        face_names = []
        for face_encoding in face_encodings:
            # Or instead, use the known face with the smallest distance to the new face
            face_distances = face_distance(known_face_encodings, face_encoding)
            best_match_index = np.argmin(face_distances)
            # if matches[best_match_index]:
            if face_distances[best_match_index] < 0.5:
                name = known_face_labels[best_match_index]
            else:
                name = "Unknown"

            face_names.append(name)

    process_this_frame = not process_this_frame


    # Display the results
    for (top, right, bottom, left), name in zip(face_locations, face_names):
        # Scale back up face locations since the frame we detected in was scaled to 1/4 size
        if times != 1:
            top = int(top * (1/times))
            right = int(right * (1/times))
            bottom = int(bottom * (1/times))
            left = int(left * (1/times))

        # Draw a box around the face
        cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)

        # Draw a label with a name below the face
        cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (255, 0, 0), cv2.FILLED)
        font = cv2.FONT_HERSHEY_DUPLEX
        cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)

    # Display the resulting image
    cv2.imshow('Video', frame)

    # Hit 'q' on the keyboard to quit!
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release handle to the webcam
video_capture.release()
cv2.destroyAllWindows()

In [5]:
import matplotlib.pyplot as plt

In [10]:
print(frame.shape, small_frame.shape)

(480, 720, 3) (384, 576, 3)


In [13]:
get_face_encodings(rgb_small_frame, face_locations)

[array([-1.82601318e-01,  1.43306494e-01,  4.75299805e-02,  6.98941574e-03,
        -3.61036733e-02, -4.37219553e-02, -2.94948369e-03, -4.28521819e-02,
         1.40828177e-01, -3.18889953e-02,  2.86602706e-01, -6.66234493e-02,
        -8.79895538e-02, -9.96640772e-02,  2.28629336e-02,  1.14016123e-01,
        -1.46562397e-01, -1.55406907e-01, -4.16643992e-02, -6.29545078e-02,
         6.94382042e-02, -5.84198870e-02, -2.96676420e-02,  9.32637155e-02,
        -1.96350127e-01, -3.24195772e-01, -8.74499679e-02, -1.22879334e-01,
         1.06417581e-01, -8.86754319e-02, -4.93622869e-02,  4.78196107e-02,
        -1.98641151e-01, -4.06240821e-02, -3.49454880e-02,  8.46922323e-02,
         6.89230207e-03,  4.84503806e-03,  1.10867813e-01,  2.21803412e-02,
        -1.52630195e-01,  1.90328658e-02,  6.37322813e-02,  2.93712169e-01,
         2.29291752e-01,  9.78410244e-02,  5.81969991e-02, -5.88554237e-03,
        -8.65512341e-03, -1.72328621e-01,  6.87415451e-02,  1.09770179e-01,
         1.3