In [1]:
import random
import numpy as np
import matplotlib.pyplot as plt
import cv2
import dlib
from collections import OrderedDict

# Face detection

## dlib HOG

In [10]:
cap = cv2.VideoCapture(0)
hogFaceDetector = dlib.get_frontal_face_detector()

while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Our operations on the frame come here
    #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    faceRects = hogFaceDetector(frame, 0)
    for faceRect in faceRects:
        x1 = faceRect.left()
        y1 = faceRect.top()
        x2 = faceRect.right()
        y2 = faceRect.bottom()

        cv2.rectangle(frame, (x1,y1), (x2,y2), (0,0,255), 3)

    # Display the resulting frame
    cv2.imshow('frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

## OpenCV DNN

In [21]:
cap = cv2.VideoCapture(0)

modelFile = "trained_models/res10_300x300_ssd_iter_140000.caffemodel"
configFile = "trained_models/deploy.prototxt.txt"
net = cv2.dnn.readNetFromCaffe(configFile, modelFile)

while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()
    height, width = frame.shape[0], frame.shape[1]

    # Our operations on the frame come here
    #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), [104, 117, 123], False, False)

    net.setInput(blob)
    detections = net.forward()
    bboxes = []
    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > 0.8:
            x1 = int(detections[0, 0, i, 3] * width)
            y1 = int(detections[0, 0, i, 4] * height)
            x2 = int(detections[0, 0, i, 5] * width)
            y2 = int(detections[0, 0, i, 6] * height)

            cv2.rectangle(frame, (x1,y1), (x2,y2), (0,0,255), 2)

    # Display the resulting frame
    cv2.imshow('frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

# Landmark detection (68)

## dlib

In [8]:
def shape_to_np(shape, dtype="int"):
	# initialize the list of (x, y)-coordinates
	coords = np.zeros((68, 2), dtype=dtype)
	# loop over the 68 facial landmarks and convert them
	# to a 2-tuple of (x, y)-coordinates
	for i in range(0, 68):
		coords[i] = (shape.part(i).x, shape.part(i).y)
	# return the list of (x, y)-coordinates
	return coords

FACIAL_LANDMARKS_IDXS = OrderedDict([
	("mouth", (48, 68)),
	("right_eyebrow", (17, 22)),
	("left_eyebrow", (22, 27)),
	("right_eye", (36, 42)),
	("left_eye", (42, 48)),
	("nose", (27, 35)),
	("jaw", (0, 17))
])

SHOW_MARKERS = True

cap = cv2.VideoCapture(0)
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("trained_models/shape_predictor_68_face_landmarks.dat")

while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Our operations on the frame come here
    #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    rects = detector(frame, 0)

    if rects:
        for (r, rect) in enumerate(rects):
            x1 = rect.left()
            y1 = rect.top()
            x2 = rect.right()
            y2 = rect.bottom()

            if SHOW_MARKERS:
                cv2.rectangle(frame, (x1,y1), (x2,y2), (0,0,255), 2)
            
            face = frame[y1:y2, x1:x2]
            face = cv2.resize(face, (500,500), interpolation=cv2.INTER_CUBIC)
            cv2.imshow("Face", face)

            # determine the facial landmarks for the face region, then
            # convert the landmark (x, y)-coordinates to a NumPy array
            shape = predictor(frame, rect)
            shape = shape_to_np(shape)

            for (name, (i, j)) in FACIAL_LANDMARKS_IDXS.items():
                if SHOW_MARKERS:
                    # loop over the subset of facial landmarks, drawing the
                    # specific face part
                    for (x, y) in shape[i:j]:
                        cv2.circle(frame, (x, y), 1, (0, 0, 255), -1)

                if name == "right_eye" or name == "left_eye":
                    # extract the ROI of the face region as a separate image
                    (x, y, w, h) = cv2.boundingRect(np.array([shape[i:j]]))
                    roi = frame[y:y + h, x:x + w]
                    roi = cv2.resize(roi, (500,250), interpolation=cv2.INTER_CUBIC)
                    cv2.imshow(name, roi)

    # Display the resulting frame
    cv2.imshow('Webcam', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

## DNN

In [13]:
def shape_to_np(shape, dtype="int"):
	# initialize the list of (x, y)-coordinates
	coords = np.zeros((68, 2), dtype=dtype)
	# loop over the 68 facial landmarks and convert them
	# to a 2-tuple of (x, y)-coordinates
	for i in range(0, 68):
		coords[i] = (shape.part(i).x, shape.part(i).y)
	# return the list of (x, y)-coordinates
	return coords

FACIAL_LANDMARKS_IDXS = OrderedDict([
	("mouth", (48, 68)),
	("right_eyebrow", (17, 22)),
	("left_eyebrow", (22, 27)),
	("right_eye", (36, 42)),
	("left_eye", (42, 48)),
	("nose", (27, 35)),
	("jaw", (0, 17))
])

SHOW_MARKERS = False

modelFile = "trained_models/res10_300x300_ssd_iter_140000.caffemodel"
configFile = "trained_models/deploy.prototxt.txt"
net = cv2.dnn.readNetFromCaffe(configFile, modelFile)
predictor = dlib.shape_predictor("trained_models/shape_predictor_68_face_landmarks.dat")
cap = cv2.VideoCapture(0)

while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()
    height, width = frame.shape[0], frame.shape[1]

    # Our operations on the frame come here
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), [104, 117, 123], False, False)
    net.setInput(blob)
    detections = net.forward()

    for d in range(detections.shape[2]):
        confidence = detections[0, 0, d, 2]
        if confidence > 0.9:
            x1 = int(detections[0, 0, d, 3] * width)
            y1 = int(detections[0, 0, d, 4] * height)
            x2 = int(detections[0, 0, d, 5] * width)
            y2 = int(detections[0, 0, d, 6] * height)

            if SHOW_MARKERS:
                cv2.rectangle(frame, (x1,y1), (x2,y2), (0,0,255), 2)
        
            face = frame[y1:y2, x1:x2]
            face = cv2.resize(face, (500,500), interpolation=cv2.INTER_CUBIC)
            cv2.imshow("Face", face)

            # determine the facial landmarks for the face region, then
            # convert the landmark (x, y)-coordinates to a NumPy array
            rect = dlib.rectangle(x1,y1,x2,y2)
            shape = predictor(frame, rect)
            shape = shape_to_np(shape)

            for (name, (i, j)) in FACIAL_LANDMARKS_IDXS.items():
                if SHOW_MARKERS:
                    # loop over the subset of facial landmarks, drawing the
                    # specific face part
                    for (x, y) in shape[i:j]:
                        cv2.circle(frame, (x, y), 1, (0, 0, 255), -1)

                if name == "right_eye" or name == "left_eye":
                    # extract the ROI of the face region as a separate image
                    (x, y, w, h) = cv2.boundingRect(np.array([shape[i:j]]))
                    roi = frame[y:y + h, x:x + w]
                    roi = cv2.resize(roi, (500,250), interpolation=cv2.INTER_CUBIC)
                    cv2.imshow(name, roi)

    # Display the resulting frame
    cv2.imshow('Webcam', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

# Landmark detection (5)

## dlib

In [12]:
def shape_to_np(shape, dtype="int"):
	# initialize the list of (x, y)-coordinates
	coords = np.zeros((5, 2), dtype=dtype)
	# loop over the 68 facial landmarks and convert them
	# to a 2-tuple of (x, y)-coordinates
	for i in range(0, 5):
		coords[i] = (shape.part(i).x, shape.part(i).y)
	# return the list of (x, y)-coordinates
	return coords

FACIAL_LANDMARKS_IDXS = OrderedDict([
	("right_eye", (0, 2)),
	("left_eye", (2, 4))
])

SHOW_MARKERS = True

cap = cv2.VideoCapture(0)
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("trained_models/shape_predictor_5_face_landmarks.dat")

while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Our operations on the frame come here
    #frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    rects = detector(frame, 0)

    if rects:
        for (r, rect) in enumerate(rects):
            x1 = rect.left()
            y1 = rect.top()
            x2 = rect.right()
            y2 = rect.bottom()

            if SHOW_MARKERS:
                cv2.rectangle(frame, (x1,y1), (x2,y2), (0,0,255), 2)
            
            face = frame[y1:y2, x1:x2]
            face = cv2.resize(face, (500,500), interpolation=cv2.INTER_CUBIC)
            cv2.imshow("Face", face)

            # determine the facial landmarks for the face region, then
            # convert the landmark (x, y)-coordinates to a NumPy array
            shape = predictor(frame, rect)
            shape = shape_to_np(shape)

            for (name, (i, j)) in FACIAL_LANDMARKS_IDXS.items():
                if SHOW_MARKERS:
                    # loop over the subset of facial landmarks, drawing the
                    # specific face part
                    for (x, y) in shape[i:j]:
                        cv2.circle(frame, (x, y), 1, (0, 0, 255), -1)

                if name == "right_eye" or name == "left_eye":
                    # extract the ROI of the face region as a separate image
                    (x, y, w, h) = cv2.boundingRect(np.array([shape[i:j]]))
                    roi = frame[y-10:y + h+10, x:x + w]
                    roi = cv2.resize(roi, (500,300), interpolation=cv2.INTER_CUBIC)
                    cv2.imshow(name, roi)

    # Display the resulting frame
    cv2.imshow('Webcam', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

## DNN

In [14]:
def shape_to_np(shape, dtype="int"):
	# initialize the list of (x, y)-coordinates
	coords = np.zeros((5, 2), dtype=dtype)
	# loop over the 68 facial landmarks and convert them
	# to a 2-tuple of (x, y)-coordinates
	for i in range(0, 5):
		coords[i] = (shape.part(i).x, shape.part(i).y)
	# return the list of (x, y)-coordinates
	return coords

FACIAL_LANDMARKS_IDXS = OrderedDict([
	("right_eye", (0, 2)),
	("left_eye", (2, 4))
])

SHOW_MARKERS = True

modelFile = "trained_models/res10_300x300_ssd_iter_140000.caffemodel"
configFile = "trained_models/deploy.prototxt.txt"
net = cv2.dnn.readNetFromCaffe(configFile, modelFile)
predictor = dlib.shape_predictor("trained_models/shape_predictor_5_face_landmarks.dat")
cap = cv2.VideoCapture(0)

while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()
    height, width = frame.shape[0], frame.shape[1]

    # Our operations on the frame come here
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), [104, 117, 123], False, False)
    net.setInput(blob)
    detections = net.forward()

    for d in range(detections.shape[2]):
        confidence = detections[0, 0, d, 2]
        if confidence > 0.9:
            x1 = int(detections[0, 0, d, 3] * width)
            y1 = int(detections[0, 0, d, 4] * height)
            x2 = int(detections[0, 0, d, 5] * width)
            y2 = int(detections[0, 0, d, 6] * height)

            if SHOW_MARKERS:
                cv2.rectangle(frame, (x1,y1), (x2,y2), (0,0,255), 2)
        
            face = frame[y1:y2, x1:x2]
            face = cv2.resize(face, (500,500), interpolation=cv2.INTER_CUBIC)
            cv2.imshow("Face", face)

            # determine the facial landmarks for the face region, then
            # convert the landmark (x, y)-coordinates to a NumPy array
            rect = dlib.rectangle(x1,y1,x2,y2)
            shape = predictor(frame, rect)
            shape = shape_to_np(shape)

            for (name, (i, j)) in FACIAL_LANDMARKS_IDXS.items():
                if SHOW_MARKERS:
                    # loop over the subset of facial landmarks, drawing the
                    # specific face part
                    for (x, y) in shape[i:j]:
                        cv2.circle(frame, (x, y), 1, (0, 0, 255), -1)

                if name == "right_eye" or name == "left_eye":
                    # extract the ROI of the face region as a separate image
                    (x, y, w, h) = cv2.boundingRect(np.array([shape[i:j]]))
                    roi = frame[y-10:y + h+10, x:x + w]
                    roi = cv2.resize(roi, (500,300), interpolation=cv2.INTER_CUBIC)
                    cv2.imshow(name, roi)

    # Display the resulting frame
    cv2.imshow('Webcam', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

# Face alignment

## dlib

In [23]:
def shape_to_np(shape, dtype="int"):
	# initialize the list of (x, y)-coordinates
	coords = np.zeros((5, 2), dtype=dtype)
	# loop over the facial landmarks and convert them
	# to a 2-tuple of (x, y)-coordinates
	for i in range(0, 5):
		coords[i] = (shape.part(i).x, shape.part(i).y)
	# return the list of (x, y)-coordinates
	return coords

FACIAL_LANDMARKS_IDXS = OrderedDict([
	("right_eye", (0, 2)),
	("left_eye", (2, 4)),
    ("nose", (4, 5))
])

SHOW_MARKERS = True

cap = cv2.VideoCapture(0)
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("trained_models/shape_predictor_5_face_landmarks.dat")

while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Our operations on the frame come here
    rects = detector(frame, 0)

    if len(rects) > 0:
        faces = dlib.full_object_detections()
        for detection in rects:
            shape = predictor(frame, detection)
            faces.append(shape)
            
            for (name, (i, j)) in FACIAL_LANDMARKS_IDXS.items():
                if SHOW_MARKERS:
                    # loop over the subset of facial landmarks, drawing the
                    # specific face part
                    for (x, y) in shape_to_np(shape)[i:j]:
                        cv2.circle(frame, (x, y), 1, (0, 0, 255), -1)

        aligned = dlib.get_face_chips(frame, faces, size=160)
        for image in aligned:
            cv2.imshow("aligned", image)

    # Display the resulting frame
    cv2.imshow('Webcam', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()