In [37]:
from facenet_pytorch import MTCNN
from PIL import Image
from matplotlib import pyplot  as plt
import numpy as np
import math
import cv2

In [38]:
mtcnn = MTCNN(image_size=160,
              margin=0,
              min_face_size=20,
              thresholds=[0.6, 0.7, 0.7], # MTCNN thresholds
              factor=0.709,
              post_process=True,
              device='cpu' # If you don't have GPU
        )
lineColor = (255, 255, 0)

In [39]:
# Landmaeks: [Left Eye], [Right eye], [nose], [left mouth], [right mouth]
def npAngle(a, b, c):
    ba = np.array(a) - np.array(b)
    bc = np.array(c) - np.array(b) 
    
    cosine_angle = np.dot(ba, bc)/(np.linalg.norm(ba)*np.linalg.norm(bc))
    angle = np.arccos(cosine_angle)
    
    return np.degrees(angle)

In [43]:
def visualizeCV2(image, landmarks_, angle_R_, angle_L_, pred_):
    
    for landmarks, angle_R, angle_L, pred in zip(landmarks_, angle_R_, angle_L_, pred_):
        
        if pred == 'Frontal':
            color = (0, 0, 0)
        elif pred == 'Right Profile':
            color = (255, 0, 0)
        else:
            color = (0, 0, 255)
            
        point1 = [int(landmarks[0][0]), int(landmarks[1][0])]
        point2 = [int(landmarks[0][1]), int(landmarks[1][1])]

        point3 = [int(landmarks[2][0]), int(landmarks[0][0])]
        point4 = [int(landmarks[2][1]), int(landmarks[0][1])]

        point5 = [int(landmarks[2][0]), int(landmarks[1][0])]
        point6 = [int(landmarks[2][1]), int(landmarks[1][1])]
        print(landmarks.shape)
        for land in landmarks:
            cv2.circle(image, (int(land[0]), int(land[1])), radius=5, color=(0, 255, 255), thickness=-1)
        cv2.line(image, (int(landmarks[0][0]), int(landmarks[0][1])), (int(landmarks[1][0]), int(landmarks[1][1])), lineColor, 3)
        cv2.line(image, (int(landmarks[0][0]), int(landmarks[0][1])), (int(landmarks[2][0]), int(landmarks[2][1])), lineColor, 3)
        cv2.line(image, (int(landmarks[1][0]), int(landmarks[1][1])), (int(landmarks[2][0]), int(landmarks[2][1])), lineColor, 3)
        
        text_sizeR, _ = cv2.getTextSize(pred, cv2.FONT_HERSHEY_PLAIN, fontScale, 4)
        text_wR, text_hR = text_sizeR
        
        cv2.putText(frame, pred, (point1[0], point2[0]), cv2.FONT_HERSHEY_PLAIN, fontScale, color, fontThickness, cv2.LINE_AA)


In [44]:
def predFacePoseCV2(frame):
    
    bbox_, prob_, landmarks_ = mtcnn.detect(frame, landmarks=True) # The detection part producing bounding box, probability of the detected face, and the facial landmarks
    angle_R_List = []
    angle_L_List = []
    predLabelList = []
    
    for bbox, landmarks, prob in zip(bbox_, landmarks_, prob_):
        if bbox is not None: # To check if we detect a face in the image
            if prob > 0.9: # To check if the detected face has probability more than 90%, to avoid 
                angR = npAngle(landmarks[0], landmarks[1], landmarks[2]) # Calculate the right eye angle
                angL = npAngle(landmarks[1], landmarks[0], landmarks[2])# Calculate the left eye angle
                angle_R_List.append(angR)
                angle_L_List.append(angL)
                if ((int(angR) in range(35, 57)) and (int(angL) in range(35, 58))):
                    predLabel='Frontal'
                    predLabelList.append(predLabel)
                else: 
                    if angR < angL:
                        predLabel='Left Profile'
                    else:
                        predLabel='Right Profile'
                    predLabelList.append(predLabel)
            else:
                print('The detected face is Less then the detection threshold')
        else:
            print('No face detected in the image')
    return landmarks_, angle_R_List, angle_L_List, predLabelList
    

In [45]:
source = 0

# Create a video capture object from the VideoCapture Class.
video_cap = cv2.VideoCapture(0)

# Create a named window for the video display.
win_name = 'Video Preview'
cv2.namedWindow(win_name)
video_cadesired_width = 160
desired_height = 160
# dim = (desired_width, desired_height)
left_offset = 20
fontScale = 2
fontThickness = 3
text_color = (0,0,255)
while True:
    # Read one frame at a time using the video capture object.
    has_frame, frame = video_cap.read()
    if not has_frame:
        break
    
    landmarks_, angle_R_List, angle_L_List, predLabelList = predFacePoseCV2(frame)

    # Annotate each video frame.
    visualizeCV2(frame, landmarks_, angle_R_List, angle_L_List, predLabelList)
    cv2.imshow(win_name, frame)

    key = cv2.waitKey(1)

    # You can use this feature to check if the user selected the `q` key to quit the video stream.
    if key == ord('Q') or key == ord('q') or key == 27:
        # Exit the loop.
        break

video_cap.release()
cv2.destroyWindow(win_name)

(5, 2)
%^%^%^%^%^%^
landmarks[0]:  [586.21716 576.7024 ]
landmarks[0][0]:  586.21716
(5, 2)
%^%^%^%^%^%^
landmarks[0]:  [619.3135  561.47894]
landmarks[0][0]:  619.3135
(5, 2)
%^%^%^%^%^%^
landmarks[0]:  [637.7896 550.391 ]
landmarks[0][0]:  637.7896
(5, 2)
%^%^%^%^%^%^
landmarks[0]:  [652.78546 549.52185]
landmarks[0][0]:  652.78546
(5, 2)
%^%^%^%^%^%^
landmarks[0]:  [629.4998 549.9074]
landmarks[0][0]:  629.4998
(5, 2)
%^%^%^%^%^%^
landmarks[0]:  [608.0183  548.81537]
landmarks[0][0]:  608.0183
(5, 2)
%^%^%^%^%^%^
landmarks[0]:  [600.2789 534.01  ]
landmarks[0][0]:  600.2789
(5, 2)
%^%^%^%^%^%^
landmarks[0]:  [608.5925  508.81674]
landmarks[0][0]:  608.5925
(5, 2)
%^%^%^%^%^%^
landmarks[0]:  [609.645  506.5854]
landmarks[0][0]:  609.645
(5, 2)
%^%^%^%^%^%^
landmarks[0]:  [611.42334 496.93643]
landmarks[0][0]:  611.42334
(5, 2)
%^%^%^%^%^%^
landmarks[0]:  [609.40753 507.50433]
landmarks[0][0]:  609.40753
(5, 2)
%^%^%^%^%^%^
landmarks[0]:  [607.93146 492.15802]
landmarks[0][0]:  607.93