### Import packages

In [1]:
import cv2
import dlib
import time
import imutils
import argparse
import playsound
import numpy as np
from threading import Thread
from collections import OrderedDict
from imutils.video import VideoStream
from imutils.video import FileVideoStream
from scipy.spatial import distance as dist

In [2]:
# Define the shape predictor and alarm audio file
SHAPE_PREDICTOR_PATH = './shape_predictor_68_face_landmarks.dat'
ALARM_AUDIO_PATH = './alarm.wav'

### Eye Aspect Ratio

In [3]:
# Compute the ratio of eye landmark distances to determine if a person is blinking
def eye_aspect_ratio(eye):
    # compute the euclidean distances between the two sets of
    # vertical eye landmarks (x, y)-coordinates
    vertical_A = dist.euclidean(eye[1], eye[5])
    vertical_B = dist.euclidean(eye[2], eye[4])
    
    # compute the euclidean distance between the horizontal
    # eye landmark (x, y)-coordinates
    horizontal_C = dist.euclidean(eye[0], eye[3])
    
    # compute the eye aspect ratio
    ear = (vertical_A + vertical_B) / (2.0 * horizontal_C)
    
    # return the eye aspect ratio
    return ear

### Threshold constants to detect a drowsiness

In [4]:
# Constant for the eye aspect ratio to indicate drowsiness 
EYE_AR_THRESH = 0.3

# Constant for the number of consecutive frames the eye (closed) must be below the threshold
EYE_AR_CONSEC_FRAMES = 48

# Initialize the frame counter
FRAME_COUNTER = 0

# Boolean to indicate if the alarm is going off
IS_ALARM_ON = False

### Helper Functions

In [5]:
# Take a bounding predicted by dlib and convert it
# to the format (x, y, w, h) as normally handled in OpenCV
def rect_to_bb(rect):
    x = rect.left()
    y = rect.top()
    w = rect.right() - x
    h = rect.bottom() - y
    
    # return a tuple of (x, y, w, h)
    return (x, y, w, h)

In [6]:
# The dlib face landmark detector will return a shape object 
# containing the 68 (x, y)-coordinates of the facial landmark regions.
# This fucntion converts the above object to a NumPy array.
def shape_to_np(shape, dtype = 'int'):
    # initialize the list of (x, y)-coordinates
    coords = np.zeros((68, 2), dtype = dtype)
    
    # loop over the 68 facial landmarks and convert them
    # to a 2-tuple of (x, y)-coordinates
    for i in range(0, 68):
        coords[i] = (shape.part(i).x, shape.part(i).y)
        
    # return the list of (x, y)-coordinates
    return coords

In [7]:
def sound_alarm(path):
    # play an alarm sound
    playsound.playsound(path)

### Facial Landmarks indices based on the 68-point facial landmark detection

In [8]:
# define a dictionary that maps the indexes of the facial
# landmarks to specific face regions
FACIAL_LANDMARKS_IDXS = OrderedDict([
    ("mouth", (48, 68)),
    ("right_eyebrow", (17, 22)),
    ("left_eyebrow", (22, 27)),
    ("right_eye", (36, 42)),
    ("left_eye", (42, 48)),
    ("nose", (27, 35)),
    ("jaw", (0, 17))
])

### Detect Face

In [9]:
# initialize dlib's face detector (HOG-based)
detector = dlib.get_frontal_face_detector()

# create the facial landmark predictor
predictor = dlib.shape_predictor(SHAPE_PREDICTOR_PATH)

# grab the indexes of the facial landmarks for the left and
# right eye, respectively
(leStart, leEnd) = FACIAL_LANDMARKS_IDXS['left_eye']
(reStart, reEnd) = FACIAL_LANDMARKS_IDXS['right_eye']

# Streaming from a web-cam
vs = VideoStream(src = 0).start()
fileStream = False

time.sleep(1.0)

### Detect Face, Eyes and Blinks

In [10]:
# loop over frames from the video stream
while True:
    # if this is a file video stream, then we need to check if
    # there any more frames left in the buffer to process
    if fileStream and not vs.more():
        break
        
    # grab the frame from the threaded video file stream, resize
    # it, and convert it to grayscale channels
    frame = vs.read()
    frame = imutils.resize(frame, width = 450)
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Detect faces in the grayscale image
    # Note: The 0 in the second argument indicates that we will not upsample the image. 
    # The benefit of increasing the resolution of the input image prior to face detection is that 
    # it may allow us to detect more faces in the image — the downside is that the larger the input image, 
    # the more computaitonally expensive the detection process is.
    rects = detector(gray, 0)
    
    # loop over the detected faces
    for rect in rects:
        # determine the facial landmarks for the face region, then
        # convert the facial landmark (x, y)-coordinates to a NumPy array
        shape = predictor(gray, rect)
        shape_np = shape_to_np(shape)
        
        # extract the left and right eye coordinates, then use the
        # coordinates to compute the eye aspect ratio for both eyes
        leftEye = shape_np[leStart:leEnd]
        rightEye = shape_np[reStart:reEnd]
        leftEAR = eye_aspect_ratio(leftEye)
        rightEAR = eye_aspect_ratio(rightEye)
        
        # average the eye aspect ratio together for both eyes
        avgEAR = (leftEAR + rightEAR) / 2.0
        
        # compute the convex hull for the left and right eye, then
        # visualize each of the eyes
        leftEyeHull = cv2.convexHull(leftEye)
        rightEyeHull = cv2.convexHull(rightEye)
        cv2.drawContours(frame, [leftEyeHull], -1, (0, 255, 0), 1)
        cv2.drawContours(frame, [rightEyeHull], -1, (0, 255, 0), 1)
        
        # check to see if the eye aspect ratio is below the blink
        # threshold, and if so, increment the blink frame counter
        if avgEAR < EYE_AR_THRESH:
            FRAME_COUNTER += 1
            
            # if the eyes were closed for a sufficient number of
            # then sound the alarm
            if FRAME_COUNTER >= EYE_AR_CONSEC_FRAMES:
                # if the alarm is not on, turn it on
                if not IS_ALARM_ON:
                    IS_ALARM_ON = True
                    
                    # check to see if an alarm file was supplied,
                    # and if so, start a thread to have the alarm
                    # sound played in the background
                    if ALARM_AUDIO_PATH != '':
                        t = Thread(target = sound_alarm, args = (ALARM_AUDIO_PATH,))
                        t.deamon = True
                        t.start()
                        
                cv2.putText(frame, 'DROWSINESS ALERT!!!', (10, 30), 
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
            
        # check to see if the eye aspect ratio is below the blink
        # threshold, and if so, increment the blink frame counter
        else:
            FRAME_COUNTER = 0
            IS_ALARM_ON = False
            
        # draw the computed eye aspect ratio for the frame
        cv2.putText(frame, "EAR: {:.2f}".format(avgEAR), (300, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        
    # show the frame
    cv2.imshow('Frame', frame)
    key = cv2.waitKey(1) & 0xFF
    
    # if the `q` key was pressed, break from the loop
    if key == ord('q'):
        break

# Cleanup
cv2.destroyAllWindows()
vs.stop()