In [1]:
# import libraries

import cv2
import mediapipe as mp
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial import distance as dist

mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [55]:
cap.release()
cv2.destroyAllWindows()

In [2]:
# index of landmarks in right and left eye
# horizontal line for left eye: [0, 8]
# first vertical line: [3, 13]
# second vertical line: [5, 11]
left_eye_lm = [33, 246, 161, 160, 159, 158, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7]
# horizontal line for right eye: [0, 8]
# first vertical line: [3, 13]
# second vertical line: [5, 11]
right_eye_lm = [263, 466, 388, 387, 386, 385, 384, 398, 362, 382, 381, 380, 374, 373, 390, 249]

In [3]:
def eye_coords(frame, landmarks):
    height, width, _ = frame.shape
    eye_x = []
    eye_y = []
    for lm in landmarks:
        # mp return normalized coords, so we multiply
        eye_x.append(int((results.face_landmarks.landmark[lm].x) * width))
        eye_y.append(int((results.face_landmarks.landmark[lm].y) * height))
    eye = list(zip(eye_x, eye_y))
    eye = np.array(eye, dtype="int")
    return eye

In [4]:
def eye_aspect_ratio(eye):
    # euclidean distances between the two sets of vertical eye landmarks (x, y)-coordinates
    A = dist.euclidean(eye[3], eye[13])
    B = dist.euclidean(eye[5], eye[11])
    # euclidean distance between the horizontal eye landmark (x, y)-coordinates
    C = dist.euclidean(eye[0], eye[8])
    # eye aspect ratio
    ear = (A + B) / (2.0 * C)
    return ear

In [20]:
# initialize counters and threshold
frame_counter = 0
ear_frame_counter = 0
ear_threshold = []

#cap = cv2.VideoCapture("../media/video/face_7min2.mp4")
cap = cv2.VideoCapture(0)
#cap = cv2.VideoCapture("http://192.168.2.117:8080/video")

with mp_holistic.Holistic(
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        
        # update frame counter
        #frame_counter += 1
        frame = cv2.resize(frame, (640, 480))
        #frame = cv2.flip(frame, 0)
        #frame = cv2.flip(frame, 1)


        height, width, _ = frame.shape
        # Convert the BGR image to RGB.
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # To improve performance, optionally mark the image as not writeable to
        # pass by reference.
        frame.flags.writeable = False
        
        try:
            results = holistic.process(frame)
            
            if results.face_landmarks:
                frame_counter += 1
            
            # get eyes coords
            left_eye = eye_coords(frame, left_eye_lm)
            right_eye = eye_coords(frame, right_eye_lm)

            # get EARs
            left_ear = eye_aspect_ratio(left_eye)
            right_ear = eye_aspect_ratio(right_eye)

            # average of both EARs
            ear = (left_ear + right_ear) / 2

            # find convex hull for the eyes   
            leye_hull = cv2.convexHull(left_eye)
            reye_hull = cv2.convexHull(right_eye)

            # Draw convex hull
            frame.flags.writeable = True
            cv2.drawContours(frame, [leye_hull], -1, (0, 255, 0), 1)
            cv2.drawContours(frame, [reye_hull], -1, (0, 255, 0), 1)

            # take some time to initialize values
            if frame_counter < 30:
                cv2.putText(frame, "CALIBRATING", (20, 20), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255))
                ear_threshold.append(ear)
            else:
                # convert ear_threshold to numpy array
                ear_threshold = np.array(ear_threshold)
               # remove NaN values from the array
                ear_threshold = ear_threshold[~np.isnan(ear_threshold)] # ~ = is not
                # get the mean
                ear_threshold = np.mean(np.array(ear_threshold))

                # print EAR on the frame
                cv2.putText(frame, f"EAR: {round(ear, 2)}", (20, 20), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255))

                # check if EAR goes below the threshold for a number of frames
                if ear < ear_threshold:
                    ear_frame_counter += 1

                    if ear_frame_counter >= 10:
                        print("EAR " + str(ear))
                        print("THRESH " + str(ear_threshold))
                        print("drowsy")
                        cv2.putText(frame, "WARNING!", (20, 100), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255))

                else:
                    ear_frame_counter = 0
        except:
            cv2.putText(frame, "NO DETECTION", (20, 100), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255))
            pass

        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        
        cv2.imshow('Webcam Feed', frame)

        if cv2.waitKey(20) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

EAR 0.2736790713406636
THRESH 0.27802442505894703
drowsy
EAR 0.22943867956371872
THRESH 0.27802442505894703
drowsy
EAR 0.24594908020118533
THRESH 0.27802442505894703
drowsy
EAR 0.2593732577328388
THRESH 0.27802442505894703
drowsy
EAR 0.2646921215523956
THRESH 0.27802442505894703
drowsy
EAR 0.2417121023846563
THRESH 0.27802442505894703
drowsy
EAR 0.26441166717188347
THRESH 0.27802442505894703
drowsy
EAR 0.23858338445409097
THRESH 0.27802442505894703
drowsy
EAR 0.2566877109220844
THRESH 0.27802442505894703
drowsy
EAR 0.2372870851393879
THRESH 0.27802442505894703
drowsy
EAR 0.2636896154794312
THRESH 0.27802442505894703
drowsy
EAR 0.25027763126943153
THRESH 0.27802442505894703
drowsy
EAR 0.24660666463267156
THRESH 0.27802442505894703
drowsy
EAR 0.2643471999109175
THRESH 0.27802442505894703
drowsy
EAR 0.25626711582148237
THRESH 0.27802442505894703
drowsy
EAR 0.23861599919964938
THRESH 0.27802442505894703
drowsy
EAR 0.2381389171178673
THRESH 0.27802442505894703
drowsy
EAR 0.2594619504292528
