In [1]:
import cv2
import time
import numpy as np
import mediapipe as mp
from pygame import mixer 
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

pygame 2.3.0 (SDL 2.24.2, Python 3.10.2)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [14]:
mp_face_mesh = mp.solutions.face_mesh
mp_face_detect = mp.solutions.face_detection

face_mesh = mp_face_mesh.FaceMesh ( 
    max_num_faces=1,
    static_image_mode=False,
    min_detection_confidence=0.6,
    min_tracking_confidence=0.6
)

face_detect = mp_face_detect.FaceDetection (
    min_detection_confidence=0.6
)

video_src = 0
eye_classes = {0: 'closed', 1: 'opened'}
face_classes = {0: 'no_yawned', 1: 'yawned'}
eye_class_colors = {'closed': (0, 0, 255), 'opened': (0, 255, 0)}
face_class_colors = {'no_yawned': (255, 0, 0), 'yawned': (0, 255, 255)}
sleep_counter = 0
sleep_counter_thresh = 8
sleep_attempts = 0
yawn_attempts = 0
prev_yawn_pred_class = None
sleep_mood_alert_countdown_thresh = 8
sleep_mood_alert_countdown = None
alert_sound_path = '../media/Pure Alert Sound.wav'
alert_sound_len = 2
last_alert_played_at = None 

mixer.init()
mixer.music.load(alert_sound_path)
mixer.music.set_volume(1)

In [3]:
eye_model = Sequential()

eye_model.add(Conv2D(128, (5, 5), input_shape=(48, 48, 1), activation='relu'))
eye_model.add(MaxPooling2D(2, 2))

eye_model.add(Conv2D(64, (3, 3), activation='relu'))
eye_model.add(MaxPooling2D(2, 2))

eye_model.add(Conv2D(32, (3, 3), activation='relu'))
eye_model.add(MaxPooling2D(2, 2))

eye_model.add(Flatten())

eye_model.add(Dense(256, activation='relu'))
eye_model.add(Dense(64, activation='relu'))
eye_model.add(Dense(2, activation='softmax'))

eye_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
eye_model.load_weights('../models/sleep_detection_model.h5')


In [4]:
yawn_model = Sequential()

yawn_model.add(Conv2D(128, (3, 3), input_shape=(48, 48, 1), activation='relu'))
yawn_model.add(MaxPooling2D(2, 2))

yawn_model.add(Conv2D(64, (3, 3), activation='relu'))
yawn_model.add(MaxPooling2D(2, 2))

yawn_model.add(Conv2D(32, (3, 3), activation='relu'))
yawn_model.add(MaxPooling2D(2, 2))

yawn_model.add(Flatten())

yawn_model.add(Dense(128, activation='relu'))
yawn_model.add(Dense(32, activation='relu'))
yawn_model.add(Dense(2, activation='softmax'))

yawn_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
yawn_model.load_weights('../models/yawn_detection_model.h5')

In [5]:
def predict(model=eye_model, res=(48, 48), image=None, classes=eye_classes):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.resize(image, (res[0], res[1]))
    image = image / 255.0
    image = np.reshape(image, (1, image.shape[0], image.shape[1], 1))
    prediction = model.predict(image)
    pred_argmax = np.argmax(prediction, axis=1)[0]
    
    return classes[pred_argmax], int(prediction[0][pred_argmax] * 100)

In [15]:
cam = cv2.VideoCapture(0)

while True: 
    ret, frame = cam.read()
    height, width, _ = frame.shape

    if ret:
        face_detect_results = face_mesh_results = None
        rgb_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        face_detect_results = face_detect.process(rgb_image)
        face_mesh_results = face_mesh.process(rgb_image)
        
        if face_detect_results.detections:
            for face in face_detect_results.detections:
                rel_bound_rect = face.location_data.relative_bounding_box
                face_x, face_w, face_y, face_h = int(rel_bound_rect.xmin * width), int(rel_bound_rect.width * width), int(rel_bound_rect.ymin * height), int(rel_bound_rect.height * height)
                face = frame[face_y: face_y + face_h, face_x: face_x + face_w]
                yawn_pred_class, yawn_pred_acc = predict(model=yawn_model, image=face, classes=face_classes)
                
                prev_yawn_attempts = yawn_attempts
                if yawn_pred_class == 'yawned' and prev_yawn_pred_class != yawn_pred_class:
                    yawn_attempts += 1
                prev_yawn_pred_class = yawn_pred_class
                    
        
        if face_mesh_results.multi_face_landmarks:
            init_landmarks = {55: None, 117: None, 285: None}
            for face_landmarks in face_mesh_results.multi_face_landmarks:
                for i, landmark in enumerate(face_landmarks.landmark):
                    if i in init_landmarks:
                        lm_x, lm_y = int(landmark.x * width), int(landmark.y * height)
                        init_landmarks[i] = lm_x, lm_y

            left_eye_bound_rect = (init_landmarks[117][0], init_landmarks[55][1]), (init_landmarks[55][0], init_landmarks[117][1])
            right_eye_bound_rect = (init_landmarks[285][0], init_landmarks[285][1]), (init_landmarks[285][0] + (left_eye_bound_rect[1][0] - left_eye_bound_rect[0][0]), init_landmarks[117][1])
            left_eye_bound_rect_h, left_eye_bound_rect_w = left_eye_bound_rect[1][1] - left_eye_bound_rect[0][1], left_eye_bound_rect[1][0] - left_eye_bound_rect[0][0]
            right_eye_bound_rect_h, right_eye_bound_rect_w = right_eye_bound_rect[1][1] - right_eye_bound_rect[0][1], right_eye_bound_rect[1][0] - right_eye_bound_rect[0][0]
            left_eye = frame[left_eye_bound_rect[0][1]: left_eye_bound_rect[0][1] + left_eye_bound_rect_h, left_eye_bound_rect[0][0]: left_eye_bound_rect[0][0] + left_eye_bound_rect_w]
            right_eye = frame[right_eye_bound_rect[0][1]: right_eye_bound_rect[0][1] + right_eye_bound_rect_h, right_eye_bound_rect[0][0]: right_eye_bound_rect[0][0] + right_eye_bound_rect_w]
            left_eye_pred_class, left_eye_pred_acc = predict(image=left_eye)
            right_eye_pred_class, right_eye_pred_acc = predict(image=right_eye)
            
            if left_eye_pred_class == right_eye_pred_class == 'closed':
                sleep_counter += 1
            else:
                sleep_counter = 0
            
        if face_detect_results.detections:
            cv2.rectangle(frame, (face_x, face_y), (face_x + face_w, face_y + face_h), face_class_colors[yawn_pred_class], 2)
        if face_mesh_results.multi_face_landmarks:    
            cv2.rectangle(frame, (left_eye_bound_rect[0][0], left_eye_bound_rect[0][1]), (left_eye_bound_rect[1][0], left_eye_bound_rect[1][1]), eye_class_colors[left_eye_pred_class], 2)
            cv2.rectangle(frame, (right_eye_bound_rect[0][0], right_eye_bound_rect[0][1]), (right_eye_bound_rect[1][0], right_eye_bound_rect[1][1]), eye_class_colors[right_eye_pred_class], 2)
            cv2.putText(frame, 'Face:', (15, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (45, 255, 45), 2)
            cv2.putText(frame, f'{yawn_pred_class.title()} ({yawn_pred_acc}%)', (70, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.55, face_class_colors[yawn_pred_class], 2)
            cv2.putText(frame, 'L.Eye:', (15, 55), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (45, 255, 45), 2)
            cv2.putText(frame, f'{left_eye_pred_class.title()} ({left_eye_pred_acc}%)', (70, 55), cv2.FONT_HERSHEY_SIMPLEX, 0.55, eye_class_colors[left_eye_pred_class], 2)
            cv2.putText(frame, 'R.Eye:', (15, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (45, 255, 45), 2)
            cv2.putText(frame, f'{right_eye_pred_class.title()} ({right_eye_pred_acc}%)', (70, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.55, eye_class_colors[right_eye_pred_class], 2)
            cv2.putText(frame, 'Sleep Attempts:', (15, 105), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (45, 255, 45), 2)
            cv2.putText(frame, f'{sleep_attempts}', (155, 105), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 0, 255), 2)
            cv2.putText(frame, 'Yawn Attempts:', (15, 130), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (45, 255, 45), 2)
            cv2.putText(frame, f'{yawn_attempts}', (155, 130), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 255), 2)

        if sleep_counter > sleep_counter_thresh:
            if sleep_counter % sleep_counter_thresh == 0:
                if not last_alert_played_at:
                    mixer.music.play()
                    last_alert_played_at = time.time()
                else:
                    now = time.time()
                    diff = int(round(now - last_alert_played_at, 1))
                    if diff > alert_sound_len:
                        mixer.music.play()
                        last_alert_played_at = time.time()
                sleep_attempts += 1
            cv2.putText(frame, '[Critical Sleep Alert]', (220, 450), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 0, 255), 2)
        elif yawn_attempts > prev_yawn_attempts:
            sleep_mood_alert_countdown = sleep_mood_alert_countdown_thresh
        if sleep_mood_alert_countdown and sleep_mood_alert_countdown > 0:
            cv2.putText(frame, '[Sleepy Mood Alert]', (225, 450), cv2.FONT_HERSHEY_SIMPLEX, 0.65, (0, 255, 255), 2)
            sleep_mood_alert_countdown -= 1

        cv2.imshow('LIVE DROWSINESS DETECTION [SLEEPING + YAWNING]', frame)
    k = cv2.waitKey(1)
    if k == 27:
        break

cam.release()
cv2.destroyAllWindows()