In [1]:
import cv2
import tensorflow as tf
from tensorflow.keras.models import load_model
import numpy as np
from pygame import mixer

pygame 2.6.0 (SDL 2.28.4, Python 3.9.13)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
mouth_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_smile.xml')
model = load_model(r'C:\Users\nandi\OneDrive\Desktop\Minor Project\Model\model_checkpoint.keras')

In [5]:
mixer.init()
sound = mixer.Sound(r'C:\Users\nandi\OneDrive\Desktop\Minor Project\Driver-Drowsiness-Detection-using-Deep-Learning-main\Driver-Drowsiness-Detection-using-Deep-Learning-main\alarm.wav')

# Initialize camera
cap = cv2.VideoCapture(0)
Score = 0
yawn_count = 0
alarm_on = False  # Variable to control whether the alarm is playing

# Lowered threshold for detecting yawn (height-to-width ratio)
YAWN_THRESHOLD = 0.5

try:
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break

        height, width = frame.shape[0:2]
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.2, minNeighbors=3)

        # Draw a black rectangle at the bottom for text display
        cv2.rectangle(frame, (0, height - 50), (200, height), (0, 0, 0), thickness=cv2.FILLED)

        for (x, y, w, h) in faces:
            face_region_gray = gray[y:y + h, x:x + w]
            face_region_color = frame[y:y + h, x:x + w]

            # Detect eyes in the face region
            eyes = eye_cascade.detectMultiScale(face_region_gray, scaleFactor=1.1, minNeighbors=3)

            # Detect mouth in the face region
            mouth = mouth_cascade.detectMultiScale(face_region_gray, scaleFactor=1.5, minNeighbors=5, minSize=(25, 25))

            # Eye detection and prediction
            for (ex, ey, ew, eh) in eyes:
                # Focus on the eyes region
                eye = face_region_color[ey:ey + eh, ex:ex + ew]
                cv2.rectangle(face_region_color, (ex, ey), (ex + ew, ey + eh), (0, 255, 0), 2)

                # Preprocess the eye region for prediction
                eye = cv2.resize(eye, (80, 80))
                eye = eye / 255
                eye = eye.reshape(80, 80, 3)
                eye = np.expand_dims(eye, axis=0)

                # Model prediction
                prediction = model.predict(eye)

                # If eyes are closed
                if prediction[0][0] > 0.30:
                    cv2.putText(frame, 'closed', (10, height - 20), fontFace=cv2.FONT_HERSHEY_COMPLEX_SMALL, fontScale=1,
                                color=(255, 255, 255), thickness=1, lineType=cv2.LINE_AA)
                    Score += 1

                    # Trigger sound if eyes are closed for more than 15 frames
                    if Score > 15 and not alarm_on:
                        try:
                            sound.play()
                            alarm_on = True  # Set flag to avoid continuous playing
                        except:
                            pass

                # If eyes are open
                elif prediction[0][1] > 0.90:
                    cv2.putText(frame, 'open', (10, height - 20), fontFace=cv2.FONT_HERSHEY_COMPLEX_SMALL, fontScale=1,
                                color=(255, 255, 255), thickness=1, lineType=cv2.LINE_AA)
                    Score -= 1
                    if Score < 0:
                        Score = 0

                    # Stop the alarm if eyes are detected open again
                    if alarm_on and Score <= 0:
                        sound.stop()
                        alarm_on = False  # Reset flag to allow alarm to play again if needed

            # Mouth detection and yawn detection
            for (mx, my, mw, mh) in mouth:
                # Focus on the mouth region
                cv2.rectangle(face_region_color, (mx, my), (mx + mw, my + mh), (255, 0, 0), 2)

                # Calculate height-to-width ratio for yawn detection
                mouth_aspect_ratio = mh / float(mw)

                # If the mouth aspect ratio exceeds the threshold, consider it a yawn
                if mouth_aspect_ratio > YAWN_THRESHOLD:
                    yawn_count += 1
                    cv2.putText(frame, 'Yawning', (10, height - 70), fontFace=cv2.FONT_HERSHEY_COMPLEX_SMALL, fontScale=1,
                                color=(0, 0, 255), thickness=1, lineType=cv2.LINE_AA)
                    if yawn_count > 5 and not alarm_on:  # Trigger alarm after 5 yawns
                        try:
                            sound.play()
                            alarm_on = True  # Alarm for excessive yawning
                        except:
                            pass
                else:
                    yawn_count = 0  # Reset yawn count when mouth is not yawning

        # Display the video feed
        cv2.imshow('frame', frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(33) & 0xFF == ord('q'):
            break

except KeyboardInterrupt:
    print("Interrupted!")

finally:
    # Proper cleanup: release the camera and close OpenCV windows
    cap.release()
    cv2.destroyAllWindows()
    print("Camera released and windows closed.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36

In [15]:
from sklearn.metrics import accuracy_score

In [16]:
YAWN_THRESHOLD = 0.5  # Adjust this value based on your testing

# Initialize lists to hold ground truth and predictions
ground_truth_eye = []  # True labels for eye state
ground_truth_yawn = []  # True labels for yawn state
predictions_eye = []  # Model predictions for eye state
predictions_yawn = []  # Model predictions for yawn state

# Ground truth labels - This is a mock dataset; replace it with real labels
ground_truth_eye = [0, 1, 0, 1, 0]  # Sample ground truth for eye states (replace with real data)
ground_truth_yawn = [0, 0, 1, 1, 0]  # Sample ground truth for yawning (replace with real data)

# Capture video or use an image dataset for testing
cap = cv2.VideoCapture(0)  # Use a webcam for live feed or replace with a test video file

try:
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.2, minNeighbors=3)

        eye_detected = False  # Track if an eye was detected in the frame
        mouth_detected = False  # Track if a mouth was detected in the frame

        for (x, y, w, h) in faces:
            face_region_gray = gray[y:y + h, x:x + w]
            face_region_color = frame[y:y + h, x:x + w]

            # Detect eyes in the face region
            eyes = eye_cascade.detectMultiScale(face_region_gray, scaleFactor=1.1, minNeighbors=3)
            if len(eyes) > 0:
                for (ex, ey, ew, eh) in eyes[:1]:  # Only process the first detected eye
                    eye_detected = True
                    eye = face_region_color[ey:ey + eh, ex:ex + ew]
                    eye = cv2.resize(eye, (80, 80))
                    eye = eye / 255
                    eye = np.expand_dims(eye, axis=0)

                    # Model prediction for eyes
                    prediction = model.predict(eye)
                    if prediction[0][0] > 0.30:  # Eye closed threshold
                        predictions_eye.append(1)  # Closed
                    else:
                        predictions_eye.append(0)  # Open
                    break  # Only process the first detected eye

            # Detect mouth for yawning detection
            mouth = mouth_cascade.detectMultiScale(face_region_gray, scaleFactor=1.5, minNeighbors=5, minSize=(25, 25))
            if len(mouth) > 0:
                for (mx, my, mw, mh) in mouth[:1]:  # Only process the first detected mouth
                    mouth_detected = True
                    mouth_aspect_ratio = mh / float(mw)
                    if mouth_aspect_ratio > YAWN_THRESHOLD:
                        predictions_yawn.append(1)  # Yawning
                    else:
                        predictions_yawn.append(0)  # No yawn
                    break  # Only process the first detected mouth

        # Handle cases where no eyes or mouth were detected
        if not eye_detected:
            predictions_eye.append(0)  # Assuming default as eyes open when not detected
        if not mouth_detected:
            predictions_yawn.append(0)  # Assuming no yawn when mouth is not detected

        # Break the loop after a certain number of frames for testing
        if len(predictions_eye) >= len(ground_truth_eye):
            break

except Exception as e:
    print(f"Error: {e}")

finally:
    cap.release()
    cv2.destroyAllWindows()

# Step 4: Calculate Accuracy
if len(ground_truth_eye) == len(predictions_eye) and len(ground_truth_yawn) == len(predictions_yawn):
    eye_accuracy = accuracy_score(ground_truth_eye, predictions_eye)
    yawn_accuracy = accuracy_score(ground_truth_yawn, predictions_yawn)
    overall_accuracy = (eye_accuracy + yawn_accuracy) / 2

    print(f"Eye Detection Accuracy: {eye_accuracy * 100:.2f}%")
    print(f"Yawn Detection Accuracy: {yawn_accuracy * 100:.2f}%")
    print(f"Overall Model Accuracy: {overall_accuracy * 100:.2f}%")
else:
    print("The number of predictions does not match the number of ground truth labels.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Eye Detection Accuracy: 60.00%
Yawn Detection Accuracy: 60.00%
Overall Model Accuracy: 60.00%
