# Real Time Video Demo

## Importing Required Libraries

In [None]:
import cv2
import numpy as np
import tensorflow as tf
import time
import winsound
import mediapipe as mp

## Load the trained model And Defining Class Labels

In [None]:
model = tf.keras.models.load_model('models/Eye_Model2.h5')
class_names = ['Close-Eyes', 'Open-Eyes']

## Set sound frequency and duration

In [None]:
frequency = 2500  # Hz
duration = 2000   # 2 seconds

## Initialize MediaPipe face mesh

In [None]:
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1)

## Open webcam For A Real-Time Video Demo
- The code captures video from the webcam.
- Detects faces and eyes using OpenCV.
- Uses a deep learning model to classify eye state.
- Alerts the user if no face is detected for 3 seconds.
- Triggers a sleep alert if eyes are closed for too long.

1. Open Webcam

In [None]:
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)

if not cap.isOpened():
    raise IOError("Cannot Open Webcam")

2. Initialize Variables

In [None]:
counter = 0
status = "Open Eyes"
face_detected_time = time.time()

3. Eye landmarks

In [None]:
LEFT_EYE_LANDMARKS = [33, 133, 153, 154, 155, 133]
RIGHT_EYE_LANDMARKS = [263, 362, 387, 386, 385, 362]
padding = 20

4. Define Functions

In [None]:
def get_eye_box(landmarks, landmark_indices, frame_shape):
    points = [(int(landmarks.landmark[i].x * frame_shape[1]),
               int(landmarks.landmark[i].y * frame_shape[0])) for i in landmark_indices]
    x_min = min(p[0] for p in points) - padding
    y_min = min(p[1] for p in points) - padding
    x_max = max(p[0] for p in points) + padding
    y_max = max(p[1] for p in points) + padding
    return (x_min, y_min, x_max, y_max)

def predict_eye_state(eye):
    if eye.size == 0: return 'Close-Eyes', 0.0
    eye_resized = cv2.resize(eye, (256, 256)) / 255.0
    eye_input = np.expand_dims(eye_resized, axis=0)
    prediction = model.predict(eye_input)
    return class_names[np.argmax(prediction[0])], np.max(prediction[0])


5. Main Loop

In [None]:
while True:
    ret, frame = cap.read()
    if not ret:
        break

    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(rgb_frame)

    if results.multi_face_landmarks is None:
        if time.time() - face_detected_time > 3:
            cv2.putText(frame, 'Face Missing Alert!', (50, 150), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            winsound.Beep(frequency, duration)
    else:
        face_detected_time = time.time()
        for landmarks in results.multi_face_landmarks:
            left_x1, left_y1, left_x2, left_y2 = get_eye_box(landmarks, LEFT_EYE_LANDMARKS, frame.shape)
            right_x1, right_y1, right_x2, right_y2 = get_eye_box(landmarks, RIGHT_EYE_LANDMARKS, frame.shape)

            left_eye = frame[left_y1:left_y2, left_x1:left_x2]
            right_eye = frame[right_y1:right_y2, right_x1:right_x2]

            left_eye_status, _ = predict_eye_state(left_eye)
            right_eye_status, _ = predict_eye_state(right_eye)

            left_color = (0, 0, 255) if left_eye_status == 'Close-Eyes' else (0, 255, 0)
            right_color = (0, 0, 255) if right_eye_status == 'Close-Eyes' else (0, 255, 0)

            cv2.rectangle(frame, (left_x1, left_y1), (left_x2, left_y2), left_color, 2)
            cv2.rectangle(frame, (right_x1, right_y1), (right_x2, right_y2), right_color, 2)

            if left_eye_status == 'Close-Eyes' and right_eye_status == 'Close-Eyes':
                counter += 1
                status = "Closed Eyes"
            else:
                counter = 0
                status = "Open Eyes"

            cv2.putText(frame, status, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

            if counter > 5:
                cv2.putText(frame, 'Sleep Alert !!', (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                winsound.Beep(frequency, duration)
                counter = 0

    cv2.imshow("Real-Time Eye Detection", frame)
    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()


In [None]:
# import cv2
# import numpy as np
# import matplotlib.pyplot as plt
# import tensorflow as tf
# import mediapipe as mp

# # Load the model
# new_model = tf.keras.models.load_model('models/Eye_Model2.h5')
# class_names = ['Close-Eyes', 'Open-Eyes']

# # Read the image
# img = cv2.imread("close.jpg")
# plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

# # Initialize MediaPipe Face Mesh
# mp_face_mesh = mp.solutions.face_mesh
# face_mesh = mp_face_mesh.FaceMesh()

# # Convert the image to RGB
# img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# # Process the image to detect faces and facial landmarks
# results = face_mesh.process(img_rgb)

# # Check if landmarks were detected
# if results.multi_face_landmarks:
#     for face_landmarks in results.multi_face_landmarks:
#         # Get landmarks for both eyes (right eye and left eye)
#         left_eye_landmarks = [33, 133, 153, 154, 155, 133]
#         right_eye_landmarks = [263, 362, 387, 386, 385, 362]

#         # Initialize min and max values for the bounding box
#         left_eye_x_min, left_eye_y_min = float('inf'), float('inf')
#         left_eye_x_max, left_eye_y_max = -float('inf'), -float('inf')

#         right_eye_x_min, right_eye_y_min = float('inf'), float('inf')
#         right_eye_x_max, right_eye_y_max = -float('inf'), -float('inf')

#         # Get the pixel coordinates for the left eye landmarks
#         h, w, _ = img.shape
#         for idx in left_eye_landmarks:
#             x, y = int(face_landmarks.landmark[idx].x * w), int(face_landmarks.landmark[idx].y * h)
#             left_eye_x_min = min(left_eye_x_min, x)
#             left_eye_y_min = min(left_eye_y_min, y)
#             left_eye_x_max = max(left_eye_x_max, x)
#             left_eye_y_max = max(left_eye_y_max, y)

#         # Get the pixel coordinates for the right eye landmarks
#         for idx in right_eye_landmarks:
#             x, y = int(face_landmarks.landmark[idx].x * w), int(face_landmarks.landmark[idx].y * h)
#             right_eye_x_min = min(right_eye_x_min, x)
#             right_eye_y_min = min(right_eye_y_min, y)
#             right_eye_x_max = max(right_eye_x_max, x)
#             right_eye_y_max = max(right_eye_y_max, y)

#         # Apply padding to the bounding boxes to extend their width and height
#         padding = 16  # Add padding to the bounding box for better coverage
#         left_eye_x_min -= padding
#         left_eye_y_min -= padding
#         left_eye_x_max += padding
#         left_eye_y_max += padding

#         right_eye_x_min -= padding
#         right_eye_y_min -= padding
#         right_eye_x_max += padding
#         right_eye_y_max += padding

#         # Draw bounding boxes around the eyes using the adjusted coordinates
#         cv2.rectangle(img, (left_eye_x_min, left_eye_y_min), (left_eye_x_max, left_eye_y_max), (0, 255, 0), 2)
#         cv2.rectangle(img, (right_eye_x_min, right_eye_y_min), (right_eye_x_max, right_eye_y_max), (0, 255, 0), 2)

#         # Crop the eye regions for further prediction
#         left_eye_roi = img[left_eye_y_min:left_eye_y_max, left_eye_x_min:left_eye_x_max]
#         right_eye_roi = img[right_eye_y_min:right_eye_y_max, right_eye_x_min:right_eye_x_max]

#         # Show the image with rectangles drawn
#         plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
#         plt.show()

#         # Preprocess the eye regions for model prediction
#         IMAGE_SIZE = 256

#         # Resize and preprocess the left eye image
#         left_eye_final = cv2.resize(left_eye_roi, (IMAGE_SIZE, IMAGE_SIZE))
#         left_eye_final = np.expand_dims(left_eye_final, axis=0)  # Add batch dimension
#         left_eye_final = left_eye_final / 255.0  # Normalize

#         # Resize and preprocess the right eye image
#         right_eye_final = cv2.resize(right_eye_roi, (IMAGE_SIZE, IMAGE_SIZE))
#         right_eye_final = np.expand_dims(right_eye_final, axis=0)  # Add batch dimension
#         right_eye_final = right_eye_final / 255.0  # Normalize

#         # Make the prediction for the left eye
#         left_eye_predictions = new_model.predict(left_eye_final)
#         left_eye_predicted_class = class_names[np.argmax(left_eye_predictions[0])]
#         left_eye_confidence = round(100 * (np.max(left_eye_predictions[0])), 2)
#         print(f"Left Eye Predicted: {left_eye_predicted_class}.\nConfidence: {left_eye_confidence}%")

#         # Make the prediction for the right eye
#         right_eye_predictions = new_model.predict(right_eye_final)
#         right_eye_predicted_class = class_names[np.argmax(right_eye_predictions[0])]
#         right_eye_confidence = round(100 * (np.max(right_eye_predictions[0])), 2)
#         print(f"Right Eye Predicted: {right_eye_predicted_class}.\nConfidence: {right_eye_confidence}%")

# else:
#     print("No faces detected.")


In [1]:
import cv2
import numpy as np
import time
import tensorflow as tf
import mediapipe as mp
from ultralytics import YOLO
import winsound
import threading

# Load YOLO model
yolo_model = YOLO('./models/best.pt')

# Load Eye Classification Model
eye_model = tf.keras.models.load_model('models/Eye_Model1.h5')
class_names = ['Close-Eyes', 'Open-Eyes']

# Initialize MediaPipe Face Mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh()

# Start webcam
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

# Variables for drowsiness tracking
drowsy_start_time = None
closed_eyes_start_time = None
ALERT_THRESHOLD = 3  # Seconds before triggering an alert

# Function to play alert sound
def play_alert():
    for _ in range(3):  # Beep 3 times
        winsound.Beep(1000, 500)  # Frequency: 1000Hz, Duration: 500ms

while True:
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame.")
        break

    img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(img_rgb)

    # Run YOLO model on the frame
    yolo_results = yolo_model(frame)
    frame = yolo_results[0].plot()

    # Check if YOLO detects "drowsy"
    yolo_drowsy_detected = any(yolo_results[0].names[int(box.cls)] == 'drowsy' for box in yolo_results[0].boxes)

    # Detect eyes using MediaPipe
    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            h, w, _ = frame.shape
            left_eye_landmarks = [33, 133, 153, 154, 155, 133]
            right_eye_landmarks = [263, 362, 387, 386, 385, 362]

            # Get eye bounding boxes
            padding = 16  # Increased padding for better eye detection
            left_eye_x_min, left_eye_y_min = float('inf'), float('inf')
            left_eye_x_max, left_eye_y_max = -float('inf'), -float('inf')
            right_eye_x_min, right_eye_y_min = float('inf'), float('inf')
            right_eye_x_max, right_eye_y_max = -float('inf'), -float('inf')

            for idx in left_eye_landmarks:
                x, y = int(face_landmarks.landmark[idx].x * w), int(face_landmarks.landmark[idx].y * h)
                left_eye_x_min, left_eye_y_min = min(left_eye_x_min, x), min(left_eye_y_min, y)
                left_eye_x_max, left_eye_y_max = max(left_eye_x_max, x), max(left_eye_y_max, y)

            for idx in right_eye_landmarks:
                x, y = int(face_landmarks.landmark[idx].x * w), int(face_landmarks.landmark[idx].y * h)
                right_eye_x_min, right_eye_y_min = min(right_eye_x_min, x), min(right_eye_y_min, y)
                right_eye_x_max, right_eye_y_max = max(right_eye_x_max, x), max(right_eye_y_max, y)

            # Expand bounding box size
            left_eye_x_min -= padding
            left_eye_y_min -= padding
            left_eye_x_max += padding
            left_eye_y_max += padding

            right_eye_x_min -= padding
            right_eye_y_min -= padding
            right_eye_x_max += padding
            right_eye_y_max += padding

            # Keep bounding boxes within frame limits
            left_eye_x_min, left_eye_y_min = max(0, left_eye_x_min), max(0, left_eye_y_min)
            left_eye_x_max, left_eye_y_max = min(w, left_eye_x_max), min(h, left_eye_y_max)
            right_eye_x_min, right_eye_y_min = max(0, right_eye_x_min), max(0, right_eye_y_min)
            right_eye_x_max, right_eye_y_max = min(w, right_eye_x_max), min(h, right_eye_y_max)

            # Crop eye regions
            left_eye_roi = frame[left_eye_y_min:left_eye_y_max, left_eye_x_min:left_eye_x_max]
            right_eye_roi = frame[right_eye_y_min:right_eye_y_max, right_eye_x_min:right_eye_x_max]

            # Resize and preprocess for prediction
            IMAGE_SIZE = 256
            left_eye_final = cv2.resize(left_eye_roi, (IMAGE_SIZE, IMAGE_SIZE)) / 255.0
            right_eye_final = cv2.resize(right_eye_roi, (IMAGE_SIZE, IMAGE_SIZE)) / 255.0

            left_eye_final = np.expand_dims(left_eye_final, axis=0)
            right_eye_final = np.expand_dims(right_eye_final, axis=0)

            # Predict eye status
            left_eye_pred = class_names[np.argmax(eye_model.predict(left_eye_final))]
            right_eye_pred = class_names[np.argmax(eye_model.predict(right_eye_final))]

            # Determine eye rectangle color
            eye_color = (0, 255, 0)  # Default: Green (Open Eyes)
            if left_eye_pred == 'Close-Eyes' and right_eye_pred == 'Close-Eyes':
                eye_color = (0, 0, 255)  # Red (Closed Eyes)

            # Draw eye bounding boxes
            cv2.rectangle(frame, (left_eye_x_min, left_eye_y_min), (left_eye_x_max, left_eye_y_max), eye_color, 2)
            cv2.rectangle(frame, (right_eye_x_min, right_eye_y_min), (right_eye_x_max, right_eye_y_max), eye_color, 2)

            # Start timer if eyes are closed
            if left_eye_pred == 'Close-Eyes' and right_eye_pred == 'Close-Eyes':
                if closed_eyes_start_time is None:
                    closed_eyes_start_time = time.time()
                elif time.time() - closed_eyes_start_time >= ALERT_THRESHOLD:
                    print("ALERT! Eyes closed for too long!")
                    threading.Thread(target=play_alert).start()
            else:
                closed_eyes_start_time = None  # Reset timer

    # Start timer if YOLO detects drowsiness
    if yolo_drowsy_detected:
        if drowsy_start_time is None:
            drowsy_start_time = time.time()
        elif time.time() - drowsy_start_time >= ALERT_THRESHOLD:
            print("ALERT! Drowsiness detected!")
            threading.Thread(target=play_alert).start()
    else:
        drowsy_start_time = None  # Reset timer

    # Display the frame
    cv2.imshow("Real-Time Drowsiness Detection", frame)

    # Exit on 'Esc' key
    if cv2.waitKey(1) & 0xFF == 27:
        break

# Cleanup
cap.release()
cv2.destroyAllWindows()



0: 384x512 1 drowsy, 74.3ms
Speed: 3.1ms preprocess, 74.3ms inference, 79.8ms postprocess per image at shape (1, 3, 384, 512)

0: 384x512 1 drowsy, 50.3ms
Speed: 1.6ms preprocess, 50.3ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 512)

0: 384x512 1 drowsy, 50.9ms
Speed: 1.5ms preprocess, 50.9ms inference, 3.6ms postprocess per image at shape (1, 3, 384, 512)

0: 384x512 1 drowsy, 51.1ms
Speed: 1.6ms preprocess, 51.1ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 512)

0: 384x512 1 drowsy, 49.9ms
Speed: 3.2ms preprocess, 49.9ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 512)

0: 384x512 1 drowsy, 49.4ms
Speed: 1.7ms preprocess, 49.4ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 512)

0: 384x512 1 drowsy, 51.3ms
Speed: 1.8ms preprocess, 51.3ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 512)

0: 384x512 1 drowsy, 49.8ms
Speed: 1.6ms preprocess, 49.8ms inference, 2.5ms postprocess per image at shape (1, 3, 3