In [1]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting opencv-contrib-python (from mediapipe)
  Downloading opencv_contrib_python-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.6-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m40.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading protobuf-4.25.6-cp37-abi3-manylinux2014_x86_64.whl (294 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.6/294.6 kB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Downloading open

In [10]:
import os
import cv2
import mediapipe as mp
import numpy as np
from IPython.display import display
import ipywidgets as widgets

# Force GPU usage if available (ensure your Colab runtime is set to GPU)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

def euclidean_distance(pt1, pt2):
    return np.linalg.norm(np.array(pt1) - np.array(pt2))

def compute_ear(eye_points):
    """
    Compute Eye Aspect Ratio (EAR) for an eye.
    EAR = (||p2 - p6|| + ||p3 - p5||) / (2 * ||p1 - p4||)
    """
    p1, p2, p3, p4, p5, p6 = eye_points
    vertical1 = euclidean_distance(p2, p6)
    vertical2 = euclidean_distance(p3, p5)
    horizontal = euclidean_distance(p1, p4)
    ear = (vertical1 + vertical2) / (2.0 * horizontal)
    return ear

def get_face_bbox(landmarks, w, h):
    xs = [lm.x for lm in landmarks]
    ys = [lm.y for lm in landmarks]
    return int(min(xs) * w), int(min(ys) * h), int(max(xs) * w), int(max(ys) * h)

def fancy_draw_face_points(frame, landmarks, w, h):
    """
    Draw face mesh points excluding eye landmarks.
    """
    # These indices correspond to the eye landmarks used for EAR calculation.
    eye_indices = {33, 159, 160, 133, 144, 145, 263, 386, 387, 362, 373, 374}
    num_points = len(landmarks)
    for i, lm in enumerate(landmarks):
        # Skip drawing if the landmark is part of an eye.
        if i in eye_indices:
            continue
        x = int(lm.x * w)
        y = int(lm.y * h)
        hue = int(180 * i / num_points)
        color_hsv = np.uint8([[[hue, 255, 255]]])
        color_bgr = cv2.cvtColor(color_hsv, cv2.COLOR_HSV2BGR)[0][0].tolist()
        cv2.circle(frame, (x, y), 1, color_bgr, -1)

# ---------------- Initialization ----------------

video_path = "/content/WhatsApp Video 2025-02-23 at 11.13.23_f65644f8.mp4"  # Update as needed

# Initialize MediaPipe Face Mesh.
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False,
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

# Create an ipywidgets Image widget for fast, in-place display.
img_widget = widgets.Image(format='jpeg')
display(img_widget)

cap = cv2.VideoCapture(video_path)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Finished processing video.")
        break

    # Resize frame for faster processing (50% of original size)
    frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5)
    h, w, _ = frame.shape

    # Convert frame to RGB for MediaPipe processing.
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(frame_rgb)
    sleeping = False

    if results.multi_face_landmarks:
        # Process only the first detected face.
        face_landmarks = results.multi_face_landmarks[0].landmark

        # Draw face landmarks, but skip drawing eye landmarks.
        fancy_draw_face_points(frame, face_landmarks, w, h)

        # Compute EAR for sleeping detection.
        left_eye_points = [
            (int(face_landmarks[33].x * w), int(face_landmarks[33].y * h)),
            (int(face_landmarks[159].x * w), int(face_landmarks[159].y * h)),
            (int(face_landmarks[160].x * w), int(face_landmarks[160].y * h)),
            (int(face_landmarks[133].x * w), int(face_landmarks[133].y * h)),
            (int(face_landmarks[144].x * w), int(face_landmarks[144].y * h)),
            (int(face_landmarks[145].x * w), int(face_landmarks[145].y * h))
        ]
        right_eye_points = [
            (int(face_landmarks[263].x * w), int(face_landmarks[263].y * h)),
            (int(face_landmarks[386].x * w), int(face_landmarks[386].y * h)),
            (int(face_landmarks[387].x * w), int(face_landmarks[387].y * h)),
            (int(face_landmarks[362].x * w), int(face_landmarks[362].y * h)),
            (int(face_landmarks[373].x * w), int(face_landmarks[373].y * h)),
            (int(face_landmarks[374].x * w), int(face_landmarks[374].y * h))
        ]
        left_ear = compute_ear(left_eye_points)
        right_ear = compute_ear(right_eye_points)
        avg_ear = (left_ear + right_ear) / 2.0

        # Use a very small threshold so that sleeping is detected only when eyes are fully closed.
        ear_threshold = 0.1
        if avg_ear < ear_threshold:
            sleeping = True

        # Draw sleeping status on the frame.
        face_xmin, face_ymin, face_xmax, face_ymax = get_face_bbox(face_landmarks, w, h)
        status_text = "Sleeping" if sleeping else "Not Sleeping"
        status_color = (0, 0, 255) if sleeping else (0, 255, 0)
        cv2.putText(frame, status_text, (face_xmin, face_ymin - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, status_color, 2)

    ret2, buffer = cv2.imencode('.jpg', frame)
    if ret2:
        img_widget.value = buffer.tobytes()

    # Removed time.sleep() to speed up processing.

cap.release()
print("Processing complete.")


Image(value=b'', format='jpeg')

Finished processing video.
Processing complete.


In [14]:
import os
import cv2
import mediapipe as mp
import numpy as np
from IPython.display import display
import ipywidgets as widgets

# Force GPU usage if available
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

def euclidean_distance(pt1, pt2):
    return np.linalg.norm(np.array(pt1) - np.array(pt2))

def compute_ear(eye_points):
    """
    Compute Eye Aspect Ratio (EAR) for an eye.
    EAR = (||p2 - p6|| + ||p3 - p5||) / (2 * ||p1 - p4||)
    """
    p1, p2, p3, p4, p5, p6 = eye_points
    vertical1 = euclidean_distance(p2, p6)
    vertical2 = euclidean_distance(p3, p5)
    horizontal = euclidean_distance(p1, p4)
    return (vertical1 + vertical2) / (2.0 * horizontal)

def get_face_bbox(landmarks, w, h):
    xs = [lm.x for lm in landmarks]
    ys = [lm.y for lm in landmarks]
    return int(min(xs) * w), int(min(ys) * h), int(max(xs) * w), int(max(ys) * h)

def draw_fancy_rectangle(frame, x, y, x_max, y_max, color, thickness=2, corner_radius=20):
    """
    Draws a rectangle with rounded corners.
    """
    overlay = frame.copy()
    cv2.rectangle(overlay, (x, y), (x_max, y_max), color, -1)
    cv2.addWeighted(overlay, 0.2, frame, 0.8, 0, frame)

    # Draw outer border
    cv2.rectangle(frame, (x, y), (x_max, y_max), color, thickness, cv2.LINE_AA)

def draw_fancy_status(frame, text, x, y, color):
    """
    Draws a stylish status box above the face.
    """
    font = cv2.FONT_HERSHEY_SIMPLEX
    text_size = cv2.getTextSize(text, font, 1, 2)[0]
    text_x = x + (abs(x - x + 100) - text_size[0]) // 2  # Center text

    overlay = frame.copy()
    cv2.rectangle(overlay, (text_x - 10, y - 40), (text_x + text_size[0] + 10, y - 10), color, -1)
    cv2.addWeighted(overlay, 0.5, frame, 0.5, 0, frame)

    cv2.putText(frame, text, (text_x, y - 15), font, 1, (255, 255, 255), 2, cv2.LINE_AA)

# ---------------- Initialization ----------------

video_path = "/content/WhatsAp9.mp4"

# Initialize MediaPipe Face Mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False,
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

# Create an ipywidgets Image widget for fast, in-place display.
img_widget = widgets.Image(format='jpeg')
display(img_widget)

cap = cv2.VideoCapture(video_path)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("Finished processing video.")
        break

    # Resize frame for faster processing (50% of original size)
    frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5)
    h, w, _ = frame.shape

    # Convert frame to RGB for MediaPipe processing.
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(frame_rgb)
    sleeping = False

    if results.multi_face_landmarks:
        # Process only the first detected face.
        face_landmarks = results.multi_face_landmarks[0].landmark

        # Compute EAR for sleeping detection.
        left_eye_points = [
            (int(face_landmarks[33].x * w), int(face_landmarks[33].y * h)),
            (int(face_landmarks[159].x * w), int(face_landmarks[159].y * h)),
            (int(face_landmarks[160].x * w), int(face_landmarks[160].y * h)),
            (int(face_landmarks[133].x * w), int(face_landmarks[133].y * h)),
            (int(face_landmarks[144].x * w), int(face_landmarks[144].y * h)),
            (int(face_landmarks[145].x * w), int(face_landmarks[145].y * h))
        ]
        right_eye_points = [
            (int(face_landmarks[263].x * w), int(face_landmarks[263].y * h)),
            (int(face_landmarks[386].x * w), int(face_landmarks[386].y * h)),
            (int(face_landmarks[387].x * w), int(face_landmarks[387].y * h)),
            (int(face_landmarks[362].x * w), int(face_landmarks[362].y * h)),
            (int(face_landmarks[373].x * w), int(face_landmarks[373].y * h)),
            (int(face_landmarks[374].x * w), int(face_landmarks[374].y * h))
        ]
        left_ear = compute_ear(left_eye_points)
        right_ear = compute_ear(right_eye_points)
        avg_ear = (left_ear + right_ear) / 2.0

        # Detect sleeping state
        ear_threshold = 0.1
        sleeping = avg_ear < ear_threshold

        # Get face bounding box
        face_xmin, face_ymin, face_xmax, face_ymax = get_face_bbox(face_landmarks, w, h)

        # Set colors
        status_text = "Sleeping" if sleeping else "Not Sleeping"
        status_color = (0, 0, 255) if sleeping else (0, 255, 0)

        # Draw fancy bounding box
        draw_fancy_rectangle(frame, face_xmin, face_ymin, face_xmax, face_ymax, status_color, thickness=3)

        # Draw fancy status text above the face
        draw_fancy_status(frame, status_text, face_xmin, face_ymin, status_color)

    # Encode frame for ipywidgets display
    ret2, buffer = cv2.imencode('.jpg', frame)
    if ret2:
        img_widget.value = buffer.tobytes()

cap.release()
print("Processing complete.")


Image(value=b'', format='jpeg')

Finished processing video.
Processing complete.
