In [3]:
# Install required libraries
!pip install mediapipe opencv-python

# Import necessary libraries
import cv2
import mediapipe as mp
import numpy as np

Collecting mediapipe
  Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (36.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.1/36.1 MB[0m [31m54.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.18 sounddevice-0.5.1


In [12]:
import torch
import cv2
import mediapipe as mp
import numpy as np
import os
import sys

class AdvancedEyeStateDetector:
    def __init__(self,
                 ear_threshold=0.10,  # Adjusted threshold
                 min_detection_confidence=0.5):
        """
        Initialize Eye State Detector with improved parameters

        Args:
            ear_threshold (float): Threshold for determining eye state
            min_detection_confidence (float): Minimum confidence for face detection
        """
        # MediaPipe Face Mesh Setup
        self.mp_face_mesh = mp.solutions.face_mesh
        self.mp_drawing = mp.solutions.drawing_utils

        # Configuration Parameters
        self.EAR_THRESHOLD = ear_threshold
        self.face_mesh = self.mp_face_mesh.FaceMesh(
            static_image_mode=False,
            max_num_faces=1,
            refine_landmarks=True,
            min_detection_confidence=min_detection_confidence
        )

        # Eye Landmark Indices (MediaPipe Face Mesh)
        self.LEFT_EYE = [362, 385, 387, 263, 373, 380]
        self.RIGHT_EYE = [33, 160, 158, 133, 153, 144]

    def calculate_eye_aspect_ratio(self, eye_landmarks):
        """
        Calculate the Eye Aspect Ratio (EAR)

        Args:
            eye_landmarks (numpy.ndarray): Eye landmark coordinates

        Returns:
            float: Eye Aspect Ratio
        """
        # Vertical eye distances
        A = np.linalg.norm(eye_landmarks[1] - eye_landmarks[5])
        B = np.linalg.norm(eye_landmarks[2] - eye_landmarks[4])

        # Horizontal eye distance
        C = np.linalg.norm(eye_landmarks[0] - eye_landmarks[3])

        # EAR calculation
        ear = (A + B) / (2.0 * C)
        return ear

    def detect_eye_state(self, image):
        """
        Detect eye state with improved accuracy

        Args:
            image (numpy.ndarray): Input image

        Returns:
            dict: Detailed eye state information
        """
        # Convert to RGB if needed
        if len(image.shape) == 3 and image.shape[2] == 3:
            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        else:
            image_rgb = image

        # Process image with MediaPipe
        results = self.face_mesh.process(image_rgb)

        # Default return if no face detected
        if not results.multi_face_landmarks:
            return {
                'is_open': False,
                'openness_percentage': 0.0,
                'error': 'No face detected',
                'visualization': image
            }

        # Extract face landmarks
        face_landmarks = results.multi_face_landmarks[0]

        # Extract eye landmarks with absolute coordinates
        left_eye_landmarks = np.array([
            (face_landmarks.landmark[i].x * image.shape[1],
             face_landmarks.landmark[i].y * image.shape[0])
            for i in self.LEFT_EYE
        ])

        right_eye_landmarks = np.array([
            (face_landmarks.landmark[i].x * image.shape[1],
             face_landmarks.landmark[i].y * image.shape[0])
            for i in self.RIGHT_EYE
        ])

        # Calculate Eye Aspect Ratio for each eye
        left_ear = self.calculate_eye_aspect_ratio(left_eye_landmarks)
        right_ear = self.calculate_eye_aspect_ratio(right_eye_landmarks)

        # Average EAR
        avg_ear = (left_ear + right_ear) / 2.0

        # Determine eye state
        is_open = avg_ear > self.EAR_THRESHOLD

        # Calculate openness percentage more meaningfully
        # Map EAR to a percentage, with thresholds
        if avg_ear <= 0.1:
            openness_percentage = 0.0
        elif avg_ear >= 0.4:
            openness_percentage = 100.0
        else:
            # Linear interpolation between 0.1 and 0.4
            openness_percentage = ((avg_ear - 0.1) / 0.3) * 100

        # Prepare visualization
        output_image = image.copy()

        # Left Eye Bounding Box
        left_eye_bbox = cv2.boundingRect(left_eye_landmarks.astype(np.int32))
        cv2.rectangle(output_image,
                      (left_eye_bbox[0], left_eye_bbox[1]),
                      (left_eye_bbox[0] + left_eye_bbox[2], left_eye_bbox[1] + left_eye_bbox[3]),
                      (0, 255, 0), 2)

        # Right Eye Bounding Box
        right_eye_bbox = cv2.boundingRect(right_eye_landmarks.astype(np.int32))
        cv2.rectangle(output_image,
                      (right_eye_bbox[0], right_eye_bbox[1]),
                      (right_eye_bbox[0] + right_eye_bbox[2], right_eye_bbox[1] + right_eye_bbox[3]),
                      (0, 255, 0), 2)

        # Annotate with eye state
        eye_state = "Open" if is_open else "Closed"
        cv2.putText(
            output_image,
            f"Eyes: {eye_state} ({openness_percentage:.2f}%)",
            (10, 30),
            cv2.FONT_HERSHEY_SIMPLEX,
            1, (0, 255, 0), 2
        )

        return {
            'is_open': is_open,
            'openness_percentage': openness_percentage,
            'visualization': output_image
        }

    def process_video(self, video_path):
        """
        Process video and detect eye state in each frame

        Args:
            video_path (str): Path to input video
        """
        # Open the video capture
        cap = cv2.VideoCapture(video_path)

        # Check if video opened successfully
        if not cap.isOpened():
            print(f"Error: Could not open video file {video_path}")
            return

        # Video output setup
        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = cap.get(cv2.CAP_PROP_FPS)

        # Output video writer
        output_path = 'output_eye_detection.mp4'
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

        # Validate output video writer
        if not out.isOpened():
            print(f"Error: Could not create output video file {output_path}")
            cap.release()
            return

        frame_count = 0
        eye_open_count = 0
        eye_closed_count = 0

        print("Starting video processing...")

        try:
            while True:
                # Read a frame from the video
                ret, frame = cap.read()

                # Break the loop if no more frames
                if not ret:
                    break

                # Detect eye state
                result = self.detect_eye_state(frame)

                # Write processed frame to output video
                out.write(result['visualization'])

                # Track eye state statistics
                frame_count += 1
                if result['is_open']:
                    eye_open_count += 1
                else:
                    eye_closed_count += 1

                # Print progress periodically
                if frame_count % 30 == 0:
                    print(f"Processed {frame_count} frames. "
                          f"Current state: {'Open' if result['is_open'] else 'Closed'}")

        except Exception as e:
            print(f"Error during video processing: {e}")

        finally:
            # Release resources
            cap.release()
            out.release()
            cv2.destroyAllWindows()

        # Print final statistics
        print(f"\nVideo Processing Complete:")
        print(f"Total Frames Processed: {frame_count}")
        print(f"Frames with Open Eyes: {eye_open_count}")
        print(f"Frames with Closed Eyes: {eye_closed_count}")
        print(f"Open Eyes Percentage: {(eye_open_count/frame_count)*100:.2f}%")
        print(f"Output saved to: {output_path}")

def main():
    # Define file path
    file_path = "/content/41126-427876260_small.mp4"  # Replace with your desired path

    # Validate file path
    if not os.path.exists(file_path):
        print(f"Error: File not found at {file_path}")
        return

    # Initialize detector with adjusted threshold
    detector = AdvancedEyeStateDetector(ear_threshold=0.25)

    # Determine file type and process accordingly
    file_ext = os.path.splitext(file_path)[1].lower()

    try:
        if file_ext in ['.png', '.jpg', '.jpeg']:
            # Image processing
            image = cv2.imread(file_path)
            if image is None:
                print(f"Error: Could not read image from {file_path}")
                return

            result = detector.detect_eye_state(image)
            print(f"Image Analysis:")
            print(f"Eyes: {'Open' if result['is_open'] else 'Closed'}")
            print(f"Percentage of Openness: {result['openness_percentage']:.2f}%")

            # Save visualization
            cv2.imwrite('eye_detection_result.jpg', result['visualization'])
            print("Visualization saved as 'eye_detection_result.jpg'")

        elif file_ext in ['.mp4', '.avi', '.mov']:
            # Video processing
            detector.process_video(file_path)

        else:
            print("Unsupported file type. Please use an image or video file.")

    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()

Starting video processing...
Processed 30 frames. Current state: Open
Processed 60 frames. Current state: Open
Processed 90 frames. Current state: Open
Processed 120 frames. Current state: Open
Processed 150 frames. Current state: Closed
Processed 180 frames. Current state: Open
Processed 210 frames. Current state: Open
Processed 240 frames. Current state: Open
Processed 270 frames. Current state: Open
Processed 300 frames. Current state: Open
Processed 330 frames. Current state: Open

Video Processing Complete:
Total Frames Processed: 353
Frames with Open Eyes: 322
Frames with Closed Eyes: 31
Open Eyes Percentage: 91.22%
Output saved to: output_eye_detection.mp4
