In [None]:
import cv2
import mediapipe as mp
import torch
import torch.nn as nn

IMG_SIZE = 64
num_classes = 2

In [26]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Flatten(),
            nn.Dropout(p=0.5),
            nn.Linear(128 * (IMG_SIZE // 8) * (IMG_SIZE // 8), 128),
            nn.ReLU(),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        return self.net(x)
    
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN().to(device)
model.load_state_dict(torch.load('eye_classifier.pth', map_location=device))
model.eval()

CNN(
  (net): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Flatten(start_dim=1, end_dim=-1)
    (10): Dropout(p=0.5, inplace=False)
    (11): Linear(in_features=8192, out_features=128, bias=True)
    (12): ReLU()
    (13): Linear(in_features=128, out_features=2, bias=True)
  )
)

In [None]:
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils

In [28]:
def find_camera():
    """Find the first available camera"""
    for i in range(10):  # Check first 10 indices
        cap = cv2.VideoCapture(i)
        if cap.isOpened():
            print(f"Found camera at index {i}")
            return cap
        cap.release()
    return None

# Initialize webcam
cap = find_camera()
if cap is None:
    print("Error: No camera found!")
    exit()

# Confidence threshold
CONFIDENCE_THRESHOLD = 0.6

with mp_face_mesh.FaceMesh(
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as face_mesh:
    
    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("Failed to capture frame")
            break
        
        # Flip frame horizontally for selfie view
        frame = cv2.flip(frame, 1)
        
        # Convert to RGB
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Process with MediaPipe
        results = face_mesh.process(rgb_frame)
        
        if results.multi_face_landmarks:
            for face_landmarks in results.multi_face_landmarks:
                landmarks = face_landmarks.landmark
                
                # Process left eye
                left_eye_img, left_bbox = get_eye_region(frame, landmarks, LEFT_EYE_INDICES)
                left_state, left_conf = classify_eye(left_eye_img)
                
                # Process right eye
                right_eye_img, right_bbox = get_eye_region(frame, landmarks, RIGHT_EYE_INDICES)
                right_state, right_conf = classify_eye(right_eye_img)
                
                # Draw bounding boxes and labels only if confidence is above threshold
                # Left eye
                if left_conf >= CONFIDENCE_THRESHOLD:
                    color = (0, 255, 0) if left_state == "Open" else (0, 0, 255)
                    cv2.rectangle(frame, (left_bbox[0], left_bbox[1]), 
                                (left_bbox[2], left_bbox[3]), color, 2)
                    cv2.putText(frame, f"Left: {left_state} ({left_conf:.2f})", 
                              (left_bbox[0], left_bbox[1] - 10),
                              cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                
                # Right eye
                if right_conf >= CONFIDENCE_THRESHOLD:
                    color = (0, 255, 0) if right_state == "Open" else (0, 0, 255)
                    cv2.rectangle(frame, (right_bbox[0], right_bbox[1]), 
                                (right_bbox[2], right_bbox[3]), color, 2)
                    cv2.putText(frame, f"Right: {right_state} ({right_conf:.2f})", 
                              (right_bbox[0], right_bbox[1] - 10),
                              cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        
        # Display
        cv2.imshow('Eye State Classifier', frame)
        
        # Press 'q' to quit
        if cv2.waitKey(5) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()


Found camera at index 0


I0000 00:00:1764551647.160786   10502 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1764551647.163126   10881 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 Mesa 25.0.7-0ubuntu0.25.04.2), renderer: AMD Radeon 660M (radeonsi, rembrandt, LLVM 20.1.2, DRM 3.61, 6.14.0-36-generic)
W0000 00:00:1764551647.165908   10873 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1764551647.184607   10867 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
