In [1]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}") 

Using device: cuda


In [2]:
import cv2
import torch
from PIL import Image
from transformers import ViTForImageClassification, ViTFeatureExtractor

# Set device to CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the trained model and feature extractor
model_path = "trained_vit_model"
model = ViTForImageClassification.from_pretrained(model_path).to(device)
feature_extractor = ViTFeatureExtractor.from_pretrained(model_path)

# Load Haar Cascade for face and eye detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_eye.xml")

# Function to preprocess and predict
def predict_drowsiness(image):
    inputs = feature_extractor(images=image, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        probabilities = torch.softmax(outputs.logits, dim=1)
        _, predicted = torch.max(outputs.logits, 1)
    return predicted.item(), probabilities[0][predicted.item()].item()

# Open the camera
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert frame to grayscale for face and eye detection
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

    for (x, y, w, h) in faces:
        # Draw bounding box around the face
        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)  # Blue box for face

        # Extract face region for eye detection
        face_roi = gray[y:y+h, x:x+w]
        eyes = eye_cascade.detectMultiScale(face_roi, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

        # Check if eyes are detected
        if len(eyes) > 0:
            # Assume the first detected eye is used for drowsiness classification
            (ex, ey, ew, eh) = eyes[0]
            eye = frame[y+ey:y+ey+eh, x+ex:x+ex+ew]
            eye_pil = Image.fromarray(cv2.cvtColor(eye, cv2.COLOR_BGR2RGB))

            # Predict drowsiness (open or closed eyes)
            class_id, confidence = predict_drowsiness(eye_pil)
            class_name = "Drowsy" if class_id == 0 else "Not Drowsy"  # Adjust based on your class mapping
            color = (0, 0, 255) if class_name == "Drowsy" else (0, 255, 0)  # Red for drowsy, Green for not drowsy
        else:
            # If no eyes are detected, assume "Not Drowsy"
            class_name = "Not Drowsy"
            color = (0, 255, 0)  # Green for not drowsy

        # Display drowsiness status above the face bounding box
        cv2.putText(frame, f"Status: {class_name}", (x, y-20), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)

    # Display the frame
    cv2.imshow("Drowsiness Detection (Face + Eye-Based)", frame)

    # Exit on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the camera and close windows
cap.release()
cv2.destroyAllWindows()

  from .autonotebook import tqdm as notebook_tqdm
