In [None]:
# Importing necessary libraries
import cv2
import pytesseract
import numpy as np
import dlib
from imutils import face_utils
from playsound import playsound

In [None]:
# Initialize the video capture object (0 represents the default webcam)
cap = cv2.VideoCapture(0)

# Initialize the face detector and facial landmarks predictor from dlib
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

# Define status variables for the blink detection
sleep = 0
drowsy = 0
active = 0
status = ""
color = (0, 0, 0)

# Define functions for computing distances and checking blinks
def compute(ptA, ptB):
    dist = np.linalg.norm(ptA - ptB)
    return dist

def blinked(a, b, c, d, e, f):
    up = compute(b, d) + compute(c, e)
    down = compute(a, f)
    ratio = up / (2.0 * down)
    
    # Check for blinks based on ratio
    if ratio > 0.25:
        return 2
    elif 0.21 <= ratio <= 0.25:
        return 1
    else:
        return 0

# Define a function to perform OCR on a frame and print the extracted text
def perform_ocr(frame):
    # Convert the frame to grayscale for better OCR accuracy
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Apply thresholding (binarization)
    _, binary_frame = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)

    # Use Tesseract to extract text from the preprocessed frame
    extracted_text = pytesseract.image_to_string(binary_frame, lang='eng')

    # Perform OCR using pytesseract on the grayscale frame
    # text = pytesseract.image_to_string(gray)

    # Print the extracted text in the console
    print("Extracted Text:", extracted_text)

# Continuously read frames from the video capture object
while True:
    ret, frame = cap.read()
    
    # Check if the frame is read successfully
    if not ret:
        print("Error: Failed to read frame.")
        break

    # Convert the frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces in the grayscale frame
    faces = detector(gray)
    face_frame = frame.copy()
    
    # Process each detected face
    for face in faces:
        x1 = face.left()
        y1 = face.top()
        x2 = face.right()
        y2 = face.bottom()

        # Draw rectangles around detected faces
        cv2.rectangle(face_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

        # Get facial landmarks
        landmarks = predictor(gray, face)
        landmarks = face_utils.shape_to_np(landmarks)

        # Process eye blinks
        left_blink = blinked(landmarks[36], landmarks[37], landmarks[38], landmarks[41], landmarks[40], landmarks[39])
        right_blink = blinked(landmarks[42], landmarks[43], landmarks[44], landmarks[47], landmarks[46], landmarks[45])

        # Judge the eye blinks
        if left_blink == 0 or right_blink == 0:
            sleep += 1
            drowsy = 0
            active = 0
            if sleep > 6:
                status = "SLEEPING"
                color = (255, 0, 0)
                
                # Play alarm sound when the status is "SLEEPING"
                playsound("alarm.wav")

        elif left_blink == 1 or right_blink == 1:
            sleep = 0
            active = 0
            drowsy += 1
            if drowsy > 6:
                status = "Drowsy"
                color = (0, 0, 255)
        else:
            drowsy = 0
            sleep = 0
            active += 1
            if active > 6:
                status = "Active"
                color = (0, 255, 0)
        
        # Display status on the frame
        cv2.putText(frame, status, (100, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 3)

        # Draw facial landmarks on the face frame
        for n in range(0, 68):
            (x, y) = landmarks[n]
            cv2.circle(face_frame, (x, y), 1, (255, 255, 255), -1)
    
    # Perform OCR and print the extracted text
    perform_ocr(frame)

    # Show the original frame
    cv2.imshow("Frame", frame)

    # Show the frame with detected faces and landmarks
    if face_frame is not None:
        cv2

    cv2.imshow("Face Frame", face_frame)

    # Check for 'q' key to exit the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture object and close OpenCV windows
cap.release()
cv2.destroyAllWindows()


In [None]:
# # Importing necessary libraries
# import cv2
# import pytesseract
# import numpy as np
# import dlib
# from imutils import face_utils

# # Initialize the video capture object (0 represents the default webcam)
# cap = cv2.VideoCapture(0)

# # Initialize the face detector and facial landmarks predictor from dlib
# detector = dlib.get_frontal_face_detector()
# predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

# # Define status variables for the blink detection
# sleep = 0
# drowsy = 0
# active = 0
# status = ""
# color = (0, 0, 0)

# # Define functions for computing distances and checking blinks
# def compute(ptA, ptB):
#     dist = np.linalg.norm(ptA - ptB)
#     return dist

# def blinked(a, b, c, d, e, f):
#     up = compute(b, d) + compute(c, e)
#     down = compute(a, f)
#     ratio = up / (2.0 * down)
    
#     # Check for blinks based on ratio
#     if ratio > 0.25:
#         return 2
#     elif 0.21 <= ratio <= 0.25:
#         return 1
#     else:
#         return 0

# # Define a function to perform OCR on a frame and print the extracted text
# def perform_ocr(frame):
#     # Convert the frame to grayscale for better OCR accuracy
#     gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

#     # Perform OCR using pytesseract on the grayscale frame
#     text = pytesseract.image_to_string(gray)

#     # Print the extracted text in the console
#     print("Extracted Text:", text)

# # Continuously read frames from the video capture object
# while True:
#     ret, frame = cap.read()
    
#     # Check if the frame is read successfully
#     if not ret:
#         print("Error: Failed to read frame.")
#         break

#     # Convert the frame to grayscale
#     gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

#     # Detect faces in the grayscale frame
#     faces = detector(gray)
#     face_frame = frame.copy()
    
#     # Process each detected face
#     for face in faces:
#         x1 = face.left()
#         y1 = face.top()
#         x2 = face.right()
#         y2 = face.bottom()

#         # Draw rectangles around detected faces
#         cv2.rectangle(face_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

#         # Get facial landmarks
#         landmarks = predictor(gray, face)
#         landmarks = face_utils.shape_to_np(landmarks)

#         # Process eye blinks
#         left_blink = blinked(landmarks[36], landmarks[37], landmarks[38], landmarks[41], landmarks[40], landmarks[39])
#         right_blink = blinked(landmarks[42], landmarks[43], landmarks[44], landmarks[47], landmarks[46], landmarks[45])

#         # Judge the eye blinks
#         if left_blink == 0 or right_blink == 0:
#             sleep += 1
#             drowsy = 0
#             active = 0
#             if sleep > 6:
#                 status = "SLEEPING !!!!"
#                 color = (255, 0, 0)
#         elif left_blink == 1 or right_blink == 1:
#             sleep = 0
#             active = 0
#             drowsy += 1
#             if drowsy > 6:
#                 status = "Drowsy !!"
#                 color = (0, 0, 255)
#         else:
#             drowsy = 0
#             sleep = 0
#             active += 1
#             if active > 6:
#                 status = "!! Active ...!"
#                 color = (0, 255, 0)
        
#         # Display status on the frame
#         cv2.putText(frame, status, (100, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 3)

#         # Draw facial landmarks on the face frame
#         for n in range(0, 68):
#             (x, y) = landmarks[n]
#             cv2.circle(face_frame, (x, y), 1, (255, 255, 255), -1)
    
#     # Perform OCR and print the extracted text
#     perform_ocr(frame)

#     # Show the original frame
#     cv2.imshow("Frame", frame)

#     # Show the frame with detected faces and landmarks
#     if face_frame is not None:
#         cv2

#     cv2.imshow("Face Frame", face_frame)

#     # Check for 'q' key to exit the loop
#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break

# # Release the video capture object and close OpenCV windows
# cap.release()
# cv2.destroyAllWindows()


make some changes in this code such that it can capture the frames every 5 sec gap


To add an alarm system that activates when the sleeping state is detected for 5 seconds, you can modify the while loop in the code to check if the status remains "SLEEPING" for 5 seconds or more. Once this condition is met, the alarm system will be triggered.

Here's how you can modify the code:

In [None]:
# Importing necessary libraries
import cv2
import pytesseract
import numpy as np
import dlib
from imutils import face_utils
from playsound import playsound
import time  # Importing time library for timer

# Initialize the video capture object (0 represents the default webcam)
cap = cv2.VideoCapture(0)

# Initialize the face detector and facial landmarks predictor from dlib
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

# Define status variables for the blink detection
sleep = 0
drowsy = 0
active = 0
status = ""
color = (0, 0, 0)

# Define functions for computing distances and checking blinks
def compute(ptA, ptB):
    dist = np.linalg.norm(ptA - ptB)
    return dist

def blinked(a, b, c, d, e, f):
    up = compute(b, d) + compute(c, e)
    down = compute(a, f)
    ratio = up / (2.0 * down)
    
    # Check for blinks based on ratio
    if ratio > 0.25:
        return 2
    elif 0.21 <= ratio <= 0.25:
        return 1
    else:
        return 0

# Define a function to perform OCR on a frame and print the extracted text
def perform_ocr(frame):
    # Convert the frame to grayscale for better OCR accuracy
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Apply thresholding (binarization)
    _, binary_frame = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)

    # Use Tesseract to extract text from the preprocessed frame
    extracted_text = pytesseract.image_to_string(binary_frame, lang='eng')

    # Print the extracted text in the console
    print("Extracted Text:", extracted_text)

# Initialize the timer
start_time = time.time()

# Continuously read frames from the video capture object
while True:
    # Calculate elapsed time since the start of the loop
    elapsed_time = time.time() - start_time
    
    # Check if 5 seconds have passed since the last frame was processed
    if elapsed_time >= 2:
        ret, frame = cap.read()

        # Reset the timer
        start_time = time.time()

        # Check if the frame is read successfully
        if not ret:
            print("Error: Failed to read frame.")
            break

        # Convert the frame to grayscale
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Detect faces in the grayscale frame
        faces = detector(gray)
        face_frame = frame.copy()
        
        # Process each detected face
        for face in faces:
            x1 = face.left()
            y1 = face.top()
            x2 = face.right()
            y2 = face.bottom()

            # Draw rectangles around detected faces
            cv2.rectangle(face_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

            # Get facial landmarks
            landmarks = predictor(gray, face)
            landmarks = face_utils.shape_to_np(landmarks)

            # Process eye blinks
            left_blink = blinked(landmarks[36], landmarks[37], landmarks[38], landmarks[41], landmarks[40], landmarks[39])
            right_blink = blinked(landmarks[42], landmarks[43], landmarks[44], landmarks[47], landmarks[46], landmarks[45])

            # Judge the eye blinks
            if left_blink == 0 or right_blink == 0:
                sleep += 1
                drowsy = 0
                active = 0
                if sleep > 6:
                    status = "SLEEPING"
                    color = (255, 0, 0)
                    # Play alarm sound when the status is "SLEEPING"
                    playsound("alarm.wav")

            elif left_blink == 1 or right_blink == 1:
                sleep = 0
                active = 0
                drowsy += 1
                if drowsy > 6:
                    status = "Drowsy"
                    color = (0, 0, 255)
            else:
                drowsy = 0
                sleep = 0
                active += 1
                if active > 6:
                    status = "Active"
                    color = (0, 255, 0)

            # Display status on the frame
            cv2.putText(frame, status, (100, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 3)

            # Draw facial landmarks on the face frame
            for n in range(0, 68):
                (x, y) = landmarks[n]
                cv2.circle(face_frame, (x, y), 1, (255, 255, 255), -1)
        
        # Perform OCR and print the extracted text
        perform_ocr(frame)

        # Show the original frame
        cv2.imshow("Frame", frame)

        # Show the frame with detected faces and landmarks
        cv2.imshow("Face Frame", face_frame)

    # Check for 'q' key to exit the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture object and close OpenCV windows
cap.release()
cv2.destroyAllWindows()


in above code add alarm system when sleeping state is for 5 sec 


To add an alarm system that activates when the sleeping state is detected for 5 seconds, you can modify the while loop in the code to check if the status remains "SLEEPING" for 5 seconds or more. Once this condition is met, the alarm system will be triggered.

Here's how you can modify the code:



In this code:

A new variable, sleep_start_time, is introduced to record the time when the sleeping state is first detected.
When the status variable is set to "SLEEPING", the code checks if sleep_start_time is None to initialize it.
The code then calculates the elapsed time since sleep_start_time and checks if it exceeds 5 seconds.
If the sleeping state lasts for 5 seconds or more, the alarm sound is played using playsound, and sleep_start_time is reset to avoid multiple alarms.
The rest of the code remains largely unchanged and functions as expected.

In [None]:
# Importing necessary libraries
import cv2
import pytesseract
import numpy as np
import dlib
from imutils import face_utils
from playsound import playsound
import time

# Initialize the video capture object (0 represents the default webcam)
cap = cv2.VideoCapture(0)

# Initialize the face detector and facial landmarks predictor from dlib
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

# Define status variables for the blink detection
sleep = 0
drowsy = 0
active = 0
status = ""
color = (0, 0, 0)

# Define functions for computing distances and checking blinks
def compute(ptA, ptB):
    dist = np.linalg.norm(ptA - ptB)
    return dist

def blinked(a, b, c, d, e, f):
    up = compute(b, d) + compute(c, e)
    down = compute(a, f)
    ratio = up / (2.0 * down)
    
    # Check for blinks based on ratio
    if ratio > 0.25:
        return 2
    elif 0.21 <= ratio <= 0.25:
        return 1
    else:
        return 0

# Define a function to perform OCR on a frame and print the extracted text
def perform_ocr(frame):
    # Convert the frame to grayscale for better OCR accuracy
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Apply thresholding (binarization)
    _, binary_frame = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)

    # Use Tesseract to extract text from the preprocessed frame
    extracted_text = pytesseract.image_to_string(binary_frame, lang='eng')

    # Print the extracted text in the console
    print("Extracted Text:", extracted_text)

# Continuously read frames from the video capture object
sleep_start_time = None

while True:
    ret, frame = cap.read()
    
    # Check if the frame is read successfully
    if not ret:
        print("Error: Failed to read frame.")
        break

    # Convert the frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces in the grayscale frame
    faces = detector(gray)
    face_frame = frame.copy()
    
    # Process each detected face
    for face in faces:
        x1 = face.left()
        y1 = face.top()
        x2 = face.right()
        y2 = face.bottom()

        # Draw rectangles around detected faces
        cv2.rectangle(face_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

        # Get facial landmarks
        landmarks = predictor(gray, face)
        landmarks = face_utils.shape_to_np(landmarks)

        # Process eye blinks
        left_blink = blinked(landmarks[36], landmarks[37], landmarks[38], landmarks[41], landmarks[40], landmarks[39])
        right_blink = blinked(landmarks[42], landmarks[43], landmarks[44], landmarks[47], landmarks[46], landmarks[45])

        # Judge the eye blinks
        if left_blink == 0 or right_blink == 0:
            sleep += 1
            drowsy = 0
            active = 0
            if sleep > 6:
                status = "SLEEPING"
                color = (255, 0, 0)
                
                # Record the time when sleep is first detected
                if sleep_start_time is None:
                    sleep_start_time = time.time()

        elif left_blink == 1 or right_blink == 1:
            sleep = 0
            active = 0
            drowsy += 1
            if drowsy > 6:
                status = "Drowsy"
                color = (0, 0, 255)
                
                # Reset sleep_start_time if not in sleeping state
                sleep_start_time = None
        else:
            drowsy = 0
            sleep = 0
            active += 1
            if active > 6:
                status = "Active"
                color = (0, 255, 0)
                
                # Reset sleep_start_time if not in sleeping state
                sleep_start_time = None

        # Check if the sleeping state has lasted for 5 seconds or more
        if status == "SLEEPING" and sleep_start_time:
            elapsed_time = time.time() - sleep_start_time
            if elapsed_time >= 3:
                # Play alarm sound when the sleeping state is detected for 3 seconds or more
                playsound("alarm.wav")
                
                # Reset sleep_start_time to avoid multiple alarms
                sleep_start_time = None
        
        # Display status on the frame
        cv2.putText(frame, status, (100, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 3)

        # Draw facial landmarks on the face frame
        for n in range(0, 68):
            (x, y) = landmarks[n]
            cv2.circle(face_frame, (x, y), 1, (255, 255, 255), -1)
    
    # Perform OCR and print the extracted text
    perform_ocr(frame)

    # Show the original frame
    cv2.imshow("Frame", frame)

    # Show the frame with detected faces and landmarks
    cv2.imshow("Face Frame", face_frame)

    # Check for 'q' key to exit the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture object and close OpenCV windows
cap.release()
cv2.destroyAllWindows()


modify the above code : Detect Text in Images with Python - pytesseract in the above code



To modify the code to detect text in images using Python's pytesseract, you can incorporate the necessary functions and workflow for OCR (Optical Character Recognition) within the existing code. The following steps will guide you on how to achieve this:

Import Necessary Libraries: You already have the required libraries for pytesseract and cv2 imported.

Define a Function to Perform OCR: The function perform_ocr(frame) is already defined in your code. It uses pytesseract to extract text from a preprocessed frame.

Perform OCR on the Frame: The function is called in each iteration of the while loop to perform OCR on the current frame.

Print Extracted Text: The function prints the text extracted from each frame using print().

Here is a revised version of your existing code with added comments to guide you:



In this code:

The perform_ocr function is used to perform OCR on the current frame and print the extracted text.
The function is called on each frame within the while loop.
The perform_ocr function converts the frame to grayscale and applies thresholding before using pytesseract to extract text.
The function then prints the extracted text to the console

In [None]:
# Import necessary libraries
import cv2
import pytesseract
import numpy as np
import dlib
from imutils import face_utils
from playsound import playsound
import time

# Initialize the video capture object (0 represents the default webcam)
cap = cv2.VideoCapture(0)

# Initialize the face detector and facial landmarks predictor from dlib
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

# Define status variables for the blink detection
sleep = 0
drowsy = 0
active = 0
status = ""
color = (0, 0, 0)
sleep_start_time = None

# Define a function for computing distances and checking blinks
def compute(ptA, ptB):
    dist = np.linalg.norm(ptA - ptB)
    return dist

def blinked(a, b, c, d, e, f):
    up = compute(b, d) + compute(c, e)
    down = compute(a, f)
    ratio = up / (2.0 * down)
    
    # Check for blinks based on the eye aspect ratio
    if ratio > 0.25:
        return 2
    elif 0.21 <= ratio <= 0.25:
        return 1
    else:
        return 0

# Define a function to perform OCR on a frame and print the extracted text
def perform_ocr(frame):
    # Convert the frame to grayscale for better OCR accuracy
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Apply thresholding (binarization) for better text extraction
    _, binary_frame = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)

    # Use pytesseract to extract text from the preprocessed frame
    extracted_text = pytesseract.image_to_string(binary_frame, lang='eng')

    # Print the extracted text in the console
    print("Extracted Text:", extracted_text)

# Continuously read frames from the video capture object
while True:
    ret, frame = cap.read()
    
    # Check if the frame is read successfully
    if not ret:
        print("Error: Failed to read frame.")
        break

    # Convert the frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces in the grayscale frame
    faces = detector(gray)
    face_frame = frame.copy()
    
    # Process each detected face
    for face in faces:
        # Get coordinates of the face
        x1 = face.left()
        y1 = face.top()
        x2 = face.right()
        y2 = face.bottom()

        # Draw rectangles around detected faces
        cv2.rectangle(face_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

        # Get facial landmarks
        landmarks = predictor(gray, face)
        landmarks = face_utils.shape_to_np(landmarks)

        # Process eye blinks
        left_blink = blinked(landmarks[36], landmarks[37], landmarks[38], landmarks[41], landmarks[40], landmarks[39])
        right_blink = blinked(landmarks[42], landmarks[43], landmarks[44], landmarks[47], landmarks[46], landmarks[45])

        # Judge the eye blinks
        if left_blink == 0 or right_blink == 0:
            sleep += 1
            drowsy = 0
            active = 0
            if sleep > 6:
                status = "SLEEPING"
                color = (255, 0, 0)
                
                # Record the time when sleep is first detected
                if sleep_start_time is None:
                    sleep_start_time = time.time()
        elif left_blink == 1 or right_blink == 1:
            sleep = 0
            active = 0
            drowsy += 1
            if drowsy > 6:
                status = "Drowsy"
                color = (0, 0, 255)
                
                # Reset sleep_start_time if not in sleeping state
                sleep_start_time = None
        else:
            drowsy = 0
            sleep = 0
            active += 1
            if active > 6:
                status = "Active"
                color = (0, 255, 0)
                
                # Reset sleep_start_time if not in sleeping state
                sleep_start_time = None
        
        # Check if the sleeping state has lasted for 5 seconds or more
        if status == "SLEEPING" and sleep_start_time:
            elapsed_time = time.time() - sleep_start_time
            if elapsed_time >= 5:
                # Play alarm sound when the sleeping state is detected for 5 seconds or more
                playsound("alarm.wav")
                
                # Reset sleep_start_time to avoid multiple alarms
                sleep_start_time = None

        # Display status on the frame
        cv2.putText(frame, status, (100, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.2, color, 3)

        # Draw facial landmarks on the face frame
        for n in range(0, 68):
            (x, y) = landmarks[n]
            cv2.circle(face_frame, (x, y), 1, (255, 255, 255), -1)
    
    # Perform OCR on the frame and print the extracted text
    perform_ocr(frame)

    # Show the original frame
    cv2.imshow("Frame", frame)

    # Show the frame with detected faces and landmarks
    cv2.imshow("Face Frame", face_frame)

    # Check for 'q' key to exit the loop
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture object and close OpenCV windows
cap.release()
cv2.destroyAllWindows()
