In [None]:
# Import necessary libraries
import cv2
import pytesseract
from PIL import Image
import numpy as np

# --- Configuration ---
# On Windows, you might need to set the path to the Tesseract executable
# Example: pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# Set a confidence threshold for detected text
CONFIDENCE_THRESHOLD = 60

# --- Main Application Logic ---

def run_ocr_on_camera():
    """
    Initializes the camera, captures frames, and performs OCR in a loop.
    """
    print("Initializes the camera, captures frames, and performs OCR in a loop")
    # Initialize video capture from the default camera (index 0)
    cap = cv2.VideoCapture(0)

    if not cap.isOpened():
        print("Error: Could not open camera.")
        return

    print("Camera opened successfully. Press 'q' to quit.")

    while True:
        # 1. Capture frame-by-frame
        ret, frame = cap.read()
        if not ret:
            print("Error: Failed to capture frame.")
            break

        # 2. Perform OCR using pytesseract
        # image_to_data returns a dictionary with details for each detected word
        try:
            # Convert the frame to RGB, which is what Tesseract expects
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            
            data = pytesseract.image_to_data(rgb_frame, output_type=pytesseract.Output.DICT)
            n_boxes = len(data['level'])
            
            detected_texts = []

            # 3. Process OCR results
            for i in range(n_boxes):
                # Check if the confidence level is above our threshold
                confidence = int(data['conf'][i])
                if confidence > CONFIDENCE_THRESHOLD:
                    # Get the coordinates and dimensions of the bounding box
                    (x, y, w, h) = (data['left'][i], data['top'][i], data['width'][i], data['height'][i])
                    
                    # Get the recognized text
                    text = data['text'][i]

                    # Filter out empty or whitespace-only text
                    if text.strip():
                        detected_texts.append(text)
                        
                        # 4. Draw bounding box and text on the frame
                        # Draw a green rectangle around the detected word
                        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                        
                        # Put the recognized text above the bounding box
                        cv2.putText(frame, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

            # Print all detected text for the current frame to the console
            if detected_texts:
                print("Detected Text:", ' '.join(detected_texts))

        except Exception as e:
            print(f"An error occurred during OCR processing: {e}")


        # 5. Display the resulting frame
        cv2.imshow('Real-Time OCR - Press Q to Quit', frame)

        # 6. Check for quit command
        # Wait for 1ms and check if the 'q' key is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # --- Cleanup ---
    # When everything is done, release the capture and destroy windows
    cap.release()
    cv2.destroyAllWindows()
    print("Camera and windows closed.")

# Run the main function
run_ocr_on_camera()
