In [2]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
import cv2
import easyocr
import numpy as np
import joblib  # Added joblib for saving and loading pickle files
from tkinter import Tk, filedialog
import os

# Function to load image from file
def load_image_from_file():
    root = Tk()
    root.withdraw()  # Hide the root window
    file_path = filedialog.askopenfilename(initialdir=os.path.expanduser("~/Desktop"), 
                                           title="Select Image File", 
                                           filetypes=(("JPEG files", "*.jpg;*.jpeg"), 
                                                      ("PNG files", "*.png"), 
                                                      ("All files", "*.*")))
    if file_path:
        return cv2.imread(file_path)
    else:
        print("No file selected.")
        return None

# Function to capture and process webcam feed
def process_webcam_feed(reader, allowed_words):
    cap = cv2.VideoCapture(0)  # Open default webcam
    if not cap.isOpened():
        print("Cannot open webcam")
        return

    while True:
        ret, frame = cap.read()  # Capture frame-by-frame
        if not ret:
            print("Failed to capture frame")
            break

        # OCR - Detect text in the current frame
        ocr_results = reader.readtext(frame)

        # Save OCR results as pickle using joblib
        joblib.dump(ocr_results, "ocr_results_webcam.pkl")

        # Redact the text except allowed words
        redact_text(frame, ocr_results, allowed_words)

        # Display the frame
        cv2.imshow("Live Redacted Webcam Feed", frame)

        # Press 'q' to exit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release the webcam and close windows
    cap.release()
    cv2.destroyAllWindows()

# Function to redact text
def redact_text(image, ocr_results, allowed_words):
    for (bbox, text, _) in ocr_results:
        if text.lower() not in allowed_words:
            # Extract top-left and bottom-right corners from the bbox
            top_left = tuple(map(int, bbox[0]))  # First point (x1, y1)
            bottom_right = tuple(map(int, bbox[2]))  # Third point (x3, y3)

            # Draw black rectangle over text
            cv2.rectangle(image, top_left, bottom_right, (0, 0, 0), thickness=-1)  # Black rectangle

# Main function
def main():
    # Initialize EasyOCR reader
    reader = easyocr.Reader(['en'], gpu=False)

    # Prompt user to input allowed words
    allowed_words = input("Enter a comma-separated list of words to NOT redact (case-insensitive): ").lower().split(',')

    # Prompt for input method
    choice = input("Do you want to use Webcam or File? (webcam/file): ").strip().lower()

    if choice == 'webcam':
        process_webcam_feed(reader, allowed_words)
    elif choice == 'file':
        image = load_image_from_file()
        if image is None:
            return

        # OCR - Detect text in image
        ocr_results = reader.readtext(image)

        # Save OCR results as pickle using joblib
        joblib.dump(ocr_results, "ocr_results_file.pkl")

        # Redact the text except allowed words
        redact_text(image, ocr_results, allowed_words)

        # Show the result
        cv2.imshow("Redacted Image", image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
    else:
        print("Invalid option.")

if __name__ == "__main__":
    main()


Using CPU. Note: This module is much faster with a GPU.


Enter a comma-separated list of words to NOT redact (case-insensitive):  
Do you want to use Webcam or File? (webcam/file):  webcam
