In [43]:
from paddleocr import PaddleOCR

class FastOCR:
    """Fast OCR using PaddleOCR with GPU support"""

    _ocr_instance = None

    def __init__(self):
        if FastOCR._ocr_instance is None:
            print("Initializing PaddleOCR model... (this happens only once)")
            FastOCR._ocr_instance = PaddleOCR(use_angle_cls=True, lang="en", use_gpu=True)
        self.ocr = FastOCR._ocr_instance

    def extract_text(self, image_path: str) -> str:
        """Extracts text from an image using PaddleOCR."""
        try:
            results = self.ocr.ocr(image_path, cls=True)
            extracted_text = " ".join([res[1][0] for res in results[0] if res[1]])
            return extracted_text.strip()
        except Exception as e:
            print(f"OCR failed for {image_path}: {e}")
            return ""

In [49]:
image_path = "/Users/mamduhzabidi/Pictures/Photos Library.photoslibrary/originals/0/0D7948BB-B79F-4062-8683-9764B55F8808.jpeg"

correct_orientation(image_path)

ocr_processor = FastOCR()

In [47]:
extracted_text = ocr_processor.extract_text(image_path)

[2025/02/16 23:42:52] ppocr DEBUG: dt_boxes num : 23, elapsed : 0.5328319072723389
[2025/02/16 23:42:52] ppocr DEBUG: cls num  : 23, elapsed : 0.13718485832214355
[2025/02/16 23:42:56] ppocr DEBUG: rec_res num  : 23, elapsed : 3.817391872406006
