OCR TECHNIQUES

PEDAL PEDAL OCR


In [None]:
import os
from paddleocr import PaddleOCR
import cv2
import logging

logging.basicConfig(level=logging.INFO)

# Initialize PaddleOCR with GPU support
ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=True)  # Enable GPU

SUPPORTED_FORMATS = ['jpg', 'jpeg', 'png']

def process_images_with_paddleocr(input_folder, output_folder, width=800, height=600):
    os.makedirs(output_folder, exist_ok=True)
    processed_count = 0

    for root, _, files in os.walk(input_folder):
        for file in files:
            ext = os.path.splitext(file)[1][1:].lower()
            if ext in SUPPORTED_FORMATS:
                image_path = os.path.join(root, file)
                logging.info(f"Processing image: {image_path}")

                try:
                    # Read the image
                    image = cv2.imread(image_path)

                    # Convert to grayscale
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

                    # Resize the image
                    image = cv2.resize(image, (width, height))

                    # Perform OCR using PaddleOCR
                    result = ocr.ocr(image, cls=True)

                    # Filter result based on confidence threshold (> 0.8)
                    text = '\n'.join([line[1][0] for line in result[0] if line[1][1] > 0.8])

                    # Save result
                    relative_path = os.path.relpath(root, input_folder)
                    local_folder = os.path.join(output_folder, relative_path)
                    os.makedirs(local_folder, exist_ok=True)

                    output_file = os.path.join(local_folder, f"{os.path.splitext(file)[0]}.txt")
                    with open(output_file, "w", encoding="utf-8") as f:
                        f.write(text)

                    logging.info(f"OCR result saved to: {output_file}")
                    processed_count += 1
                except Exception as e:
                    logging.error(f"Failed to process image {image_path}. Error: {e}")

    logging.info(f"OCR processing completed. Total images processed: {processed_count}")

if __name__ == "__main__":
    input_dir = r"E:\OCR BANK STATMENTS PROJECT\VS CODE INFOSYS\OCR BANK STATMENTS\RETRIVED IMAGES"
    output_dir = r"E:\OCR BANK STATMENTS PROJECT\VS CODE INFOSYS\OCR BANK STATMENTS\OCR OUTPUTS\paddleocr"
    process_images_with_paddleocr(input_dir, output_dir)


TESERACT OCR

In [None]:
import os
import pytesseract
import cv2
# Specify the path to the Tesseract executable (adjust based on your system)
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

# Input directory containing local images
input_dir = r"E:\OCR BANK STATMENTS PROJECT\VS CODE INFOSYS\OCR BANK STATMENTS\RETRIVED IMAGES"
# Output directory for OCR results
output_dir = r"E:\OCR BANK STATMENTS PROJECT\VS CODE INFOSYS\OCR BANK STATMENTS\OCR OUTPUTS\teseract"
os.makedirs(output_dir, exist_ok=True)

# Supported image formats
SUPPORTED_FORMATS = ['jpg', 'jpeg', 'png']

# Process images for OCR using Tesseract
def process_local_images(input_folder):
    for root, dirs, files in os.walk(input_folder):
        for file in files:
            # Check for supported image formats
            if file.split('.')[-1].lower() in SUPPORTED_FORMATS:
                image_path = os.path.join(root, file)
                print(f"Processing image: {image_path}")

                # Perform OCR
                try:
                    # Read the image
                    image = cv2.imread(image_path)

                    # Convert image to grayscale for better OCR results
                    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

                    # Perform OCR using Tesseract
                    text = pytesseract.image_to_string(gray_image)

                    # Create a mirrored folder structure locally for saving OCR results
                    relative_path = os.path.relpath(root, input_folder)
                    local_folder = os.path.join(output_dir, relative_path)
                    os.makedirs(local_folder, exist_ok=True)

                    # Save OCR result to a text file
                    output_file = os.path.join(local_folder, f"{file}.txt")
                    with open(output_file, "w", encoding="utf-8") as f:
                        f.write(text)

                    print(f"OCR result saved to: {output_file}")
                except Exception as e:
                    print(f"Failed to process image {image_path}. Error: {e}")

# Entry point
if __name__ == "__main__":
    process_local_images(input_dir)
    print("OCR processing completed.")


API INTEGRATION WITH CLOUDINARY FOR OCR

In [None]:
import gradio as gr
from paddleocr import PaddleOCR
import pytesseract
import cv2
import numpy as np
from PIL import Image
import pandas as pd

# Function to process image, extract text using PaddleOCR or Tesseract, and save it to a file
def extract_text_from_image(image, lang, ocr_engine):
    try:
        # Convert PIL Image to NumPy array
        image_np = np.array(image)

        # Convert image to RGB (both OCR engines expect RGB images)
        image_rgb = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)

        text = ""
        confidence_scores = []
        lines = []
        bounding_boxes = []

        if ocr_engine == "PaddleOCR":
            # Initialize PaddleOCR with the selected language
            ocr = PaddleOCR(use_angle_cls=True, lang=lang, use_gpu=True)

            # Perform OCR using PaddleOCR
            result = ocr.ocr(image_rgb, cls=True)

            # Extract text, confidence scores, and bounding boxes
            for line in result[0]:
                lines.append(line[1][0])  # Extracted text
                confidence_scores.append(line[1][1])  # Confidence score
                # Bounding box coordinates (x, y, width, height)
                bbox = line[0]
                bounding_boxes.append(bbox)
                # Draw bounding box on the image
                cv2.polylines(image_rgb, [np.array(bbox, dtype=np.int32)], isClosed=True, color=(0, 255, 0), thickness=2)

        elif ocr_engine == "Tesseract":
            # Set Tesseract language
            tesseract_lang = "eng" if lang == "en" else "fra"  # Adjust language codes for Tesseract
            
            # Perform OCR using Tesseract
            data = pytesseract.image_to_data(image_rgb, lang=tesseract_lang, output_type=pytesseract.Output.DICT)

            # Extract text, confidence scores, and bounding boxes
            for i in range(len(data["text"])):
                if int(data["conf"][i]) > 0:  # Ignore lines with confidence -1
                    lines.append(data["text"][i])
                    confidence_scores.append(float(data["conf"][i]))
                    # Bounding box coordinates (x, y, width, height)
                    x, y, w, h = data["left"][i], data["top"][i], data["width"][i], data["height"][i]
                    bounding_boxes.append((x, y, x + w, y + h))
                    # Draw bounding box on the image
                    cv2.rectangle(image_rgb, (x, y), (x + w, y + h), (0, 255, 0), 2)

        # Combine text and confidence scores for the file
        text_with_confidence = "\n".join([f"{line} (Confidence: {score:.2f})" for line, score in zip(lines, confidence_scores)])

        # Save the text and confidence scores to a file
        output_file = "extracted_text_with_confidence.txt"
        with open(output_file, "w", encoding="utf-8") as f:
            f.write(text_with_confidence)

        # Create a DataFrame for text and confidence
        df = pd.DataFrame({"Text": lines, "Confidence": confidence_scores})
        
        # Convert image array back to PIL Image for display in Gradio
        image_with_bboxes = Image.fromarray(image_rgb)
        
        # Return DataFrame, file path, and image with bounding boxes
        return df, output_file, image_with_bboxes

    except Exception as e:
        error_message = f"Error processing image: {e}"
        error_file = "error.txt"
        with open(error_file, "w", encoding="utf-8") as f:
            f.write(error_message)
        return error_message, pd.DataFrame(), error_file, None

# Function to load a demo image
def load_demo_image():
    demo_image_path = "E:/OCR BANK STATMENTS PROJECT/VS CODE INFOSYS/OCR BANK STATMENTS/RETRIVED IMAGES/BALANCE SHEET/BALANCE SHEET_avd5rnwfhfy4gewvukby.jpg"  # Specify the path to the demo image file
    demo_image = Image.open(demo_image_path)
    return demo_image

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# OCR Tool Using multiple OCR Engines")
    gr.Markdown("Upload an image to extract text using PaddleOCR or Tesseract OCR.")
    
    # Choose OCR engine options
    with gr.Row():
        ocr_engine = gr.Dropdown(label="Select OCR Engine", choices=["PaddleOCR", "Tesseract"], value="PaddleOCR")

    # Choose language options for OCR
    with gr.Row():
        ppocr_language = gr.Dropdown(label="Select Language", choices=["en", "fr"], value="en")

    # Choose picture upload options
    with gr.Row():
        upload_file = gr.Image(label="Upload Image", type="pil", interactive=True)  # Image component for uploading and taking pictures
        demo_file = gr.Button("Use a Demo File")

    # DataFrame output for text and confidence
    dataframe_output = gr.Dataframe(label="Text with Confidence Scores", headers=["Text", "Confidence"], interactive=False)
    
    # File output for downloading extracted text
    file_output = gr.File(label="Download Extracted Text")
    
    # Image output with bounding boxes
    image_output = gr.Image(label="Processed Image with Bounding Boxes", type="pil")

    # Process button
    process_button = gr.Button("EXTRACT TEXT")
    
    # Button to trigger OCR
    process_button.click(
        fn=extract_text_from_image,
        inputs=[upload_file, ppocr_language, ocr_engine],
        outputs=[dataframe_output, file_output, image_output]
    )

    # Button to trigger "Use a Demo File"
    demo_file.click(
        fn=load_demo_image,
        outputs=[upload_file]  # This will set the demo image as the uploaded file
    )

# Run the Gradio app
if __name__ == "__main__":
    demo.launch(share=True)
