In [22]:
import tensorflow as tf
from tensorflow import keras
from keras import layers
import cv2
import os
import numpy as np
import pytesseract
import re

Preprocess Image

In [16]:
def noiseRemoval(image):
    kernel = np.ones((1, 1), np.uint8)
    image = cv2.dilate(image, kernel, iterations=1)
    kernel = np.ones((1, 1), np.uint8)
    image = cv2.erode(image, kernel, iterations=1)
    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
    image = cv2.medianBlur(image, 3)
    return image

In [17]:
# Specify the desired image dimensions
image_height = 500
image_width = 500

# Path to the dataset directory containing receipt images
dataset_path = "Data"

# Path to store preprocessed images
processed_path = "Temp"

# Ensure the preprocessed directory exists
os.makedirs(processed_path, exist_ok=True)

# Preprocess the dataset
# Loop through each image in the dataset
for filename in os.listdir(dataset_path):
    # Load the image
    image_path = os.path.join(dataset_path, filename)
    image = cv2.imread(image_path)
    
    # Invert the image
    inverted_image = cv2.bitwise_not(image)
    
    # Rescale the image (optional)
    rescaled_image = cv2.resize(inverted_image, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    
    # Convert to grayscale
    grayscale_image = cv2.cvtColor(rescaled_image, cv2.COLOR_BGR2GRAY)
    
    # Binarization
    thresholdImage, binary_image = cv2.threshold(grayscale_image, 127, 255, cv2.THRESH_BINARY)
    
    # Noise removal (optional)
    denoised_image = noiseRemoval(binary_image)
    
    # Save the preprocessed image
    preprocessed_image_path = os.path.join(processed_path, filename)
    cv2.imwrite(preprocessed_image_path, denoised_image)

In [None]:
# Perform text detection using Tesseract at multiple scales
scales = [1.0, 0.8, 0.6]  # Adjust the scales as per your requirement
for scale in scales:
    scaled_image = cv2.resize(denoised_image, None, fx=scale, fy=scale)
    
    # Perform text detection on the scaled image
    text_boxes = pytesseract.image_to_boxes(scaled_image, config='--psm 6')
    
    # Iterate over the detected text boxes
    for box in text_boxes.splitlines():
        # Extract coordinates and text information
        x, y, w, h, text = box.split(' ')[1:]
        x, y, w, h = int(x), int(y), int(w), int(h)
        
        # Rescale the bounding box coordinates to the original image scale
        x, y, w, h = int(x / scale), int(y / scale), int(w / scale), int(h / scale)
        
        # Draw bounding box around the text on the original image
        cv2.rectangle(image, (x, y), (w, h), (0, 255, 0), 2)
        
        # Display the text
        cv2.putText(image, text, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)

    # Display the image with text boxes
    cv2.imshow("Text Detection", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    # Perform text recognition using Tesseract on the original image
    recognized_text = pytesseract.image_to_string(image, config='--psm 6')
    
    # Print the recognized text
    print("Recognized Text:")
    print(recognized_text)
    print("-----------------------")

    # Get the first text element as merchant name
    first_text_box = text_boxes.splitlines()[0]
    x, y, w, h, text = first_text_box.split(' ')[1:]
    x, y, w, h = int(x), int(y), int(w), int(h)
    merchant_name = text

    # Draw bounding box around the merchant name
    cv2.rectangle(image, (x, y), (w, h), (0, 255, 0), 2)

    # Display the merchant name
    cv2.putText(image, merchant_name, (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)

    # Print the detected merchant name
    print("Merchant Name:", merchant_name)
    print("-----------------------")

In [27]:
pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files/Tesseract-OCR/tesseract.exe'