In [None]:
# Install OpenCV if not already installed
# !pip install opencv-python

import cv2
from IPython.display import display, Image as IPImage
import numpy as np

# 1. Load the image
img_path = "example_receipt.jpg"
img = cv2.imread(img_path)

# Convert BGR (OpenCV default) to RGB for display
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
cv2.imwrite("original_rgb.jpg", img_rgb)
display(IPImage("original_rgb.jpg"))

# 2. Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imwrite("gray.jpg", gray)
display(IPImage("gray.jpg"))

# 3. Apply thresholding (binarization) to enhance text
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
cv2.imwrite("thresh.jpg", thresh)
display(IPImage("thresh.jpg"))

# 4. Remove noise using median blur
denoised = cv2.medianBlur(thresh, 3)
cv2.imwrite("denoised.jpg", denoised)
display(IPImage("denoised.jpg"))

# 5. Optional: Dilate and erode to strengthen text
kernel = np.ones((2,2), np.uint8)
processed = cv2.dilate(denoised, kernel, iterations=1)
processed = cv2.erode(processed, kernel, iterations=1)
cv2.imwrite("processed.jpg", processed)
display(IPImage("processed.jpg"))

# Now the image is preprocessed and ready for OCR
