In [4]:
import cv2
import pytesseract
import re

In [5]:
def readLargestPlateText(plateThresh):
    # Find contours
    contours, _ = cv2.findContours(plateThresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    boundingBoxes = [cv2.boundingRect(c) for c in contours]

    # Sort bounding boxes by area (descending)
    boundingBoxes = sorted(boundingBoxes, key=lambda box: box[2] * box[3], reverse=True)

    # Filter bounding boxes by size and aspect ratio
    filteredBoxes = [
        (x, y, w, h) for (x, y, w, h) in boundingBoxes
        if 2 < w/h < 8 and w * h > 500  # Adjust thresholds as needed
    ]

    if not filteredBoxes:
        return None

    # Get the largest valid region
    x, y, w, h = filteredBoxes[0]
    largestTextRegion = plateThresh[y:y+h, x:x+w]

    # Use OCR to extract text
    config = "--psm 7"
    largestText = pytesseract.image_to_string(largestTextRegion, config=config, lang='eng')
    cleanedText = re.sub(r'[^A-Z0-9]', '', largestText.upper())

    return cleanedText

In [6]:
# Load image
plateImage = cv2.imread('test-media/test1.jpg')
grayImage = cv2.cvtColor(plateImage, cv2.COLOR_BGR2GRAY)

# Apply Otsu thresholding
_, plateThresh = cv2.threshold(grayImage, 64, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

# Use the function to read the largest text region
plateText = readLargestPlateText(plateThresh)

# Display results
print(f'Plate Text: {plateText}')
cv2.imshow("Thresholded Plate", plateThresh)
cv2.waitKey(0)
cv2.destroyAllWindows()

Plate Text: None
