In [None]:
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pytesseract
from kamino import image_utils as iu
from pytesseract import Output
from tqdm.notebook import tqdm

%matplotlib ipympl

In [None]:
# Pytesseract configuration
custom_config = r"--psm 6 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ"

In [None]:
# Load word list
wl_path = Path("..") / "data" / "word_list.txt"
word_list = []

with open(wl_path, "r") as f:
    for line in f.readlines():
        word_list.append(line.strip().lower())

In [None]:
# https://stackoverflow.com/questions/55169645/square-detection-in-image
# Load image, grayscale, median blur, sharpen image
impath = Path("..") / "data" / "board_3.jpg"
image = cv2.imread(str(impath))
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = iu.blur(gray)
sharpen = iu.sharpen(blur)

# Threshold and morph close
thresh = cv2.threshold(sharpen, 160, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)

# Find contours and filter using threshold area
contours = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]

# Define min and max area
min_area = 100
max_area = 40000

# Initialize image number
image_number = 0


for contour in contours:
    area = cv2.contourArea(contour)
    if area > min_area and area < max_area:
        x, y, w, h = cv2.boundingRect(contour)
        roi = gray[y : y + h, x : x + w]
        roi = cv2.threshold(roi, 115, 255, cv2.THRESH_BINARY)[1]
        ocr_text = pytesseract.image_to_string(roi, config=custom_config)
        ocr_words = ocr_text.split("\n")
        word = ""
        for ocr_word in ocr_words:
            word = ocr_word.strip().lower()
            if word in word_list:
                break

        if not word:
            print(f"{image_number}: !! ERROR !! : {ocr_text}")
        else:
            print(f"{image_number}: {word}")

        # cv2.imwrite("ROI_{}.png".format(image_number), roi)
        # cv2.rectangle(image, (x, y), (x + w, y + h), (36, 255, 12), 2)

        image_number += 1