In [None]:
# Import the used libraries into the script
# ---------------------------------------------
import sys
print(sys.path)

from math import ceil, floor
import numpy as np
import os 

import cv2
import pytesseract

#CV2 version:  4.8.0
#Numpy version:  1.25.2
#Tesseract version:  0.3.10

print("CV2 version: ", cv2.getVersionString())
print("Numpy version: ", np.__version__)
print("Tesseract version: ", pytesseract.__version__)

In [None]:
# Select input file and define the output folder
# ---------------------------------------------

global output_folder, output_basename

output_folder = "./outputStages/Pipeline_1/"

input_image = "./data/newspaper_lokalavisenfavrskov/S1.jpg"
output_basename = "S1"

#input_image = "./data/newspaper_lokalavisenfavrskov/S2.jpg"
#output_basename = "S2"

#input_image = "./data/computer_generated_images/Sudoku_Puzzle_by_L2G-20050714_standardized_layout.svg.png"  # https://upload.wikimedia.org/wikipedia/commons/e/e0/Sudoku_Puzzle_by_L2G-20050714_standardized_layout.svg
#output_basename =  "Sudoku_Puzzle_by_L2G-20050714_standardized_layout"

In [None]:
# Helper function for visualization of intermediary/stage results
# ---------------------------------------------
def printoutStage(stage_no, stage_img):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        print("Made designated output folder")
    cv2.imwrite(output_folder + output_basename + "-stage-" + str(stage_no) +".png", stage_img)
    cv2.imwrite(output_folder + "last.png", stage_img)

In [None]:
# Load Image
# ------------ 
image = cv2.imread(input_image)
img_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# ------------
printoutStage(1, img_gray)

In [None]:
# Adaptive Threshold "white-en" image
# -----------------------------------
image = img_gray

def apply_adaptive_threshold(image, block_size=11, constant_value=2):

    v1 = cv2.ADAPTIVE_THRESH_MEAN_C
    v2 = cv2.ADAPTIVE_THRESH_GAUSSIAN_C
    # Apply adaptive thresholding
    thresholded_image = cv2.adaptiveThreshold(
        image, 255, v2,
        cv2.THRESH_BINARY, block_size, constant_value
    )

    return thresholded_image

# Specify block size and constant value for adaptive thresholding for segmentation
block_size = int(image.shape[0]/9/4)
if block_size % 2 == 0:
   block_size =  block_size - 1 

constant_value = 15     # For S1 image 
#constant_value = 5     # For S2 image

# Apply adaptive thresholding to the image
img_threshold = apply_adaptive_threshold(image, block_size, constant_value)

# ------------
printoutStage(2.1, img_threshold)

In [None]:
# Reinforce the outer lines of the grid 
# using dilation operation on inverse black and white 
# ---------------------------------------------------------------

image = img_threshold
img_inv = cv2.bitwise_not(image)
# ---------
printoutStage(3.1, img_inv)


# Dilation 1
image = img_inv
no_dilatation_iter = 4

# Define a structuring element
structuring_element = np.ones((3,1), np.uint8) # horiz line
img_dilated_horiz = cv2.dilate(image, structuring_element, iterations=no_dilatation_iter)
# ---------
printoutStage(3.2, img_dilated_horiz)


# Dilation 2
image = img_dilated_horiz
# For vertical lines, you might use:
structuring_element = np.ones((1,3), np.uint8) # vertical line
img_dilated_horiz_verti = cv2.dilate(image, structuring_element, iterations=no_dilatation_iter)
# ---------
printoutStage(3.3, img_dilated_horiz_verti)

image = img_dilated_horiz_verti
#img_reinforced = cv2.bitwise_not(image)
img_reinforced = image

# ------------
printoutStage(3.4, img_reinforced)

In [None]:
# Find the main sudoku box
# -------------------------
image = img_reinforced
#contours, hierarchy = cv2.findContours(image, cv2.RETR_TREE, cv2.CHAIN_APPROX_TC89_KCOS)
contours, hierarchy = cv2.findContours(image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
 
print(len(contours), contours[0])

img_contours = image.copy()
img_contours_color = cv2.cvtColor(img_contours, cv2.COLOR_GRAY2RGB)
cv2.drawContours(image=img_contours_color, contours=contours, 
 contourIdx = -1, color = (0,255,0), thickness=3, lineType=cv2.LINE_AA)

# ------------
printoutStage(4.1, img_contours_color)

In [None]:
# Create the 4 cornered approximations for the contours
# -----------------------------------------------------
image = img_reinforced

approxes = []
for idx, contour in enumerate(contours):
    area = cv2.contourArea(contour)
    peri = cv2.arcLength(contour,True)
    approx = cv2.approxPolyDP(contour,0.03*peri,True)
    if len(approx)==4: 
        approxes.append(approx)
print("Number of 4-cornered contour approxes: (should be close to 80 on a clean image )", len(approxes))

img_contours = image.copy()
img_contours_color = cv2.cvtColor(img_contours, cv2.COLOR_GRAY2RGB)
cv2.drawContours(image=img_contours_color, contours=approxes, 
    contourIdx = -1 , color = (0,255,0), thickness=8, lineType=cv2.LINE_AA)

# ------------
printoutStage(4.2, img_contours_color)

In [None]:
# Find the main sudoku frame/box by
# Finding the minimum area 4-cornered-contour from all those
# that have an area bigger than 80% of the image
# ---------------------------------------------------------
areas = []
image = img_reinforced

img_height, img_width = image.shape[:2]
img_area = img_height * img_width
print("IMG Height:", img_height, "IMG Width:", img_width, "IMG Area:", img_area)

for idx, approx in enumerate(approxes):
    areas.append(cv2.contourArea(approx))

idx_sorted_areas = sorted(range(len(areas)), key=lambda j: areas[j], reverse=True)
sorted_areas = [areas[j] for j in idx_sorted_areas]

print(sorted_areas)

idx_big_box_candidates = np.where(np.array(sorted_areas) > 0.7 * img_area)
if len(idx_big_box_candidates[0]) == 0:
   print("Error") 
    
print("idx_big_box_candidates", idx_big_box_candidates)

# Hopefully got the bigbox candiatate correctly!
big_box_idx = idx_big_box_candidates[0][-1]
big_box_area = sorted_areas[big_box_idx]
print("Big Box area idx:\t", big_box_idx)
print("Big box area:\t ", big_box_area)

true_big_box_idx = idx_sorted_areas[big_box_idx]
print("Big Box Actual Id:", true_big_box_idx)

# plot the big box 
img_bigbox = image.copy()
img_bigbox_color = cv2.cvtColor(img_bigbox, cv2.COLOR_GRAY2RGB)
cv2.drawContours(image=img_bigbox_color, contours=approxes, 
    contourIdx = true_big_box_idx , color = (0,255,0), thickness=12, lineType=cv2.LINE_AA)

big_box = approxes[true_big_box_idx]
print("Big Box:\t", big_box)

# ------------
printoutStage(4.3, img_bigbox_color)

In [None]:
# Cleanup everything outside this box (make it white)
# ---------------------------------------------

image = img_reinforced

# original image
mask = np.zeros(image.shape, dtype=np.uint8)
cv2.fillPoly(mask, pts=[big_box[:,0,:]], color=(255))

# apply the mask
wbg = np.ones_like(image, np.uint8)*255
cv2.bitwise_not(wbg,wbg, mask=mask)
img_reinforced_cleaned = wbg + cv2.bitwise_and(image, image, mask=mask)

# ------------
printoutStage(4.4, img_reinforced_cleaned)

In [None]:
# Correct Perspective:
# (a) Generate the ideal / straight outer box
# ---------------------------------------------

big_box_n = big_box[:,0,:]
idx = [ [0,1], [1,2], [2,3], [3,0] ]
lengs = []
for i, idx_pair in enumerate(idx):
   x1 = big_box_n[idx_pair[0],0] 
   y1 = big_box_n[idx_pair[0],1]
   x2 = big_box_n[idx_pair[1],0]
   y2 = big_box_n[idx_pair[1],1]
   lengs.append(np.sqrt((x1-x2)**2+(y2-y1)**2))

len_side = int(np.mean(lengs))

# --------

big_box_xs = []
big_box_ys = []

for i in range(4):
   big_box_xs.append(big_box_n[i,0])
   big_box_ys.append(big_box_n[i,1])

big_box_xs = sorted(big_box_xs)
big_box_ys = sorted(big_box_ys)

print("xs:\t", big_box_xs)
print("ys:\t", big_box_ys)

x_min = np.mean(big_box_xs[0:2])
x_max = np.mean(big_box_xs[2:4])

y_min = np.mean(big_box_ys[0:2])
y_max = np.mean(big_box_ys[2:4])

# ---------

str8_box = []
ulc = [[ x_min, y_min ]]
urc = [[ x_min+len_side, y_min ]]
lrc = [[ x_min+len_side, y_min+len_side ]]
llc = [[ x_min, y_min+len_side ]]
str8_box.append(np.array([ulc, urc, lrc, llc], dtype=int))

# Enforce the same convention order convention for the big_box corners, otherwise e.g. 90 degrees rotations
# Sort by y-coordinate
bbox = sorted(big_box[:,0,:], key=lambda pt: pt[1])
# Separate the box points into top and bottom
btop = sorted(bbox[:2], key=lambda pt: pt[0])  # Top-left and top-right
bbottom = sorted(bbox[2:], key=lambda pt: pt[0], reverse=True)  # Bottom-right and bottom-left
# Now, concatenate the boxes again
big_box_n_conform = np.array(btop + bbottom)


# plot the generated grid ontop of image
image = img_reinforced_cleaned
img_outer_box = image.copy()
img_outer_box_color = cv2.cvtColor(img_outer_box, cv2.COLOR_GRAY2RGB)
cv2.drawContours(image=img_outer_box_color, contours=str8_box, 
    contourIdx = -1 , color = (0,255,0), thickness=ceil(img_outer_box.shape[0]/90), lineType=cv2.LINE_AA)

# ------------
printoutStage(5.1, img_outer_box_color)

In [None]:
# Correct Perspective:
# (b) find the Perspective transform 
# ---------------------------------------------
warp_matrix = cv2.getPerspectiveTransform(str8_box[0][:,0,:].astype(np.float32), big_box_n_conform.astype(np.float32))

In [None]:
# Correct Perspective:
# (c) Apply the perspective transform to the full image
# ---------------------------------------------

def align_apply2image(img, warp_matrix):
  return cv2.warpPerspective(img, warp_matrix, 
    (img.shape[1], img.shape[0]), 
    flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP)

img_gray_aligned = align_apply2image(img_gray, warp_matrix)

img_gray_aligned_c = cv2.cvtColor(img_gray_aligned.copy(), cv2.COLOR_GRAY2RGB)
cv2.drawContours(image = img_gray_aligned_c, contours=str8_box, 
    contourIdx = -1 , color = (0,255,0), thickness=3, lineType=cv2.LINE_AA)

img_threshold_aligned = align_apply2image(img_threshold, warp_matrix)

img_threshold_aligned_c = cv2.cvtColor(img_threshold_aligned.copy(), cv2.COLOR_GRAY2RGB)
cv2.drawContours(image = img_threshold_aligned_c, contours=str8_box, 
    contourIdx = -1 , color = (0,255,0), thickness=3, lineType=cv2.LINE_AA)

#------------------
printoutStage(5.2,img_gray_aligned_c)
printoutStage(5.3,img_threshold_aligned_c)

In [None]:
# Generate a full str8 grid
# ---------------------------------------------

image = img_gray_aligned

dx = len_side/9.0

grid_xs = np.arange(x_min, x_max + 0.9*dx, dx)
grid_ys = np.arange(y_min, y_max + 0.9*dx, dx)

# ---------
# Generate a set of artificial roi boxes
print("Grid xs:\t", grid_xs)
print("Grid ys:\t", grid_ys)

# Generate the boxes to plot
generated_roi_boxes = []
for jj in range(9):
   for ii in range(9):
      ulc = [[ grid_xs[ii], grid_ys[jj] ]]
      urc = [[ grid_xs[ii+1], grid_ys[jj] ]]
      llc = [[ grid_xs[ii], grid_ys[jj+1] ]]
      lrc = [[ grid_xs[ii+1], grid_ys[jj+1] ]]
      generated_roi_boxes.append(np.array([ulc, urc, lrc, llc], dtype=int))

# plot the generated grid ontop of image
img_grid = image.copy()
img_grid_color = cv2.cvtColor(img_grid, cv2.COLOR_GRAY2RGB)
cv2.drawContours(image=img_grid_color, contours=generated_roi_boxes, 
    contourIdx = -1 , color = (0,255,0), thickness=ceil(img_grid_color.shape[0]/90), lineType=cv2.LINE_AA)

# ------------ 
printoutStage(5.4, img_grid_color)

In [None]:
# Prepare image for OCR: Do a blur
# -----------------------------------
img_threshold_aligned_blur = cv2.GaussianBlur(img_threshold_aligned, (5,5), 0)
img_gray_aligned_blur = cv2.GaussianBlur(img_gray_aligned, (5,5), 0)

# ------------
printoutStage(6.1, img_threshold_aligned_blur)
printoutStage(6.15, img_gray_aligned_blur)

In [None]:
# Prepare image for OCR: Do a closing
# ---------------
structuring_element = np.ones((11, 11), np.uint8) 
img_threshold_aligned_blur_closed = cv2.bitwise_not(cv2.morphologyEx(cv2.bitwise_not(img_threshold_aligned_blur), cv2.MORPH_CLOSE, structuring_element))
img_gray_aligned_blur_closed = cv2.bitwise_not(cv2.morphologyEx(cv2.bitwise_not(img_gray_aligned_blur), cv2.MORPH_CLOSE, structuring_element))

# ------------
printoutStage(6.2, img_threshold_aligned_blur_closed)
#printoutStage(6.25, img_gray_aligned_blur_closed)

In [None]:
# Get each little square box and OCR the digit with Tesseract
# -----------------------------------------------------------

image = img_threshold_aligned_blur_closed

SUDOKU_MATRIX_TESSERACT = np.zeros([9,9])

PSIZE = -1
img_all = np.array([])
img_row = np.array([])
ctr = 0

for idx in range(81):
  
  ii = idx//9 
  jj = idx%9 

  y_min = np.min( [generated_roi_boxes[idx][0][0][1], generated_roi_boxes[idx][1][0][1], generated_roi_boxes[idx][2][0][1], generated_roi_boxes[idx][3][0][1]] )
  y_max = np.max( [generated_roi_boxes[idx][0][0][1], generated_roi_boxes[idx][1][0][1], generated_roi_boxes[idx][2][0][1], generated_roi_boxes[idx][3][0][1]] )
  x_min = np.min( [generated_roi_boxes[idx][0][0][0], generated_roi_boxes[idx][1][0][0], generated_roi_boxes[idx][2][0][0], generated_roi_boxes[idx][3][0][0]] )
  x_max = np.max( [generated_roi_boxes[idx][0][0][0], generated_roi_boxes[idx][1][0][0], generated_roi_boxes[idx][2][0][0], generated_roi_boxes[idx][3][0][0]] )
  
  # Cut a bit the edges so we eliminate black lines from the grid
  # For the OCR to work better 
  
  delta_x = floor(0.075*(x_max-x_min))  
  delta_y = floor(0.075*(y_max-y_min)) 
   
  ROI = image[y_min+delta_y:y_max-delta_y, x_min+delta_x:x_max-delta_x]

  non_white_pixels = ( ROI.flatten() < 155 ).sum()
  total_pixels = ROI.shape[0] * ROI.shape[1]
  non_white_ratio = non_white_pixels / total_pixels
  print("Non White Ratio Result: ", idx, non_white_ratio)
   
  # Do a white padding for the OCR to work better 
  # -----------------------------------------------
  #pad_y = 15
  #pad_x = 15 

  # Pad the matrix with white 
  # ROI = np.pad(ROI, ((pad_y, pad_y), (pad_x, pad_x)), mode='constant', constant_values=255)

  # Resize to 32 x 32 
  ROI = cv2.resize(ROI, (32,32), interpolation = cv2.INTER_AREA)
   
  # code for to see the segmentation 
  # ------------------------------------------------
  if (PSIZE == -1):
      PSIZE = abs(max(ROI.shape[0], ROI.shape[1])*1.1)
  v0 = int(PSIZE - ROI.shape[0])
  v1 = int(PSIZE - ROI.shape[1])
  ROI_padded = np.pad(ROI, ((0, v0), (0, v1)), 'constant', constant_values=100)
  if len(img_row):
      img_row = np.concatenate( (img_row, ROI_padded), axis = 1 ) # stack horizontally
  else:
      img_row = ROI_padded
  ctr = ctr + 1
  if ctr == 9:
    ctr = 0
    if len(img_all):
      img_all = np.concatenate((img_all, img_row), axis=0) # stack vertically
    else:
      img_all = img_row
    img_row = np.array([]) 
  # ------------------------------------------------

  ocr_result = ""

  # Compare the non-white ratio with the threshold
  if non_white_ratio >= 0.03:

    # Tesseract 
    # Specify your whitelist of characters. For example, let's say you want only digits:
    ocr_result = pytesseract.image_to_string(ROI, config = r'--psm 13 -c tessedit_char_whitelist=123456789')
    #ocr_result = pytesseract.image_to_string(ROI, config = r'--psm 10 --oem 0 -c page_separator=""')

    ocr_result = ocr_result.strip()
           
    print("Tesseract OCR Result: ", idx, len(ocr_result), "->", ocr_result, "<-")
    if ( len(ocr_result) == 1 and ocr_result.isdigit() ):
       print("Yes TESSERACT!") 
       SUDOKU_MATRIX_TESSERACT[ii,jj] = int(ocr_result)
    elif (len(ocr_result) == 2 and ocr_result=='41'):
      print("Common problem, hopefully good fix")
      ocr_result = 1
      SUDOKU_MATRIX_TESSERACT[ii,jj] = int(ocr_result)
    else:
      print("NOOOOO TESSERACT!")
        
#------------------
print("Tesseract:\n", SUDOKU_MATRIX_TESSERACT)
printoutStage(6.1,img_all)

###################
# # Tesseract 
# # Page segmentation modes:
#  0    Orientation and script detection (OSD) only.
#  1    Automatic page segmentation with OSD.
#  2    Automatic page segmentation, but no OSD, or OCR.
#  3    Fully automatic page segmentation, but no OSD. (Default)
#  4    Assume a single column of text of variable sizes.
#  5    Assume a single uniform block of vertically aligned text.
#  6    Assume a single uniform block of text.
#  7    Treat the image as a single text line.
#  8    Treat the image as a single word.
#  9    Treat the image as a single word in a circle.
# 10    Treat the image as a single character.
# 11    Sparse text. Find as much text as possible in no particular order.
# 12    Sparse text with OSD.
# 13    Raw line. Treat the image as a single text line,
#                        bypassing hacks that are Tesseract-specific.  
# OCR Engine modes [oem] 
# 0 Legacy engine only.
# 1 Neural nets LSTM engine only.
# 2 Legacy + LSTM engines.
# 3 Default, based on what is available.

In [None]:
# Get each little square box and OCR the digit (another recognizer)
# -----------------------------------------------------------------

image = img_gray_aligned_blur_closed
#image = img_threshold_aligned_blur_closed
image_bw = img_threshold_aligned_blur_closed

recognizer = cv2.dnn.readNet('opencv-onnx\CRNN_VGG_BiLSTM_CTC.onnx')

SUDOKU_MATRIX_CNRR = np.zeros([9,9])

def decodeText(scores):
    text = ""
    alphabet = "0123456789abcdefghijklmnopqrstuvwxyz"
    #print(scores)
    for i in range(scores.shape[0]):
        c = np.argmax(scores[i][0][[0,2,3,4,5,6,7,8,9,10]])  # only digits for Sudoku
        if c != 0:
            #text += alphabet[c - 1]
            text += alphabet[c]
        else:
            text += '-'

    # print(text)
    
    # adjacent same letters as well as background text must be removed to get the final output
    char_list = []
    for i in range(len(text)):
        if text[i] != '-':
            char_list.append(text[i])
    return ''.join(char_list)

for idx in range(81):
  
  ROI= []

  ii = idx//9 
  jj = idx%9 

  y_min = np.min( [generated_roi_boxes[idx][0][0][1], generated_roi_boxes[idx][1][0][1], generated_roi_boxes[idx][2][0][1], generated_roi_boxes[idx][3][0][1]] )
  y_max = np.max( [generated_roi_boxes[idx][0][0][1], generated_roi_boxes[idx][1][0][1], generated_roi_boxes[idx][2][0][1], generated_roi_boxes[idx][3][0][1]] )
  x_min = np.min( [generated_roi_boxes[idx][0][0][0], generated_roi_boxes[idx][1][0][0], generated_roi_boxes[idx][2][0][0], generated_roi_boxes[idx][3][0][0]] )
  x_max = np.max( [generated_roi_boxes[idx][0][0][0], generated_roi_boxes[idx][1][0][0], generated_roi_boxes[idx][2][0][0], generated_roi_boxes[idx][3][0][0]] )
  
  # Cut a bit the edges so we eliminate black lines from the grid 
  delta_x = floor(0.075*(x_max-x_min))  
  delta_y = floor(0.075*(y_max-y_min)) 

  ROI = image[y_min+delta_y:y_max-delta_y, x_min+delta_x:x_max-delta_x]
  ROI_bw = image_bw[y_min+delta_y:y_max-delta_y, x_min+delta_x:x_max-delta_x]

  non_white_pixels = ( ROI_bw.flatten() < 100 ).sum()
  total_pixels = ROI_bw.shape[0] * ROI.shape[1]
  non_white_ratio = non_white_pixels / total_pixels
  print("Non White Ratio Result: ", idx, non_white_ratio)

  if non_white_ratio < 0.03:
      continue

  width_ = ROI.shape[0]
  height_ = ROI.shape[1]
  printoutStage(98*100+idx*10,ROI)
 
  vertices = [[0,height_],[0,0],[width_,0],[width_, height_]]  
  ROI_sized = cv2.resize(ROI, (100,32), interpolation = cv2.INTER_AREA)
  printoutStage(98*100+idx*10+2,ROI_sized)

  # Create a 4D blob from cropped image
  blob = cv2.dnn.blobFromImage(ROI_sized, size=(100, 32), mean=127.5, scalefactor=1.0 / 127.5)
  recognizer.setInput(blob)
    
  # Run the recognition model
  result = recognizer.forward()

  # decode the result into text
  wordRecognized = decodeText(result)
  print(wordRecognized)

  if len(wordRecognized)==0:
      continue
  
  if len(wordRecognized)>1:
      print("Multiple Letters:\t", wordRecognized)
      wordRecognized = wordRecognized[0]
  
  if wordRecognized=='-':   
      continue
  
  SUDOKU_MATRIX_CNRR[ii,jj] = int(wordRecognized)

# ------------------------------------
print("CNNR:\n", SUDOKU_MATRIX_CNRR)

In [None]:
# Print the two matrices
print("Tesseract:\n", SUDOKU_MATRIX_TESSERACT)
print("CNNR:\n", SUDOKU_MATRIX_CNRR)