In [1]:
import cv2
import numpy as np
import os
from utils import *
from crossword_utils import *
import re

#### 1 - Preprocessing Image
def  pre_process(image):
    # Transform source image to gray if it is not already
    if len(image.shape) != 2:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image

    # using a big blocksize seem to work well (blocksize = 51, c = 9)
    # to keep the details, 3 and 2 seem to work better?!
    thresh = cv2.adaptiveThreshold( 
        gray,
        maxValue=255.0,
        adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        thresholdType=cv2.THRESH_BINARY_INV,
        blockSize=29,
        C=9
    )

    # Otsu's thresholding after Gaussian filtering
    # blur = cv2.GaussianBlur(gray,(3,3),0)
    # ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    # store initial threshold
    threshInitial = thresh.copy()

    # remove borders
    threshNoBorders = removeBorder(thresh, 4)

    threshCleaned = threshNoBorders.copy()
    # Filter out all numbers and noise to isolate only boxes
    # (seem not to be needed, but keep it anyway?)
    removeNoise(threshCleaned, 20) # larger than 20 removes dashes, i.e. -

    roi = cv2.bitwise_not(threshCleaned)
    
    return roi, gray, threshInitial, threshNoBorders, threshCleaned 

def processFilepath(path):
    image = cv2.imread(path)
    roi, gray, threshInitial, threshNoBorders, threshCleaned = pre_process(image)
    
    txt = ocr(roi)
    print('file: %s\n%s' % (path, txt))

    h, w = gray.shape
    imgBlank = np.zeros((h, w, 3), np.uint8)
    fileNumber = re.search(r"(\d+)", path).group(1)
    ocrImage = get_ocr_image(imgBlank, txt, fileNumber)

    # Image Array for Display
    imageArray = ([
        [image, gray, threshInitial, threshNoBorders], 
        [threshCleaned, roi, ocrImage, imgBlank]
    ] )
    stackedImage = stackImages(imageArray, 2, 
    [
        ['raw', 'gray', 'threshold', 'no borders'] , 
        ['cleaned', 'roi', 'ocr', 'blank']
    ])

    show_wait_destroy("stacked", stackedImage)
    return txt


# ####################################
# MAIN
# ####################################

testRunSingle = False

if (testRunSingle):
    # test-run on one image
    
    # 567
    processFilepath('temp/roi567.png')

    # 410
    processFilepath('temp/roi410.png')

    # 8
    processFilepath('temp/roi8.png')

    # 561
    processFilepath('temp/roi561.png')

    # 188
    processFilepath('temp/roi188.png')

    # 428
    processFilepath('temp/roi428.png')

    # 161
    processFilepath('temp/roi161.png')

    # 174
    processFilepath('temp/roi174.png')

    # 97
    processFilepath('temp/roi97.png')

    # 83
    processFilepath('temp/roi83.png')

else:
    f = open("temp/ocr_output.txt", "w")
    for file in os.listdir("temp"):
        if file.startswith("roi"):
            filepath = os.path.join("temp", file)

            txt = processFilepath(filepath)

            f.write('file: %s\n%s\n' % (filepath, txt))
            f.flush()

    f.close()


file: temp/roi407.png
ELE-
GANT

file: temp/roi361.png
TYGGE

file: temp/roi375.png

file: temp/roi413.png

file: temp/roi349.png

file: temp/roi163.png
C

file: temp/roi177.png
E

file: temp/roi188.png
HUND

file: temp/roi57.png
Å

file: temp/roi80.png
STRØM



SystemExit: Pressed q - exiting ...

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
