In [None]:
import cv2
import numpy as np
import pytesseract
import os
from utils import *
from sys import exit

#### 1 - Preprocessing Image
def  pre_process(image):
    # Transform source image to gray if it is not already
    if len(image.shape) != 2:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image

    # using a big blocksize seem to work well (blocksize = 19, c = 9)
    thresh = cv2.adaptiveThreshold( 
        gray,
        maxValue=255.0,
        adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        thresholdType=cv2.THRESH_BINARY_INV,
        blockSize=19,
        C=9
    )

    # store initial threshold
    threshInitial = thresh.copy()

    # remove borders
    threshNoBorders = removeBorder(thresh, 2)

    threshCleaned = threshNoBorders.copy()
    # Filter out all numbers and noise to isolate only boxes
    # seem not to be needed, but keep it anyway
    cnts = cv2.findContours(threshCleaned, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        area = cv2.contourArea(c)
        if area < 2: # larger than 4 removed dashes, i.e. -
            cv2.drawContours(threshCleaned, [c], -1, (0,0,0), -1)

    roi = cv2.bitwise_not(threshCleaned)
    
    return roi, gray, threshInitial, threshNoBorders, threshCleaned 

def show_wait_destroy(winname, img):
    cv2.imshow(winname, img)
    cv2.moveWindow(winname, 500, 0)

    if cv2.waitKey(0) & 0xFF == ord('q'):
        cv2.destroyWindow(winname)
        exit('Pressed q - exiting ...')

    cv2.destroyWindow(winname)

def getOcrImage(image, txt, h, w, bottomTxt=''):
    output = image.copy()

    # output = cv2.resize(image, (100, 100), cv2.INTER_AREA) 

    # strip out non-ASCII text so we can draw the text on the image
    # using OpenCV, then draw a bounding box around the text along
    # with the text itself
    txt = "".join([c if ord(c) < 128 else "" for c in txt]).strip()

    y0, dy = 20, 10
    for i, line in enumerate(txt.split('\n')):
        y = y0 + i*dy
        cv2.putText(output, line, (0, y ), cv2.FONT_HERSHEY_SIMPLEX, 0.40, (255,255,0), 1)

    cv2.putText(output, bottomTxt, (0, h-4), cv2.FONT_HERSHEY_SIMPLEX, 0.40, (0,255,255), 1)

    return output

def ocr(roi):
    txt = pytesseract.image_to_string(roi, config="--psm 6", lang='nor')
    # txt = pytesseract.image_to_string(roi, 
    #                                 config="-c tessedit"
    #                                 "_char_whitelist=' 'ABCDEFGHIJKLMNOPQRSTUVWXYZÆØÅ-"
    #                                 " --psm 6")
    return txt

def removeOuterBorder(image, thresh):
    # https://stackoverflow.com/questions/58084229/remove-borders-from-image-but-keep-text-written-on-borders-preprocessing-before
    removed = image.copy()

    # Remove vertical lines
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,10))
    remove_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
    cnts = cv2.findContours(remove_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        cv2.drawContours(removed, [c], -1, (255,255,255), 5)

    # Remove horizontal lines
    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10,1))
    remove_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
    cnts = cv2.findContours(remove_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        cv2.drawContours(removed, [c], -1, (255,255,255), 5)

    # Repair kernel
    repair_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
    removed = 255 - removed
    dilate = cv2.dilate(removed, repair_kernel, iterations=5)
    dilate = cv2.cvtColor(dilate, cv2.COLOR_BGR2GRAY)
    pre_result = cv2.bitwise_and(dilate, thresh)

    result = cv2.morphologyEx(pre_result, cv2.MORPH_CLOSE, repair_kernel, iterations=5)
    final = cv2.bitwise_and(result, thresh)

    invert_final = 255 - final

    return invert_final

def removeBorder(thresh, borderThickness = 2):
    
    h, w = thresh.shape
    blank = np.zeros(thresh.shape[:2], np.uint8)
    
    x1 = borderThickness
    y1 = borderThickness
    x2 = int(w - 2 * borderThickness)
    y2 = int(h - 2 * borderThickness)
    mask = cv2.rectangle(blank, (x1, y1), (x2, y2), 255, -1)
    masked = cv2.bitwise_and(thresh, thresh, mask=mask)

    return masked

# ####################################

# image = cv2.imread('temp/roi174.png')
# roi, gray, threshInitial, threshNoBorders, threshCleaned = pre_process(image)

# h, w = gray.shape
# imgBlank = np.zeros((h, w, 3), np.uint8)  # CREATE A BLANK IMAGE FOR TESTING DEBUGGING IF REQUIRED
        
# txt = ocr(roi)

# ocrImage = getOcrImage(imgBlank, txt, h, w)

# # Image Array for Display
# imageArray = ([[image, gray, threshInitial, threshNoBorders], [threshCleaned, roi, ocrImage, imgBlank]] )
# stackedImage = stackImages(imageArray, 4, 
#     [['raw', 'gray', 'threshold', 'no borders'] , ['cleaned', 'roi', 'ocr', '']])

# show_wait_destroy("stacked", stackedImage)

# f = open("temp/ocr_output.txt", "w")
for file in os.listdir("temp"):
    if file.startswith("roi"):
        path = os.path.join("temp", file)

        # image = cv2.imread('temp/roi1.png')
        image = cv2.imread(path)
        roi, gray, threshInitial, threshNoBorders, threshCleaned = pre_process(image)

        h, w = gray.shape
        imgBlank = np.zeros((h, w, 3), np.uint8)  # CREATE A BLANK IMAGE FOR TESTING DEBUGGING IF REQUIRED
        
        txt = ocr(roi)

        print('%s : %s' % (file, txt))

        ocrImage = getOcrImage(imgBlank, txt, h, w, file.strip('.png'))

        # Image Array for Display
        imageArray = ([[image, gray, threshInitial, threshNoBorders], [threshCleaned, roi, ocrImage, imgBlank]] )
        stackedImage = stackImages(imageArray, 4, 
            [['raw', 'gray', 'threshold', 'no borders'] , ['cleaned', 'roi', 'ocr', '']])

        show_wait_destroy("stacked", stackedImage)

        # f.write('file: %s ocr: %s \n' % (path, txt))
        # f.flush()

# f.close()
