In [None]:
import cv2
import math
import pytesseract
import sys
import numpy as np
%matplotlib inline
from matplotlib import pyplot as plt

# 1. Program digits detection using openCV contours

In [None]:
def in_bounds(s, target):
    return target == 0 or (s/target >= 0.7 and s/target <= 1.3)

def digitGenerator(thresh, height_of_digit, width_of_digit, x_pos = 0, y_pos = 0):
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    for cnt in contours:
        [x, y, w, h] = cv2.boundingRect(cnt)
        if (in_bounds(w, width_of_digit) and in_bounds(h, height_of_digit)):
            #print("Accept:", x, y, w, h)
            if (x_pos != 0 and x >= x_pos - 25 == False) or (y_pos != 0 and in_bounds(y, y_pos) == False):
                continue
            cv2.rectangle(thresh, (x - 1, y - 1), (x + 1 + w, y + 1 + h), (0, 0, 255), 1)
            roi = thresh[y:y + h, x:x + w]
            roi = cv2.adaptiveThreshold(roi,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,21,11)
            yield roi
        else:
            pass
            #print("Out of bound:", x, y, w, h)

# 2. Manually configure convolution digit detection

In [None]:
def plot_img(name, img):
    plt.imshow(img)
    plt.title(name)
    plt.show()

In [None]:
im = cv2.imread("./courrier-font.png")
imgray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(imgray, 150, 255, 0)
plot_img('thresh', thresh)
width_of_digit = 58
height_of_digit = 84
            
digitsModels = [[None]] * 11
for roi in digitGenerator(thresh, height_of_digit, width_of_digit):
    cv2.imshow('Training: Enter digits displayed in the red rectangle!', roi)
    key = cv2.waitKey(0)
    keys = [i for i in range(48, 58)]
    keys.append(121)
    if key in keys:
        if key != 121: 
            digitsModels[key - 48] = roi
        else:
            digitsModels[10] = roi
cv2.destroyAllWindows()

def detect_number(digit, candidate):
    dgt = digit
    if (candidate.shape != dgt.shape):
        dgt = cv2.resize(dgt, np.transpose(candidate).shape)
    dgt[np.where(dgt > 80)] = 255
    dgt[np.where(dgt <= 80)] = 0
    conv = np.multiply(dgt, candidate)
    return np.sum(np.sum(conv))

def recognize_digit(candidate):
    convs = [detect_number(digit, candidate) for digit in digitsModels]
    print(candidate.shape, convs)
    return convs.index(max(convs))

recognize_digit(digitsModels[8])

# 3. Program text detection using openCV EAST trained model

In [None]:
def decode(scores, geometry, scoreThresh):
    detections = []
    confidences = []

    ############ CHECK DIMENSIONS AND SHAPES OF geometry AND scores ############
    assert len(scores.shape) == 4, "Incorrect dimensions of scores"
    assert len(geometry.shape) == 4, "Incorrect dimensions of geometry"
    assert scores.shape[0] == 1, "Invalid dimensions of scores"
    assert geometry.shape[0] == 1, "Invalid dimensions of geometry"
    assert scores.shape[1] == 1, "Invalid dimensions of scores"
    assert geometry.shape[1] == 5, "Invalid dimensions of geometry"
    assert scores.shape[2] == geometry.shape[2], "Invalid dimensions of scores and geometry"
    assert scores.shape[3] == geometry.shape[3], "Invalid dimensions of scores and geometry"
    height = scores.shape[2]
    width = scores.shape[3]
    for y in range(0, height):

        # Extract data from scores
        scoresData = scores[0][0][y]
        x0_data = geometry[0][0][y]
        x1_data = geometry[0][1][y]
        x2_data = geometry[0][2][y]
        x3_data = geometry[0][3][y]
        anglesData = geometry[0][4][y]
        for x in range(0, width):
            score = scoresData[x]

            # If score is lower than threshold score, move to next x
            if(score < scoreThresh):
                continue

            # Calculate offset
            offsetX = x * 4.0
            offsetY = y * 4.0
            angle = anglesData[x]

            # Calculate cos and sin of angle
            cosA = math.cos(angle)
            sinA = math.sin(angle)
            h = x0_data[x] + x2_data[x]
            w = x1_data[x] + x3_data[x]

            # Calculate offset
            offset = ([offsetX + cosA * x1_data[x] + sinA * x2_data[x], offsetY - sinA * x1_data[x] + cosA * x2_data[x]])

            # Find points for rectangle
            p1 = (-sinA * h + offset[0], -cosA * h + offset[1])
            p3 = (-cosA * w + offset[0],  sinA * w + offset[1])
            center = (0.5*(p1[0]+p3[0]), 0.5*(p1[1]+p3[1]))
            detections.append((center, (w,h), -1*angle * 180.0 / math.pi))
            confidences.append(float(score))

    # Return detections and confidences
    return [detections, confidences]

In [None]:
model = "./frozen_east_text_detection.pb"

def detect_text(frame, model):
    inpWidth = 320
    inpHeight = 320
    confThreshold = 0.5
    nmsThreshold = 0.4
    try: 
        # Load network
        net = cv2.dnn.readNet(model)
        outNames = []
        outNames.append("feature_fusion/Conv_7/Sigmoid")
        outNames.append("feature_fusion/concat_3")

        # Get frame height and width
        height_ = frame.shape[0]
        width_ = frame.shape[1]
        rW = width_ / float(inpWidth)
        rH = height_ / float(inpHeight)

        # Create a 4D blob from frame.
        blob = cv2.dnn.blobFromImage(frame, 1.0, (inpWidth, inpHeight), (123.68, 116.78, 103.94), True, False)
        imgray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        #imgray = cv2.adaptiveThreshold(imgray, 255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,21,5)

        # Run the model
        net.setInput(blob)
        outs = net.forward(outNames)
        t, _ = net.getPerfProfile()

        # Get scores and geometry
        scores = outs[0]
        geometry = outs[1]
        [boxes, confidences] = decode(scores, geometry, confThreshold)

        # Apply NMS
        indices = cv2.dnn.NMSBoxesRotated(boxes, confidences, confThreshold,nmsThreshold)
        for i in indices:
            # get 4 corners of the rotated rect
            vertices = cv2.boxPoints(boxes[i[0]])

            # scale the bounding box coordinates based on the respective ratios
            for j in range(4):
                vertices[j][0] *= rW
                vertices[j][1] *= rH

            # Display square
            x,y,w,h = cv2.boundingRect(vertices)
            #rect = cv2.rectangle(imgray,(x,y),(x+w,y+h),(0,255,0),2)
            roi = imgray[y:y + h, x:x + w]
            roi = cv2.adaptiveThreshold(roi,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,21,5)
            yield (roi, x, y, w, h)
    finally:
        pass

# 4. Tools

In [None]:
def display_detected_text(img, config = ("")):
    img_height, img_width, _ = img.shape
    for (roi, x, y, w, h) in detect_text(img, model):
        if roi is None:
            continue
        plot_img('Detected text!', roi)
        print("Detected at (x,y,w,h) = ", x, y, w, h)
        print("Relative at (x,y,w,h) = ", x/img_width, y/img_height, w/img_width, h/img_height)
        text = pytesseract.image_to_string(roi, config=config)
        if text:
            print(text)
        else:
            print("Cannot read")

In [None]:
def crop_img_price(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    shape = img.shape # shape = height * width * color
    crop_img = img[int(shape[0]/2):,int(shape[1]/2):]
    return crop_img

def crop_img_id(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    shape = img.shape # shape = height * width * color
    crop_img = img[:int(shape[0]/2),int(shape[1]/2):]
    return crop_img

In [None]:
def detect_digits_convolution(im):
    imgray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
    thresh = cv2.adaptiveThreshold(imgray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,21,5)
    plot_img('thresh', thresh)

    # price
    width_of_digit = 10
    height_of_digit = 20

    # fapiao id
    width_of_digit = 15
    height_of_digit = 40

    pos_x = 120
    pos_y = 515

    pos_x = 20
    pos_y = 11

    for roi in digitGenerator(thresh, height_of_digit, width_of_digit, pos_x, pos_y):          
        cv2.imshow('Training: Enter digits displayed in the red rectangle!', roi)
        config = ("")
        print(pytesseract.image_to_string(roi, config=config))
        key = cv2.waitKey(0)
        if key == 27:
            break
    cv2.destroyAllWindows()

# 5. Tests

In [None]:
im = cv2.imread("./fapiao/electronic/031001800211-46.62.png")
detect_digits_convolution(im)

In [None]:
img = cv2.imread("./fapiao/electronic/031001800211-46.62.png")
config = ("--tessdata-dir ./tesseract/ --l chi_sim --psm 3")
display_detected_text(img, config)

In [None]:
img = cv2.imread("./fapiao/IMG_20190312_102624.jpg")
crop_img = crop_img_price(img)
plot_img("cropped", crop_img)
display_detected_text(crop_img)

In [None]:
img = cv2.imread("./fapiao/IMG_20190312_102653.jpg")
crop_img = crop_img_id(img)
plot_img("cropped", crop_img)
display_detected_text(crop_img)

# 6. Other Tests

In [None]:
def text_detect(img,ele_size=(8,2)): #
    if len(img.shape)==3:
        img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    img_sobel = cv2.Sobel(img,cv2.CV_8U,1,0)#same as default,None,3,1,0,cv2.BORDER_DEFAULT)
    img_threshold = cv2.threshold(img_sobel,0,255,cv2.THRESH_OTSU+cv2.THRESH_BINARY)
    element = cv2.getStructuringElement(cv2.MORPH_RECT,ele_size)
    img_threshold = cv2.morphologyEx(img_threshold[1],cv2.MORPH_CLOSE,element)
    contours, _ = cv2.findContours(img_threshold,0,1)
    Rect = [cv2.boundingRect(i) for i in contours if i.shape[0]>100]
    RectP = [(int(i[0]-i[2]*0.08),int(i[1]-i[3]*0.08),int(i[0]+i[2]*1.1),int(i[1]+i[3]*1.1)) for i in Rect]
    return RectP

In [None]:
img = cv2.imread("./fapiao/IMG_20190312_102653.jpg")
crop_img = crop_img_price(img)
rect = text_detect(crop_img)
for i in rect:
    cv2.rectangle(crop_img,i[:2],i[2:],(0,0,255))
cv2.imshow("img", crop_img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
def contrib_text_detection(img):
    vis      = img.copy()

    pathname = "./openCV_Cpp_samples"
    # Extract channels to be processed individually
    channels = cv2.text.computeNMChannels(img)
    # Append negative channels to detect ER- (bright regions over dark background)
    cn = len(channels)-1
    for c in range(0,cn):
          channels.append((255-channels[c]))

    # Apply the default cascade classifier to each independent channel (could be done in parallel)
    print("Extracting Class Specific Extremal Regions from "+str(len(channels))+" channels ...")
    print("    (...) this may take a while (...)")
    for channel in channels:
        erc1 = cv2.text.loadClassifierNM1(pathname+'/trained_classifierNM1.xml')
        er1 = cv2.text.createERFilterNM1(erc1,16,0.00015,0.13,0.2,True,0.1)

        erc2 = cv2.text.loadClassifierNM2(pathname+'/trained_classifierNM2.xml')
        er2 = cv2.text.createERFilterNM2(erc2,0.5)

        regions = cv2.text.detectRegions(channel,er1,er2)

        rects = cv2.text.erGrouping(img,channel,[r.tolist() for r in regions])
        #rects = cv2.text.erGrouping(img,channel,[x.tolist() for x in regions], cv2.text.ERGROUPING_ORIENTATION_ANY,'../../GSoC2014/opencv_contrib/modules/text/samples/trained_classifier_erGrouping.xml',0.5)

        #Visualization
        for r in range(0,np.shape(rects)[0]):
            rect = rects[r]
            cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (0, 0, 0), 2)
            cv2.rectangle(vis, (rect[0],rect[1]), (rect[0]+rect[2],rect[1]+rect[3]), (255, 255, 255), 1)


    #Visualization
    cv2.imshow("Text detection result", vis)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


In [None]:
img = cv2.imread("./fapiao/IMG_20190312_102653.jpg")
crop_img = crop_img_id(img)
contrib_text_detection(img)