In [1]:
from datasets import load_dataset
import numpy as np
import cv2
import math
import pytesseract
import re
import time
import easyocr

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ds = load_dataset("lansinuote/ocr_id_card")

In [3]:
# okay time for some actual functionality: let's make an accuracy checker
def accuracyChecker(words, testData):
    name = testData[0]['word']
    accuracy = 0
    yearFound = False
    monthFound = False
    dayFound = False
    IDFound = False

    for i in range(len(words)):
        if words[i] == name:
            accuracy += 1
        if re.fullmatch(r"\d{4}", words[i]) and not yearFound:
            if 2025 > int(words[i]) >= 1900:
                accuracy += 1
                yearFound = True
        if re.fullmatch(r"\d{1,2}", words[i]) and not monthFound:
            if 12 > int(words[i]) >= 1:
                accuracy += 1
                monthFound = True
        if re.fullmatch(r"\d{1,2}", words[i]) and not dayFound:
            if 31 > int(words[i]) >= 1:
                accuracy += 1
                dayFound = True
        if re.fullmatch(r"\d{18}", words[i]) and not IDFound:
            accuracy += 1
            IDFound = True
    return accuracy / 5


In [4]:
accuracy = []
latency = []
numImages = len(ds['train'])
numImages = 1
for imgNum in range(numImages):
    # collect an image from the dataset and turn it into an array for edge detection, use greyscale
    currImg = ds['train'][imgNum]['image']
    testingImageArray = np.array(currImg)
    edgeDetectionArray = cv2.cvtColor(testingImageArray, cv2.COLOR_RGB2GRAY)

    # apply canny edge detection
    edges = cv2.Canny(edgeDetectionArray, 100, 200)

    # do a hough lines transform
    lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 75, None, 50, 10)
    linesTest = np.zeros(edges.shape) + 250
    for i in range(0, len(lines)):
        l = lines[i][0]
        cv2.line(linesTest, (l[0], l[1]), (l[2], l[3]), (0,0,255), 3, cv2.LINE_AA)

    # find the angles at which each line lies
    theta = np.zeros(len(lines))
    for i in range(0, len(lines)):
        l = lines[i][0]
        if abs(l[3] - l[1]) == 0:
            # horizontal line
            theta[i] = 0
        else:
            theta[i] = math.atan(abs(l[2] - l[0])/abs(l[3] - l[1]))

    # find smallest angle greater than pi/4
    optTheta = 100
    optIndex = 0
    for k in range(len(theta)):
        if theta[k] > np.pi/4:
            if theta[k] < optTheta:
                optTheta = theta[k]
                optIndex = k
    mostHorz = np.float32(lines[optIndex][0])
    # this actually ends up being the least horizontal of the still pretty horizontal lines, deals with noise from extra lines created from the text

    # find some points to warp the image, use the midpoint of the lines offset a bit, and the new destination of the endpoints of the least horizontal but still horizontal line
    mdpnt = [(mostHorz[0] + mostHorz[2])/2,(mostHorz[1] + mostHorz[3])/2]
    adjustment = 100
    if mdpnt[1] > 250:
        initialPts = np.float32([[mostHorz[0], mostHorz[1]],
                                 [mostHorz[2], mostHorz[3]],
                                 [mdpnt[0], mdpnt[1] - adjustment]])
        finalPts = np.float32([[mostHorz[0], mdpnt[1]],
                               [mostHorz[2], mdpnt[1]],
                               [mdpnt[0], mdpnt[1] - adjustment]])
    else:
        initialPts = np.float32([[mostHorz[0], mostHorz[1]],
                                 [mostHorz[2], mostHorz[3]],
                                 [mdpnt[0], mdpnt[1] + adjustment]])
        finalPts = np.float32([[mostHorz[0], mdpnt[1]],
                               [mostHorz[2], mdpnt[1]],
                               [mdpnt[0], mdpnt[1] + adjustment]])

    # warp it
    M = cv2.getAffineTransform(initialPts, finalPts)
    warpedImg = cv2.warpAffine(testingImageArray, M, (testingImageArray.shape[1], testingImageArray.shape[0]))

    # red it
    redshiftImg = warpedImg
    for m in range(warpedImg.shape[0]):
        for n in range(warpedImg.shape[1]):
            redshiftImg[m][n][1] = 0
            redshiftImg[m][n][2] = 0

    # tesseract it, also find latency
    start = time.time()
    wordsRaw = pytesseract.image_to_string(redshiftImg, lang='chi_sim')
    stop = time.time()
    latency.append(stop - start)

    # clean and tokenize the data from OCR
    words = wordsRaw.replace(" ","").splitlines()
    testingData = ds['train'][imgNum]['ocr']

    pattern = r'\d+|[\u4e00-\u9fff]+|[A-Za-z]+|\s|[^\w\s]'
    tokens = []
    for i in range(len(words)):
        tokens.append(re.findall(pattern, words[i]))

    # flatten it out
    wordsData = []
    for i in range(len(tokens)):
        for j in range(len(tokens[i])):
            wordsData.append(tokens[i][j])

    # run the accuracy checker on the final data
    accuracy.append(accuracyChecker(wordsData, testingData))

print(wordsData)

['e', '官加强', '女眷泉', '1988', '干', '1', '月', '21', '清南省衡阳市珠', '区', '1853198812215365', '河']


In [20]:
# here will be an easyOCR pipeline

accuracy = []
latency = []
numImages = len(ds['train'])
numImages = 1
reader = easyocr.Reader(['ch_sim'])
for imgNum in range(numImages):
    # collect an image from the dataset and turn it into an array for edge detection, use greyscale
    currImg = ds['train'][imgNum]['image']
    testingImageArray = np.array(currImg)
    # greyscale it
    grey = cv2.cvtColor(testingImageArray, cv2.COLOR_RGB2GRAY)

    # gaussian blur
    grey = cv2.GaussianBlur(grey, (5,5), 0)

    # binary it
    thresh = cv2.adaptiveThreshold(grey, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 10)

    # easyocr it
    start = time.time()
    results = reader.readtext(thresh)
    stop = time.time()
    latency.append(stop - start)

    # snag words
    testing = []
    for m in range(len(results)):
        if results[m][2] > .25:
            testing.append(results[m][1])

    testingData = ds['train'][imgNum]['ocr']

    # check accuracy
    accuracy.append(accuracyChecker(testing, testingData))

print(accuracy)



Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


[0.4]


In [6]:
print(results)

[]


In [9]:
decipher = [([[np.int32(123), np.int32(139)], [np.int32(151), np.int32(139)], [np.int32(151), np.int32(159)], [np.int32(123), np.int32(159)]], '止#', np.float64(0.00021269982427132527)), ([[np.int32(206), np.int32(156)], [np.int32(264), np.int32(156)], [np.int32(264), np.int32(186)], [np.int32(206), np.int32(186)]], '艮 &', np.float64(0.03267718479037285)), ([[np.int32(125), np.int32(168)], [np.int32(196), np.int32(168)], [np.int32(196), np.int32(200)], [np.int32(125), np.int32(200)]], '1  女', np.float64(0.29385551937228865)), ([[np.int32(130), np.int32(216)], [np.int32(158), np.int32(216)], [np.int32(158), np.int32(240)], [np.int32(130), np.int32(240)]], '+1', np.float64(0.040283392915826326)), ([[np.int32(170), np.int32(206)], [np.int32(212), np.int32(206)], [np.int32(212), np.int32(236)], [np.int32(170), np.int32(236)]], '1帔', np.float64(0.005673569689775284)), ([[np.int32(135), np.int32(257)], [np.int32(161), np.int32(257)], [np.int32(161), np.int32(277)], [np.int32(135), np.int32(277)]], '卫牛', np.float64(0.022190657852689474)), ([[np.int32(177), np.int32(275)], [np.int32(197), np.int32(275)], [np.int32(197), np.int32(299)], [np.int32(177), np.int32(299)]], '区', np.float64(0.13102118594386614)), ([[np.int32(190), np.int32(346)], [np.int32(218), np.int32(346)], [np.int32(218), np.int32(370)], [np.int32(190), np.int32(370)]], '5', np.float64(0.780059804776613)), ([[np.float64(159.2802656784306), np.float64(128.3885579247043)], [np.float64(219.29805341057522), np.float64(122.73663001235035)], [np.float64(221.7197343215694), np.float64(156.6114420752957)], [np.float64(161.70194658942478), np.float64(162.26336998764967)]], '[', np.float64(0.0021989887930565916)), ([[np.float64(206.98198161801108), np.float64(201.78017977981216)], [np.float64(297.72193181960137), np.float64(189.73861696164815)], [np.float64(301.01801838198895), np.float64(217.21982022018784)], [np.float64(210.27806818039866), np.float64(230.26138303835185)]], '412031 #', np.float64(0.0389764017291661)), ([[np.float64(168.57002829714983), np.float64(246.9420169782899)], [np.float64(311.7178083825546), np.float64(224.52422873364245)], [np.float64(316.4299717028502), np.float64(257.05798302171013)], [np.float64(173.2821916174454), np.float64(279.4757712663576)]], '染裔箸[召市#耳', np.float64(7.434269243581871e-06)), ([[np.float64(224.0), np.float64(343.0)], [np.float64(381.84985206210285), np.float64(318.0628003285702)], [np.float64(386.0), np.float64(343.0)], [np.float64(227.15014793789715), np.float64(368.9371996714298)]], '64183158812215385', np.float64(0.43798913544342083)), ([[np.float64(139.65465441200737), np.float64(356.5201198532081)], [np.float64(194.7364862842489), np.float64(349.0077221232863)], [np.float64(197.34534558799263), np.float64(370.4798801467919)], [np.float64(142.2635137157511), np.float64(378.9922778767137)]], '4息阜', np.float64(0.002058092394927222))]

In [19]:
testing = []
for i in range(len(decipher)):
    if decipher[i][2] > .25:
        testing.append(decipher[i][1])
print(accuracyChecker(testing, testingData))

0.4
