In [2]:
import cv2
import numpy as np
import pytesseract
import os
import re
import imutils
from skimage.filters import threshold_local
import argparse
import utils
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

# Document Scanner 1

In [None]:
def order_points(pts):
    # initialzie a list of coordinates that will be ordered
    # such that the first entry in the list is the top-left,
    # the second entry is the top-right, the third is the
    # bottom-right, and the fourth is the bottom-left
    rect = np.zeros((4, 2), dtype = "float32")
    # the top-left point will have the smallest sum, whereas
    # the bottom-right point will have the largest sum
    s = pts.sum(axis = 1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]
    # now, compute the difference between the points, the
    # top-right point will have the smallest difference,
    # whereas the bottom-left will have the largest difference
    diff = np.diff(pts, axis = 1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]
    # return the ordered coordinates
    return rect

In [None]:
def four_point_transform(image, pts):
    # obtain a consistent order of the points and unpack them
    # individually
    rect = order_points(pts)
    (tl, tr, br, bl) = rect
    # compute the width of the new image, which will be the
    # maximum distance between bottom-right and bottom-left
    # x-coordiates or the top-right and top-left x-coordinates
    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))
    # compute the height of the new image, which will be the
    # maximum distance between the top-right and bottom-right
    # y-coordinates or the top-left and bottom-left y-coordinates
    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))
    # now that we have the dimensions of the new image, construct
    # the set of destination points to obtain a "birds eye view",
    # (i.e. top-down view) of the image, again specifying points
    # in the top-left, top-right, bottom-right, and bottom-left
    # order
    dst = np.array([[0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype = "float32")
    # compute the perspective transform matrix and then apply it
    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
    # return the warped image
    return warped

In [None]:
def Doc_Scanner(image):
    ratio = image.shape[0]/ 500.0
    orig = image.copy()
    image = imutils.resize(image, height = 500)
    
    #cv2.imshow("original", image)
    #cv2.waitKey(0)
    #cv2.destroyAllWindows()
    
    #STEP 1 - Edge Detection
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5,5), 0)
    edged = cv2.Canny(gray, 75, 200)
    print("Step 1: Edge Detection")
    #cv2.imshow("original" , image)
    #cv2.imshow("edged", edged)
    #cv2.waitKey(0)
    #cv2.destroyAllWindows()
    
    #STEP 2 - Finding Contour
    cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    cnts = sorted(cnts, key = cv2.contourArea, reverse=True)[:5]
    
    flag=0
    
    for c in cnts:
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.02 * peri, True)
    
        if len(approx) == 4:
            screenCnt = approx
            flag = 1
            break
        
    print("Step 2: Find Contours")
    cv2.drawContours(image, [screenCnt], -1, (0,255,0), 2)
    
    #cv2.imshow("outline", image)
    #cv2.waitKey(0)
    #cv2.destroyAllWindows()
    
    #STEP 3 - Perspective Transform
    warped = four_point_transform(orig, screenCnt.reshape(4,2) * ratio)
    # convert warped image to grayscale, then threshold it to
    # give it that 'black and white' paper effect
    warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
    warped = cv2.resize(warped, (600,600) )
    T = threshold_local(warped, 11, offset = 15, method = "gaussian")
    warped = (warped > T).astype("uint8") * 255
    
    # show original and scanned images
    # imutils.resize(warped, height = 650)
    print("STEP 3: Apply perspective transform")
    cv2.imshow("Original", cv2.resize(orig, (600,600)))
    cv2.imshow("Scanned", warped)
    cv2.waitKey(0)
    
    return warped
    

# Image Alignment

## Here, imgQ is the template image and imgF is aligned wrt to imgQ

In [29]:
# Make changes here to try out different test cases
path_to_template_image = '0845.jpg'
path_to_non_aligned_image = 'non-aligned.jpg'

In [30]:
# Read and Scan both images
imgQ = cv2.imread(path_to_template_image)
#imgQ = Doc_Scanner(imgQ)
# imgQ = cv2.resize(imgQ,(w,h//3))
imgQ = cv2.resize(imgQ, (600,600))

imgF = cv2.imread(path_to_non_aligned_image)
#imgF = Doc_Scanner(imgF)
imgF = cv2.resize(imgF, (600,600))

In [31]:
h,w,c = imgQ.shape

orb = cv2.ORB_create(100000)
kp1, des1 = orb.detectAndCompute(imgQ,None)
imgKp1 = cv2.drawKeypoints(imgQ,kp1,None)

#cv2.imshow('Output',imgKp1)
cv2.imshow('Template',imgQ)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [32]:
cv2.imshow('Test',imgF)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [33]:
per = 5

kp2, des2 = orb.detectAndCompute(imgF,None)

bf = cv2.BFMatcher(cv2.NORM_HAMMING)
match = bf.match(des2,des1)
match.sort(key=lambda x:x.distance)
good = match[:int(len(match)*(per/100))]

imgMatch = cv2.drawMatches(imgF,kp2,imgQ,kp1,good[:40],None,flags=2)
cv2.imshow('Rect',imgMatch)
cv2.waitKey(0)
cv2.destroyAllWindows()



In [34]:
srcPoints = np.float32([kp2[m.queryIdx].pt for m in good]).reshape(-1,1,2)
desPoints = np.float32([kp1[m.trainIdx].pt for m in good]).reshape(-1,1,2)

M, _ = cv2.findHomography(srcPoints,desPoints,cv2.RANSAC,5.0)
imgScan = cv2.warpPerspective(imgF,M,(w,h))
cv2.imshow('Rect',imgScan)
cv2.waitKey(0)
cv2.destroyAllWindows()

## Script to capture ROI's

In [35]:
rectangleCoor = []

In [36]:
# variables
ix = -1
iy = -1
drawing = False

def draw_reactangle_with_drag(event, x, y, flags, param):
    global ix, iy, drawing, imgQ
    if event == cv2.EVENT_LBUTTONDOWN:
        drawing = True
        ix = x
        iy = y


    elif event == cv2.EVENT_MOUSEMOVE:
        if drawing == True:
            img2 = cv2.imread(path_to_template_image)
            img2 = cv2.resize(img2,(600,600))
            cv2.rectangle(img2, pt1=(ix,iy), pt2=(x, y),color=(0,255,0),thickness=1)
            imgQ = img2

    elif event == cv2.EVENT_LBUTTONUP:
        drawing = False
        img2 = cv2.imread(path_to_template_image)
        img2 = cv2.resize(img2,(600,600))
        #cv2.rectangle(img2, pt1=(ix,iy), pt2=(x, y),color=(0,255,0),thickness=1)
        Type = input('Enter type: ')
        Field_Name = input('Enter Field Name: ') 
        rectangleCoor.append([(ix,iy),(x,y),Type,Field_Name])
        imgQ = img2

cv2.namedWindow(winname= "Title of Popup Window")
cv2.setMouseCallback("Title of Popup Window", draw_reactangle_with_drag)

while True:
    cv2.imshow("Title of Popup Window", imgQ)
    #if cv2.waitKey(10) == 27:
    if cv2.waitKey(1) & 0xFF == ord('s'):
        print(rectangleCoor)
        break
cv2.destroyAllWindows()

Enter type: text
Enter Field Name: Buyer
Enter type: Int
Enter Field Name: Date
Enter type: Int
Enter Field Name: Awb No
[[(4, 107), (415, 193), 'text', 'Buyer'], [(5, 194), (160, 223), 'Int', 'Date'], [(467, 189), (540, 229), 'Int', 'Awb No']]


# Let's try to capture above ROI's on the aligned image 'imgScan'

In [37]:
def img_processing_pipeline(cropped_img):
    # Convert to grayscale
    cropped_img = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2GRAY)
    
    # Apply adaptive thresholding
    #T = threshold_local(cropped_img, 11, offset = 15, method = "mean")
    #cropped_img = (cropped_img > T).astype("uint8") * 255
    
    # Creating our sharpening filter
    filter = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
    # Applying cv2.filter2D function for Image Sharpening
    cropped_img = cv2.filter2D(cropped_img,-1,filter)
    
    #Histogram Equalization
    cropped_img = cv2.equalizeHist(cropped_img)
    
    #cropped_img = cv2.resize(cropped_img, (200,200))
    return cropped_img

In [38]:
Result = {}
for i in rectangleCoor:
    x = i[0][0]
    y = i[0][1]
    w = i[1][0]
    z = i[1][1]
    
    imgScan = cv2.rectangle(imgScan,(x,y),(w,z),(0,255,0),1)
    #imgQ = cv2.rectangle(imgQ,(x,y),(w,z),(0,255,0),1)
    #cv2.imshow('Rect',imgScan)
    cropped_img = imgScan[i[0][1]:i[1][1] , i[0][0]:i[1][0]]
    #cropped_img = img_processing_pipeline(cropped_img)
    
    field = i[2]
    
    cur_text = pytesseract.image_to_string(cropped_img)
    cur_text = cur_text.replace('\n','')
    
    if field != 'text':
        cur_text = re.sub('[a-zA-Z]','',cur_text)
        
    #if cur_text == '':
    #    cur_text = "4600"
        
    Result[i[3]] = cur_text
    
    cv2.imshow('Rect',cropped_img)
    
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
cv2.imshow('Rect',imgScan)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [39]:
Result

{'Buyer': 'Ms Century Overseas17, DLF. Industrial Area,   New Dethi=110015State Code: 07',
 'Date': '',
 'Awb No': ''}

Save Result as excel sheet

In [None]:
import pandas as pd
df = pd.DataFrame(data=Result, index=[0])

In [None]:
df