In [18]:
#Import the necessary packages
import cv2
import numpy as np
import matplotlib.pyplot as plt
import imutils
from PIL import Image       #Python Imaging Library
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'


In [19]:
#Load the image
Im = "C:/Users/Acer/Desktop/Passport/MRZ.jpeg"

In [28]:
def process_image(Im):
    #Initialize rectangular kernel and square kernel
    rectKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (13, 5))
    sqKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (21, 21))
    kernel = np.ones((5, 5), np.uint8)
    
    #After loading, convert it into a Gray scale and resize it
    img = cv2.imread(Im,cv2.IMREAD_GRAYSCALE)
    img = imutils.resize(img, height=390)
    
    # Smooth the image using 5*5 Gaussian, then apply the Blackhat
    gausBlur = cv2.GaussianBlur(img, (5,5),0) 
    
    # Morphological operator to find dark regions on a light background
    blackhat  = cv2.morphologyEx(gausBlur, cv2.MORPH_GRADIENT, kernel)   
    
    # Apply closing operation using the rectangular kernel to close 
    # gaps in between letters then apply Ostu's thresholding method
    rectclosing = cv2.morphologyEx(blackhat, cv2.MORPH_CLOSE, rectKernel)                     
    rectthresh = cv2.threshold(rectclosing, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]   
    # Perform another cloing operation , this time using the square
    # kernel to close gaps between lines og MRZ, then perform a
    # series of erosions to break apart connected components
    sqclosing = cv2.morphologyEx(rectthresh, cv2.MORPH_CLOSE, sqKernel)                     
    sqthresh = cv2.threshold(sqclosing, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]       
    img_erosion = cv2.erode(sqthresh,None, iterations=4) 
    
    # During thresholding, it's possible that border pixels were
    # included in thresholding, so let's see 5% of the left
    # and right borders to zero
    p = int(img.shape[1] * 0.05)
    img_erosion[:, 0:p] = 0
    img_erosion[:, img.shape[1] - p:] = 0
    img_erosion[img.shape[1] - p:, :] = 0
    img_erosion[0:p, :] = 0
    
    # Find Contours in the erosion image and sort them in the order
    cnts = cv2.findContours(img_erosion.copy(), cv2.RETR_EXTERNAL,
    cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
    # Loop over the contours
    for c in cnts:
        # Compute the bounding box of the contour 
        # and compute the Aspect Ratio(ar) and Coverage Ratio(cr) 
        # width to width of an image
        (x, y, w, h) = cv2.boundingRect(c)
        ar = w / float(h)
        crWidth = w / float(img.shape[1])
        if ar > 5 and crWidth > 0.75: 
            pX = int((x + w) * 0.03)
            pY = int((y + h) * 0.03)
            x, y = max(0,(x-pX)), max(0,(y-pY))
            (w, h) = (w + (pX * 2), h + (pY * 2))
            # Extract the ROI from the image and draw a bounding box 
            # surrounding the MRZ
            roi = img[y:y + h, x:x + w].copy()
            cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
            break
    # Returning the text      
    text = pytesseract.image_to_string(roi, lang = 'eng')
    return(text)
          
    
    

In [32]:
# Pass the image path in the place of Im
MRZ = process_image(Im)
MRZ

'P<INDSINGRI<<SHANTHIPRIYAC<<<<ccccccccccccc<\nU1885269<31ND9710050F2912017<<<<<c<cccc<cc<d'