In [808]:
import cv2
import pytesseract
from PIL import Image
import numpy as np
import re
from matplotlib import pyplot as plt

# Specify the path to Tesseract executable
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'  # Adjust according to your installation path


## Straighten the Image
The first step is to straighten the image if it is rotated. You can use the ORB (Oriented FAST and Rotated BRIEF) algorithm to detect keypoints and match them between the original and a template image. If enough matches are found, you can estimate the homography and rectify the image.

Key Points:
Feature Detection and Matching: ORB is used here due to its speed and efficiency. You could also consider using SIFT or SURF for potentially better quality at the cost of more computation.
Homography Estimation: The homography matrix is estimated based on the point matches found. The RANSAC algorithm helps to find a robust homography.
Perspective Warping: The cv2.warpPerspective function applies the computed homography to transform the image to the perspective of the template.

In [809]:
def straighten_image(image_path, template_path):
    # Load the image and template
    global rectified_image
    original  = cv2.imread(image_path)
    template = cv2.imread(template_path)
    
    if original is None or template is None:
        raise FileNotFoundError("One or both image paths are incorrect or the images cannot be read.")

    # Check if the original image is larger than the template and resize if necessary
    if original.shape[1] > template.shape[1] or original.shape[0] > template.shape[0]:
        original = resize_image(original, template.shape[1], template.shape[0])


    # Convert images to grayscale
    original_gray = cv2.cvtColor(original, cv2.COLOR_BGR2GRAY)
    template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
    

    # Initialize ORB detector
    orb = cv2.ORB_create()
    
    # Find the keypoints and descriptors with ORB
    keypoints1, descriptors1 = orb.detectAndCompute(original_gray, None)
    keypoints2, descriptors2 = orb.detectAndCompute(template_gray, None)
    
    # Create matcher object
    bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
    
    # Match descriptors
    matches = bf.match(descriptors1, descriptors2)
    
    # Sort them in the order of their distance
    matches = sorted(matches, key = lambda x:x.distance)
    
    # Draw the top N matches
    N = 30  # Number of matches to visualize
    matched_image = cv2.drawMatches(original, keypoints1, template, keypoints2, matches[:N], None, flags=2)
    
    # Assuming enough matches are found, estimate the homography
    if len(matches) > 10:
        src_pts = np.float32([keypoints1[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2)
        dst_pts = np.float32([keypoints2[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2)
    
        # Compute Homography
        M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
        
        # Use homography
        height, width, channels = template.shape
        rectified_image = cv2.warpPerspective(original, M, (width, height))

    else:
        print("Not enough matches are found - {}/{}".format(len(matches), 10))
    
    # Optionally, display the matches
    cv2.imshow('Matches', matched_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
    if rectified_image is not None:
        return rectified_image
    else:
        return original


In [810]:
def resize_image(image, target_width, target_height):
    """
    Resizes the image to the target dimensions using aspect ratio preservation.
    :param image: input image array
    :param target_width: target width to scale to
    :param target_height: target height to scale to
    :return: resized image array
    """
    # Calculate the scale factors while preserving the aspect ratio
    scale_x = target_width / image.shape[1]
    scale_y = target_height / image.shape[0]
    scale = min(scale_x, scale_y)

    # New dimensions
    new_width = int(image.shape[1] * scale)
    new_height = int(image.shape[0] * scale)

    resized = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
    return resized

## Histogram Analysis
Glare often results in areas of overexposure, where the pixel values approach the maximum intensity. You can analyze the histogram of the image to check for a spike at the high end of the intensity range, indicating overexposure.

In [811]:
def detect_glare(image, threshold_ratio=0.05):
    image = cv2.imread(image)

    # Convert to grayscale to simplify the analysis
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Calculate histogram
    hist = cv2.calcHist([gray], [0], None, [256], [0,256])
    # Check if there's a significant peak near the maximum intensity
    if hist[-1] > threshold_ratio:  # Define 'threshold' based on your specific needs
        return True
    return False

In [812]:
def split_image(name, image_path, x, y, width=400, height=38):
    # The user wants to split the image into multiple pieces, starting with the first five rows.
    
    # Load the image
    image = cv2.imread(image_path)
    # Convert the image to grayscale
    #image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Get the image dimensions
    h, w,_ = image.shape
    
    # Define the starting point, width, and height of the rectangle
    start_x = int(w * x)
    start_y = int(h * y)
    
    # Crop the rectangle from the image
    cropped_image = image[start_y:start_y+height, start_x:start_x+width]
    
    # Save or display the cropped image
    cv2.imshow(name, cropped_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
    # Optionally, save the cropped image
    path = f"split/{name}"
    cv2.imwrite(path, cropped_image)
    
    # Display the cropped image
    # plt.imshow(cropped_image)
    # plt.axis('off')  # Turn off axis numbers and ticks
    # plt.show()
    
    return path
    

In [813]:
# Function to preprocess the image
def preprocess_image(image):
    
    # Load the image if image_path is a path
    # if isinstance(image, str):
    #     image = cv2.imread(image)
       
    # Convert the image to grayscale
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        
    # Check the contrast by measuring the standard deviation of pixel intensities
    contrast = np.std(image)
    print("Contrast: ", contrast)
    
    # Apply gaussian blur to reduce noise and improve edge detection
    image = cv2.blur(image,(1,1))
    
    # if contrast > 50.0:
    #     # Scale down the intensity to reduce the contrast
    #     # Assuming the image is in the range [0, 255], reduce the higher values
    #     image = np.clip(image * 0.1, 0, 15).astype(np.uint8)
    #     # Apply blur to reduce noise and improve edge detection
    #     image = cv2.blur(image,(5,5))
    # 
    # else:
    #     # Increase the contrast by scaling up the pixel intensities
    #     image = np.clip(image * 0.1, 0, 255).astype(np.uint8)
    #     # Apply blur to reduce noise and improve edge detection
    #     image = cv2.blur(image,(1,1))
        
    # Apply a mask to exclude the photo and signature if their positions are consistent
    h, w = image.shape
        
    mask = np.ones_like(image) * 255

    mask[0:int(h*0.87), 0:int(w*0.31)] = 0 # hide photo
    mask[:int(h*0.21), :] = 0  # hide header
    mask[int(h*0.68):, int(w*0.335):] = 0  # Applying mask to bottom right corner # Exclude signature
    
    # Apply the mask
    image = cv2.bitwise_and(image, mask)
        
    # Apply thresholding to binarize the image using Otsu's method
    _, thresh_image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    # Resize the image
    resized_image = cv2.resize(thresh_image, (795, 500))

    # Save the processed image temporarily
    temp_processed_path = 'temp_processed_image.png'
    cv2.imwrite(temp_processed_path, resized_image)
    
    return temp_processed_path

In [814]:
# Function to perform OCR using Tesseract
def perform_ocr(temp_processed_path, cat):
    
    # Configure Tesseract to only accept alphanumeric characters (whitelist)
    if cat == 'gender':
        custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=MF'
    elif cat == 'nid':
        custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890'
    elif cat == 'dob':
        # Whitelist includes necessary uppercase and lowercase letters for months, and digits for the year
        custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist="JFMASONDanebrpyulgpctovc0123456789 "'
    else:
        custom_config = r'--oem 3 --psm 6'
        
    # Run Tesseract OCR on the preprocessed image
    extracted_text = pytesseract.image_to_string(Image.open(temp_processed_path), config=custom_config)
    
    # Specific processing for NID extraction
    if cat == 'nid':
        # Regex to match NID formats: "S123456789012D" or "S1234567890123"
        nid_pattern = r'\b[A-Z][0-9]{12}[A-Z]|[A-Z][0-9]{13}\b'
        match = re.search(nid_pattern, extracted_text)
        if match:
            return match.group(0)  # Return the valid NID
        else:
            return "Couldn't read NID"
        
    #Specific processing for date extraction
    if cat == 'dob':
        # Regex to match date format "DD MMM YYYY"
        date_pattern = r'\b(0[1-9]|[12][0-9]|3[01]) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (19[0-9]{2}|20[0-1][0-9]|202[0-3])\b'
        match = re.search(date_pattern, extracted_text)
        if match:
            return match.group(0)  # Return the valid date string
        else:
            return "No valid date found"

    return extracted_text

In [815]:
#image_path = 'ID Summun Roshan.jpg'
stright_image_path = 'ID Summun Roshan.jpg'

image_path = 'dicksen.jpg'
#image_path = 'mom.png'
#image_path = 'id_mom_rotated.png'
#image_path = 'id_roshan_rotated.png'

In [816]:
# Check for glare in the image
if detect_glare(image_path):
    print("Glare detected.")
else:
    print("No glare detected.")

# Straighten the image
rectified_image = straighten_image(image_path, stright_image_path)

#rectified_image = rectify_image(image_path, stright_image_path)

cv2.imshow('Rectified Image', rectified_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Preprocess the image
preprocessed_image = preprocess_image(rectified_image)

surname = split_image('surname.jpg', preprocessed_image, 0.31, 0.20)
forename = split_image('forename.jpg', preprocessed_image, 0.31, 0.33)
maiden = split_image('maiden.jpg', preprocessed_image, 0.31, 0.46 )
gender = split_image('gender.jpg', preprocessed_image, 0.31, 0.61, width=45)
dob = split_image('dob.jpg', preprocessed_image, 0.45, 0.6, width=200, height=50)
nid = split_image('nid.jpg', preprocessed_image, 0, 0.88, width=265, height=50)

# Perform OCR on the preprocessed image
print("\nExtracted details:")
print(perform_ocr(surname, cat='surname').strip())
print(perform_ocr(forename, cat='forename').strip())
print(perform_ocr(maiden, cat='maiden').strip())
print(perform_ocr(gender, cat='gender').strip())
print(perform_ocr(dob, cat='dob').strip())
print(perform_ocr(nid, cat='nid').strip())


Glare detected.
Contrast:  43.8805520050961

Extracted details:
Veloopillay
Dicksen

M
31 Jul 1995
No valid NID found
