### import important libraries

In [2]:
from tensorflow.keras.layers.experimental.preprocessing import StringLookup
from tensorflow import keras
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import os
from tqdm import tqdm
import tensorflow as tf
from tensorflow.python.compiler.tensorrt import trt_convert as trt
import numpy as np
import onnxruntime as ort

### create Function that load and build the lookup mapper

In [4]:
def load_StringLookup():
    '''
        this function load the characters from vocabulary txt file 
        then build the lookup mapper again 
        and return the mapper
    '''
    # Load the vocabulary from the saved file
    with open('vocabulary.txt', 'r', encoding='utf-8') as vocab_file:
        vocabulary = vocab_file.read().split('\n')

    # Create a new StringLookup layer with the loaded vocabulary and configuration
    char_to_num = tf.keras.layers.StringLookup(
        vocabulary=vocabulary,
        mask_token=None,  # You may need to adjust this depending on your configuration
    )
    
    # Mapping integers back to original characters.
    num_to_char = StringLookup(
        vocabulary= vocabulary, mask_token=None, invert=True
    )
    return char_to_num, num_to_char

In [5]:
char_to_num, num_to_char = load_StringLookup()

### create the image processing Functions 

In [6]:
def segment_the_line(thresh):
    '''
        this function (vertically only) finds 
        where the text starts and end in the image
        and return the index of stating and ending
    '''
    thresh = thresh//255.0
    up_j = 99999999
    down_j= -999999999
    for j,  i in enumerate( thresh):
        if i.min() == 0:
            if(up_j> j): 
                up_j = j

            if(down_j< j): 
                down_j = j
    
    return max(up_j-3,0), min(down_j+4, len(thresh))

def segmentTheLine(thresh):
    '''
        this function take a black and white image, 
        then crop the text from the white background
    
    '''
    i, j = segment_the_line(thresh)
    thresh = thresh[i:j]
    thresh = cv2.rotate(thresh, cv2.ROTATE_90_CLOCKWISE)
    i, j = segment_the_line(thresh)
    thresh = thresh[i:j]
    thresh = cv2.rotate(thresh, cv2.ROTATE_90_CLOCKWISE)
    thresh = cv2.rotate(thresh, cv2.ROTATE_90_CLOCKWISE)
    thresh = cv2.rotate(thresh, cv2.ROTATE_90_CLOCKWISE)

    return thresh

def process_nationalID_address(image):
    '''
        this function applys image processing methods as: 
        - thersholding:  to convert the image to black and white
        - enhancement: using smoothing then Sharpening
        - the line segmentaion function 
        then return the results
    '''
    
    image = cv2.resize(image, (10000, 1000))

    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply thresholding
    _, thresh = cv2.threshold(gray, 85, 255, cv2.THRESH_BINARY)
    _, thresh = cv2.threshold(thresh, 0, 255, cv2.THRESH_BINARY)

    # apply image enhancement
    smoothed_image = cv2.GaussianBlur(thresh, (7, 7), 0)
    kernel = np.array([[-1,-1,-1], [-1, 9,-1],[-1,-1,-1]])
    sharpened = cv2.filter2D(smoothed_image, -1, kernel) # applying the sharpening kernel to the input image & displaying it.

    # Perform morphological operations to clean up the image
    kernel = np.ones((9, 3), np.uint8)
    img_erosion = cv2.erode(sharpened, kernel, iterations=2)

    result = cv2.resize(img_erosion, (1000, 100))
    result = segmentTheLine(result)
    _, result = cv2.threshold(result, 0, 255, cv2.THRESH_BINARY)
    return result

def preprocess_image(image_paths, width, hight):
    '''
        this function load the images from the paths, 
        then preprocess them 
        then returns a numpy array of images
    '''
    images = []
    for path in tqdm(image_paths):
        # Load and preprocess your images here (e.g., resizing and normalizing)
        image = cv2.imread(path)  # Load the image using OpenCV
        image = process_nationalID_address(image)
        image = cv2.resize(image, (width, hight))
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#         _, image = cv2.threshold(image, 250, 255, cv2.THRESH_BINARY)

        image = image // 255.0  # Normalize pixel values to [0, 1]

        images.append(image)
    # Next we need to reshape our data for the convolutional network
    images=np.array(images)
    images = images.reshape(images.shape[0], hight, width,1)
    return images

### Create function that decode the model output and map it to final predicted text

In [7]:
max_len=68
def decode_batch_predictions(pred):
    '''
    this function decode the output of the model 
    and map the charcters
    then returns the whole text
    '''
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    # Use greedy search. For complex tasks, you can use beam search.
    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
        :, :max_len
    ]
    # Iterate over the results and get back the text.
    output_text = []
    for res in results:
        res = tf.gather(res, tf.where(tf.math.not_equal(res, -1)))
        res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
        output_text.append(res)
    return output_text

### load the ONNX model then use it

In [14]:
# load the prediction model
ort_session = ort.InferenceSession("./models/my_prediction_model_final.onnx")

In [15]:
# Provide input data as a dictionary
input_name = ort_session.get_inputs()[0].name
output_name = ort_session.get_outputs()[0].name
input_name , output_name

('image', 'dense3')

In [16]:
# load the image
image_path = './OCR_Text_Dataset/OCR_Text/1.jpg'
text_path = './OCR_Text_Dataset/OCR_Text/1.txt'

image = preprocess_image([image_path], 300, 20)
label_file = open(text_path, "r",  encoding="utf8")
actual_text = label_file.read()

100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 10.11it/s]


In [17]:
# path the input to the model and print the predicted text

input_data = np.array(image, dtype=np.float32)  

result = ort_session.run([output_name], {input_name: input_data})

predicted_text = decode_batch_predictions(np.array(result[0]))[0]

print("actual text: ", actual_text) 
print("predicted text: ", predicted_text)

actual text:  1641 شارع أم الرخم متفرع من زحوم في ١٢٥٨ أردناستانغ
predicted text:  103 شارع أم الرمم متفرع من رموم في ١٠١١ أرالستان
