In [11]:
import os

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

def decode_batch_predictions(pred , max_length = 5):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    # Use greedy search. For complex tasks, you can use beam search
    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
        :, :max_length
    ]
    # Iterate over the results and get back the text
    output_text = []
    for res in results:
        res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
        output_text.append(res)
    return output_text

model = tf.keras.models.load_model('ocr-model')

prediction_model = keras.models.Model(
    model.get_layer(name="image").input, model.get_layer(name="dense2").output
)
prediction_model.summary()


Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 image (InputLayer)          [(None, 200, 50, 1)]      0         
                                                                 
 Conv1 (Conv2D)              (None, 200, 50, 32)       320       
                                                                 
 pool1 (MaxPooling2D)        (None, 100, 25, 32)       0         
                                                                 
 Conv2 (Conv2D)              (None, 100, 25, 64)       18496     
                                                                 
 pool2 (MaxPooling2D)        (None, 50, 12, 64)        0         
                                                                 
 reshape (Reshape)           (None, 50, 768)           0         
                                                                 
 dense1 (Dense)              (None, 50, 64)            4921

In [5]:
img_width = 200
img_height = 50
def encode_single_sample(img):
    # 1. Read image
    img = tf.convert_to_tensor(img)
    # 2. Decode and convert to grayscale
    img = tf.io.decode_png(img, channels=1)
    # 3. Convert to float32 in [0, 1] range
    img = tf.image.convert_image_dtype(img, tf.float32)
    # 4. Resize to the desired size
    img = tf.image.resize(img, [img_height, img_width])
    # 5. Transpose the image because we want the time
    # dimension to correspond to the width of the image.
    img = tf.transpose(img, perm=[1, 0, 2])    
    # 6. Return a dict as our model is expecting two inputs
    return tf.reshape(img, [1, img_width, img_height, 1])


img = encode_single_sample('84458.png')

In [6]:
model.summary()

Model: "ocr_model_v1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 image (InputLayer)             [(None, 200, 50, 1)  0           []                               
                                ]                                                                 
                                                                                                  
 Conv1 (Conv2D)                 (None, 200, 50, 32)  320         ['image[0][0]']                  
                                                                                                  
 pool1 (MaxPooling2D)           (None, 100, 25, 32)  0           ['Conv1[0][0]']                  
                                                                                                  
 Conv2 (Conv2D)                 (None, 100, 25, 64)  18496       ['pool1[0][0]']       

In [None]:
img[0]

In [8]:
prediction_model.predict(img)
# preds = prediction_model.predict(batch_images)
# pred_texts = decode_batch_predictions(preds)
    

2023-02-27 17:07:17.625696: W tensorflow/stream_executor/gpu/asm_compiler.cc:80] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
2023-02-27 17:07:17.626401: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] INTERNAL: Failed to launch ptxas
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.




array([[[4.85318878e-08, 6.53915833e-07, 3.77822926e-06, 2.60472802e-06,
         1.54487304e-06, 3.49422430e-06, 1.22856784e-06, 7.06033825e-06,
         5.77448372e-06, 8.81441679e-07, 9.01852388e-07, 9.99971986e-01],
        [1.71593162e-09, 3.57510359e-08, 2.36733754e-07, 1.72582787e-07,
         1.40003124e-07, 3.32044891e-07, 1.04682250e-07, 4.88339822e-07,
         3.25696192e-07, 7.88166119e-08, 7.16736039e-08, 9.99997973e-01],
        [6.30686170e-10, 1.42117642e-08, 1.00333516e-07, 8.66742127e-08,
         8.14766423e-08, 1.40929615e-07, 5.57380240e-08, 1.76568207e-07,
         1.31383132e-07, 4.86950071e-08, 3.74103912e-08, 9.99999046e-01],
        [5.25087029e-10, 1.06556186e-08, 8.30584526e-08, 8.00609428e-08,
         8.31833873e-08, 1.20712500e-07, 4.57857645e-08, 1.29013273e-07,
         1.18952947e-07, 5.50108616e-08, 3.28098508e-08, 9.99999285e-01],
        [6.15042128e-10, 1.01836930e-08, 9.14476317e-08, 8.21333828e-08,
         1.03881597e-07, 1.67583252e-07, 4.8743

In [14]:
characters = sorted(list(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']))

# Mapping characters to integers
char_to_num = layers.StringLookup(vocabulary=list(characters), mask_token=None)

# Mapping integers back to original characters
num_to_char = layers.StringLookup(
    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
)


preds = prediction_model.predict(img)
pred_texts = decode_batch_predictions(preds)



In [15]:
pred_texts

['84458']