In [1]:
import tensorflow as tf
from tensorflow import keras 




In [13]:
image_path = "./words/g06/g06-011j/g06-011j-06-06.png"
batch_size = 32
padding_token = 99
image_width = 128
image_height = 32

characters = {'B', 'j', 'W', ',', 'O', 'v', 'E', '+', '6', 'S', 'h', 'R', "'", '?', 'C', '(', 'n', 'o', '4', 'f', 'U', 'a', 'r', '0', '2', 'L', 'Y', 'A', '5', 'e', 't', 'y', 'q', 'I', '9', '7', '&', 'K', 'k', 'F', 'w', 'V', 'P', '1', 'x', 'b', 'G', '"', ':', 'Z', 'T', 'u', '!', '3', 'd', '-', '8', 'N', ';', 'Q', 'c', 'l', 'D', 'g', 'i', '/', 'M', 'H', 'm', 'X', 's', ')', '*', 'p', 'J', 'z', '#', '.'}

In [14]:
from tensorflow.keras.layers.experimental.preprocessing import StringLookup

AUTOTUNE = tf.data.AUTOTUNE
#TensorFlow will automatically decide the optimal values for characters.

#mapping character to integer
char_to_num = StringLookup(vocabulary = list(characters), mask_token=None)

#mapping integers back to original characters.
num_to_char = StringLookup(vocabulary = char_to_num.get_vocabulary(), mask_token=None, invert = True)
def distortion_free_resize(image, img_size):
  w,h = img_size
  image = tf.image.resize(image, size=(h,w),preserve_aspect_ratio=True)  # size paraeter takes height first and then width
  #The resulting image might not be exactly (h,w) pixels but will fit within these dimensions without any distortion.
  # if any pixel left we will use padding

  #checking the padding height and width
  pad_height = h-tf.shape(image)[0]
  pad_width = w-tf.shape(image)[1]

  #Only necessary if you want to do some amount of padding on both sides.
  if pad_height % 2 !=0:
    height=pad_height//2
    pad_height_top = height +1
    pad_height_bottom = height
  else:
    pad_height_top = pad_height_bottom = pad_height // 2

  if pad_width %2 != 0:
    width = pad_width //2
    pad_width_left = width +1
    pad_width_right = width
  else:
    pad_width_left = pad_width_right = pad_width //2

  image = tf.pad(
      image,
      paddings = [
          [pad_height_top, pad_height_bottom],
          [pad_width_left, pad_width_right],
          [0,0],
      ],

  )
  image = tf.transpose(image, perm=[1,0,2])
  image = tf.image.flip_left_right(image)
  # because tf.resize uses (h,w) way
  return image


In [4]:
def preprocessing_image(image_path, img_size=(image_width, image_height)):
  image = tf.io.read_file(image_path)
  image = tf.image.decode_png(image,channels=1) # decode the png_encoded images into tensor , channel 1 for gray scale
  image = distortion_free_resize(image,img_size)
  image = tf.cast(image, tf.float32)/255.0  # data type conversion in tensor
  return image

In [5]:
tensor = preprocessing_image(image_path)
print("A slice of the tensor:\n", tensor.numpy()[:60, :60])

A slice of the tensor:
 [[[0.        ]
  [0.        ]
  [0.        ]
  ...
  [0.        ]
  [0.        ]
  [0.        ]]

 [[0.        ]
  [0.        ]
  [0.        ]
  ...
  [0.        ]
  [0.        ]
  [0.        ]]

 [[0.        ]
  [0.        ]
  [0.        ]
  ...
  [0.        ]
  [0.        ]
  [0.        ]]

 ...

 [[0.93893987]
  [0.5960302 ]
  [0.33796838]
  ...
  [1.        ]
  [1.        ]
  [1.        ]]

 [[0.9843137 ]
  [0.9385173 ]
  [0.5268786 ]
  ...
  [1.        ]
  [1.        ]
  [1.        ]]

 [[0.95729166]
  [0.9576374 ]
  [0.9381595 ]
  ...
  [1.        ]
  [1.        ]
  [1.        ]]]


In [28]:
max_len=21
def decode_batch_prediction(pred):
  input_len = np.ones(pred.shape[0])*pred.shape[1]

  # Use greedy search For complex tasks , you can use beam search.
  results = keras.backend.ctc_decode(pred, input_length=input_len, greedy = True )[0][0][:,:max_len]
  print(results)
  #Iterate over the results and get back the text.
  output_text = []
  for res in results:
    res = tf.gather(res, tf.where(tf.math.not_equal(res, -1)))
    print(res)
    res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
    print(res)
    output_text.append(res)
  return output_text

In [6]:
loaded_model = keras.models.load_model('model_V5.h5')





In [34]:
import numpy as np
image = preprocessing_image(image_path)
print(image.shape)
image = np.expand_dims(image, axis=0)
preds = loaded_model.predict(image)
pred_texts = decode_batch_prediction(preds)
print(pred_texts)

(128, 32, 1)
tf.Tensor([[22 17 55 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]], shape=(1, 21), dtype=int64)
tf.Tensor(
[[22]
 [17]
 [55]], shape=(3, 1), dtype=int64)
and
['and']


In [33]:
import tensorflow as tf
from tensorflow.keras.layers.experimental.preprocessing import StringLookup
characters = {'5', 'o', 'e', 'T', 'f', 'N', 'h', '!', 'S', '1', 'm', 'b', 'c', '.', 'q', 'U', '-', 'K', 'k', 'I', 'M', '2', 'W', 'Q', 'a', 'H', 'P', 'd', '?', '&', '*', 'V', 'R', 'w', 'r', ')', 'l', 'J', '/', 'D', 'i', 't', 'v', 'Y', 'A', 'E', '0', 'B', '"', ';', 'p', '(', '+', 'L', ',', "'", 'j', 'n', 'C', '3', '9', 'g', '4', 'F', '8', 'G', 'x', 'Z', 'y', '#', 'X', '7', 's', ':', 'z', 'O', '6', 'u'}
AUTOTUNE = tf.data.AUTOTUNE
#TensorFlow will automatically decide the optimal values for characters.

#mapping character to integer
char_to_num = StringLookup(vocabulary = list(characters), mask_token=None)

#mapping integers back to original characters.
num_to_char = StringLookup(vocabulary = mapping, mask_token=None, invert = True)

In [31]:
mapping =['[UNK]',
 'B',
 'j',
 'W',
 ',',
 'O',
 'v',
 'E',
 '+',
 '6',
 'S',
 'h',
 'R',
 "'",
 '?',
 'C',
 '(',
 'n',
 'o',
 '4',
 'f',
 'U',
 'a',
 'r',
 '0',
 '2',
 'L',
 'Y',
 'A',
 '5',
 'e',
 't',
 'y',
 'q',
 'I',
 '9',
 '7',
 '&',
 'K',
 'k',
 'F',
 'w',
 'V',
 'P',
 '1',
 'x',
 'b',
 'G',
 '"',
 ':',
 'Z',
 'T',
 'u',
 '!',
 '3',
 'd',
 '-',
 '8',
 'N',
 ';',
 'Q',
 'c',
 'l',
 'D',
 'g',
 'i',
 '/',
 'M',
 'H',
 'm',
 'X',
 's',
 ')',
 '*',
 'p',
 'J',
 'z',
 '#',
 '.']

In [12]:
char_to_num

<keras.src.layers.preprocessing.string_lookup.StringLookup at 0x215ed9d6690>

In [13]:
num_to_char

<keras.src.layers.preprocessing.string_lookup.StringLookup at 0x215f0667c50>

In [15]:
char_to_num(tf.strings.unicode_split("what", input_encoding="UTF-8"))




<tf.Tensor: shape=(4,), dtype=int64, numpy=array([ 3, 37, 32, 42], dtype=int64)>

In [17]:
char_to_num.get_vocabulary()

['[UNK]',
 '-',
 'r',
 'w',
 'S',
 'D',
 'Z',
 'Q',
 'J',
 'I',
 '9',
 '0',
 'j',
 'F',
 'o',
 'L',
 'n',
 'f',
 'M',
 'g',
 'e',
 'Y',
 'u',
 'R',
 'q',
 ':',
 'P',
 'V',
 ';',
 'A',
 '?',
 '+',
 'a',
 'b',
 'K',
 '4',
 'B',
 'h',
 'O',
 'N',
 '&',
 ',',
 't',
 'C',
 '/',
 'd',
 'H',
 'T',
 '#',
 '5',
 '3',
 '6',
 '"',
 's',
 'l',
 'E',
 'x',
 'c',
 '7',
 'p',
 'z',
 'W',
 'X',
 ')',
 "'",
 '1',
 '2',
 'y',
 '*',
 '8',
 '(',
 '!',
 'U',
 'G',
 'k',
 '.',
 'i',
 'v',
 'm']