In [7]:
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Activation, Reshape, BatchNormalization, Bidirectional, LSTM, Lambda
from tensorflow.keras.optimizers import SGD
import cv2
import numpy as np
import pandas as pd
import codecs
import math
import os

In [5]:
window_height = 64  #windown height
window_width = 64   #window width
window_shift = window_width - 2 #window shift

#CNN related configurations
MPoolLayers_ALL = 5	#Nbr of all maxpool layers
MPoolLayers_H = 2	#Nbr of maxpool in horizontal dimension
LastFilters = 512	#Nbr of feature maps at the last conv layer

#LSTM related configurations
NUnits = 256    #Number of units in forward/backward LSTM
NLayers = 3     #Number of layers in BLSTM

In [9]:
FV = int(window_height / math.pow(2, MPoolLayers_ALL))
NFeatures = FV * LastFilters

#%%
input_data = Input(shape=(window_height, window_width, 1))

convolution1 = Conv2D(filters=64, kernel_size=(1,1))(input_data)
convolution1 = BatchNormalization(axis = -1)(convolution1)
convolution1 = Activation("relu")(convolution1)

convolution2 = Conv2D(filters=64, kernel_size=(1,1))(convolution1)
convolution2 = BatchNormalization(axis = -1)(convolution2)
convolution2 = Activation("relu")(convolution2)

pooling1 = MaxPooling2D(pool_size=(2,2), strides=(2,2))(convolution2)

convolution3 = Conv2D(filters=128, kernel_size=(1,1))(pooling1)
convolution3 = BatchNormalization(axis = -1)(convolution3)
convolution3 = Activation("relu")(convolution3)

convolution4 = Conv2D(filters=128, kernel_size=(1,1))(convolution3)
convolution4 = BatchNormalization(axis = -1)(convolution4)
convolution4 = Activation("relu")(convolution4)

pooling2 = MaxPooling2D(pool_size=(2,2), strides=(2,2))(convolution4)

convolution5 = Conv2D(filters=256, kernel_size=(1,1))(pooling2)
convolution5 = BatchNormalization(axis = -1)(convolution5)
convolution5 = Activation("relu")(convolution5)

convolution6 = Conv2D(filters=256, kernel_size=(1,1))(convolution5)
convolution6 = BatchNormalization(axis = -1)(convolution6)
convolution6 = Activation("relu")(convolution6)

convolution7 = Conv2D(filters=256, kernel_size=(1,1))(convolution6)
convolution7 = BatchNormalization(axis = -1)(convolution7)
convolution7 = Activation("relu")(convolution7)

pooling3 = MaxPooling2D(pool_size=(2,1), strides=(2,1))(convolution7)

convolution8 = Conv2D(filters=512, kernel_size=(1,1))(pooling3)
convolution8 = BatchNormalization(axis = -1)(convolution8)
convolution8 = Activation("relu")(convolution8)

convolution9 = Conv2D(filters=512, kernel_size=(1,1))(convolution8)
convolution9 = BatchNormalization(axis = -1)(convolution9)
convolution9 = Activation("relu")(convolution9)

convolution10 = Conv2D(filters=512, kernel_size=(1,1))(convolution9)
convolution10= BatchNormalization(axis = -1)(convolution10)
convolution10 = Activation("relu")(convolution10)

pooling4 = MaxPooling2D(pool_size=(2,1), strides=(2,1))(convolution10)

convolution11 = Conv2D(filters=512, kernel_size=(1,1))(pooling4)
convolution11= BatchNormalization(axis = -1)(convolution11)
convolution11 = Activation("relu")(convolution11)

convolution12 = Conv2D(filters=512, kernel_size=(1,1))(convolution11)
convolution12= BatchNormalization(axis = -1)(convolution12)
convolution12 = Activation("relu")(convolution12)

convolution13 = Conv2D(filters=512, kernel_size=(1,1))(convolution12)
convolution13= BatchNormalization(axis = -1)(convolution13)
convolution13 = Activation("relu")(convolution13)

pooling5 = MaxPooling2D(pool_size=(2,1), strides=(2,1))(convolution13)

convolution_full = Reshape(target_shape=(LastFilters * FV, 16))(pooling5)

bidir_LSTM1 = Bidirectional(LSTM(units = NUnits, return_sequences=True))(convolution_full)
bidir_LSTM2 = Bidirectional(LSTM(units = NUnits, return_sequences=True))(bidir_LSTM1)
y_pred = Bidirectional(LSTM(units = NUnits))(bidir_LSTM2)

Model(inputs = input_data, outputs = y_pred).summary()

#%%
# the actual loss calc occurs here despite it not being an internal Keras loss function

def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    # the 2 is critical here since the first couple outputs of the RNN tend to be garbage:
    #y_pred = y_pred[:, 2:, :]
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

#%%
labels = Input(shape=[47], dtype='float32')
input_length = Input(shape=[1], dtype='int64')
label_length = Input(shape=[1], dtype='int64')
# Keras doesn't currently support loss funcs with extra parameters
# so CTC loss is implemented in a lambda layer
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])

# clipnorm seems to speeds up convergence
sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)

model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)

# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 64, 64, 1)]       0         
                                                                 
 conv2d (Conv2D)             (None, 64, 64, 64)        128       
                                                                 
 batch_normalization (BatchN  (None, 64, 64, 64)       256       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 64, 64, 64)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 64, 64, 64)        4160      
                                                                 
 batch_normalization_1 (Batc  (None, 64, 64, 64)       256       
 hNormalization)                                             

_________________________________________________________________


ValueError: Exception encountered when calling layer "ctc" (type Lambda).

Dimension must be 2 but is 3 for '{{node ctc/transpose_2}} = Transpose[T=DT_FLOAT, Tperm=DT_INT32](Placeholder, ctc/transpose_2/perm)' with input shapes: [?,512], [3].

Call arguments received by layer "ctc" (type Lambda):
  • inputs=['tf.Tensor(shape=(None, 512), dtype=float32)', 'tf.Tensor(shape=(None, 47), dtype=float32)', 'tf.Tensor(shape=(None, 1), dtype=int64)', 'tf.Tensor(shape=(None, 1), dtype=int64)']
  • mask=None
  • training=False

In [8]:
vec_per_window = window_width / math.pow(2, MPoolLayers_H)

#%%
#reading the class files
data = {}
with codecs.open("Data\\class.txt", 'r', encoding='utf-8') as cF:
    data = cF.read().split('\n')
    
#%%
def returnClasses(string):
    text = list(string)
    text = ["<SPACE>"] + ["<SPACE>" if x==" " else x for x in text] + ["<SPACE>"]
    classes = [data.index(x) if x in data else 2 for x in text]
    return classes
    
#%%
infile = pd.read_csv(".\\Data\\list.csv")

#%%
inputList = []
seqLens = []
targetList = []

for record in range(0, len(infile)):
    path = infile["Path"][record]
    annotation = infile["Annotation"][record]
    #reading images from the path
    print("Reading file: " + path)
    image = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    h, w = np.shape(image)
    print(np.shape(image))
    
    #setting factor if the image is greater than the window height
    if (h > window_height): factor = window_height/h
    else: factor = 1
    
    #resizing the image to the specified window height
    image = cv2.resize(image, None, fx=factor, fy=factor, interpolation = cv2.INTER_CUBIC)
    h, w = np.shape(image)
        
    featureSet = []
    
    winId = 0
    while True:
        s = winId * window_shift
        e = s + window_width
        
        if e > w:
            break
        
        wnd = image[:h, s:e]
        featureSet.append(wnd)
        winId = winId + 1
        
    inputList.append(featureSet)
    
    #taking the sequence length
    winId = 0
    wpd = 0
    while True:
        s = winId * window_shift
        e = s + window_width
        
        if e > w:
            sl = (winId+1) * vec_per_window
            seqLens.append(sl)
            break
        winId = winId + 1
        
    #taking the text annotations from the csv file and converting to labels
    targetList.append(returnClasses(annotation))
        

FileNotFoundError: [Errno 2] No such file or directory: 'Data\\class.txt'