In [13]:
from keras import backend as K
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Input, Dense, Activation, LeakyReLU, Permute, Bidirectional
from keras.layers import Reshape, Lambda, BatchNormalization
from keras.layers.merge import add, concatenate
from keras.models import Model
from keras.layers.recurrent import LSTM
import tensorflow as tf

def default_model_params(img_height, vocabulary_size):
    params = dict()
    params['img_height'] = img_height
    params['img_width'] = None
    params['batch_size'] = 16
    params['img_channels'] = 1
    params['conv_blocks'] = 4
    params['conv_filter_n'] = [32, 64, 128, 256]
    params['conv_filter_size'] = [ [3,3], [3,3], [3,3], [3,3] ]
    params['conv_pooling_size'] = [ [2,2], [2,2], [2,2], [2,2] ]
    params['rnn_units'] = 512
    params['rnn_layers'] = 2
    params['vocabulary_size'] = vocabulary_size
    return params

def ctc_loss(y_true, y_pred, input_length, label_length, real_y_true_ts):
    return tf.keras.backend.ctc_batch_cost(real_y_true_ts, y_pred, input_length, label_length)


def ctc_crnn(params, width_rem = 128):
    input_shape = (params['img_height'],params['img_width'], params['img_channels'])

    inputs = Input(name='the_input', shape=input_shape, dtype='float32')
    width_reduction = 1
    height_reduction = 1

    #conv2d layer
    for i in range(params['conv_blocks']):
            inner = Conv2D(params['conv_filter_n'][i], params['conv_filter_size'][i], padding='same', name='conv'+ str(i+1), kernel_initializer='he_normal')( inputs if i == 0 else inner)
            inner = BatchNormalization()(inner)
            inner = LeakyReLU(0.2)(inner)
            inner = MaxPooling2D(pool_size=params['conv_pooling_size'][i], strides = params['conv_pooling_size'][i], name='max' + str(i+1))(inner)

            width_reduction = width_reduction * params['conv_pooling_size'][i][1]
            height_reduction = height_reduction * params['conv_pooling_size'][i][0]

            
    features = K.permute_dimensions(inner, (2,0,3,1))
    feature_dim = params['conv_filter_n'][-1] * (params['img_height'] / height_reduction)
    # feature_width = input_shape[1] / width_reduction
    feature_width = width_rem / width_reduction
    # features = tf.reshape(features, tf.stack([tf.cast(feature_width,'int32'), inputs.shape[0], tf.cast(feature_dim,'int32')]))
    features = tf.reshape(features, tf.stack([tf.cast(feature_width,'int32'), 16, tf.cast(feature_dim,'int32')]))
    
    # RNN block
    rnn_hidden_units = params['rnn_units']
    rnn_hidden_layers = params['rnn_layers']
    for i in range(rnn_hidden_layers):
        inner = Bidirectional(LSTM(rnn_hidden_units,activation='softmax'))( features )
    inner = Dense(params['vocabulary_size'] +1 )(inner)
    y_pred = Activation('softmax', name='softmax')(inner)  

    return Model(inputs=[inputs], outputs= y_pred)

    

In [14]:
temp = default_model_params(128,1)
temp = ctc_crnn(temp)
temp.summary()

0
1
Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 the_input (InputLayer)      [(None, 128, None, 1)]    0         
                                                                 
 conv1 (Conv2D)              (None, 128, None, 32)     320       
                                                                 
 batch_normalization_24 (Bat  (None, 128, None, 32)    128       
 chNormalization)                                                
                                                                 
 leaky_re_lu_24 (LeakyReLU)  (None, 128, None, 32)     0         
                                                                 
 max1 (MaxPooling2D)         (None, 64, None, 32)      0         
                                                                 
 conv2 (Conv2D)              (None, 64, None, 64)      18496     
                                                       