In [31]:
import tensorflow as tf
from keras import backend as K
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Input, Dense, Activation, LeakyReLU, Permute
from keras.layers import Reshape, Lambda, BatchNormalization, Bidirectional
from keras.layers.merge import add, concatenate
from keras.models import Model
from keras.layers.recurrent import LSTM

from keras.utils.vis_utils import plot_model

def default_model_params(img_height, vocabulary_size):
    params = dict()
    params['img_height'] = img_height
    params['img_width'] = None
    params['batch_size'] = 16
    params['img_channels'] = 1
    params['conv_blocks'] = 4
    params['conv_filter_n'] = [32, 64, 128, 256]
    params['conv_filter_size'] = [[3,3], [3,3], [3,3], [3,3]]
    params['conv_pooling_size'] = [[2,2], [2,2], [2,2], [2,2]]
    params['rnn_units'] = 512
    params['rnn_layers'] = 2
    params['vocabulary_size'] = vocabulary_size
    return params


# # Loss and train functions, network architecture
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    # the 2 is critical here since the first couple outputs of the RNN
    # tend to be garbage:
    y_pred = y_pred[:, 2:, :]
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

def ctc_crnn(params, training = True, width_rem=128):
    input_shape = (params['img_height'],params['img_width'], params['img_channels'])

    inputs = Input(name='the_input', shape=input_shape, dtype='float32')
    width_reduction = 1
    height_reduction = 1

    #Conv2d layers
    for i in range(params['conv_blocks']):
        inner = Conv2D(params['conv_filter_n'][i], params['conv_filter_size'][i], padding='same', name='conv'+ str(i+1), kernel_initializer='he_normal')( inputs if i == 0 else inner)
        inner = BatchNormalization()(inner)
        inner = LeakyReLU(0.2)(inner)
        inner = MaxPooling2D(pool_size=params['conv_pooling_size'][i], strides = params['conv_pooling_size'][i], name='max' + str(i+1))(inner)

        width_reduction = width_reduction * params['conv_pooling_size'][i][1]
        height_reduction = height_reduction * params['conv_pooling_size'][i][0]


    features = K.permute_dimensions(inner, (2,0,3,1))
    feature_dim = params['conv_filter_n'][-1] * (params['img_height'] / height_reduction)
    # feature_width = input_shape[1] / width_reduction
    feature_width = width_rem / width_reduction
    # features = tf.reshape(features, tf.stack([tf.cast(feature_width,'int32'), inputs.shape[0], tf.cast(feature_dim,'int32')]))
    features = tf.reshape(features, tf.stack([tf.cast(feature_width,'int32'), 16, tf.cast(feature_dim,'int32')]))

    inner = Reshape(target_shape=((8, params['conv_filter_n'][-1])), name='reshape')(inner)

    #Recurrent layers
    lstm_1 = LSTM(512, return_sequences=True, kernel_initializer='he_normal', name='lstm1')(inner)  # (None, 32, 512)
    lstm_1b = LSTM(512, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm1_b')(inner)
    reversed_lstm_1b = Lambda(lambda inputTensor: K.reverse(inputTensor, axes=1)) (lstm_1b)

    lstm1_merged = add([lstm_1, reversed_lstm_1b])  # (None, 32, 512)
    lstm1_merged = BatchNormalization()(lstm1_merged)
    
    lstm_2 = LSTM(512, return_sequences=True, kernel_initializer='he_normal', name='lstm2')(lstm1_merged)
    lstm_2b = LSTM(512, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm2_b')(lstm1_merged)
    reversed_lstm_2b= Lambda(lambda inputTensor: K.reverse(inputTensor, axes=1)) (lstm_2b)

    lstm2_merged = concatenate([lstm_2, reversed_lstm_2b])  # (None, 32, 1024)
    lstm2_merged = BatchNormalization()(lstm2_merged)
    inner = Dense(params['vocabulary_size'] + 1, kernel_initializer='he_normal',name='dense2')(lstm2_merged)
    y_pred = Activation('softmax', name='softmax')(inner)

    labels = Input(name='the_labels', shape=[max_text_len], dtype='float32') # (None ,8)
    input_length = Input(name='input_length', shape=[1], dtype='int64')     # (None, 1)
    label_length = Input(name='label_length', shape=[1], dtype='int64')     # (None, 1)

    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) #(None, 1)

    if training:
        return Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)
    else:
        return Model(inputs=[inputs], outputs=y_pred)


In [32]:
temp = default_model_params(128,1)
temp = ctc_crnn(temp)
temp.summary()

Model: "model_6"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 the_input (InputLayer)         [(None, 128, None,   0           []                               
                                1)]                                                               
                                                                                                  
 conv1 (Conv2D)                 (None, 128, None, 3  320         ['the_input[0][0]']              
                                2)                                                                
                                                                                                  
 batch_normalization_50 (BatchN  (None, 128, None, 3  128        ['conv1[0][0]']                  
 ormalization)                  2)                                                          

In [None]:
plot_model(temp, to_file='model_plot.png', show_shapes=True, show_layer_names=True)