In [1]:
from keras import backend as K
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Input, Dense, Activation, LeakyReLU, Permute, Bidirectional
from keras.layers import Reshape, Lambda, BatchNormalization
from keras.layers.merge import add, concatenate
from keras.models import Model
from keras.layers.recurrent import LSTM, CuDNNLSTM
import tensorflow as tf
import tensorflow as tf
from primus import CTC_PriMuS
import ctc_utils
import os
import tensorflow as tf
from primus import CTC_PriMuS
import ctc_utils
import os
import cv2

tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(tf.config.experimental.list_physical_devices('GPU')[0], True)

def default_model_params(img_height, vocabulary_size):
    params = dict()
    params['img_height'] = img_height
    params['img_width'] = None
    params['batch_size'] = 16
    params['img_channels'] = 1
    params['conv_blocks'] = 4
    params['conv_filter_n'] = [32, 64, 128, 256]
    params['conv_filter_size'] = [ [3,3], [3,3], [3,3], [3,3] ]
    params['conv_pooling_size'] = [ [2,2], [2,2], [2,2], [2,2] ]
    params['rnn_units'] = 128
    params['rnn_layers'] = 2
    params['vocabulary_size'] = vocabulary_size
    return params

# def ctc_loss(y_true, y_pred, input_length, label_length, real_y_true_ts):
#     return tf.keras.backend.ctc_batch_cost(real_y_true_ts, y_pred, input_length, label_length)

def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    # FIXME: Why?
    # the 2 is critical here since the first couple outputs of the RNN
    # tend to be garbage:
    y_pred = y_pred[:, 2:, :]
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

def ctc_crnn(params, max_seq_len, width_rem = 128, training = True):
    input_shape = (params['img_height'],params['img_width'], params['img_channels'])

    inputs = Input(name='the_input', shape=input_shape, dtype='float32')
    width_reduction = 1
    height_reduction = 1

    #conv2d layer
    for i in range(params['conv_blocks']):
            inner = Conv2D(params['conv_filter_n'][i], params['conv_filter_size'][i], padding='same', name='conv'+ str(i+1), kernel_initializer='he_normal')(inputs if i == 0 else inner)
            inner = BatchNormalization()(inner)
            inner = LeakyReLU(0.2)(inner)
            inner = MaxPooling2D(pool_size=params['conv_pooling_size'][i], strides = params['conv_pooling_size'][i], name='max' + str(i+1))(inner)

            width_reduction = width_reduction * params['conv_pooling_size'][i][1]
            height_reduction = height_reduction * params['conv_pooling_size'][i][0]

            
    features = K.permute_dimensions(inner, (2,0,3,1))
    feature_dim = params['conv_filter_n'][-1] * (params['img_height'] / height_reduction)
    # feature_width = input_shape[1] / width_reduction
    feature_width = width_rem / width_reduction
    # features = tf.reshape(features, tf.stack([tf.cast(feature_width,'int32'), inputs.shape[0], tf.cast(feature_dim,'int32')]))
    features = tf.reshape(features, tf.stack([tf.cast(feature_width,'int32'), 16, tf.cast(feature_dim,'int32')]))
    
    # RNN block
    lstm_1 = LSTM(params['rnn_units'], return_sequences=True, kernel_initializer='he_normal', name='lstm1', dropout= 0.2)(features)  # (None, 32, 512)
    lstm_1b = LSTM(params['rnn_units'], return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm1_b', dropout= 0.2)(features)
    reversed_lstm_1b = Lambda(lambda inputTensor: K.reverse(inputTensor, axes=1)) (lstm_1b)

    lstm1_merged = add([lstm_1, reversed_lstm_1b])  # (None, 32, 512)
    lstm1_merged = BatchNormalization()(lstm1_merged)
    
    lstm_2 = LSTM(params['rnn_units'], return_sequences=True, kernel_initializer='he_normal', name='lstm2', dropout= 0.2)(lstm1_merged)
    lstm_2b = LSTM(params['rnn_units'], return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm2_b', dropout= 0.2)(lstm1_merged)
    reversed_lstm_2b= Lambda(lambda inputTensor: K.reverse(inputTensor, axes=1)) (lstm_2b)

    lstm2_merged = concatenate([lstm_2, reversed_lstm_2b])  # (None, 32, 1024)
    lstm2_merged = BatchNormalization()(lstm2_merged)

    # transforms RNN output to character activations:
    num_classes = params['vocabulary_size'] + 1
    inner = Dense(num_classes, kernel_initializer='he_normal',name='dense2')(lstm2_merged) #(None, 32, 63)
    y_pred = Activation('softmax', name='softmax')(inner)

    # FIXME: Change based on data
    labels = Input(name='the_labels', shape=[max_seq_len], dtype='float32') # (None ,8)
    input_length = Input(name='input_length', shape=[1], dtype='int64')     # (None, 1)
    label_length = Input(name='label_length', shape=[1], dtype='int64')     # (None, 1)

    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) #(None, 1)

    if training:
        return Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)
    else:
        return Model(inputs=[inputs], outputs=y_pred)

    # rnn_hidden_units = params['rnn_units']
    # rnn_hidden_layers = params['rnn_layers']
    # for i in range(rnn_hidden_layers):
    #     inner = Bidirectional(LSTM(rnn_hidden_units,activation='softmax'))(features) # features if i == 0 else inner
  
    # inner = Dense(params['vocabulary_size'] + 1)(inner)
    # y_pred = Activation('softmax', name='softmax')(inner)

    # return Model(inputs=[inputs], outputs= lstm2_merged), loss_out

    

ImportError: cannot import name 'CuDNNLSTM' from 'keras.layers.recurrent' (c:\Users\steli\.conda\envs\tf-gpu\lib\site-packages\keras\layers\recurrent.py)

In [61]:
corpus_dirpath = "C:/Users/steli/DeepLearning/Project/tf-end-to-end/Data/primusCalvoRizoAppliedSciences2018/"
corpus_filepath = "C:/Users/steli/DeepLearning/Project/tf-end-to-end/Data/train.txt"
vocabulary = "C:/Users/steli/DeepLearning/Project/tf-end-to-end/Data/vocabulary_semantic.txt"
primus = CTC_PriMuS(corpus_dirpath,corpus_filepath,vocabulary, True, val_split = 0.1)

Training with 70880 and validating with 7875


In [62]:
# Parameterization
img_height = 128
params = default_model_params(img_height,primus.vocabulary_size)
max_epochs = 100
dropout = 0.5

# Model
model = ctc_crnn(params,5)

In [63]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), loss= {'ctc': lambda y_true, y_pred: y_pred})


In [53]:
params

{'img_height': 128,
 'img_width': None,
 'batch_size': 16,
 'img_channels': 1,
 'conv_blocks': 4,
 'conv_filter_n': [32, 64, 128, 256],
 'conv_filter_size': [[3, 3], [3, 3], [3, 3], [3, 3]],
 'conv_pooling_size': [[2, 2], [2, 2], [2, 2], [2, 2]],
 'rnn_units': 512,
 'rnn_layers': 2,
 'vocabulary_size': 1781}

In [54]:
# testimg = cv2.imread("C:/Users/steli/DeepLearning/Project/tf-end-to-end/Data/primusCalvoRizoAppliedSciences2018/package_aa/000051650-1_1_2/000051650-1_1_2.png", cv2.IMREAD_GRAYSCALE)
# testimg.shape

AttributeError: 'NoneType' object has no attribute 'shape'

In [64]:

val_data, val_len = primus.getValidation(params)

model.fit_generator(generator=primus.nextBatch(params),
                    steps_per_epoch=int(len(primus.training_list) / params['batch_size']),
                    epochs=max_epochs,
                    validation_data= val_data, # FIXME: unsure
                    validation_steps=int(val_len / params['batch_size']))

MemoryError: Unable to allocate 1.04 MiB for an array with shape (128, 1067) and data type float64