In [1]:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Input
from keras.layers.convolutional import Conv3D, ZeroPadding3D
from keras.layers.pooling import MaxPooling3D
from keras.layers.core import SpatialDropout3D, Flatten, Dense, Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.recurrent import GRU
from keras.layers.wrappers import Bidirectional, TimeDistributed
from keras import backend as K

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
tf.logging.set_verbosity(tf.logging.ERROR)

input_shape = (75, 100, 50, 3) #(Time X Height X Width X Channels)
input_data = Input(shape=input_shape, dtype='float32')

zero1 = ZeroPadding3D(padding=(1,2,2))(input_data)
conv1 = Conv3D(filters=32, kernel_size=(3,5,5), strides=(1,2,2), kernel_initializer='he_normal')(zero1)
batc1 = BatchNormalization()(conv1)
actv1 = Activation('relu')(batc1)
drop1 = SpatialDropout3D(0.5)(actv1)
maxp1 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2))(drop1)

zero2 = ZeroPadding3D(padding=(1,2,2))(maxp1)
conv2 = Conv3D(filters=64, kernel_size=(3,5,5), strides=(1,1,1), kernel_initializer='he_normal')(zero2)
batc2 = BatchNormalization()(conv2)
actv2 = Activation('relu')(batc2)
drop2 = SpatialDropout3D(0.5)(actv2)
maxp2 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2))(drop2)

zero3 = ZeroPadding3D(padding=(1,1,1))(maxp2)
conv3 = Conv3D(filters=96, kernel_size=(3,3,3), strides=(1,1,1), kernel_initializer='he_normal')(zero3)
batc3 = BatchNormalization()(conv3)
actv3 = Activation('relu')(batc3)
drop3 = SpatialDropout3D(0.5)(actv3)
maxp3 = MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2))(drop3)

resh1 = TimeDistributed(Flatten())(maxp3)

gru1 = Bidirectional(GRU(units=256, kernel_initializer='Orthogonal', return_sequences=True), merge_mode='concat')(resh1)
gru2 = Bidirectional(GRU(units=256, kernel_initializer='Orthogonal', return_sequences=True), merge_mode='concat')(gru1)

dense1 = Dense(28, kernel_initializer='he_normal')(gru2)
y_pred = Activation('softmax')(dense1)

labels = Input(shape=[32], dtype='float32')
input_length = Input(shape=[1], dtype='int64')
label_length = Input(shape=[1], dtype='int64')

In [21]:
# CTC loss function -- custom objective function
from keras.layers.core import Lambda
from keras.models import Model

def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    # From Keras example image_ocr.py:
    # the 2 is critical here since the first couple outputs of the RNN
    # tend to be garbage:
    # y_pred = y_pred[:, 2:, :]
    y_pred = y_pred[:, :, :]
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

# CTC Layer implementation using Lambda layer
def CTC(args):
    return Lambda(ctc_lambda_func, output_shape=(1,))(args)

loss_out = CTC([y_pred, labels, input_length, label_length])
model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)

In [4]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 75, 100, 50,  0                                            
__________________________________________________________________________________________________
zero_padding3d_1 (ZeroPadding3D (None, 77, 104, 54,  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv3d_1 (Conv3D)               (None, 75, 50, 25, 3 7232        zero_padding3d_1[0][0]           
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 75, 50, 25, 3 128         conv3d_1[0][0]                   
__________________________________________________________________________________________________
activation

In [14]:
import h5py
X, y = None, None
with h5py.File('../datasets/grid_lips_0-4_100x50.hdf5') as f:
    X = f["features"][:]
    y = f["labels"][:]
    X = X/255
    print(X.shape)
    print(y.shape)

(5, 75, 50, 100, 3)
(5, 75, 28)


In [16]:
X.reshape(5,75,100,50,3)

array([[[[[0.49803922, 0.49803922, 0.40392157],
          [0.49803922, 0.49803922, 0.40392157],
          [0.49803922, 0.49803922, 0.40784314],
          ...,
          [0.49803922, 0.4627451 , 0.34117647],
          [0.49803922, 0.4627451 , 0.34117647],
          [0.49803922, 0.46666667, 0.3372549 ]],

         [[0.49803922, 0.4745098 , 0.34117647],
          [0.49803922, 0.47843137, 0.34509804],
          [0.49803922, 0.48235294, 0.34901961],
          ...,
          [0.49803922, 0.49803922, 0.34509804],
          [0.49803922, 0.49803922, 0.34509804],
          [0.49803922, 0.49803922, 0.34509804]],

         [[0.49803922, 0.49803922, 0.4       ],
          [0.49803922, 0.49803922, 0.40392157],
          [0.49803922, 0.49803922, 0.40784314],
          ...,
          [0.49803922, 0.4627451 , 0.34117647],
          [0.49803922, 0.46666667, 0.34117647],
          [0.49803922, 0.47058824, 0.34117647]],

         ...,

         [[0.49803922, 0.38039216, 0.2627451 ],
          [0.49803922,

In [22]:
model.compile(optimizer='adam', loss=CTC)
model.fit(X[0],y,)

AttributeError: 'Lambda' object has no attribute 'get_shape'