In [1]:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Input
from keras.layers.convolutional import Conv3D, ZeroPadding3D
from keras.layers.pooling import MaxPooling3D
from keras.layers.core import SpatialDropout3D, Flatten, Dense, Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.recurrent import GRU
from keras.layers.wrappers import Bidirectional, TimeDistributed
from keras import backend as K

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
tf.logging.set_verbosity(tf.logging.ERROR)

input_shape = (75, 50, 100, 3) #(Time X Height X Width X Channels)
input_data = Input(shape=input_shape, dtype='float32')
model = Sequential()
model.add(ZeroPadding3D(input_shape=input_shape, padding=(1,2,2)))
model.add(Conv3D(filters=32, kernel_size=(3,5,5), strides=(1,2,2), kernel_initializer='he_normal'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(SpatialDropout3D(0.5))
model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))

model.add(ZeroPadding3D(padding=(1,2,2)))
model.add(Conv3D(filters=64, kernel_size=(3,5,5), strides=(1,1,1), kernel_initializer='he_normal'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(SpatialDropout3D(0.5))
model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))

model.add(ZeroPadding3D(padding=(1,1,1)))
model.add(Conv3D(filters=96, kernel_size=(3,3,3), strides=(1,1,1), kernel_initializer='he_normal'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(SpatialDropout3D(0.5))
model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))

model.add(TimeDistributed(Flatten()))

model.add(Bidirectional(GRU(units=256, kernel_initializer='Orthogonal', return_sequences=True), merge_mode='concat'))
model.add(Bidirectional(GRU(units=256, kernel_initializer='Orthogonal', return_sequences=True), merge_mode='concat'))

model.add(Dense(28, kernel_initializer='he_normal'))
model.add(Activation('softmax'))

labels = Input(shape=[32], dtype='float32')
input_length = Input(shape=[1], dtype='int64')
label_length = Input(shape=[1], dtype='int64')


In [None]:
# CTC loss function -- custom objective function
from keras.layers.core import Lambda

def ctc_lambda_func(y_pred, labels):
    input_length = Input(shape=[1], dtype='int64')
    label_length = Input(shape=[1], dtype='int64')
    # From Keras example image_ocr.py:
    # the 2 is critical here since the first couple outputs of the RNN
    # tend to be garbage:
    # y_pred = y_pred[:, 2:, :]
    y_pred = y_pred[:, :, :]
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

# CTC Layer implementation using Lambda layer
def CTC(y_pred, label):
    return Lambda(ctc_lambda_func, output_shape=(1,))


In [3]:
model.compile(optimizer='adam',loss='categorical_crossentropy', metrics=['accuracy'])

In [4]:
import h5py
X, y = None, None
with h5py.File('../datasets/grid_lips_0-499_100x50.hdf5') as f:
    X = f["features"][:10]
    y = f["labels"][:10]
    X = X/255
    print(X.shape)
    print(y.shape)

(10, 75, 50, 100, 3)
(10, 75, 28)


In [5]:
model.fit(X,y, epochs=3, batch_size=2, validation_split=0.2)

Train on 8 samples, validate on 2 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f1ec8139438>