In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/Project-X-Lip-Reading

/content/drive/MyDrive/Project-X-Lip-Reading


In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Layer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional, MaxPool3D, MaxPooling3D
from tensorflow.keras.layers import Activation, Reshape, SpatialDropout3D, BatchNormalization, TimeDistributed, Flatten
from tensorflow.keras.optimizers import Adam

In [4]:
#  model.add(Conv3D(32, (3, 3, 3), strides=1, padding='valid', input_shape=input_shape))
class Conv3D(Layer):
    def __init__(self, filters, kernel_size, strides=1, padding='valid', **kwargs):
        super(Conv3D, self).__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size
        self.strides = strides
        self.padding = padding

    def build(self, input_shape):
        self.W = self.add_weight(shape=(self.kernel_size[0], self.kernel_size[1], self.kernel_size[2], input_shape[-1], self.filters), initializer='random_normal', trainable=True)
        self.b = self.add_weight(shape=(self.filters,), initializer='zeros', trainable=True)

    def call(self, inputs):
        return self.conv3d(inputs, self.W, self.b, self.strides, self.padding)

    def conv3d(self, X, W, b, stride=1, padding='valid'):

        (n_B, n_H_prev, n_W_prev, n_D_prev, n_C_prev) = X.shape
        (f, f, f, n_C_prev, n_C) = W.shape

        if padding == 'same':
            pad = (f-1)//2
        else:
            pad = 0

        n_H = int((n_H_prev-f+2*pad)/stride)+1
        n_W = int((n_W_prev-f+2*pad)/stride)+1
        n_D = int((n_D_prev-f+2*pad)/stride)+1

        Z = np.zeros((n_B, n_H, n_W, n_D, n_C))

        X_pad = np.pad(X, ((0, 0), (pad, pad), (pad, pad), (pad, pad), (0, 0)), mode='constant', constant_values=(0, 0))

        for i in range(n_B):
            for c in range(n_C):
                for h in range(n_H):
                    for w in range(n_W):
                        for d in range(n_D):
                            h_start = h*stride
                            h_end = h_start+f
                            w_start = w*stride
                            w_end = w_start+f
                            d_start = d*stride
                            d_end = d_start+f

                            X_slice = X_pad[i, h_start:h_end, w_start:w_end, d_start:d_end, :]
                            Z[i, h, w, d, c] = np.sum(X_slice * W[:, :, :, :, c]) + b[c]

        return Z

    def compute_output_shape(self, input_shape):
        if self.padding == 'same':
            pad = (self.kernel_size[0]-1)//2
        else:
            pad = 0

        n_H = int((input_shape[1]-self.kernel_size[0]+2*pad)/self.strides)+1
        n_W = int((input_shape[2]-self.kernel_size[1]+2*pad)/self.strides)+1
        n_D = int((input_shape[3]-self.kernel_size[2]+2*pad)/self.strides)+1

        return (input_shape[0], n_H, n_W, n_D, self.filters)

In [5]:
def build_3d_cnn_model(input_shape, num_classes=10, learning_rate=0.01, dropout_rate=0.5):
    model = Sequential()

    # 1st Layer group
    model.add(Conv3D(32, (3, 3, 3), strides=1, padding='valid', input_shape=input_shape))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=2))

    # 2nd Layer group
    model.add(Conv3D(64, (3, 3, 3), strides=1, padding='valid'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=2))

    # 3rd Layer group
    model.add(Conv3D(128, (3, 3, 3), strides=1, padding='valid'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=2))

    # Reshape
    model.add(Reshape((128, 1*10*10)))

    # LSTMS Layer
    model.add(LSTM(32, return_sequences=True))
    model.add(Dropout(dropout_rate))

    # Flatten and Dense layers
    model.add(Flatten())

    model.add(Dense(2048, activation='relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(dropout_rate))

    model.add(Dense(num_classes, activation='softmax'))

    # Model Compilation
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

In [6]:
# Tuning Hyperparameters
input = (22, 100, 100, 1)
num_classes = 10
learning_rate = 0.001
dropout_rate = 0.5

In [7]:
# Build Model
model = build_3d_cnn_model(input, num_classes, learning_rate, dropout_rate)

  super(Conv3D, self).__init__(**kwargs)


In [8]:
# Summary
model.summary()