# Gesture Recognition with Neural Network

- **Problem Statement:** Imagine you are working as a data scientist at a home electronics company which manufactures state of the art smart televisions. You want to develop a cool feature in the smart-TV that can recognise five different gestures performed by the user which will help users control the TV without using a remote. The gestures are continuously monitored by the webcam mounted on the TV. Each gesture corresponds to a specific command:

  - *Thumbs up:*  Increase the volume
  - *Thumbs down:* Decrease the volume
  - *Left swipe:* 'Jump' backwards 10 seconds
  - *Right swipe:* 'Jump' forward 10 seconds  
  - *Stop:* Pause the movie
 
### 0. Importing Libraries

In [20]:
# import libraries
import numpy as np
import tensorflow as tf
import random as rn
import os
import cv2
import datetime
from tensorflow.keras.layers import Conv3D, Dense, Flatten, BatchNormalization, Dropout, TimeDistributed, GRU, GlobalMaxPooling2D, MaxPooling3D, MaxPooling2D, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.models import Sequential
import warnings
warnings.filterwarnings('ignore')
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

### 1. Constants Definitions

In [2]:
# define dataset paths
TRAIN_DOC_PATH = 'datasets/Project_data/train.csv'
TEST_DOC_PATH = 'datasets/Project_data/val.csv'
TRAIN_DATA_PATH = '/datasets/Project_data/train'
TEST_DATA_PATH = '/datasets/Project_data/val'

In [3]:
# seed initializations
np.random.seed(30)
tf.random.set_seed(30)
rn.seed(30)

In [4]:
frames = 30
print('# frames = ', frames)
dim = (100, 100)
print('# dim = ', dim)
curr_dt_time = datetime.datetime.now()
num_train_sequences = len(np.random.permutation(open(TRAIN_DOC_PATH).readlines()))
print('# training sequences =', num_train_sequences)
num_val_sequences = len(np.random.permutation(open(TEST_DOC_PATH).readlines()))
print('# validation sequences =', num_val_sequences)

# frames =  30
# dim =  (100, 100)
# training sequences = 663
# validation sequences = 100


### 2. Data Generator

In [5]:
# data generator function
def data_generator(data_dir_path, data_doc_path, batch_size):
    """
    Generator function to generate the data in batches
    # param- data_dir_path: directory path of train and test data
    # param- data_doc_path: directory path of .csv where all folders list is present
    # param- batch_size: size of a batch
    # param- frames: number of frames(images) you want to use in a video
    # param- dim: dimension of a image in form of (w, h)
    # returns- batch_data and batch_labels
    """
    print('\nSource path = ', data_dir_path, '; batch size =', batch_size)
    num_of_images = [i for i in range(0, frames)]
    
    while True:
        doc = np.random.permutation(open(data_doc_path).readlines())
        num_batches = len(doc) // batch_size
        
        # increase batchsize by 1 to store extra images if doc_len is not divisible by batch_size
        if len(doc) % batch_size != 0:
            num_batches += 1

        for batch in range(0, num_batches):
            # adjust last batch size according to remaining number of images
            if len(doc) % batch_size != 0 and batch == num_batches-1:
                batch_size = len(doc) - (batch*batch_size)
            
            batch_data = np.zeros((batch_size, frames, dim[0], dim[1], 3))
            batch_labels = np.zeros((batch_size, 5))

            for folder in range(0, batch_size):
                folder_idx = folder + (batch*batch_size)

                images = os.listdir(data_dir_path + '/' + doc[folder_idx].split(';')[0])

                for img_idx, item in enumerate(num_of_images):
                    image = cv2.imread(data_dir_path+'/'+ doc[folder_idx].strip().split(';')[0]+'/'+images[item]).astype(np.float32)
                    image = image_processing(image, dim)
                    batch_data[folder, img_idx, :, :, 0] = image[:, :, 0] / 255.0
                    batch_data[folder, img_idx, :, :, 1] = image[:, :, 1] / 255.0
                    batch_data[folder, img_idx, :, :, 2] = image[:, :, 2] / 255.0

                batch_labels[folder, int(doc[folder_idx].split(';')[-1])] = 1
            yield batch_data, batch_labels

## ----------------------------------------------------------------##

# utility for image processing
def image_processing(image, dim):
    image = crop_image(image)
    image = resize_image(image, dim)
    return image   

## ----------------------------------------------------------------##

# utility to resize image
def resize_image(image, dim):
    return cv2.resize(image, dim)

## ----------------------------------------------------------------##

# utility to crop image
def crop_image(image):
    height, width, _ = image.shape

    # Define the size of the cropping box
    crop_width = 120  # Width of the cropped region
    crop_height = 120  # Height of the cropped region

    # Calculate the center of the image
    center_x, center_y = width // 2, height // 2

    # Calculate the coordinates of the cropping box
    start_x = max(center_x - crop_width // 2, 0)
    end_x = min(center_x + crop_width // 2, width)
    start_y = max(center_y - crop_height // 2, 0)
    end_y = min(center_y + crop_height // 2, height)

    # Crop the image
    cropped_image = image[start_y:end_y, start_x:end_x]
    
    return cropped_image

### 3. 3D-CNN Architecture

In [18]:
# utility to run the model
def run_model(model, batch_size, num_epochs, model_init='model_init', save=False):
    callbacks_list = []
    
    if save == True:
        model_name = model_init + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'

        if not os.path.exists(model_name):
            os.mkdir(model_name)

        filepath = model_name + 'model-{epoch:05d}-{loss:.4f}-{categorical_accuracy:.4f}-{val_loss:.4f}-{val_categorical_accuracy:.4f}.h5'
        checkpoint = ModelCheckpoint(filepath, monitor='val_categorical_accuracy', save_best_only=True, mode='auto', verbose=1)
        callbacks_list.append(checkpoint)

    # define callbacks
    LR = ReduceLROnPlateau(monitor='val_loss', factor=0.2, verbose=1, patience=4)
    callbacks_list.append(LR)

    # define step size for train and test
    if (num_train_sequences%batch_size) == 0:
        steps_per_epoch = int(num_train_sequences/batch_size)
    else:
        steps_per_epoch = (num_train_sequences//batch_size) + 1
    
    if (num_val_sequences%batch_size) == 0:
        validation_steps = int(num_val_sequences/batch_size)
    else:
        validation_steps = (num_val_sequences//batch_size) + 1
    
    print('Steps per epoch = ', steps_per_epoch)
    print('Validation steps = ', validation_steps)
    
    # data generation
    train_data_generator = data_generator(TRAIN_DATA_PATH, TRAIN_DOC_PATH, batch_size)
    test_data_generator = data_generator(TEST_DATA_PATH, TEST_DOC_PATH, batch_size)
    
    # train and validate model
    model.fit_generator(train_data_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, validation_data=test_data_generator, 
               validation_steps=validation_steps, callbacks=callbacks_list, class_weight=None, workers=1, initial_epoch=0)

### Experiment-1: 3D CNN with 3 convolutions, kernal_size=(3,3,3), batch size=32, without padding

In [15]:
# clear cache
tf.keras.backend.clear_session()

# hyperparameters
kernel_size = (3, 3, 3)
pool_size = (2, 2, 2)

# model definition
model = Sequential()

model.add(Conv3D(32, kernel_size, input_shape=(frames, dim[0], dim[1], 3), activation='relu'))
model.add(MaxPooling3D(pool_size=pool_size))

model.add(Conv3D(64, kernel_size, activation='relu'))
model.add(MaxPooling3D(pool_size=pool_size))

model.add(Conv3D(128, kernel_size, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling3D(pool_size=pool_size))

model.add(Flatten())

model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.25))

model.add(Dense(5, activation='softmax'))

# model summary
model.summary()

# compile model
model.compile(optimizer=Adam(), loss=categorical_crossentropy, metrics=['categorical_accuracy'])

# run model
run_model(model=model, batch_size=32, num_epochs=5, save=False)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d (Conv3D)             (None, 28, 98, 98, 32)    2624      
                                                                 
 max_pooling3d (MaxPooling3D  (None, 14, 49, 49, 32)   0         
 )                                                               
                                                                 
 conv3d_1 (Conv3D)           (None, 12, 47, 47, 64)    55360     
                                                                 
 max_pooling3d_1 (MaxPooling  (None, 6, 23, 23, 64)    0         
 3D)                                                             
                                                                 
 conv3d_2 (Conv3D)           (None, 4, 21, 21, 128)    221312    
                                                                 
 dropout (Dropout)           (None, 4, 21, 21, 128)    0

2024-09-02 05:00:18.584763: I tensorflow/stream_executor/cuda/cuda_dnn.cc:377] Loaded cuDNN version 8302


Source path =  /datasets/Project_data/val ; batch size = 32
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Experiment-2: 3D CNN with 4 convolutions, kernal_size=(3,3,3), batch size=32, with padding

In [17]:
# clear cache
tf.keras.backend.clear_session()

# hyperparameters
kernel_size = (3, 3, 3)
pool_size = (2, 2, 2)

# model definition
model = Sequential()

model.add(Conv3D(16, kernel_size, input_shape=(frames, dim[0], dim[1], 3), activation='relu', padding='same'))
model.add(MaxPooling3D(pool_size=pool_size))

model.add(Conv3D(32, kernel_size, activation='relu', padding='same'))
model.add(MaxPooling3D(pool_size=pool_size))

model.add(Conv3D(64, kernel_size, activation='relu', padding='same'))
model.add(MaxPooling3D(pool_size=pool_size))

model.add(Conv3D(128, kernel_size, activation='relu', padding='same'))
model.add(Dropout(0.5))
model.add(MaxPooling3D(pool_size=pool_size))

model.add(Flatten())

model.add(Dense(64, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.25))

model.add(Dense(5, activation='softmax'))

# model summary
model.summary()

# compile model
model.compile(optimizer=Adam(), loss=categorical_crossentropy, metrics=['categorical_accuracy'])

# run model
run_model(model=model, batch_size=32, num_epochs=5, save=False)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d (Conv3D)             (None, 30, 100, 100, 16)  1312      
                                                                 
 max_pooling3d (MaxPooling3D  (None, 15, 50, 50, 16)   0         
 )                                                               
                                                                 
 conv3d_1 (Conv3D)           (None, 15, 50, 50, 32)    13856     
                                                                 
 max_pooling3d_1 (MaxPooling  (None, 7, 25, 25, 32)    0         
 3D)                                                             
                                                                 
 conv3d_2 (Conv3D)           (None, 7, 25, 25, 64)     55360     
                                                                 
 max_pooling3d_2 (MaxPooling  (None, 3, 12, 12, 64)    0

### Experiment-3 3D CNN with 4 convolutions, kernal_size=(2, 2, 2), batch size=16, with batch normalization & without padding

In [21]:
# clear cache
tf.keras.backend.clear_session()

# hyperparameters
kernel_size = (2, 2, 2)
pool_size = (2, 2, 2)

# Define the model
model = Sequential()

model.add(Conv3D(filters=32, kernel_size=(2, 2, 2), input_shape=(frames, dim[0], dim[1], 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.1))

model.add(Conv3D(filters=32, kernel_size=(2, 2, 2)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.1))

model.add(MaxPooling3D(pool_size=(2, 2, 2)))

model.add(Conv3D(filters=64, kernel_size=(2, 2, 2)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.1))

model.add(Conv3D(filters=64, kernel_size=(2, 2, 2)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.1))

model.add(MaxPooling3D(pool_size=(2, 2, 2)))

model.add(Flatten())

model.add(Dense(512, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(5, activation='softmax'))

# model summary
model.summary()

# compile model
model.compile(optimizer=Adam(), loss=categorical_crossentropy, metrics=['categorical_accuracy'])

# run model
run_model(model=model, batch_size=16, num_epochs=5, save=False)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d (Conv3D)             (None, 29, 99, 99, 32)    800       
                                                                 
 batch_normalization (BatchN  (None, 29, 99, 99, 32)   128       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 29, 99, 99, 32)    0         
                                                                 
 dropout (Dropout)           (None, 29, 99, 99, 32)    0         
                                                                 
 conv3d_1 (Conv3D)           (None, 28, 98, 98, 32)    8224      
                                                                 
 batch_normalization_1 (Batc  (None, 28, 98, 98, 32)   128       
 hNormalization)                                        

### Experiment-4 3D CNN with 3 convolutions, kernal_size=(2,2,2), batch size=32, without padding & with batch normalization

In [22]:
# clear cache
tf.keras.backend.clear_session()

# hyperparameters
kernel_size = (2, 2, 2)
pool_size = (2, 2, 2)

# model definition
model = Sequential()

model.add(Conv3D(32, kernel_size, input_shape=(frames, dim[0], dim[1], 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=pool_size))

model.add(Conv3D(64, kernel_size))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling3D(pool_size=pool_size))

model.add(Conv3D(128, kernel_size))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(MaxPooling3D(pool_size=pool_size))

model.add(Flatten())

model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.25))

model.add(Dense(5, activation='softmax'))

# model summary
model.summary()

# compile model
model.compile(optimizer=Adam(), loss=categorical_crossentropy, metrics=['categorical_accuracy'])

# run model
run_model(model=model, batch_size=32, num_epochs=5, save=False)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d (Conv3D)             (None, 29, 99, 99, 32)    800       
                                                                 
 batch_normalization (BatchN  (None, 29, 99, 99, 32)   128       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 29, 99, 99, 32)    0         
                                                                 
 max_pooling3d (MaxPooling3D  (None, 14, 49, 49, 32)   0         
 )                                                               
                                                                 
 conv3d_1 (Conv3D)           (None, 13, 48, 48, 64)    16448     
                                                                 
 batch_normalization_1 (Batc  (None, 13, 48, 48, 64)   2

### Experiment-5: 3D CNN with 3 convolutions, kernal_size=(2,2,2), batch size=32, without padding (Best model) 

This model has following architecture:

- Input size = (100, 100, 3)
- Batch size = 32
- 3D convolutional layer with 32 filters, standard kernel size of (2,2,2) and `relu` activation function.
- 3D maxpooling with pool size of (2,2,2)
- 3D convolutional layer with 64 filters, standard kernel size of (2,2,2) and `relu` activation function.
- 3D maxpooling with pool size of (2,2,2)
- 3D convolutional layer with 128 filters, standard kernel size of (2,2,2) and `relu` activation function.
- 3D maxpooling with pool size of (2,2,2)
- Flatten layer
- 2 Dense layers with 128 and 64 neurons respectively with `relu` activation function.
- Output layer with 5 neurons and `softmax` activation function.
- Optimiser used is `Adam`
- Loss used is `categorical_crossentropy`
- Best accuracy
    - Train: 89.64%
    - Test:  87.50%
- Best loss
    - Train: 0.2785
    - Test:  0.5316

In [24]:
# clear cache
tf.keras.backend.clear_session()

# hyperparameters
kernel_size = (2, 2, 2)
pool_size = (2, 2, 2)

# model architecture
model = Sequential()

model.add(Conv3D(32, kernel_size, input_shape=(frames, dim[0], dim[1], 3), activation='relu'))
model.add(MaxPooling3D(pool_size=pool_size))

model.add(Conv3D(64, kernel_size, activation='relu'))
model.add(MaxPooling3D(pool_size=pool_size))

model.add(Conv3D(128, kernel_size, activation='relu'))
model.add(MaxPooling3D(pool_size=pool_size))

model.add(Flatten())

model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(5, activation='softmax'))

# model summary
model.summary()

# compile the model
model.compile(optimizer=Adam(), loss=categorical_crossentropy, metrics=['categorical_accuracy'])

# run & save the model
run_model(model=model, batch_size=32, model_init = '3D_CNN', num_epochs=15, save=True)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv3d (Conv3D)             (None, 29, 99, 99, 32)    800       
                                                                 
 max_pooling3d (MaxPooling3D  (None, 14, 49, 49, 32)   0         
 )                                                               
                                                                 
 conv3d_1 (Conv3D)           (None, 13, 48, 48, 64)    16448     
                                                                 
 max_pooling3d_1 (MaxPooling  (None, 6, 24, 24, 64)    0         
 3D)                                                             
                                                                 
 conv3d_2 (Conv3D)           (None, 5, 23, 23, 128)    65664     
                                                                 
 max_pooling3d_2 (MaxPooling  (None, 2, 11, 11, 128)   0

### 4. CNN-RNN Stack Architecture

Another architecture to solve this problem is CNN+RNN stack. Here I use transfer learning methodology in CNN architecture.

### Experiment-1 ResNet152V2 + GRU

In [27]:
# clear cache
tf.keras.backend.clear_session()

# define model
model = Sequential()

model.add(TimeDistributed(tf.keras.applications.ResNet152V2(weights='imagenet', include_top=False),input_shape=(frames, dim[0], dim[1], 3), name='ResNet152V2', trainable=False))
model.add(TimeDistributed(GlobalMaxPooling2D()))

model.add(TimeDistributed(Dense(64, activation='relu')))

model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())

model.add(GRU(128, return_sequences=True))
model.add(BatchNormalization())

model.add(Flatten())

model.add(Dense(256, activation='relu'))

model.add(Dense(5, activation='softmax'))

# model summary
model.summary()

# compile the model
model.compile(optimizer=Adam(), loss=categorical_crossentropy, metrics=['categorical_accuracy'])

# run model
run_model(model=model, batch_size=64, num_epochs=5, save=False)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 ResNet152V2 (TimeDistribute  (None, 30, 4, 4, 2048)   58331648  
 d)                                                              
                                                                 
 time_distributed (TimeDistr  (None, 30, 2048)         0         
 ibuted)                                                         
                                                                 
 time_distributed_1 (TimeDis  (None, 30, 64)           131136    
 tributed)                                                       
                                                                 
 dense_1 (Dense)             (None, 30, 256)           16640     
                                                                 
 batch_normalization (BatchN  (None, 30, 256)          1024      
 ormalization)                                          

### Experiment-2 ResNet50V2 + GRU

In [29]:
# clear cache
tf.keras.backend.clear_session()

# define model
model = Sequential()

model.add(TimeDistributed(tf.keras.applications.ResNet50V2(weights='imagenet', include_top=False),input_shape=(frames, dim[0], dim[1], 3), name='ResNet50V2', trainable=False))
model.add(TimeDistributed(GlobalMaxPooling2D()))

model.add(TimeDistributed(Dense(64, activation='relu')))

model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())

model.add(GRU(128, return_sequences=True))
model.add(BatchNormalization())

model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.25))

model.add(Dense(5, activation='softmax'))

# model summary
model.summary()

# compile the model
model.compile(optimizer=Adam(), loss=categorical_crossentropy, metrics=['categorical_accuracy'])

# run model
run_model(model=model, batch_size=64, num_epochs=5, save=False)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 ResNet50V2 (TimeDistributed  (None, 30, 4, 4, 2048)   23564800  
 )                                                               
                                                                 
 time_distributed (TimeDistr  (None, 30, 2048)         0         
 ibuted)                                                         
                                                                 
 time_distributed_1 (TimeDis  (None, 30, 64)           131136    
 tributed)                                                       
                                                                 
 dense_1 (Dense)             (None, 30, 256)           16640     
                                                                 
 batch_normalization (BatchN  (None, 30, 256)          1024      
 ormalization)                                          

### Experiment-3 ResNet152V2 + GRU with extra dense layer

In [30]:
# clear cache
tf.keras.backend.clear_session()

# define model
model = Sequential()

model.add(TimeDistributed(tf.keras.applications.ResNet152V2(weights='imagenet', include_top=False),input_shape=(frames, dim[0], dim[1], 3), name='ResNet152V2', trainable=False))
model.add(TimeDistributed(GlobalMaxPooling2D()))

model.add(TimeDistributed(Dense(64, activation='relu')))

model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())

model.add(GRU(128, return_sequences=True))
model.add(BatchNormalization())

model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(5, activation='softmax'))

# model summary
model.summary()

# compile the model
model.compile(optimizer=Adam(), loss=categorical_crossentropy, metrics=['categorical_accuracy'])

# run model
run_model(model=model, batch_size=64, num_epochs=5, save=False)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 ResNet152V2 (TimeDistribute  (None, 30, 4, 4, 2048)   58331648  
 d)                                                              
                                                                 
 time_distributed (TimeDistr  (None, 30, 2048)         0         
 ibuted)                                                         
                                                                 
 time_distributed_1 (TimeDis  (None, 30, 64)           131136    
 tributed)                                                       
                                                                 
 dense_1 (Dense)             (None, 30, 256)           16640     
                                                                 
 batch_normalization (BatchN  (None, 30, 256)          1024      
 ormalization)                                          

### Experiment-4 MobileNet + GRU with dropouts (Best Model)

- Input size = (100, 100, 3)
- Batch size = 64
- Time distributed `MobileNet` CNN layer with non trainable layers and weights used is `imagenet`
- Time distributed Batch normalization layer
- Time distributed max poolig layer with pool size of (2,2,2)
- Time distributed Flatten layer
- `GRU` layer with 256 neurons
- Dropout layer with 0.2
- Dense layers with 256 neurons and `relu` activation function.
- Dropout layer with 0.2
- Output layer with 5 neurons and `softmax` activation function.
- Optimiser used is `Adam`
- Loss used is `categorical_crossentropy`
- Best accuracy
    - Train: 92.12%
    - Test: 87.50%
- Best loss
    - Train: 0.2935
    - Test:  0.7299

In [33]:
# clear cache
tf.keras.backend.clear_session()

# hyperparameter
pool_size = (2,2)

# define model
model = Sequential()

model.add(TimeDistributed(tf.keras.applications.MobileNet(weights='imagenet', include_top=False), input_shape = (frames, dim[0], dim[1], 3),
                          name = 'mobilenet_gru', trainable=False))

model.add(TimeDistributed(BatchNormalization()))

model.add(TimeDistributed(MaxPooling2D(pool_size = pool_size)))

model.add(TimeDistributed(Flatten()))

model.add(GRU(256))
model.add(Dropout(0.2))

model.add(Dense(256,activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(5, activation='softmax'))

# model summary
model.summary()

# compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# run & save the model
run_model(model=model, batch_size=64, model_init = 'MobileNet_GRU', num_epochs=15, save=True)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mobilenet_gru (TimeDistribu  (None, 30, 3, 3, 1024)   3228864   
 ted)                                                            
                                                                 
 time_distributed (TimeDistr  (None, 30, 3, 3, 1024)   4096      
 ibuted)                                                         
                                                                 
 time_distributed_1 (TimeDis  (None, 30, 1, 1, 1024)   0         
 tributed)                                                       
                                                                 
 time_distributed_2 (TimeDis  (None, 30, 1024)         0         
 tributed)                                                       
                                                                 
 gru (GRU)                   (None, 256)               9