# Gesture Recognition


In [None]:
import numpy as np
import os
import datetime
import random
import datetime

def imread(path):
    from PIL import Image
    return np.array(Image.open(path))

def imresize(img, size):
    from PIL import Image
    return np.array(Image.fromarray(img).resize(size))

We set the random seed so that the results don't vary drastically.

In [None]:
np.random.seed(30)
import random as rn
rn.seed(30)
import tensorflow as tf
tf.random.set_seed(30)

In this block, you read the folder names for training and validation. You also set the `batch_size` here. Note that you set the batch size in such a way that you are able to use the GPU in full capacity. You keep increasing the batch size until the machine throws an error.

In [None]:
train_doc = np.random.permutation(open('Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('Project_data/val.csv').readlines())
batch_size = 23

## Generator
This is one of the most important part of the code. The overall structure of the generator has been given. In the generator, you are going to preprocess the images as you have images of 2 different dimensions as well as create a batch of video frames. You have to experiment with `img_idx`, `y`,`z` and normalization such that you get high accuracy.

In [None]:
def generator(source_path, folder_list, batch_size):
    print( 'Source path = ', source_path, '; batch size =', batch_size)
    
    # Create a list of image numbers you want to use for a particular video.
    img_idx = [x for x in range(random.randint(5,31))]
    
    while True:
        
        # List of folders for each video inside the train and val folder, permuted and loaded in a list.
        t = np.random.permutation(folder_list)

        # Compute the number of batches,
        total_videos =len([_ for folder in list(t)]) 
        num_batches = int(total_videos/batch_size)

        # We iterate over the number of batches.
        # input dimension - 200 x 200
        for batch in range(num_batches):
            #  x is the number of images you use for each video, (y,z) is the final size of the input images and 3 is the number of channels RGB
            batch_data = np.zeros((batch_size,len(img_idx),200,200,3))
            batch_labels = np.zeros((batch_size,5)) # batch_labels is the one hot representation of the output
            for folder in range(batch_size): # iterate over the batch_size
                imgs = os.listdir(source_path+'/'+ t[folder + (batch*batch_size)].split(';')[0]) # read all the images in the folder
                for idx,item in enumerate(img_idx): #  Iterate iver the frames/images of a folder to read them in
                    image = imread(source_path+'/'+ t[folder + (batch*batch_size)].strip().split(';')[0]+'/'+imgs[item]).astype(np.float32)
                    
                    #crop the images and resize them. Note that the images are of 2 different shape 
                    #and the conv3D will throw error if the inputs in a batch have different shapes
                    
                    image = imresize(image.astype(np.uint8), (200, 200))
                    batch_data[folder,idx,:,:,0] = batch_data[folder,idx,:,:,0]/255
                    batch_data[folder,idx,:,:,1] = batch_data[folder,idx,:,:,1]/255
                    batch_data[folder,idx,:,:,2] = batch_data[folder,idx,:,:,2]/255
                    
                batch_labels[folder, int(t[folder + (batch*batch_size)].strip().split(';')[2])] = 1
            
            yield batch_data, batch_labels #you yield the batch_data and the batch_labels, remember what does yield do

        
        # The code for the remaining data points which are left after full batches
        if total_videos%batch_size > 0:
            
            batch_size = total_videos%batch_size
            
            #  x is the number of images you use for each video, (y,z) is the final size of the input images and 3 is the number of channels RGB
            batch_data = np.zeros((batch_size,len(img_idx),200,200,3))
            batch_labels = np.zeros((batch_size,5)) # batch_labels is the one hot representation of the output
            for folder in range(batch_size): # Iterate over the batch_size
                imgs = os.listdir(source_path+'/'+ t[folder + (batch_size)].split(';')[0]) # read all the images in the folder
                for idx,item in enumerate(img_idx): # Iterate over the frames/images of a folder to read them in
                    image = imread(source_path+'/'+ t[folder + (batch_size)].strip().split(';')[0]+'/'+imgs[item]).astype(np.float32)
                    
                    #crop the images and resize them. Note that the images are of 2 different shape
                    #and the conv3D will throw error if the inputs in a batch have different shapes
                    image = imresize(image.astype(np.uint8), (200, 200))
                    
                    batch_data[folder,idx,:,:,0] = batch_data[folder,idx,:,:,0]/255
                    batch_data[folder,idx,:,:,1] = batch_data[folder,idx,:,:,1]/255
                    batch_data[folder,idx,:,:,2] = batch_data[folder,idx,:,:,2]/255
                    
                batch_labels[folder, int(t[folder + (batch_size)].strip().split(';')[2])] = 1
                
            yield batch_data, batch_labels


Note here that a video is represented above in the generator as (number of images, height, width, number of channels). Take this into consideration while creating the model architecture.

In [None]:
curr_dt_time = datetime.datetime.now()
train_path = 'Project_data/train'
val_path = 'Project_data/val'
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)
num_epochs = 10
print ('# epochs =', num_epochs)

## Model
Here you make the model using different functionalities that Keras provides. Remember to use `Conv3D` and `MaxPooling3D` and not `Conv2D` and `Maxpooling2D` for a 3D convolution model. You would want to use `TimeDistributed` while building a Conv2D + RNN model. Also remember that the last layer is the softmax. Design the network in such a way that the model is able to give good accuracy on the least number of parameters so that it can fit in the memory of the webcam.

### 3D-CNN Version 1 

In [None]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, GRU, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation
from tensorflow.keras.layers import Conv3D, MaxPooling3D, GlobalMaxPooling3D
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import optimizers

# 3D CNN
model = Sequential()

# Layer 1 - 32 filters of 3x3X3 size with relu activation 
# Max pool operation with 2x2x2 field of view
# Followed by Batch Normalization
model.add(Conv3D(32, (3, 3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling3D((2, 2, 2)))

# Layer 2 - 64 filters of 3x3x32 size with relu activation
# Max pool operation with 2x2 field of view
# Followed by Batch Normalization
model.add(Conv3D(64, (3, 3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling3D(pool_size=(2, 2, 2)))

# Flatten the tensor output from the layer 2
model.add(GlobalMaxPooling3D())

# Defining 2 fully connencted layers
model.add(Dense(units=1024,activation="relu"))
model.add(Dense(units=1024,activation="relu"))

# Softmax layer as the output probability head 
model.add(Dense(units=5, activation="softmax"))

Now that you have written the model, the next step is to `compile` the model. When you print the `summary` of the model, you'll see the total number of parameters you have to train.

In [None]:
optimiser = "adam"
model.build((batch_size, None, 200, 200, 3))
model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print (model.summary())

Let us create the `train_generator` and the `val_generator` which will be used in `.fit_generator`.

In [None]:
train_generator = generator(train_path, train_doc, batch_size)
val_generator = generator(val_path, val_doc, batch_size)

In [None]:
model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'
    
if not os.path.exists(model_name):
    os.mkdir(model_name)
        
filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)

LR = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=2, min_lr=0.0001)
callbacks_list = [checkpoint, LR]

The `steps_per_epoch` and `validation_steps` are used by `fit_generator` to decide the number of next() calls it need to make.

In [None]:
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1

Let us now fit the model. This will start training the model and with the help of the checkpoints, you'll be able to save the model at the end of each epoch.

In [None]:
model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)

### 3D-CNN Version 2

We observe for the version 1, the model complexity is low and the underfitting can be observed. The training and validation accuracies are very low. In this version, let us introduce more convolution layers to see, if we can get any gain in the performance of the model.

In [None]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, GRU, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation
from tensorflow.keras.layers import Conv3D, MaxPooling3D, GlobalMaxPooling3D
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import optimizers

model_ccn_3d_2 = Sequential()

# Layer 1 - 32 filters of 3x3X3 size with relu activation 
# Max pool operation with 2x2x2 field of view
# Followed by Batch Normalization
model_ccn_3d_2.add(Conv3D(32, (3, 3, 3), activation='relu', padding="same"))
model_ccn_3d_2.add(BatchNormalization())
model_ccn_3d_2.add(MaxPooling3D((2, 2, 2)))

# Layer 2 - 32 filters of 3x3x32 size with relu activation
# Max pool operation with 2x2 field of view
# Followed by Batch Normalization
model_ccn_3d_2.add(Conv3D(32, (3, 3, 3), activation='relu', padding="same"))
model_ccn_3d_2.add(BatchNormalization())
model_ccn_3d_2.add(MaxPooling3D(pool_size=(2, 2, 2)))

# Layer 3 - 64 filters of 3x3x32 size with relu activation
# Max pool operation with 2x2 field of view
# Followed by Batch Normalization
model_ccn_3d_2.add(Conv3D(64, (3, 3, 3), activation='relu', padding="same"))
model_ccn_3d_2.add(BatchNormalization())
model_ccn_3d_2.add(MaxPooling3D(pool_size=(1, 1, 1)))

# Layer 4 - 128 filters of 5x5x64 size with relu activation
# Max pool operation with 2x2 field of view
# Followed by Batch Normalization
model_ccn_3d_2.add(Conv3D(128, (5, 5, 5), activation='relu', padding="same"))
model_ccn_3d_2.add(BatchNormalization())
model_ccn_3d_2.add(MaxPooling3D(pool_size=(2, 2, 2)))

# Flatten the tensor output from the layer 2
model_ccn_3d_2.add(GlobalMaxPooling3D())

# Defining 2 fully connencted layers
model_ccn_3d_2.add(Dense(units=1024,activation="relu"))
model_ccn_3d_2.add(Dense(units=786,activation="relu"))

# Softmax layer as the output probability head 
model_ccn_3d_2.add(Dense(units=5, activation="softmax"))

In [None]:
# Hyperparameters
batch_size = 16
num_epochs = 40
optzer = optimizers.Nadam(learning_rate=0.01)

# Seeding the Network Run
np.random.seed(20)
import random as rn
rn.seed(20)
import tensorflow as tf
tf.random.set_seed(20)

# Creating train and validation doc
train_doc = np.random.permutation(open('Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('Project_data/val.csv').readlines())

# Printing the training, validation sequence and num_epochs
curr_dt_time = datetime.datetime.now()
train_path = 'Project_data/train'
val_path = 'Project_data/val'
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)
print ('# epochs =', num_epochs)


# Optimizer and Network compiling
optimiser = optzer
model_ccn_3d_2.build((batch_size, None, 200, 200, 3))
model_ccn_3d_2.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print(model_ccn_3d_2.summary())

# Generator Instantiation
train_generator = generator(train_path, train_doc, batch_size)
val_generator = generator(val_path, val_doc, batch_size)

# Model storing and callback
model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'
    
if not os.path.exists(model_name):
    os.mkdir(model_name)
        
filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)

LR = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=2, min_lr=0.0001)
callbacks_list = [checkpoint, LR]

# Step per epoch to be used by the fit_generator
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1
    

# Fitting the model
model_ccn_3d_2.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)

### CNN - RNN Version -1 [LSTM]

In [None]:
"""
To Run the below code in a new Kernel downgrade the version of the numpy :  pip install -U numpy==1.18.5
"""


from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, GRU, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D, LSTM, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import optimizers

# Instantiate the Network
model_cnn_rnn_1 = Sequential()

# Layer 1 - 32 filters of 3x3 size with relu activation 
# Max pool operation with 2x2 field of view
# Followed by Batch Normalization
model_cnn_rnn_1.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding="same"), input_shape=(30, 200, 
                                                                                            200, 3)))
model_cnn_rnn_1.add(TimeDistributed(BatchNormalization()))
model_cnn_rnn_1.add(TimeDistributed(MaxPooling2D((2, 2))))

# Layer 2 - 64 filters of 3x3 size with relu activation
# Max pool operation with 2x2 field of view
# Followed by Batch Normalization
model_cnn_rnn_1.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu', padding="same")))
model_cnn_rnn_1.add(TimeDistributed(BatchNormalization()))
model_cnn_rnn_1.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))

# Flatten the output across the Time dimension
model_cnn_rnn_1.add(TimeDistributed(GlobalMaxPooling2D()))

# Add the LSTM Layer, followed by 2 dense layers
model_cnn_rnn_1.add(LSTM(512, activation="relu", return_sequences=False))
model_cnn_rnn_1.add(Dense(512, activation="relu"))
model_cnn_rnn_1.add(Dropout(0.2))
model_cnn_rnn_1.add(Dense(units=5, activation="softmax"))

In [None]:
# Generator Function for the CNN RNN Architecture

def generator_cnn_rnn(source_path, folder_list, batch_size):
    print( 'Source path = ', source_path, '; batch size =', batch_size)
    
    # Here we are passing all the frames in the sequence, since the variable lenght of frames in the video would
    # Padding or masking to make it constant length number of frames.
    img_idx = [x for x in range(0,30)]
    
    while True:
        
        # List of folders for each video inside the train and val folder, permuted and loaded in a list.
        t = np.random.permutation(folder_list)

        # Compute the number of batches,
        total_videos =len([_ for folder in list(t)]) 
        num_batches = int(total_videos/batch_size)

        # We iterate over the number of batches.
        # input dimension - 200 x 200
        for batch in range(num_batches):
            #  x is the number of images you use for each video, (y,z) is the final size of the input images and 3 is the number of channels RGB
            batch_data = np.zeros((batch_size,len(img_idx),200,200,3))
            batch_labels = np.zeros((batch_size,5)) # batch_labels is the one hot representation of the output
            for folder in range(batch_size): # iterate over the batch_size
                imgs = os.listdir(source_path+'/'+ t[folder + (batch*batch_size)].split(';')[0]) # read all the images in the folder
                for idx,item in enumerate(img_idx): #  Iterate iver the frames/images of a folder to read them in
                    image = imread(source_path+'/'+ t[folder + (batch*batch_size)].strip().split(';')[0]+'/'+imgs[item]).astype(np.float32)
                    
                    #crop the images and resize them. Note that the images are of 2 different shape 
                    #and the conv3D will throw error if the inputs in a batch have different shapes
                    
                    image = imresize(image.astype(np.uint8), (200, 200))
                    batch_data[folder,idx,:,:,0] = batch_data[folder,idx,:,:,0]/255
                    batch_data[folder,idx,:,:,1] = batch_data[folder,idx,:,:,1]/255
                    batch_data[folder,idx,:,:,2] = batch_data[folder,idx,:,:,2]/255
                    
                batch_labels[folder, int(t[folder + (batch*batch_size)].strip().split(';')[2])] = 1
            
            yield batch_data, batch_labels #you yield the batch_data and the batch_labels, remember what does yield do

        
        # The code for the remaining data points which are left after full batches
        if total_videos%batch_size > 0:
            
            batch_size = total_videos%batch_size
            
            #  x is the number of images you use for each video, (y,z) is the final size of the input images and 3 is the number of channels RGB
            batch_data = np.zeros((batch_size,len(img_idx),200,200,3))
            batch_labels = np.zeros((batch_size,5)) # batch_labels is the one hot representation of the output
            for folder in range(batch_size): # Iterate over the batch_size
                imgs = os.listdir(source_path+'/'+ t[folder + (batch_size)].split(';')[0]) # read all the images in the folder
                for idx,item in enumerate(img_idx): # Iterate over the frames/images of a folder to read them in
                    image = imread(source_path+'/'+ t[folder + (batch_size)].strip().split(';')[0]+'/'+imgs[item]).astype(np.float32)
                    
                    #crop the images and resize them. Note that the images are of 2 different shape
                    #and the conv3D will throw error if the inputs in a batch have different shapes
                    image = imresize(image.astype(np.uint8), (200, 200))
                    
                    batch_data[folder,idx,:,:,0] = batch_data[folder,idx,:,:,0]/255
                    batch_data[folder,idx,:,:,1] = batch_data[folder,idx,:,:,1]/255
                    batch_data[folder,idx,:,:,2] = batch_data[folder,idx,:,:,2]/255
                    
                batch_labels[folder, int(t[folder + (batch_size)].strip().split(';')[2])] = 1
                
            yield batch_data, batch_labels


In [None]:
# Run this for the Training.

# Hyperparameters
batch_size = 16
num_epochs = 20
optzer = optimizers.Adam(learning_rate=0.01)

# Seeding the Network Run
#np.random.seed(10)
#import random as rn
#rn.seed(10)
#import tensorflow as tf
#tf.random.set_seed(10)

# creating train and validation doc
train_doc = np.random.permutation(open('Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('Project_data/val.csv').readlines())

# Printing the training, validation sequence and num_epochs
curr_dt_time = datetime.datetime.now()
train_path = 'Project_data/train'
val_path = 'Project_data/val'
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)
print ('# epochs =', num_epochs)


# Optimizer and Network compiling
optimiser = optzer
model_cnn_rnn_1.build((batch_size, None, 200, 200, 3))
model_cnn_rnn_1.compile(optimizer=optzer, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print(model_cnn_rnn_1.summary())

# Generator Instantiation
train_generator = generator_cnn_rnn(train_path, train_doc, batch_size)
val_generator = generator_cnn_rnn(val_path, val_doc, batch_size)

# Model storing and callback
model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'
    
if not os.path.exists(model_name):
    os.mkdir(model_name)
        
filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)

LR = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=2, min_lr=0.0001)
callbacks_list = [checkpoint, LR]

# Step per epoch to be used by the fit_generator
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1
    

# Fitting the model
model_cnn_rnn_1.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)

### CNN - RNN Version -2 [LSTM]

In [None]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, GRU, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D, LSTM, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import optimizers

# Instantiate the Network
model_cnn_rnn_2 = Sequential()

# Layer 1 - 32 filters of 3x3 size with relu activation 
# Max pool operation with 2x2 field of view
# Followed by Batch Normalization
model_cnn_rnn_2.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding="same"), input_shape=(30, 200, 
                                                                                            200, 3)))
model_cnn_rnn_2.add(TimeDistributed(BatchNormalization()))
model_cnn_rnn_2.add(TimeDistributed(MaxPooling2D((2, 2))))

# Layer 2 - 64 filters of 3x3 size with relu activation
# Max pool operation with 2x2 field of view
# Followed by Batch Normalization
model_cnn_rnn_2.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu', padding="same")))
model_cnn_rnn_2.add(TimeDistributed(BatchNormalization()))
model_cnn_rnn_2.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))

# Layer 2 - 128 filters of 3x3 size with relu activation
# Max pool operation with 2x2 field of view
# Followed by Batch Normalization
model_cnn_rnn_2.add(TimeDistributed(Conv2D(128, (3, 3), activation='relu', padding="same")))
model_cnn_rnn_2.add(TimeDistributed(BatchNormalization()))
model_cnn_rnn_2.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))

# Flatten the output across the Time dimension
model_cnn_rnn_2.add(TimeDistributed(GlobalMaxPooling2D()))

# Add the 2 LSTM Layer, followed by 2 dense layers
model_cnn_rnn_2.add(LSTM(512, activation="relu", return_sequences=True))
model_cnn_rnn_2.add(LSTM(512, activation="relu", return_sequences=False))
model_cnn_rnn_2.add(Dense(256, activation="relu"))
model_cnn_rnn_2.add(Dropout(0.2))
model_cnn_rnn_2.add(Dense(units=5, activation="softmax"))

In [None]:
# Network Run 

# Hyperparameters
batch_size = 16
num_epochs = 20
optzer = optimizers.Adam(learning_rate=0.01)

# Seeding the Network Run
#np.random.seed(5)
#import random as rn
#rn.seed(5)
#import tensorflow as tf
#tf.random.set_seed(5)

# creating train and validation doc
train_doc = np.random.permutation(open('Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('Project_data/val.csv').readlines())

# Printing the training, validation sequence and num_epochs
curr_dt_time = datetime.datetime.now()
train_path = 'Project_data/train'
val_path = 'Project_data/val'
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)
print ('# epochs =', num_epochs)


# Optimizer and Network compiling
optimiser = optzer
model_cnn_rnn_2.build((batch_size, None, 200, 200, 3))
model_cnn_rnn_2.compile(optimizer=optzer, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print(model_cnn_rnn_2.summary())

# Generator Instantiation
train_generator = generator_cnn_rnn(train_path, train_doc, batch_size)
val_generator = generator_cnn_rnn(val_path, val_doc, batch_size)

# Model storing and callback
model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'
    
if not os.path.exists(model_name):
    os.mkdir(model_name)
        
filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)

LR = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=2, min_lr=0.0001)
callbacks_list = [checkpoint, LR]

# Step per epoch to be used by the fit_generator
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1
    

# Fitting the model
model_cnn_rnn_2.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)

### CNN - RNN Version -1 [GRU]

In [None]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, GRU, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D, LSTM, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import optimizers

# Instantiate the Network
model_cnn_rnn_11 = Sequential()

# Layer 1 - 32 filters of 3x3 size with relu activation 
# Max pool operation with 2x2 field of view
# Followed by Batch Normalization
model_cnn_rnn_11.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding="same"), input_shape=(30, 200, 
                                                                                            200, 3)))
model_cnn_rnn_11.add(TimeDistributed(BatchNormalization()))
model_cnn_rnn_11.add(TimeDistributed(MaxPooling2D((2, 2))))

# Layer 2 - 64 filters of 3x3 size with relu activation
# Max pool operation with 2x2 field of view
# Followed by Batch Normalization
model_cnn_rnn_11.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu', padding="same")))
model_cnn_rnn_11.add(TimeDistributed(BatchNormalization()))
model_cnn_rnn_11.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))

# Flatten the output across the Time dimension
model_cnn_rnn_11.add(TimeDistributed(GlobalMaxPooling2D()))

# Add the LSTM Layer, followed by 2 dense layers
model_cnn_rnn_11.add(GRU(512, activation="relu", return_sequences=False))
model_cnn_rnn_11.add(Dense(512, activation="relu"))
model_cnn_rnn_11.add(Dropout(0.2))
model_cnn_rnn_11.add(Dense(units=5, activation="softmax"))

In [None]:
# Hyperparameters
batch_size = 16
num_epochs = 10
optzer = optimizers.Adam(learning_rate=0.01)

# Seeding the Network Run
#np.random.seed(10)
#import random as rn
#rn.seed(10)
#mport tensorflow as tf
#tf.random.set_seed(10)

# creating train and validation doc
train_doc = np.random.permutation(open('Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('Project_data/val.csv').readlines())

# Printing the training, validation sequence and num_epochs
curr_dt_time = datetime.datetime.now()
train_path = 'Project_data/train'
val_path = 'Project_data/val'
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)
print ('# epochs =', num_epochs)


# Optimizer and Network compiling
model_cnn_rnn_11.build((batch_size, None, 200, 200, 3))
model_cnn_rnn_11.compile(optimizer=optzer, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print(model_cnn_rnn_11.summary())

# Generator Instantiation
train_generator = generator_cnn_rnn(train_path, train_doc, batch_size)
val_generator = generator_cnn_rnn(val_path, val_doc, batch_size)

# Model storing and callback
model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'
    
if not os.path.exists(model_name):
    os.mkdir(model_name)
        
filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)

LR = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=2, min_lr=0.0001)
callbacks_list = [checkpoint, LR]

# Step per epoch to be used by the fit_generator
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1
    

# Fitting the model
model_cnn_rnn_11.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)

### CNN - RNN Version -2[GRU]

In [None]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, GRU, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D, LSTM, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import optimizers

# Instantiate the Network
model_cnn_rnn_22 = Sequential()

# Layer 1 - 32 filters of 3x3 size with relu activation 
# Max pool operation with 2x2 field of view
# Followed by Batch Normalization
model_cnn_rnn_22.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding="same"), input_shape=(30, 200, 
                                                                                            200, 3)))
model_cnn_rnn_22.add(TimeDistributed(BatchNormalization()))
model_cnn_rnn_22.add(TimeDistributed(MaxPooling2D((2, 2))))

# Layer 2 - 64 filters of 3x3 size with relu activation
# Max pool operation with 2x2 field of view
# Followed by Batch Normalization
model_cnn_rnn_22.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu', padding="same")))
model_cnn_rnn_22.add(TimeDistributed(BatchNormalization()))
model_cnn_rnn_22.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))

# Layer 2 - 128 filters of 3x3 size with relu activation
# Max pool operation with 2x2 field of view
# Followed by Batch Normalization
model_cnn_rnn_22.add(TimeDistributed(Conv2D(128, (3, 3), activation='relu', padding="same")))
model_cnn_rnn_22.add(TimeDistributed(BatchNormalization()))
model_cnn_rnn_22.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))

# Flatten the output across the Time dimension
model_cnn_rnn_22.add(TimeDistributed(GlobalMaxPooling2D()))

# Add the 2 LSTM Layer, followed by 2 dense layers
model_cnn_rnn_22.add(GRU(512, activation="relu", return_sequences=True))
model_cnn_rnn_22.add(GRU(512, activation="relu", return_sequences=False))
model_cnn_rnn_22.add(Dense(256, activation="relu"))
model_cnn_rnn_22.add(Dropout(0.2))
model_cnn_rnn_22.add(Dense(units=5, activation="softmax"))

In [None]:
# Hyperparameters
batch_size = 16
num_epochs = 60
optzer = optimizers.Adam(learning_rate=0.01)

# Seeding the Network Run
# np.random.seed(10)
# import random as rn
# rn.seed(10)
# import tensorflow as tf
# tf.random.set_seed(10)

# creating train and validation doc
train_doc = np.random.permutation(open('Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('Project_data/val.csv').readlines())

# Printing the training, validation sequence and num_epochs
curr_dt_time = datetime.datetime.now()
train_path = 'Project_data/train'
val_path = 'Project_data/val'
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)
print ('# epochs =', num_epochs)


# Optimizer and Network compiling
model_cnn_rnn_22.build((batch_size, None, 200, 200, 3))
model_cnn_rnn_22.compile(optimizer=optzer, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print(model_cnn_rnn_22.summary())

# Generator Instantiation
train_generator = generator_cnn_rnn(train_path, train_doc, batch_size)
val_generator = generator_cnn_rnn(val_path, val_doc, batch_size)

# Model storing and callback
model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'
    
if not os.path.exists(model_name):
    os.mkdir(model_name)
        
filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)

LR = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=2, min_lr=0.0001)
callbacks_list = [checkpoint, LR]

# Step per epoch to be used by the fit_generator
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1
    

# Fitting the model
model_cnn_rnn_22.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)

### Final Model 

In [None]:
"""
After Multiple run on different architecture and hyperparameters, we found that CNN-RNN version with GRU variant
gave us the best result. Let's rerun the network and store it for the further serving.
"""

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, GRU, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalMaxPooling2D, LSTM, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras import optimizers

# Instantiate the Network
model_cnn_rnn_11 = Sequential()

# Layer 1 - 32 filters of 3x3 size with relu activation 
# Max pool operation with 2x2 field of view
# Followed by Batch Normalization
model_cnn_rnn_11.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding="same"), input_shape=(30, 200, 
                                                                                            200, 3)))
model_cnn_rnn_11.add(TimeDistributed(BatchNormalization()))
model_cnn_rnn_11.add(TimeDistributed(MaxPooling2D((2, 2))))

# Layer 2 - 64 filters of 3x3 size with relu activation
# Max pool operation with 2x2 field of view
# Followed by Batch Normalization
model_cnn_rnn_11.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu', padding="same")))
model_cnn_rnn_11.add(TimeDistributed(BatchNormalization()))
model_cnn_rnn_11.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))

# Flatten the output across the Time dimension
model_cnn_rnn_11.add(TimeDistributed(GlobalMaxPooling2D()))

# Add the LSTM Layer, followed by 2 dense layers
model_cnn_rnn_11.add(GRU(512, activation="relu", return_sequences=False))
model_cnn_rnn_11.add(Dense(512, activation="relu"))
model_cnn_rnn_11.add(Dropout(0.2))
model_cnn_rnn_11.add(Dense(units=5, activation="softmax"))

###################### MODEL RUN ###############################


# Hyperparameters
batch_size = 16
num_epochs = 40
optzer = optimizers.Adam(learning_rate=0.01)

# Seeding the Network Run
#np.random.seed(10)
#import random as rn
#rn.seed(10)
#mport tensorflow as tf
#tf.random.set_seed(10)

# creating train and validation doc
train_doc = np.random.permutation(open('Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('Project_data/val.csv').readlines())

# Printing the training, validation sequence and num_epochs
curr_dt_time = datetime.datetime.now()
train_path = 'Project_data/train'
val_path = 'Project_data/val'
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)
print ('# epochs =', num_epochs)


# Optimizer and Network compiling
model_cnn_rnn_11.build((batch_size, None, 200, 200, 3))
model_cnn_rnn_11.compile(optimizer=optzer, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print(model_cnn_rnn_11.summary())

# Generator Instantiation
train_generator = generator_cnn_rnn(train_path, train_doc, batch_size)
val_generator = generator_cnn_rnn(val_path, val_doc, batch_size)

# Model storing and callback
model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'
    
if not os.path.exists(model_name):
    os.mkdir(model_name)
        
filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)

LR = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                              patience=2, min_lr=0.0001)
callbacks_list = [checkpoint, LR]

# Step per epoch to be used by the fit_generator
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1
    

# Fitting the model
model_cnn_rnn_11.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)