In [1]:
import numpy as np
import os
from cv2 import resize
from imageio import imread
import datetime
import matplotlib.pyplot as plt
import math

In [3]:
from tensorflow import keras
import tensorflow as tf
from tensorflow import debugging
from keras.callbacks import ModelCheckpoint
from keras.layers import TimeDistributed
import warnings

In [4]:
warnings.filterwarnings('ignore')
tf.compat.v1.logging.set_verbosity('ERROR')
np.random.seed(30)

In [5]:
train_doc = np.random.permutation(open('./Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('./Project_data/val.csv').readlines())

## Generator


In [6]:
# The custom generator is a class MyGen which extends the keras.utils.Sequence and overrides all the necessary methods
# This method was chosen over a simple generator as it provides better modularity as easier modification of methods.

class MyGen(keras.utils.Sequence):
    # The initializer for the MyGen class
    def __init__(self, source_path, batch_size, n_images= 20, im_size= 200):
        self.l = []
        self.batch_size = batch_size  # batch_size, i.e. no. of videos in a batch
        self.n_images= n_images       # no. of frames per video
        self.im_size= im_size         # hxw of the image to resize,'h'='w' as we have opted to resize the images to a square
        self.source_path = source_path # source path of the file (videos)
        self.folder_names = np.array(os.listdir(self.source_path))  # ndarray of 'folder_names'
        np.random.shuffle(self.folder_names)   # shuffling the 'folder_names'
        
        # A dictionary assigning values arbitarily to the gestures
        self.gestures= {'Swipe Right': 0, 'Swipe Left': 1, 'Stop': 2, 'Thumbs Down': 3, 'Thumbs Up': 4}
        
        # Creating a one_hot_vec for each gesture
        # this is a 5x5 ndarray each column represents a vector for the corresponding gesture
        self.one_hot_vec= np.zeros_like(0., shape= (5,5))
        for rw in range(0, 5):
            for cl in range(0, 5):
                if rw==cl:
                    self.one_hot_vec[rw, cl]+= 1.
                    break
        
        # Extracting the labels for the folders depending on their names using a custom method 'FLabels()'
        self.folder_labels= []
        for L in self.folder_names:
            self.folder_labels.append((self.one_hot_vec[self.gestures[self.FLabels(L)]]))
            
    # This method represents the no. of batches of a given size in the dataset
    def __len__(self):  
        return math.floor(len(self.folder_names)/ self.batch_size)
    
    # This method fetches the batches
    def __getitem__(self, idx): 
        self.l.append(idx)
        self.batch_count= 0
        self.batch_names= self.folder_names[idx*self.batch_size : (idx + 1)*self.batch_size]
        self.batch_labels= self.folder_labels[idx*self.batch_size : (idx + 1)*self.batch_size]
        
        self.batch_data= np.zeros_like(0., shape= (self.batch_size, self.n_images, self.im_size, self.im_size, 3))
        
        for name in self.batch_names:
            self.frm= os.listdir(f'{self.source_path}{name}/')
            if len(self.frm) > self.n_images:
                self.diff= len(self.frm) - self.n_images
                self.frm= self.frm[math.floor(self.diff/2):(len(self.frm) - math.ceil(self.diff/2))]
            self.frm_add= [ (f'{name}/' + f) for f in self.frm ] 
            
            # The frames are resized and normalized by dividing each pixel by 255. 
            self.frames= np.array([resize(imread(f'{self.source_path}{img}'), (self.im_size,self.im_size))/255. for img in self.frm_add])
        
            self.batch_data[self.batch_count]= self.frames
            self.batch_count+=1
        
        self.batch_x = self.batch_data.astype(np.float32)
        self.batch_y = np.array(self.batch_labels)
        
        return self.batch_x, self.batch_y
        
        
    
    # Method to extract the labels of the videos
    def FLabels(self, arr):
        self.arr = arr
        if ('Thumbs' in self.arr) or ('thumbs' in self.arr):
            if ('Down' in self.arr) or ('down' in self.arr):
                return 'Thumbs Down'
            else:
                return 'Thumbs Up'
    
        elif ('Swipe' in self.arr) or ('swipe' in self.arr):
            if ('Left' in self.arr) or ('left' in self.arr):
                return 'Swipe Left'
            else:
                return 'Swipe Right'
        elif ('Stop' in self.arr) or ('stop' in self.arr):
            return 'Stop'
        else:
            return np.NaN      

In [29]:
# Latest Date time, this will be used for the checkpoints
curr_dt_time = datetime.datetime.now()
time= str(curr_dt_time.time())
date= str(curr_dt_time.date())
T= time.split(':')
new_time= '_' + T[0]+ '_' + T[1]
new_date= date.replace('-', '_')
new_date_time= new_date + new_time


train_path = './Project_data/train/'
val_path = './Project_data/val/'
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)

# training sequences = 663
# validation sequences = 100


## Model 1

In [30]:
model= keras.Sequential([
    # Input layer for the model
    # '25' is the no. of images/frames of the video selected for analysis
    # '120' is the shape of the image, i.e. it is both the height and the width of the image
    # '3' is the no. of channels RGB
    keras.layers.InputLayer(input_shape= (25,120,120,3)),
    
    # TimeDistributed 2D Convolutions to convolve each frame respectively
    # Note: Channels here would be refering to the no. of 3D solids outputted by the convolutions keeping, 
    #       the 'RGB' channels constant, essentially we are increasing the no. of frames per video in these
    #       layers this convention will be followed in later models also
    
    # In this block the following are the layers:
    # 1.'TimeDistributed 3x3 Conv2D',outputting 25 channels, activation 'relu'
    # 2.'TimeDistributed 2x2 MaxPool2D'
    # 3.'TimeDistributed Dropout layer', rate '0.2'
    # This is repeated a second time on with the Conv2D layer outputting 35 channels
    # and the Dropout layer has a rate of '0.3'
    # This marks the end of the TimeDistributed 2D Convolutions
    
    TimeDistributed(keras.layers.Conv2D(25, 3, activation= 'relu')),
    TimeDistributed(keras.layers.MaxPool2D(pool_size=(2, 2), padding='same')),
    TimeDistributed(keras.layers.Dropout(rate= 0.2)),
    TimeDistributed(keras.layers.Conv2D(35, 3, activation= 'relu')),
    TimeDistributed(keras.layers.MaxPool2D(pool_size=(2, 2), padding= 'same')),
    TimeDistributed(keras.layers.Dropout(rate= 0.3)),
    
    #3D Convolutions to convolve over the time axis
    # 3D Convolutions to convolve over the time axis
    # Note: Channels here would be refering to the no. of 3D solids outputted by the convolutions, this convention will
    #       be followed in later models also.
    
    # Following are the layers:
    # 1. '3x3x3 Conv3D', outputting 10 channels with an activation of 'relu'
    # 2. '2x2x2 MaxPool3D'
    # 3. '1x2x2 MaxPool3D', This is aimed at preserving the data on the time axis, as 3D convolutions reduce data on all axes
    # This marks the end of the 3D convolutions
    keras.layers.Conv3D(10, 3, activation= 'relu'),
    keras.layers.MaxPool3D(pool_size=(2, 2, 2), padding= 'same'),
    keras.layers.MaxPool3D(pool_size=(1, 2, 2), padding= 'same'),
    
    # TimeDistributed Flattened layer
    TimeDistributed(keras.layers.Flatten()),
    
    # RNN Layers
    
    # GRU Layer with return_sequences= 'True' outputs the data for every timestep, ie it preserves the 'TimeDistributed'
    # property of the data
    
    # Following are the specifications for the GRU Layer:
    # output_dim: 10; Reason: Experimentally determine optimal value
    # return_sequences: True; Reason: Explained above
    # activation: 'relu'; Reason: Found to provide optimal results for image data
    # kernel_regularizer= 'l2'; Reason: Experimentally determine optimal value
    # recurrent_regularizer= 'l1'; Reason: Experimentally determine optimal value
    # use_bias= True; Reason: Experimentally determine optimal value
    # bias_initializer= 'zeros'; Reason: initializing bias with zeros helps the model to learn biases from scratch
    
    keras.layers.GRU(10, 
                     return_sequences= True, 
                     activation= 'relu', 
                     kernel_regularizer= 'l2',
                     recurrent_regularizer= 'l1',
                     use_bias= True,
                     bias_initializer= 'zeros',
                     ),
    
    # GRU Layer with return_sequences= 'False' outputs the data for the final timestep,ie it would be like 
    # watching the data in a sequence, and predicting the outcome
    
    # Following are the specifications for the GRU Layer:
    # output_dim: 5; Reason: Only 5 classes present
    # return_sequences: False; Reason: Explained above
    # activation: 'softmax'; Reason: Output layer must be softmax for crossentropy loss
    # kernel_regularizer= 'l2'; Reason: Experimentally determine optimal value
    # recurrent_regularizer= 'l1'; Reason: Experimentally determine optimal value
    # use_bias= True; Reason: Experimentally determine optimal value
    # bias_initializer= 'zeros'; Reason: initializing bias with zeros helps the model to learn biases from scratch
    
    keras.layers.GRU(5, 
                     return_sequences= False, 
                     activation= 'softmax', 
                     kernel_regularizer= 'l2',
                     recurrent_regularizer= 'l1',
                     use_bias= True,
                     bias_initializer= 'zeros',
                     )
])

In [31]:
# Compiling the model with:
# optimizer: 'adam'
# loss: 'categorical_crossentropy'
# metrics: 'categorical_accuracy'
# run_eagerly: True

model.compile(optimizer= 'adam', loss= 'categorical_crossentropy', metrics=['categorical_accuracy'], run_eagerly= True)
print (model.summary())

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed_14 (TimeDis (None, 25, 118, 118, 25)  700       
_________________________________________________________________
time_distributed_15 (TimeDis (None, 25, 59, 59, 25)    0         
_________________________________________________________________
time_distributed_16 (TimeDis (None, 25, 59, 59, 25)    0         
_________________________________________________________________
time_distributed_17 (TimeDis (None, 25, 57, 57, 35)    7910      
_________________________________________________________________
time_distributed_18 (TimeDis (None, 25, 29, 29, 35)    0         
_________________________________________________________________
time_distributed_19 (TimeDis (None, 25, 29, 29, 35)    0         
_________________________________________________________________
conv3d_4 (Conv3D)            (None, 23, 27, 27, 10)   

In [32]:
#Using a batch size 20
batch_size= 20

In [33]:
# Initializing the train and val generators with:
# batch_size: 20
# im_size: 120
# n_images: 25
train_generator = MyGen(train_path, batch_size, im_size=120, n_images= 25)
val_generator = MyGen(val_path, batch_size, im_size=120, n_images= 25)

In [34]:
# Constructing the model name
model_name = 'model_1' + '_' + new_date_time + '.h5'
    
# Creating a path to store model
if not os.path.exists(f'./models/{model_name}/'):
    os.mkdir(f'./models/{model_name}/')
    filepath = f'./models/{model_name}/'    
else:
    os.mkdir(f'./models/{model_name}_2/')
    filepath = f'./models/{model_name}_2/'
        
# Setting up the model checkpoint and the callbacks_list
checkpoint = ModelCheckpoint(filepath, monitor='val_categorical_accuracy', verbose=2, save_best_only=True, save_weights_only=False, mode='max')
callbacks_list = [checkpoint]

In [35]:
# Calculating the steps per epoch for train and val generators
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = int((num_train_sequences//batch_size) - 1)

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = int((num_val_sequences//batch_size) - 1)

In [36]:
# Setting Number of epochs to 35
num_epochs = 35

In [37]:
# Fitting the first model
history= model.fit(
    train_generator, 
    epochs= num_epochs,
    steps_per_epoch= steps_per_epoch,
    verbose= 1, 
    callbacks= callbacks_list, 
    validation_data= val_generator,
    validation_steps= validation_steps
)

Epoch 1/35
Epoch 00001: val_categorical_accuracy improved from -inf to 0.23000, saving model to ./models\model_1_2021_01_04_22_43.h5
Epoch 2/35
Epoch 00002: val_categorical_accuracy improved from 0.23000 to 0.26000, saving model to ./models\model_1_2021_01_04_22_43.h5
Epoch 3/35
Epoch 00003: val_categorical_accuracy improved from 0.26000 to 0.41000, saving model to ./models\model_1_2021_01_04_22_43.h5
Epoch 4/35
Epoch 00004: val_categorical_accuracy improved from 0.41000 to 0.43000, saving model to ./models\model_1_2021_01_04_22_43.h5
Epoch 5/35
Epoch 00005: val_categorical_accuracy improved from 0.43000 to 0.46000, saving model to ./models\model_1_2021_01_04_22_43.h5
Epoch 6/35
Epoch 00006: val_categorical_accuracy did not improve from 0.46000
Epoch 7/35
Epoch 00007: val_categorical_accuracy improved from 0.46000 to 0.53000, saving model to ./models\model_1_2021_01_04_22_43.h5
Epoch 8/35
Epoch 00008: val_categorical_accuracy did not improve from 0.53000
Epoch 9/35
Epoch 00009: val_cat

Epoch 24/35
Epoch 00024: val_categorical_accuracy did not improve from 0.75000
Epoch 25/35
Epoch 00025: val_categorical_accuracy did not improve from 0.75000
Epoch 26/35
Epoch 00026: val_categorical_accuracy did not improve from 0.75000
Epoch 27/35
Epoch 00027: val_categorical_accuracy did not improve from 0.75000
Epoch 28/35
Epoch 00028: val_categorical_accuracy did not improve from 0.75000
Epoch 29/35
Epoch 00029: val_categorical_accuracy did not improve from 0.75000
Epoch 30/35
Epoch 00030: val_categorical_accuracy did not improve from 0.75000
Epoch 31/35
Epoch 00031: val_categorical_accuracy did not improve from 0.75000
Epoch 32/35
Epoch 00032: val_categorical_accuracy did not improve from 0.75000
Epoch 33/35
Epoch 00033: val_categorical_accuracy did not improve from 0.75000
Epoch 34/35
Epoch 00034: val_categorical_accuracy did not improve from 0.75000
Epoch 35/35
Epoch 00035: val_categorical_accuracy did not improve from 0.75000


In [38]:
# Validation accuracy and loss
vacc_1= history.history.get('val_categorical_accuracy')
vloss_1= history.history.get('val_loss') 

In [39]:
# Training accuracy and loss
cacc_1= history.history.get('categorical_accuracy')
closs_1= history.history.get('loss')

## Model 2

In [40]:
# Latest Date time, this will be used for the checkpoints
curr_dt_time = datetime.datetime.now()
time= str(curr_dt_time.time())
date= str(curr_dt_time.date())
T= time.split(':')
new_time= '_' + T[0]+ '_' + T[1]
new_date= date.replace('-', '_')
new_date_time= new_date + new_time


train_path = './Project_data/train/'
val_path = './Project_data/val/'
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)

# training sequences = 663
# validation sequences = 100


In [51]:
model= keras.Sequential([
    # Input layer for the model
    # '20' is the no. of images/frames of the video selected for analysis we have reduced it from '25'
    # '200' is the shape of the image, i.e. it is both the height and the width of the image
    # '3' is the no. of channels RGB
    keras.layers.InputLayer(input_shape= (25,200,200,3)),
    
    # TimeDistributed 2D Convolutions to convolve each frame respectively
    # Note: Channels here would be refering to the no. of 3D solids outputted by the convolutions keeping, 
    #       the 'RGB' channels constant, essentially we are increasing the no. of frames per video as mentioned above
    
    # In this block the following are the layers:
    # 1.'TimeDistributed 3x3 Conv2D',outputting 25 channels, activation 'relu'
    # 2.'TimeDistributed 2x2 MaxPool2D'
    # 3.'TimeDistributed Dropout layer', rate '0.2'
    # This is repeated a second time on with the Conv2D layer outputting 35 channels
    # and the Dropout layer has a rate of '0.3'
    # This marks the end of the TimeDistributed 2D Convolutions
    TimeDistributed(keras.layers.Conv2D(25, 3, activation= 'relu')),
    TimeDistributed(keras.layers.MaxPool2D(pool_size=(2, 2), padding='same')),
    TimeDistributed(keras.layers.Dropout(rate= 0.2)),
    TimeDistributed(keras.layers.Conv2D(35, 3, activation= 'relu')),
    TimeDistributed(keras.layers.MaxPool2D(pool_size=(2, 2), padding= 'same')),
    TimeDistributed(keras.layers.Dropout(rate= 0.3)),
    
    # 3D Convolutions to convolve over the time axis
    # Note: Channels here would be refering to the no. of 3D solids outputted by the convolutions,as mentioned above
    
    # Following are the layers:
    # 1. '3x3x3 Conv3D', outputting 20 channels with an activation of 'relu'
    # 2. '2x2x2 MaxPool3D',
    # This is repeated again only changing the output channels for the 'Conv3D' to '20'
    # This is followed by a Dropout layer with rate '0.2'
    # This marks the end of the 3D convolutions
    keras.layers.Conv3D(20, 3, activation= 'relu'),
    keras.layers.MaxPool3D(pool_size=(2, 2, 2), padding= 'same'),
    keras.layers.Conv3D(20, 3, activation= 'relu'),
    keras.layers.Dropout(rate= 0.2),
    
    # Convolution LSTM
    # In this layer we use a kernel of '2' which means 2 in every direction or (2x2x2x2), we use the same regularizers in 
    # the above model along with same bias values
    # we output 10 convolutions from this layer
    keras.layers.ConvLSTM2D(10, 
                            2,
                            kernel_initializer= 'normal',
                            unit_forget_bias= True,
                            activation= 'relu', 
                            kernel_regularizer= 'l2', 
                            recurrent_regularizer= 'l1', 
                            use_bias= True, 
                            bias_initializer= 'zeros',
                           ),
    
    # TimeDistributed Flatten
    TimeDistributed(keras.layers.Flatten()),
    
    # RNN Layers
    # GRU Layer with TimeDistributed Output of 5
    # activation= 'relu'; Reason: Proven to be good with image data also results above were favourable
    # kernel_regularize= 'l2'; Reason: Experimentally prove optimal value
    # recurrent_regularize= 'l2'; Reason: We changed it from 'l1' as above model had difficulty generalizing we also added
    #                                    an extra dropout layer
    # use_bias= True; Reason: Favorable results
    # bias_initializer= zeros; Reason: Same as above 
    keras.layers.GRU(5, 
                     return_sequences= True, 
                     activation= 'relu', 
                     kernel_regularizer= 'l2',
                     recurrent_regularizer= 'l2',
                     use_bias= True,
                     bias_initializer= 'zeros',
                     ),
    
    # Flattening data
    keras.layers.Flatten(),
    
    # Dense layer with softmax for output
    keras.layers.Dense(5, activation= 'softmax')
])

In [52]:
# Compiling the model with:
# optimizer: 'adam'
# loss: 'categorical_crossentropy'
# metrics: 'categorical_accuracy'
# run_eagerly: True
# Same as above

model.compile(optimizer= 'adam', loss= 'categorical_crossentropy', metrics=['categorical_accuracy'], run_eagerly= True)
print (model.summary())

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed_28 (TimeDis (None, 25, 198, 198, 25)  700       
_________________________________________________________________
time_distributed_29 (TimeDis (None, 25, 99, 99, 25)    0         
_________________________________________________________________
time_distributed_30 (TimeDis (None, 25, 99, 99, 25)    0         
_________________________________________________________________
time_distributed_31 (TimeDis (None, 25, 97, 97, 35)    7910      
_________________________________________________________________
time_distributed_32 (TimeDis (None, 25, 49, 49, 35)    0         
_________________________________________________________________
time_distributed_33 (TimeDis (None, 25, 49, 49, 35)    0         
_________________________________________________________________
conv3d_7 (Conv3D)            (None, 23, 47, 47, 20)   

In [53]:
# Reduced batch size
batch_size= 15

In [54]:
# Initializing the train and val generators with:
# batch_size: 15
# im_size: 200
# n_images: 20
# we have set the default resize to 200 and n_images to 20
# so even if we donot enter these values they will still be used

train_generator = MyGen(train_path, batch_size, im_size=200, n_images= 25)
val_generator = MyGen(val_path, batch_size, im_size=200, n_images= 25)

In [55]:
# Constructing the model name
model_name = 'model_2' + '_' + new_date_time + '.h5'
    
# Creating a path to store model
if not os.path.exists(f'./models/{model_name}/'):
    os.mkdir(f'./models/{model_name}/')
    filepath = f'./models/{model_name}/'    
else:
    os.mkdir(f'./models/{model_name}_2/')
    filepath = f'./models/{model_name}_2/'
        
# Setting up the model checkpoint and the callbacks_list
checkpoint = ModelCheckpoint(filepath, monitor='val_categorical_accuracy', verbose=2, save_best_only=True, save_weights_only=False, mode='max')
callbacks_list = [checkpoint]

In [56]:
# Calculating the steps per epoch for train and val generators
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = int((num_train_sequences//batch_size) - 1)

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = int((num_val_sequences//batch_size) - 1)

In [57]:
num_epochs = 45

In [58]:
history= model.fit(
    train_generator, 
    epochs= num_epochs,
    steps_per_epoch= steps_per_epoch,
    verbose= 1, 
    callbacks= callbacks_list, 
    validation_data= val_generator,
    validation_steps= validation_steps
)

Epoch 1/45
Epoch 00001: val_categorical_accuracy improved from -inf to 0.21333, saving model to ./models/model_2_2021_01_04_23_10.h5_2\
Epoch 2/45
Epoch 00002: val_categorical_accuracy improved from 0.21333 to 0.24000, saving model to ./models/model_2_2021_01_04_23_10.h5_2\
Epoch 3/45
Epoch 00003: val_categorical_accuracy did not improve from 0.24000
Epoch 4/45
Epoch 00004: val_categorical_accuracy improved from 0.24000 to 0.30667, saving model to ./models/model_2_2021_01_04_23_10.h5_2\
Epoch 5/45

KeyboardInterrupt: 

In [48]:
vacc_2= history.history.get('val_categorical_accuracy')
vloss_2= history.history.get('val_loss')

In [49]:
cacc_2= history.history.get('categorical_accuracy')
closs_2= history.history.get('loss')

## Model 3

In [7]:
# Latest Date time, this will be used for the checkpoints
curr_dt_time = datetime.datetime.now()
time= str(curr_dt_time.time())
date= str(curr_dt_time.date())
T= time.split(':')
new_time= '_' + T[0]+ '_' + T[1]
new_date= date.replace('-', '_')
new_date_time= new_date + new_time


train_path = './Project_data/train/'
val_path = './Project_data/val/'
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)

# training sequences = 663
# validation sequences = 100


In [8]:
model= keras.Sequential([
    # Input layer for the model
    # '20' is the no. of images/frames of the video selected for analysis
    # '160' is the shape of the image, i.e. it is both the height and the width of the image
    # '3' is the no. of channels RGB
    
    keras.layers.InputLayer(input_shape= (20,160,160,3)),
    
    # TimeDistributed 2D Convolutions to convolve each frame respectively
    
    # In this block the following are the layers:
    # 1.'TimeDistributed 3x3 Conv2D',outputting 25 channels, activation 'relu'
    # 2.'TimeDistributed 2x2 MaxPool2D'
    # 3.'TimeDistributed Dropout layer', rate '0.2'
    # This is repeated a second time on with the Conv2D layer outputting 35 channels
    # and the Dropout layer has a rate of '0.3'
    # This marks the end of the TimeDistributed 2D Convolutions
    
    TimeDistributed(keras.layers.Conv2D(25, 3, activation= 'relu')),
    TimeDistributed(keras.layers.MaxPool2D(pool_size=(2, 2), padding='same')),
    TimeDistributed(keras.layers.Dropout(rate= 0.2)),
    
    TimeDistributed(keras.layers.Conv2D(35, 3, activation= 'relu')),
    TimeDistributed(keras.layers.MaxPool2D(pool_size=(2, 2), padding= 'same')),
    TimeDistributed(keras.layers.Dropout(rate= 0.3)),
    
    # 3D Convolutions to convolve over the time axis
    
    # Following are the layers:
    # 1. '3x3x3 Conv3D', outputting 25 channels with an activation of 'relu'
    # 2. '2x2x2 MaxPool3D',
    # This is repeated again only changing the output channels for the 'Conv3D' to 20 and reducing the kernel size to '3x3x3'
    # This is followed by a Dropout layer with rate '0.2'
    # This marks the end of the 3D convolutions
    keras.layers.Conv3D(25, 3, activation= 'relu'),
    keras.layers.MaxPool3D(pool_size=(2, 2, 2), padding= 'same'),
    
    keras.layers.Conv3D(20, 2,activation= 'relu'),
    keras.layers.MaxPool3D(pool_size=(2, 2, 2), padding= 'same'),
    keras.layers.Dropout(rate=0.2),
   
    # Flattening out the data in a TimeDistributed Layer
    # This is done to preserve the data on the time axis
    TimeDistributed(keras.layers.Flatten()),

    # RNN Layers
    
    # A GRU layer with return_sequences= True
    # This outputs each frame in a TimeDistributed manner
    # kernel_regularizer= 'l2'; Reason: Found to be optimal
    # recurrent_regularizer= 'l2'; Reason: Found to be optimal
    # dropout= 0.3; Reason: Attempt to make model more robust
    # recurrent_dropout= 0.2; Reason: Attempt to make model more robust
    
    keras.layers.GRU(30, 
                     return_sequences= True,
                     activation= 'relu',
                     kernel_regularizer= 'l2',
                     recurrent_regularizer= 'l2',
                     use_bias= True,
                     bias_initializer= 'zeros',
                     dropout= 0.3,
                     recurrent_dropout= 0.2
                     ),
    
    # A GRU layer with return_sequences= False
    # This only produces outputs without the time axis, this can be imagined as viewing the whole video as a sequence
    # This marks the end of the RNN layers
    keras.layers.GRU(5, 
                     return_sequences= False, 
                     activation= 'softmax', 
                     recurrent_activation= 'sigmoid',
                     kernel_regularizer= 'l2',
                     recurrent_regularizer= 'l2',
                     use_bias= True,
                     bias_initializer= 'zeros',
                     )
])

In [9]:
# Compiling the model with:
# optimizer: 'adam'
# loss: 'categorical_crossentropy'
# metrics: 'categorical_accuracy'
# run_eagerly: True
# Same as above

model.compile(optimizer= 'adam', loss= 'categorical_crossentropy', metrics=['categorical_accuracy'], run_eagerly= True)
print (model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed (TimeDistri (None, 20, 158, 158, 25)  700       
_________________________________________________________________
time_distributed_1 (TimeDist (None, 20, 79, 79, 25)    0         
_________________________________________________________________
time_distributed_2 (TimeDist (None, 20, 79, 79, 25)    0         
_________________________________________________________________
time_distributed_3 (TimeDist (None, 20, 77, 77, 35)    7910      
_________________________________________________________________
time_distributed_4 (TimeDist (None, 20, 39, 39, 35)    0         
_________________________________________________________________
time_distributed_5 (TimeDist (None, 20, 39, 39, 35)    0         
_________________________________________________________________
conv3d (Conv3D)              (None, 18, 37, 37, 25)    2

In [10]:
# Increased batch size
batch_size= 25

In [11]:
# Initializing the train and val generators with:
# batch_size: 25
# im_size: 160
# n_images: 20

train_generator = MyGen(train_path, batch_size, im_size=160, n_images= 20)
val_generator = MyGen(val_path, batch_size, im_size=160, n_images= 20)

In [12]:
# Constructing the model name
model_name = 'model_3' + '_' + new_date_time + '.h5'
    
# Creating a path to store model
if not os.path.exists(f'./models/{model_name}/'):
    os.mkdir(f'./models/{model_name}/')
    filepath = f'./models/{model_name}/'    
else:
    os.mkdir(f'./models/{model_name}_2/')
    filepath = f'./models/{model_name}_2/'
        
# Setting up the model checkpoint and the callbacks_list
checkpoint = ModelCheckpoint(filepath, monitor='val_categorical_accuracy', verbose=2, save_best_only=True, save_weights_only=False, mode='max')
callbacks_list = [checkpoint]

In [13]:
# Calculating the steps per epoch for train and val generators
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = int((num_train_sequences//batch_size) - 1)

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = int((num_val_sequences//batch_size) - 1)

In [14]:
num_epochs = 30

In [15]:
history= model.fit(
    train_generator, 
    epochs= num_epochs,
    steps_per_epoch= steps_per_epoch,
    verbose= 1, 
    callbacks= callbacks_list, 
    validation_data= val_generator,
    validation_steps= validation_steps
)

Epoch 1/30
Epoch 00001: val_categorical_accuracy improved from -inf to 0.33000, saving model to ./models/model_3_2021_01_04_21_53.h5\
Epoch 2/30
Epoch 00002: val_categorical_accuracy did not improve from 0.33000
Epoch 3/30
Epoch 00003: val_categorical_accuracy improved from 0.33000 to 0.43000, saving model to ./models/model_3_2021_01_04_21_53.h5\
Epoch 4/30
Epoch 00004: val_categorical_accuracy did not improve from 0.43000
Epoch 5/30
Epoch 00005: val_categorical_accuracy improved from 0.43000 to 0.65000, saving model to ./models/model_3_2021_01_04_21_53.h5\
Epoch 6/30
Epoch 00006: val_categorical_accuracy did not improve from 0.65000
Epoch 7/30
Epoch 00007: val_categorical_accuracy improved from 0.65000 to 0.68000, saving model to ./models/model_3_2021_01_04_21_53.h5\
Epoch 8/30
Epoch 00008: val_categorical_accuracy improved from 0.68000 to 0.70000, saving model to ./models/model_3_2021_01_04_21_53.h5\
Epoch 9/30
Epoch 00009: val_categorical_accuracy did not improve from 0.70000
Epoch 

Epoch 00024: val_categorical_accuracy did not improve from 0.81000
Epoch 25/30
Epoch 00025: val_categorical_accuracy did not improve from 0.81000
Epoch 26/30
Epoch 00026: val_categorical_accuracy improved from 0.81000 to 0.84000, saving model to ./models/model_3_2021_01_04_21_53.h5\
Epoch 27/30
Epoch 00027: val_categorical_accuracy did not improve from 0.84000
Epoch 28/30
Epoch 00028: val_categorical_accuracy did not improve from 0.84000
Epoch 29/30
Epoch 00029: val_categorical_accuracy did not improve from 0.84000
Epoch 30/30
Epoch 00030: val_categorical_accuracy did not improve from 0.84000


In [16]:
vacc_3= history.history.get('val_categorical_accuracy')
vloss_3= history.history.get('val_loss')

In [17]:
cacc_3= history.history.get('categorical_accuracy')
closs_3= history.history.get('loss')

## Plots

### Validation accuracy and training accuracy 

#### Model 1

In [None]:
plt.figure(figsize= (8, 4))
plt.plot(vacc_1, label= 'val_categorical_acc')
plt.plot(cacc_1, label= 'train_categorical_acc')
plt.legend(loc= 'lower right')

#### Model 2

In [None]:
plt.figure(figsize= (8, 4))
plt.plot(vacc_2, label= 'val_categorical_acc')
plt.plot(cacc_2, label= 'train_categorical_acc')
plt.legend(loc= 'lower right')

#### Model 3

In [None]:
plt.figure(figsize= (8, 4))
plt.plot(vacc_3, label= 'val_categorical_acc')
plt.plot(cacc_3, label= 'train_categorical_acc')
plt.legend(loc= 'lower right')

### Validation Loss and training Loss 

#### Model 1

In [None]:
plt.figure(figsize= (8, 4))
plt.plot(vloss_1, label= 'val_categorical_acc')
plt.plot(closs_1, label= 'train_categorical_acc')
plt.legend(loc= 'lower right')

#### Model 2

In [None]:
plt.figure(figsize= (8, 4))
plt.plot(vloss_2, label= 'val_categorical_acc')
plt.plot(closs_2, label= 'train_categorical_acc')
plt.legend(loc= 'lower right')

#### Model 3

In [None]:
plt.figure(figsize= (8, 4))
plt.plot(vloss_3, label= 'val_categorical_acc')
plt.plot(closs_3, label= 'train_categorical_acc')
plt.legend(loc= 'lower right')