# Gesture Recognition
In this group project, you are going to build a 3D Conv model that will be able to predict the 5 gestures correctly. Please import the following libraries to get started.

In [1]:
# !pip install scipy
# !pip install Pillow
# !pip install imageio
# !pip install tensorflow
# !pip install matplotlib
#!pip install opencv-python

In [1]:
import numpy as np
import os
import imageio
import datetime
import os
import cv2
import pathlib
from scipy import misc

We set the random seed so that the results don't vary drastically.

In [2]:
import random as rn
from keras import backend as K
import tensorflow as tf

rn.seed(30)
np.random.seed(30)
tf.compat.v1.random.set_random_seed(30)

Using TensorFlow backend.


In [3]:
## Use this to load the data if the data exists on gdrive and using google colab for 

# from google.colab import drive
# drive.mount('/content/gdrive/')

# data_dir_train = "/content/gdrive/MyDrive/datasets/Project_data/train/"
# data_dir_val = '/content/gdrive/MyDrive/datasets/Project_data/test/'

# train_doc = np.random.permutation(open('/content/gdrive/MyDrive/datasets/Project_data/train.csv').readlines())
# val_doc = np.random.permutation(open('/content/gdrive/MyDrive/datasets/Project_data/val.csv').readlines())


In [4]:
## Use this to load the data if the data exists on Azure ML
# azureml-core of version 1.0.72 or higher is required
from azureml.core import Workspace, Dataset

subscription_id = '925a5ad8-a21a-48c0-92a2-5a8a2a4dfc46'
resource_group = 'machinelearning-workbench'
workspace_name = 'sriks-azureml'

workspace = Workspace(subscription_id, resource_group, workspace_name)

dataset = Dataset.get_by_name(workspace, name='gesture_ds')

## Uncommenting the below will initiate the download, set Overwrite=True if overwriting existing files is intended. 
# doc: https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.filedataset?view=azure-ml-py#download-target-path-none--overwrite-false-
dataset.download(target_path='Project_data/', overwrite=False)

UserErrorException: UserErrorException:
	Message: File "/mnt/batch/tasks/shared/LS_root/mounts/clusters/sriks-azur-ins/code/Project_data/train.csv" already exists. Set overwrite=True to overwrite it.
	InnerException None
	ErrorResponse 
{
    "error": {
        "code": "UserError",
        "message": "File \"/mnt/batch/tasks/shared/LS_root/mounts/clusters/sriks-azur-ins/code/Project_data/train.csv\" already exists. Set overwrite=True to overwrite it."
    }
}

In this block, you read the folder names for training and validation. You also set the `batch_size` here. Note that you set the batch size in such a way that you are able to use the GPU in full capacity. You keep increasing the batch size until the machine throws an error.

In [3]:
## Use this if the data exists locally
train_doc = np.random.permutation(open('Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('Project_data/val.csv').readlines())
data_dir_train = 'Project_data/train/'

FileNotFoundError: [Errno 2] No such file or directory: 'Project_data/train.csv'

## Visualization

Let us plot some sample images and run some transformations on the image so see the impact.

In [None]:
# ## Pick some random sequence
import matplotlib.pyplot as plt


random_sequence = train_doc[np.random.randint(len(train_doc))].strip().split(';')[0]+'/'
images = os.listdir(data_dir_train + random_sequence)
# Create a code to visualize one instance of all the 30 images present in the sequence
plt.figure(figsize=(10, 10))
i = 0
random_images = []
for img in images:
  ax = plt.subplot(6, 5, i + 1)
  i = i + 1
  random_images.append(os.path.join(data_dir_train, random_sequence, img))
  img_bgr = cv2.imread(os.path.join(data_dir_train, random_sequence, img))
  img_mp = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) # This is needed because opencv uses BGR convention and matplotlib uses RGB
  imgplot= plt.imshow(img_mp)
  plt.axis("off")
  plt.plot()

In [21]:
# # Random Image - Original
# random_image = random_images[np.random.randint(30)]
# image_bgr = cv2.imread(random_image)
# plt.imshow(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB))
# print(image_bgr.shape)

In [22]:
# ## Cropped Image Sample
# crop_img = image_bgr[10:120, 10:160]
# plt.imshow(cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB))
# print(crop_img.shape)

In [23]:
# # Resize Image Sample
# dim = (120, 120)
# resized_img = cv2.resize(crop_img, dim, interpolation = cv2.INTER_AREA)
# plt.imshow(cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB))
# print(resized_img.shape)

In [24]:
# # Normalized Image
# plt.imshow(cv2.cvtColor(cv2.normalize(resized_img, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F), cv2.COLOR_BGR2RGB))

## Generator

This is one of the most important part of the code. The overall structure of the generator has been given. In the generator, you are going to preprocess the images as you have images of 2 different dimensions as well as create a batch of video frames. You have to experiment with `img_idx`, `y`,`z` and normalization such that you get high accuracy.

In [25]:
def crop_image(image, size = 10):
     # cropping the image
    image_x = image.shape[0]
    image_y = image.shape[1]
    return image[size:image_x, size:image_y]

def normalize_minmax(image_resized, batch_data, folder, idx):
    batch_data[folder,idx,:,:,0] = cv2.normalize(image_resized[:,:,0], None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
    batch_data[folder,idx,:,:,1] = cv2.normalize(image_resized[:,:,1], None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
    batch_data[folder,idx,:,:,2] = cv2.normalize(image_resized[:,:,2], None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
    return batch_data   

def crop_and_normalize(image,batch_data, folder, idx, y=120,z=120):
    
    #crop the images and resize them. Note that the images are of 2 different shape 
    #and the conv3D will throw error if the inputs in a batch have different shapes
    image_cropped = crop_image(image)
    image_resized = cv2.resize(image_cropped, (y,z), interpolation = cv2.INTER_AREA)
                    
    # using min max normalization.
    pending_batch_data = normalize_minmax(image_resized, batch_data, folder, idx)
    return pending_batch_data


def create_batch_data(t, source_path, img_idx, folder, batch, batch_size, batch_data, batch_labels):
    imgs = os.listdir(source_path+'/'+ t[folder + (batch*batch_size)].split(';')[0]) # read all the images in the folder
    for idx,item in enumerate(img_idx): #  Iterate iver the frames/images of a folder to read them in
                    
        image = cv2.imread(source_path+'/'+ t[folder + (batch*batch_size)].strip().split(';')[0]+'/'+
                           imgs[item]).astype(np.float32)
                    
        # using min max normalization.
        batch_data = crop_and_normalize(image, batch_data, folder, idx)
                    
    batch_labels[folder, int(t[folder + (batch*batch_size)].strip().split(';')[2])] = 1
    return batch_data, batch_labels

def generator(source_path, folder_list, dim=(120,120), batch_size=30, ablation=None, samples=30):
    print( 'Source path = ', source_path, '; batch size =', batch_size)
    x = samples # number of taken from each video
    img_idx = range(1,x) #create a list of image numbers you want to use for a particular video
    y = dim[0] # image dim
    z = dim[1] # image dim
    while True:
        if ablation is not None: 
            t = np.random.permutation(folder_list[:ablation])
        else:
            t = np.random.permutation(folder_list)
        num_batches =  len(t) // batch_size # calculate the number of batches
        for batch in range(num_batches): # we iterate over the number of batches
            
            batch_data = np.zeros((batch_size,x,y,z,3)) # x is the number of images you use for each video, (y,z) is the final size of the input images and 3 is the number of channels RGB
            batch_labels = np.zeros((batch_size, 5)) # batch_labels is the one hot representation of the output
            
            for folder in range(batch_size): # iterate over the batch_size
                
                ## get images from the folder
                batch_data_labels = create_batch_data(t, source_path, img_idx, folder, batch, batch_size, batch_data, batch_labels)
                batch_data = batch_data_labels[0]
                batch_labels = batch_data_labels[1]
                
            yield batch_data, batch_labels 
            
        # The length of the folder list could leave some residue folders, the below code deals with it
        # Number of pending batches
        pending_batches = len(t) % batch_size
        
        pending_batch_data = np.zeros((batch_size,x,y,z,3)) # x is the number of images you use for each video, (y,z) is the final size of the input images and 3 is the number of channels RGB
        pending_batch_labels = np.zeros((batch_size, 5)) # batch_labels is the one hot representation of the output
        
        for folder in range(pending_batches): # iterate over the batch_size

            # getimages from the folder
            pending_batch_data_labels = create_batch_data(t, source_path, img_idx, folder, batch, batch_size, pending_batch_data, pending_batch_labels)
            pending_batch_data = pending_batch_data_labels[0]
            pending_batch_labels = pending_batch_data_labels[1]       
            
            
        yield pending_batch_data, pending_batch_labels
            

# g = generator(data_dir_train, train_doc, 30)
# next(g)

In [26]:
# g = generator(data_dir_train, train_doc, (120,120), 30, ablation=None, samples=30)
# p = 0
# for k in g:
#     p+=1
# print(p)
# # next(g)

Note here that a video is represented above in the generator as (number of images, height, width, number of channels). Take this into consideration while creating the model architecture.

## Model
Here you make the model using different functionalities that Keras provides. Remember to use `Conv3D` and `MaxPooling3D` and not `Conv2D` and `Maxpooling2D` for a 3D convolution model. You would want to use `TimeDistributed` while building a Conv2D + RNN model. Also remember that the last layer is the softmax. Design the network in such a way that the model is able to give good accuracy on the least number of parameters so that it can fit in the memory of the webcam.

In [27]:
from keras.models import Sequential, Model
from keras.layers import Dense, GRU, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation
from keras.layers.convolutional import Conv3D, MaxPooling3D
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras import optimizers

#write your model here

# Building a 3 D Convolution model.
model = Sequential()

model.add(Conv3D(32, kernel_size=(3, 3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(30,120,120,3)))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))

model.add(Conv3D(64, kernel_size=(3, 3, 3), activation='relu', kernel_initializer='he_uniform'))
model.add(MaxPooling3D(pool_size=(2, 2, 2)))

model.add(Flatten())
model.add(Dense(256, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(256, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(5, activation='softmax'))

Now that you have written the model, the next step is to `compile` the model. When you print the `summary` of the model, you'll see the total number of parameters you have to train.

In [34]:
optimiser =  'sgd' #write your optimizer
model.compile(optimizer=optimiser, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print (model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv3d_2 (Conv3D)            (None, 28, 118, 118, 32)  2624      
_________________________________________________________________
max_pooling3d_2 (MaxPooling3 (None, 14, 59, 59, 32)    0         
_________________________________________________________________
conv3d_3 (Conv3D)            (None, 12, 57, 57, 64)    55360     
_________________________________________________________________
max_pooling3d_3 (MaxPooling3 (None, 6, 28, 28, 64)     0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 301056)            0         
_________________________________________________________________
dense_3 (Dense)              (None, 256)               77070592  
_________________________________________________________________
dense_4 (Dense)              (None, 256)              

The `steps_per_epoch` and `validation_steps` are used by `fit_generator` to decide the number of next() calls it need to make.

## Ablation Experiment

Let us create the `train_generator` and the `val_generator` which will be used in `.fit_generator`. 
We will fit the model with 1 epoch just to validate if the model is working.
Notice I have not added any callbacks yet, we will do it in the later sections

In [29]:
ablation_size=100
num_epochs = 1

# Lets us see the input to the model
train_path = 'Project_data/train'
val_path = 'Project_data/val'

num_train_sequences = len(train_doc[:ablation_size])
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc[:ablation_size])
print('# validation sequences =', num_val_sequences)

print ('# epochs =', num_epochs)


if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1

print('steps_per_epoch: {}, validation_steps: {}'.format(steps_per_epoch, validation_steps))

train_generator = generator(train_path, train_doc, dim=(120,120), batch_size=30, ablation=ablation_size, samples=30)
val_generator = generator(val_path, val_doc, dim=(120,120), batch_size=30, ablation=ablation_size, samples=30)

# training sequences = 100
# validation sequences = 100
# epochs = 1
steps_per_epoch: 4, validation_steps: 4


In [30]:
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=[], validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)

W0424 09:20:19.582688 140360580888384 deprecation.py:323] From <ipython-input-30-b7ec57133ab4>:3: Model.fit_generator (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version.
Instructions for updating:
Please use Model.fit, which supports generators.


Source path =  Project_data/train ; batch size = 30


<tensorflow.python.keras.callbacks.History at 0x7fa79bb43630>

The model seems to be working well, there are no errors. 

## Overfitting on training data. 

Let us overfit on the training data to see if the model is able to learn from the data. We are going to use less data and run for more epochs and see if the model is able to improve the accuracy and reduce the loss.

In [31]:
ablation_size=50
batch_size=30
num_epochs = 10

# Lets us see the input to the model
train_path = 'Project_data/train'
val_path = 'Project_data/val'
num_train_sequences = len(train_doc[:ablation_size])
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc[:ablation_size])
print('# validation sequences =', num_val_sequences)
print ('# epochs =', num_epochs)

if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1

print('steps_per_epoch: {}, validation_steps: {}'.format(steps_per_epoch, validation_steps))


train_generator = generator(train_path, train_doc,batch_size=batch_size, ablation=ablation_size, samples=30)
val_generator = generator(val_path, val_doc,batch_size=batch_size, ablation=ablation_size, samples=30)

# training sequences = 50
# validation sequences = 50
# epochs = 10
steps_per_epoch: 2, validation_steps: 2


In [32]:
history = model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=[], validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)

Source path =  Project_data/train ; batch size = 30
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


The training loss reduced and accuracy increased, the model is able to learn well. 
The validation loss is high and accuracy is low which is expected at the moment as we did not use the complete model.

## Callbacks

Here we define few callbacks which we will use later when we fit the complete model.

In [36]:
curr_dt_time = datetime.datetime.now()
model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'
    
if not os.path.exists(model_name):
    os.mkdir(model_name)
        
filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)

LR = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)

ES = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

callbacks_list = [checkpoint, LR, ES]

W0424 09:26:36.721858 140360580888384 callbacks.py:1071] `period` argument is deprecated. Please use `save_freq` to specify the frequency in number of batches seen.


## Final Experiment

In [37]:
ablation_size=None
batch_size=30
num_epochs = 30

# Lets us see the input to the model
train_path = 'Project_data/train'
val_path = 'Project_data/val'
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)
print ('# epochs =', num_epochs)

if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1

print('steps_per_epoch: {}, validation_steps: {}'.format(steps_per_epoch, validation_steps))


train_generator = generator(train_path, train_doc,batch_size=batch_size, ablation=ablation_size, samples=30)
val_generator = generator(val_path, val_doc,batch_size=batch_size, ablation=ablation_size, samples=30)

# training sequences = 663
# validation sequences = 100
# epochs = 30
steps_per_epoch: 23, validation_steps: 4


In [38]:
history = model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=[], validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)

Source path =  Project_data/train ; batch size = 30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30

KeyboardInterrupt: 