##Import Data from Google Drive

In [0]:
import os
from google_drive_downloader import GoogleDriveDownloader as gdd
  
if os.path.isdir("./Project_data") is False:
  gdd.download_file_from_google_drive(file_id='1ehyrYBQ5rbQQe6yL4XbLWe3FMvuVUGiL',
                                      dest_path='./data',
                                      unzip=True)

# Gesture Recognition
In this group project, you are going to build a 3D Conv model that will be able to predict the 5 gestures correctly. Please import the following libraries to get started.

In [0]:
import numpy as np
import os
from scipy.misc import imread, imresize
import datetime
import os
import warnings
warnings.filterwarnings('ignore')

We set the random seed so that the results don't vary drastically.

In [3]:
np.random.seed(30)
import random as rn
rn.seed(30)
from keras import backend as K
import tensorflow as tf
tf.set_random_seed(30)

Using TensorFlow backend.


In this block, you read the folder names for training and validation. You also set the `batch_size` here. Note that you set the batch size in such a way that you are able to use the GPU in full capacity. You keep increasing the batch size until the machine throws an error.

In [0]:
train_doc = np.random.permutation(open('./Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('./Project_data/val.csv').readlines())
batch_size = 40#experiment with the batch size

Normalize the image

In [0]:
def normalize_image(image):
  #norm3_image = image - np.percentile(image,5)/ np.percentile(image,95) - np.percentile(image,5)
  norm1_image = image/255
  return norm1_image

In [0]:
dest_image_row = 90
dest_image_col = 90

## Generator
This is one of the most important part of the code. it resizes input image as per dest_image_row/col value.

In [0]:
img_idx = [i for i in range(30) if i%2 == 0]#create a list of image numbers you want to use for a particular video
x = len(img_idx)# x is the number of images you use for each video,
y = dest_image_row #(y,z) is the final size of the input images 
z = dest_image_col
def generator(source_path, folder_list, batch_size):
    
    while True:
        t = np.random.permutation(folder_list)
        num_batches = len(folder_list)//batch_size
        for batch in range(num_batches): # we iterate over the number of batches
            
            batch_data = np.zeros((batch_size,x,y,z,3))  # 3 is the number of channels RGB
            batch_labels = np.zeros((batch_size,5)) # batch_labels is the one hot representation of the output
            for folder in range(batch_size): # iterate over the batch_size
                imgs = os.listdir(source_path+'/'+ t[folder + (batch*batch_size)].split(';')[0]) # read all the images in the folder
                for idx,item in enumerate(img_idx): #  Iterate iver the frames/images of a folder to read them in
                    image = imread(source_path+'/'+ t[folder + (batch*batch_size)].strip().split(';')[0]+'/'+imgs[item]).astype(np.float32)
                    image = imresize(image, (y, z))
                    batch_data[folder,idx,:,:] = normalize_image(image[:,:])
                    
                batch_labels[folder, int(t[folder + (batch*batch_size)].strip().split(';')[2])] = 1
            yield batch_data, batch_labels #you yield the batch_data and the batch_labels, remember what does yield do

        
        # write the code for the remaining data points which are left after full batches
        noleftOverFolder = (len(folder_list))%batch_size
        startIndex = num_batches * batch_size
        
        batch_data = np.zeros((noleftOverFolder,x,y,z,3))  # 3 is the number of channels RGB
        batch_labels = np.zeros((noleftOverFolder,5)) # batch_labels is the one hot representation of the output
        folder = 0
        while(startIndex < len(folder_list)): # iterate over the batch_size
            imgs = os.listdir(source_path+'/'+ t[startIndex].split(';')[0]) # read all the images in the folder
            for idx,item in enumerate(img_idx): #  Iterate iver the frames/images of a folder to read them in
                image = imread(source_path+'/'+ t[startIndex].strip().split(';')[0]+'/'+imgs[item]).astype(np.float32)
                image = imresize(image, (y, z))
                batch_data[folder,idx,:,:] = normalize_image(image[:,:])
                
            batch_labels[folder, int(t[startIndex].strip().split(';')[2])] = 1
            #increase the index by 1 
            startIndex = startIndex + 1
            folder = folder + 1 #need to increase the folder number
        yield batch_data, batch_labels #you yield the batch_data and the batch_labels, remember 

Note here that a video is represented above in the generator as (number of images, height, width, number of channels). Take this into consideration while creating the model architecture.

In [0]:
curr_dt_time = datetime.datetime.now()
train_path = './Project_data/train'
val_path = './Project_data/val'
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)
num_epochs = 25# choose the number of epochs
print ('# epochs =', num_epochs)

## Model
Here you make the model using different functionalities that Keras provides. 

In [0]:
#write your model here

img_channels = 3
img_rows = dest_image_row
img_cols = dest_image_col
nb_classes = 5
num_of_frame = len(img_idx) #TODO: hardcoding right now



In [0]:
def getModel(type):
  if type == 'conv3d':
    model = Sequential()
    model.add(Conv3D(2, (3,3,3), padding = 'same', input_shape= [num_of_frame, img_rows,img_cols, img_channels] ))
    model.add(Activation('relu'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2)))
    model.add(Conv3D(64, (3,3,3)))
    model.add(Activation('relu'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2)))
    model.add(Conv3D(128, (1,3,3)))
    model.add(Activation('relu'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(nb_classes))
    model.add(Activation('softmax'))
  elif type =='LSTM':
    video = Input(shape=(num_of_frame,
                     img_rows,
                     img_cols,
                     img_channels))
    cnn_base = VGG16(input_shape=(img_rows,
                                  img_cols, 
                                  img_channels),
                     weights='imagenet',
                     include_top=False)
    cnn_out = GlobalAveragePooling2D()(cnn_base.output)
    cnn = Model(input=cnn_base.input, output=cnn_out)
    cnn.trainable = False
    encoded_frames = TimeDistributed(cnn)(video)
    encoded_sequence = LSTM(256)(encoded_frames)
    hidden_layer = Dense(activation="relu", units=1024)(encoded_sequence)
    outputs = Dense(activation="softmax", units=5)(hidden_layer)
    model = Model([video], outputs)
    optimizer = optimizers.SGD(lr=0.01, decay=0.0, momentum=0.0, nesterov=True)
    model.compile(loss="categorical_crossentropy",
              optimizer=optimizer,
              metrics=["categorical_accuracy"]) 
  elif type =='GRU':
    video = Input(shape=(num_of_frame,
                     img_rows,
                     img_cols,
                     img_channels))
    cnn_base = VGG16(input_shape=(img_rows,
                                  img_cols, 
                                  img_channels),
                     weights='imagenet',
                     include_top=False)
    cnn_out = GlobalAveragePooling2D()(cnn_base.output)
    cnn = Model(input=cnn_base.input, output=cnn_out)
    cnn.trainable = False
    encoded_frames = TimeDistributed(cnn)(video)
    encoded_sequence = LSTM(256)(encoded_frames)
    hidden_layer = Dense(activation="relu", units=1024)(encoded_sequence)
    outputs = Dense(activation="softmax", units=5)(hidden_layer)
    model = Model([video], outputs)
    optimizer = optimizers.SGD(lr=0.01, decay=0.0, momentum=0.0, nesterov=True)
    model.compile(loss="categorical_crossentropy",
              optimizer=optimizer,
              metrics=["categorical_accuracy"]) 
  
  return model

In [0]:
from keras.models import Sequential, Model
from keras.layers import Dense, GRU, Flatten, TimeDistributed, Flatten, BatchNormalization, Activation, Dropout, LSTM, Input
from keras.layers.convolutional import Conv3D, MaxPooling3D
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras import optimizers
from keras.layers.pooling import GlobalAveragePooling2D

from keras.applications.vgg16 import VGG16
from keras.layers.wrappers import TimeDistributed


Now that you have written the model, the next step is to `compile` the model. When you print the `summary` of the model, you'll see the total number of parameters you have to train.

In [0]:
model = getModel('GRU')

In [0]:
optimizer = optimizers.SGD(lr=0.01, decay=0.0, momentum=0.0, nesterov=True)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
print (model.summary())

Let us create the `train_generator` and the `val_generator` which will be used in `.fit_generator`.

In [0]:
train_generator = generator(train_path, train_doc, batch_size)
val_generator = generator(val_path, val_doc, batch_size)

In [0]:
model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'
    
if not os.path.exists(model_name):
    os.mkdir(model_name)
        
filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)

LR = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, cooldown =1)# write the REducelronplateau code here
callbacks_list = [checkpoint, LR]
#callbacks_list = [checkpoint]

The `steps_per_epoch` and `validation_steps` are used by `fit_generator` to decide the number of next() calls it need to make.

In [0]:
if (num_train_sequences%batch_size) == 0:
    steps_per_epoch = int(num_train_sequences/batch_size)
else:
    steps_per_epoch = (num_train_sequences//batch_size) + 1

if (num_val_sequences%batch_size) == 0:
    validation_steps = int(num_val_sequences/batch_size)
else:
    validation_steps = (num_val_sequences//batch_size) + 1

Let us now fit the model. This will start training the model and with the help of the checkpoints, you'll be able to save the model at the end of each epoch.

In [0]:
model.fit_generator(train_generator, steps_per_epoch=steps_per_epoch, epochs=num_epochs, verbose=1, 
                    callbacks=callbacks_list, validation_data=val_generator, 
                    validation_steps=validation_steps, class_weight=None, workers=1, initial_epoch=0)