# Gesture Recognition

In [1]:
import numpy as np
import os
import imageio
from skimage import transform as skimtr
import datetime
import os
import random as rn
from scipy.stats import geom

from keras import backend as K
import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import Dense, GRU, Flatten, TimeDistributed,\
    Flatten, BatchNormalization, Activation, Input, MaxPool3D, Dropout
from keras.layers.convolutional import Conv3D, MaxPooling3D
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras import optimizers
from keras import initializers
from keras import backend as K

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
np.random.seed(30)
rn.seed(30)
tf.set_random_seed(30)

In [3]:
train_doc = np.random.permutation(open('./Project_data/train.csv').readlines())
val_doc = np.random.permutation(open('./Project_data/val.csv').readlines())

## Hyperparameters

In [11]:
num_image_samples = 8
prob = 0.5
image_index_choice_distribution = 2 # 0: flipped geometric pdf (p=prob); 1: discrete uniform; 2: equal steps sampling (pick up every nth value from distribution)
crop_top_pct = 0
crop_right_pct = 0
crop_bottom_pct = 0
crop_left_pct = 0
ideal_size_img = (100, 100)
normalisation_type = 1 # 0: min-max scaling, 1: min-max scaling using 5th/95th pctl
num_epochs = 100

## Utility Function 1: Generate indices for sampling images within a sequence (video)

In [24]:
def generate_image_index(image_index_choice_distribution, num_image_samples, prob):
    
    if num_image_samples == 30:
        
        return np.arange(0, 30)
    
    else:
    
        if image_index_choice_distribution == 0: # flipped geometric pdf (p=prob)

            prob_dist = np.flip(geom.pmf(np.arange(0, 30), prob), axis = 0)
            balance = 1 - np.sum(prob_dist)
            prob_dist = prob_dist + balance / 30 # to force sum of probabilities to 1

            if np.sum(prob_dist) < 1:

                prob_dist[-1] = prob_dist[-1] + 1 - np.sum(prob_dist)

            elif np.sum(prob_dist) > 1:

                prob_dist[0] = prob_dist[0] + 1 - np.sum(prob_dist)

            return np.sort(np.random.choice(range(30), size=num_image_samples, replace=False, p = prob_dist))

        elif image_index_choice_distribution == 1: # discrete uniform distribution

            return np.sort(np.random.choice(range(30), size=num_image_samples, replace=False))

        elif image_index_choice_distribution == 2: # equal steps sampling
            
            # first pass
            
            image_index = np.flip(list(np.flip(np.arange(0, 30), axis = 0))[::int(np.ceil(30/num_image_samples))])
            
            # pad until there are num_image_samples elements in image_index
            
            if len(image_index) != num_image_samples:
            
                for idx in np.flip(np.arange(0, 30), axis=0):

                    if idx not in image_index:

                        image_index = np.sort(np.append(image_index, idx))

                        if len(image_index) == num_image_samples:

                            break
            
            return image_index

## Utility Function 2: Normalise (Standardise) Image

In [6]:
def normalised_image(image, normalisation_type):
    
    image = image.astype('float')
    
    if normalisation_type == 0: # min-max scaling

        image = image / 255

    elif normalisation_type == 1: # Z-standardisation

            pctl_5 = np.percentile(image, 5)
            pctl_95 = np.percentile(image, 95)
            image = (image - pctl_5) / (pctl_95 - pctl_5)
        
    return image

In [7]:
curr_dt_time = datetime.datetime.now()
train_path = './Project_data/train'
val_path = './Project_data/val'
num_train_sequences = len(train_doc)
print('# training sequences =', num_train_sequences)
num_val_sequences = len(val_doc)
print('# validation sequences =', num_val_sequences)
print ('# epochs =', num_epochs)

# training sequences = 663
# validation sequences = 100
# epochs = 100


## Generator

In [14]:
def generator(source_path, folder_list, batch_size, ablation = False):
    
    #print('begin') # debug
    print('Source path = ' + source_path)
    print('batch size = ', batch_size)
    
    image_index = generate_image_index(image_index_choice_distribution, num_image_samples, prob)  # sample of images to be used for training
    
    #print(image_index)
    
    #i = 0 # debug
    
    while True:
        
        #print('Epoch Start') # debug
        
        if ablation == 0:
        
            randomised_folder_list = np.random.permutation(folder_list)
        
        else:
            
            randomised_folder_list = (np.random.permutation(folder_list))[0:ablation]
        
        num_full_size_batches = len(randomised_folder_list) // batch_size
        
        #print('num training samples: ', len(randomised_folder_list)) # debug
        #print('num_full_size_batches:' + str(num_full_size_batches)) # debug
        #print('total num image: ' + str(len(randomised_folder_list)*len(image_index))) # debug
        #print('len rand folder list: ' + str(len(randomised_folder_list))) # debug
        #print('len image index: ' + str(len(image_index))) # debug        
        
        for batch in range(num_full_size_batches): # loop to generate dataset for all full-batches
             
            batch_data = np.zeros((batch_size, len(image_index), ideal_size_img[0], ideal_size_img[1], 3)) # initialise batch
            batch_labels = np.zeros((batch_size,5)) # initialise one hot representation of labels
            
            for folder in range(batch_size): # loop to generate dataset for a single full-batch
                
                # read in file names
                image_file_names = os.listdir(source_path+'/'+\
                                    randomised_folder_list[folder + (batch*batch_size)].split(';')[0])
                
                for idx, item in enumerate(image_index): # loop to read in each image in one batch
                    
                    # read in image within sequence (video)
                    
                    image = np.asarray(imageio.imread(source_path+'/'+ randomised_folder_list[folder +\
                            (batch*batch_size)].strip().split(';')[0]+'/'+image_file_names[item]).astype(np.float32))
                    
                    # crop using hyperparameters:"crop_top_pct", "crop_right_pct", "crop_bottom_pct", "crop_left_pct"
                    
                    crop_start_top = int(np.floor(image.shape[0] * crop_top_pct))
                    crop_end_right = int(image.shape[1] - np.ceil(image.shape[1] * crop_right_pct))
                    crop_end_bottom = int(image.shape[0] - np.ceil(image.shape[0] * crop_bottom_pct))
                    crop_start_left = int(np.floor(image.shape[1] * crop_left_pct))                    
                    
                    image = image[crop_start_top:crop_end_bottom, crop_start_left:crop_end_right, :]
                    
                    # resize image
                    
                    image = skimtr.resize(image, (ideal_size_img[0], ideal_size_img[1]))
                    
                    # normalise image using hyperparameter "normalisation_type" and feed into batch
                    
                    batch_data[folder, idx, :, :, 0] = normalised_image(image[:, :, 0], normalisation_type)
                    batch_data[folder, idx, :, :, 1] = normalised_image(image[:, :, 1], normalisation_type)
                    batch_data[folder, idx, :, :, 2] = normalised_image(image[:, :, 2], normalisation_type)
                    
                    #pass # debug
                    
                batch_labels[folder, int(randomised_folder_list[folder + (batch*batch_size)].strip().split(';')[2])] = 1
                
            #i += 1 # debug
            
            #print(i) # debug
            
            yield batch_data, batch_labels
            
            #yield i # debug
        
        # code to generate dataset covering remaining folders
        
        num_remaining_input_seq = len(randomised_folder_list) - num_full_size_batches * batch_size
        batch_data = np.zeros((num_remaining_input_seq, len(image_index), ideal_size_img[0], ideal_size_img[1], 3)) # initialise batch
        batch_labels = np.zeros((num_remaining_input_seq,5)) # initialise one hot representation of labels

        #print('num_remaining_input_seq:' + str(num_remaining_input_seq)) # debug

        for idy, folder in enumerate(range(num_full_size_batches * batch_size, len(randomised_folder_list))): # loop through remaining folders
            
            # read in file names
            image_file_names = os.listdir(source_path+'/'+ randomised_folder_list[folder].split(';')[0])

            for idx, item in enumerate(image_index): # loop to read in each image in one batch

                # read in image within sequence (video)

                image = np.asarray(imageio.imread(source_path+'/'+ randomised_folder_list[folder] \
                            .strip().split(';')[0]+'/'+image_file_names[item]).astype(np.float32))

                # crop using hyperparameters:"crop_top_pct", "crop_right_pct", "crop_bottom_pct", "crop_left_pct"

                crop_start_top = int(np.floor(image.shape[0] * crop_top_pct))
                crop_end_right = int(image.shape[1] - np.ceil(image.shape[1] * crop_right_pct))
                crop_end_bottom = int(image.shape[0] - np.ceil(image.shape[0] * crop_bottom_pct))
                crop_start_left = int(np.floor(image.shape[1] * crop_left_pct))                    

                image = image[crop_start_top:crop_end_bottom, crop_start_left:crop_end_right, :]

                # resize image

                image = skimtr.resize(image, (ideal_size_img[0], ideal_size_img[1]))

                # normalise image using hyperparameter "normalisation_type" and feed into batch

                batch_data[idy, idx, :, :, 0] = normalised_image(image[:, :, 0], normalisation_type)
                batch_data[idy, idx, :, :, 1] = normalised_image(image[:, :, 1], normalisation_type)
                batch_data[idy, idx, :, :, 2] = normalised_image(image[:, :, 2], normalisation_type)
                
                #pass # debug

            batch_labels[idy, int(randomised_folder_list[folder].strip().split(';')[2])] = 1
            
        #i += 1 # debug
        
        #print(i) # debug
        
        yield batch_data, batch_labels
    
        #break # debug
    
        #yield i # debug
        
        #print('Epoch End') # debug

## Model

In [15]:
# Set Up Model

## Clear prev sessions
K.clear_session()

## input layer
input_layer = Input((num_image_samples, ideal_size_img[0], ideal_size_img[1], 3))

## kernel initialiser
init = initializers.glorot_normal(seed=None)

## conv layers + max pool + batch normalization + droput
conv_layer1 = Conv3D(filters=8, kernel_size=(3, 3, 3), activation='relu', padding='same', kernel_initializer = init)(input_layer)
conv_layer1 = Dropout(0.2)(conv_layer1)
conv_layer1 = BatchNormalization()(conv_layer1)
pooling_layer1 = MaxPool3D(pool_size=(3, 3, 3), padding='same')(conv_layer1)
pooling_layer1 = BatchNormalization()(pooling_layer1)

## conv layers + max pool + batch normalization + droput
conv_layer2 = Conv3D(filters=16, kernel_size=(1, 3, 3), activation='relu', padding='same', kernel_initializer = init)(pooling_layer1)
conv_layer2 = Dropout(0.2)(conv_layer2)
conv_layer2 = BatchNormalization()(conv_layer2)
pooling_layer2 = MaxPool3D(pool_size=(3, 3, 3), padding='same')(conv_layer2)
pooling_layer2 = BatchNormalization()(pooling_layer2)

## conv layers + max pool + batch normalization + droput
conv_layer3 = Conv3D(filters=32, kernel_size=(3, 3, 3), activation='relu', padding='same', kernel_initializer = init)(pooling_layer2)
conv_layer3 = Dropout(0.2)(conv_layer3)
conv_layer3 = BatchNormalization()(conv_layer3)
pooling_layer3 = MaxPool3D(pool_size=(1, 3, 3), padding='same')(conv_layer3)
pooling_layer3 = BatchNormalization()(pooling_layer3)

## conv layers + max pool + batch normalization + droput
conv_layer4 = Conv3D(filters=64, kernel_size=(3, 3, 3), activation='relu', padding='same', kernel_initializer = init)(pooling_layer3)
conv_layer4 = Dropout(0.2)(conv_layer4)
conv_layer4 = BatchNormalization()(conv_layer4)
pooling_layer4 = MaxPool3D(pool_size=(1, 3, 3), padding='same')(conv_layer4)
pooling_layer4 = BatchNormalization()(pooling_layer4)


## flatten + fully connected layers with dropout
flatten_layer = Flatten()(pooling_layer4)
dense_layer1 = Dense(units=128, activation='relu', kernel_initializer = init)(flatten_layer)
dense_layer1 = Dropout(0.5)(dense_layer1)
dense_layer1 = BatchNormalization()(dense_layer1)
dense_layer2 = Dense(units=128, activation='relu', kernel_initializer = init)(dense_layer1)
dense_layer2 = Dropout(0.5)(dense_layer2)
dense_layer2 = BatchNormalization()(dense_layer2)
output_layer = Dense(units=5, activation='softmax', kernel_initializer = init)(dense_layer2)

## define the model with input layer and output layer
model = Model(inputs=input_layer, outputs=output_layer)

model.summary()

## optimizer

opt = optimizers.Adam(lr=0.01)

model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 8, 100, 100, 3)    0         
_________________________________________________________________
conv3d_1 (Conv3D)            (None, 8, 100, 100, 8)    656       
_________________________________________________________________
dropout_1 (Dropout)          (None, 8, 100, 100, 8)    0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 8, 100, 100, 8)    32        
_________________________________________________________________
max_pooling3d_1 (MaxPooling3 (None, 3, 34, 34, 8)      0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 3, 34, 34, 8)      32        
_________________________________________________________________
conv3d_2 (Conv3D)            (None, 3, 34, 34, 16)     1168      
__________

# Fit Model

In [26]:
## Choose num training samples
ablation = 0

## Set up Batch Size
batch_size = 2**4


## Compute steps_per_epoch

if ablation > 0:
    
    train_steps_per_epoch = np.ceil(ablation/batch_size)
    val_steps_per_epoch = np.ceil(ablation/batch_size)

else:
    
    train_steps_per_epoch = np.ceil(num_train_sequences/batch_size)
    val_steps_per_epoch = np.ceil(num_val_sequences/batch_size)

## Instantiate generator

train_generator = generator(train_path, train_doc, batch_size, ablation)
val_generator = generator(val_path, val_doc, batch_size)

## Build Callback to Reduce LR on Plateau

LROP = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.001, verbose=1)

## Build Checkpoint Callback to record Validation Dataset Loss

model_name = 'model_init' + '_' + str(curr_dt_time).replace(' ','').replace(':','_') + '/'
    
if not os.path.exists(model_name):
    os.mkdir(model_name)
        
filepath = model_name + 'model-{epoch:05d}-{loss:.5f}-{categorical_accuracy:.5f}-{val_loss:.5f}-{val_categorical_accuracy:.5f}.h5'

checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)


## Fit

model.fit_generator(train_generator, steps_per_epoch=train_steps_per_epoch,
                    epochs=num_epochs, verbose=1, callbacks=[checkpoint, LROP], 
                    validation_data=val_generator, validation_steps=val_steps_per_epoch,
                    class_weight=None, workers=1, initial_epoch=0)

Source path = ./Project_data/val
batch size =  16
Source path = ./Project_data/train
batch size =  16
Epoch 1/100


  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "



Epoch 00001: saving model to model_init_2020-01-3011_13_07.016235/model-00001-1.56693-0.30015-1.47666-0.42000.h5
Epoch 2/100

Epoch 00002: saving model to model_init_2020-01-3011_13_07.016235/model-00002-1.53379-0.29563-1.47714-0.37000.h5
Epoch 3/100

Epoch 00003: saving model to model_init_2020-01-3011_13_07.016235/model-00003-1.52332-0.32428-1.49608-0.30000.h5
Epoch 4/100

Epoch 00004: saving model to model_init_2020-01-3011_13_07.016235/model-00004-1.47389-0.35445-1.52613-0.28000.h5
Epoch 5/100

Epoch 00005: saving model to model_init_2020-01-3011_13_07.016235/model-00005-1.43752-0.36350-1.47613-0.34000.h5
Epoch 6/100

Epoch 00006: saving model to model_init_2020-01-3011_13_07.016235/model-00006-1.41555-0.39668-1.70555-0.23000.h5
Epoch 7/100

Epoch 00007: saving model to model_init_2020-01-3011_13_07.016235/model-00007-1.39578-0.38462-1.60686-0.25000.h5
Epoch 8/100

Epoch 00008: saving model to model_init_2020-01-3011_13_07.016235/model-00008-1.33524-0.42534-1.42450-0.38000.h5
Epoc


Epoch 00030: saving model to model_init_2020-01-3011_13_07.016235/model-00030-0.62386-0.75566-1.07065-0.47000.h5
Epoch 31/100

Epoch 00031: saving model to model_init_2020-01-3011_13_07.016235/model-00031-0.64523-0.73605-1.22575-0.45000.h5

Epoch 00031: ReduceLROnPlateau reducing learning rate to 0.001.
Epoch 32/100

Epoch 00032: saving model to model_init_2020-01-3011_13_07.016235/model-00032-0.66483-0.74962-1.07212-0.51000.h5
Epoch 33/100

Epoch 00033: saving model to model_init_2020-01-3011_13_07.016235/model-00033-0.64550-0.74208-1.11340-0.53000.h5
Epoch 34/100

Epoch 00034: saving model to model_init_2020-01-3011_13_07.016235/model-00034-0.62100-0.75867-1.24117-0.48000.h5
Epoch 35/100

Epoch 00035: saving model to model_init_2020-01-3011_13_07.016235/model-00035-0.62587-0.77828-2.05046-0.38000.h5
Epoch 36/100

Epoch 00036: saving model to model_init_2020-01-3011_13_07.016235/model-00036-0.56989-0.75113-1.04909-0.57000.h5

Epoch 00036: ReduceLROnPlateau reducing learning rate to 0


Epoch 00086: saving model to model_init_2020-01-3011_13_07.016235/model-00086-0.25573-0.91403-0.89363-0.64000.h5
Epoch 87/100

Epoch 00087: saving model to model_init_2020-01-3011_13_07.016235/model-00087-0.27940-0.90649-0.79040-0.66000.h5
Epoch 88/100

Epoch 00088: saving model to model_init_2020-01-3011_13_07.016235/model-00088-0.27090-0.91403-1.09378-0.61000.h5
Epoch 89/100

Epoch 00089: saving model to model_init_2020-01-3011_13_07.016235/model-00089-0.26416-0.90347-1.02265-0.58000.h5
Epoch 90/100

Epoch 00090: saving model to model_init_2020-01-3011_13_07.016235/model-00090-0.31133-0.88839-1.04782-0.60000.h5
Epoch 91/100

Epoch 00091: saving model to model_init_2020-01-3011_13_07.016235/model-00091-0.26311-0.90950-1.16593-0.57000.h5
Epoch 92/100

Epoch 00092: saving model to model_init_2020-01-3011_13_07.016235/model-00092-0.26425-0.90347-1.10245-0.54000.h5

Epoch 00092: ReduceLROnPlateau reducing learning rate to 0.001.
Epoch 93/100

Epoch 00093: saving model to model_init_2020-

<keras.callbacks.History at 0x7f4ec3edcb70>