In [1]:
# importing required libraries
import tensorflow as tf 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D , Dense , Dropout , Flatten
import numpy as np 
from sklearn.model_selection import train_test_split, StratifiedKFold 
import matplotlib.pyplot as plt 
import cv2 
import pandas as pd 
from glob import glob 

In [2]:
!wget https://www.gti.ssr.upm.es/images/Data/Downloads/HandGestureDatabase/set_2.rar

--2021-09-30 00:03:55--  https://www.gti.ssr.upm.es/images/Data/Downloads/HandGestureDatabase/set_2.rar
Resolving www.gti.ssr.upm.es (www.gti.ssr.upm.es)... 138.4.32.2
Connecting to www.gti.ssr.upm.es (www.gti.ssr.upm.es)|138.4.32.2|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 33102454881 (31G) [application/rar]
Saving to: ‘set_2.rar’


2021-09-30 00:45:12 (12.8 MB/s) - ‘set_2.rar’ saved [33102454881/33102454881]



In [3]:
!unrar x "/content/set_2.rar"

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Extracting  set_2/training/g05/subject_1/sequence/frame_10480.png         87%  OK 
Extracting  set_2/training/g05/subject_1/sequence/frame_10481.png         87%  OK 
Extracting  set_2/training/g05/subject_1/sequence/frame_10482.png         87%  OK 
Extracting  set_2/training/g05/subject_1/sequence/frame_10483.png         87%  OK 
Extracting  set_2/training/g05/subject_1/sequence/frame_10484.png         87%  OK 
Extracting  set_2/training/g05/subject_1/sequence/frame_10485.png         87%  OK 
Extracting  set_2/training/g05/subject_1/sequence/frame_10486.png         87%  OK 
Extracting  set_2/training/g05/subject_1/sequence/frame_10487.png         87%  OK 
Extracting  set_2/training/g05/subject_1/sequence/frame_10488.png         87%  OK 
Extracting  set_2/training/g05/subject_1/sequence/frame_10489.png         87%  OK 
Extracting  set

In [4]:
import os
os.remove('/content/set_2.rar')

In [5]:
subjectList = ['subject_1','subject_2','subject_3','subject_4','subject_5','subject_6']
gList       = ['g01','g02','g03','g04','g05']
dffinal     = pd.DataFrame(columns = ['filename','class'])

for i in subjectList:
  for j in gList:
    # defining image folders
    base_subfolder          =  '/content/set_2/training/'
    activation_subfolder    =  base_subfolder + j + '/' + i +'/sequence'
    cursor_subfolder        =  base_subfolder + j + '/' + i +'/sequence'
    left_click_subfolder    =  base_subfolder + j + '/' + i +'/sequence'
    right_click_subfolder   =  base_subfolder + j + '/' + i +'/sequence'
    deactivation_subfolder  =  base_subfolder + j + '/' + i +'/sequence'

    # finding filenames in respective subfolders
    activation_filenames    = glob(activation_subfolder + '/*')
    cursor_filenames        = glob(cursor_subfolder + '/*')
    left_click_filenames    = glob(left_click_subfolder + '/*')
    right_click_filenames   = glob(right_click_subfolder + '/*')
    deactivation_filenames  = glob(deactivation_subfolder + '/*')

    # printing filename info 
    print("Total {} images found in {} folder".format(len(activation_filenames), activation_subfolder))
    print("Total {} images found in {} folder".format(len(cursor_filenames), cursor_subfolder))
    print("Total {} images found in {} folder".format(len(left_click_filenames), left_click_subfolder))
    print("Total {} images found in {} folder".format(len(right_click_filenames), right_click_subfolder))
    print("Total {} images found in {} folder".format(len(deactivation_filenames), deactivation_subfolder))

    # creating a dataframe for containing the filepaths (complete absolute filepaths) and corresponding class labels
    label_names   = ['activation','cursor','left_click','right_click','deactivation']
    all_filepaths = activation_filenames + cursor_filenames + left_click_filenames + right_click_filenames + deactivation_filenames
    all_labels    = [label_names[0]]*len(activation_filenames) + [label_names[1]]*len(cursor_filenames) + [label_names[2]]*len(left_click_filenames) + [label_names[3]]*len(right_click_filenames) + [label_names[4]]*len(deactivation_filenames)   
    df            = pd.DataFrame({'filename':all_filepaths, 'class':all_labels})
    dffinal       = pd.concat([dffinal,df], axis=0)

Total 794 images found in /content/set_2/training/g01/subject_1/sequence folder
Total 794 images found in /content/set_2/training/g01/subject_1/sequence folder
Total 794 images found in /content/set_2/training/g01/subject_1/sequence folder
Total 794 images found in /content/set_2/training/g01/subject_1/sequence folder
Total 794 images found in /content/set_2/training/g01/subject_1/sequence folder
Total 404 images found in /content/set_2/training/g02/subject_1/sequence folder
Total 404 images found in /content/set_2/training/g02/subject_1/sequence folder
Total 404 images found in /content/set_2/training/g02/subject_1/sequence folder
Total 404 images found in /content/set_2/training/g02/subject_1/sequence folder
Total 404 images found in /content/set_2/training/g02/subject_1/sequence folder
Total 1041 images found in /content/set_2/training/g03/subject_1/sequence folder
Total 1041 images found in /content/set_2/training/g03/subject_1/sequence folder
Total 1041 images found in /content/se

In [6]:
df = dffinal.copy()
del dffinal
print(df.head())
print(df.describe())

                                            filename       class
0  /content/set_2/training/g01/subject_1/sequence...  activation
1  /content/set_2/training/g01/subject_1/sequence...  activation
2  /content/set_2/training/g01/subject_1/sequence...  activation
3  /content/set_2/training/g01/subject_1/sequence...  activation
4  /content/set_2/training/g01/subject_1/sequence...  activation
                                                 filename         class
count                                              181570        181570
unique                                              36314             5
top     /content/set_2/training/g02/subject_3/sequence...  deactivation
freq                                                    5         36314


In [7]:
# performing train, test set splits
train_val_df, test_df = train_test_split(df, test_size=0.1, random_state=0, shuffle=True, stratify=df['class'])
print("Training+Validation set")
print(train_val_df.head())
print(train_val_df.describe())
print('*'*50)
print("Testing set")
print(test_df.head())
print(test_df.describe())
test_df['filename']

Training+Validation set
                                               filename         class
1581  /content/set_2/training/g04/subject_3/sequence...        cursor
3879  /content/set_2/training/g03/subject_1/sequence...   right_click
1022  /content/set_2/training/g02/subject_1/sequence...    left_click
1847  /content/set_2/training/g01/subject_2/sequence...        cursor
8155  /content/set_2/training/g01/subject_5/sequence...  deactivation
                                                 filename         class
count                                              163413        163413
unique                                              36314             5
top     /content/set_2/training/g01/subject_2/sequence...  deactivation
freq                                                    5         32683
**************************************************
Testing set
                                               filename         class
7315  /content/set_2/training/g04/subject_2/sequence...  deacti

7315    /content/set_2/training/g04/subject_2/sequence...
4286    /content/set_2/training/g03/subject_1/sequence...
516     /content/set_2/training/g03/subject_2/sequence...
3498    /content/set_2/training/g05/subject_5/sequence...
1505    /content/set_2/training/g04/subject_6/sequence...
                              ...                        
2080    /content/set_2/training/g04/subject_3/sequence...
5153    /content/set_2/training/g03/subject_6/sequence...
4715    /content/set_2/training/g03/subject_1/sequence...
5813    /content/set_2/training/g01/subject_5/sequence...
2397    /content/set_2/training/g04/subject_6/sequence...
Name: filename, Length: 18157, dtype: object

In [8]:
# Defining K-fold stratification for train and validation set splits for cross-validation
num_cv_splits = 5
skf = StratifiedKFold(n_splits=num_cv_splits, random_state=0, shuffle=True)
list_train_idxs = []
list_val_idxs   = []
for train_idxs, val_idxs in skf.split(train_val_df['class'], train_val_df['class']):
  list_train_idxs.append(train_idxs)
  list_val_idxs.append(val_idxs)
print(len(list_train_idxs), len(list_val_idxs))
print(len(list_train_idxs[0]), len(list_val_idxs[0]))

5 5
130730 32683


In [9]:
# Defining ImageDataGenerators for train, val and test sets 
train_data_generator = ImageDataGenerator( rotation_range=40, width_shift_range=0.2, height_shift_range=0.2, rescale=1./255,
                        shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode='nearest')
val_data_generator   =  ImageDataGenerator(rescale=1./255)
test_data_generator  =  ImageDataGenerator(rescale=1./255)

In [10]:
# Defining image resolution, batch size, etc 
IMAGE_WIDTH=150
IMAGE_HEIGHT=150
BATCH_SIZE=20

In [11]:
# define finetune cnn model
from tensorflow.python.keras.applications.resnet import ResNet50
from keras.models import Model
def finetuning_model():
  # load model
  model = ResNet50(include_top=False, input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT,3))
  # mark loaded layers as not trainable
  for layer in model.layers:
    layer.trainable=False
  # add new classifier layers
  flat1=Flatten()(model.layers[-1].output)
  class1=Dense(128, activation='relu',kernel_initializer='he_uniform')(flat1)
  #bn = BatchNormalization()(flatten)
  #dropout = Dropout(0.5)(bn)
  num_output_neurons = 4
  output=Dense(num_output_neurons, activation='softmax')(class1)
  # define new model
  model = Model(inputs=model.inputs, outputs=output)
  # compile model
  model.compile(loss="categorical_crossentropy",optimizer="rmsprop",metrics=["accuracy"])
  
  return model

In [12]:
# Defining the model architecture and compiling the model 

def get_model():
  model = Sequential()
  model.add(Conv2D(32, (3,3),activation="relu",input_shape=(IMAGE_HEIGHT,IMAGE_WIDTH,3)))
  model.add(MaxPooling2D(pool_size=(2,2)))
  model.add(Conv2D(32, (3,3),activation="relu"))
  model.add(MaxPooling2D(pool_size=(2,2)))
  model.add(Flatten())
  model.add(Dense(16,activation="relu"))
  model.add(Dropout(0.1))
  model.add(Dense(4,activation="sigmoid"))
  model.compile(loss="categorical_crossentropy",optimizer="rmsprop",metrics=["accuracy"])
  return model 
  
model = get_model()
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 72, 72, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 36, 36, 32)        0         
_________________________________________________________________
flatten (Flatten)            (None, 41472)             0         
_________________________________________________________________
dense (Dense)                (None, 16)                663568    
_________________________________________________________________
dropout (Dropout)            (None, 16)                0

In [23]:
import os
import random
import shutil

classes = list(set(test_df['class'].unique()))
folders = [activation_subfolder,deactivation_subfolder,right_click_subfolder,left_click_subfolder,cursor_subfolder]
for i in range(4):
  training = base_subfolder + 'train/' + classes[i] + '/'
  validation = base_subfolder + 'validation/' + classes[i] + '/'
  test = base_subfolder + 'test/' + classes[i] + '/'
  #files = os.listdir(folders[i])

  if not os.path.exists(training): # create a tempory folder 'preview' to save generated images
    os.makedirs(training)
  if not os.path.exists(validation): # create a tempory folder 'preview' to save generated images
    os.makedirs(validation)
  if not os.path.exists(test): # create a tempory folder 'preview' to save generated images
    os.makedirs(test)

  for m in range(800):
    try:
      files = [filenames for (filenames) in os.listdir(folders[i])]
      random_file = random.choice(files)
      shutil.move(os.path.join(folders[i], random_file), training)
    except:
      pass
  for m in range(250):
    try:
      files = [filenames for (filenames) in os.listdir(folders[i])]
      random_file = random.choice(files)
      shutil.move(os.path.join(folders[i], random_file), validation)    
    except:
      pass
  for m in range(250):
    try:
      files = [filenames for (filenames) in os.listdir(folders[i])]
      random_file = random.choice(files)
      shutil.move(os.path.join(folders[i], random_file), test)
    except:
      pass

In [25]:
# Performing K-fold(5-fold) cross-validation 
train_losses     = []
train_accuracies = []
val_losses       = []
val_accuracies   = []
resnetlist       = []
NUM_EPOCHS = 1
training_data_dir   = base_subfolder +'train/'
validation_data_dir = base_subfolder +'validation/'
test_data_dir       = base_subfolder +'test/'
IMAGE_WIDTH=150
IMAGE_HEIGHT=150
BATCH_SIZE=10

test_data_generator  =  ImageDataGenerator(rescale=1./255)
test_generator = test_data_generator.flow_from_directory(
    test_data_dir,
    target_size=(IMAGE_WIDTH, IMAGE_HEIGHT),
    batch_size=1,
    class_mode="categorical",
    shuffle=False)

for i in range(num_cv_splits):
  train_idxs = list_train_idxs[i]
  val_idxs   = list_val_idxs[i]
  train_df = train_val_df.iloc[train_idxs]
  val_df   = train_val_df.iloc[val_idxs]
  
  train_gen = train_data_generator.flow_from_dataframe(train_df, target_size=(IMAGE_HEIGHT, IMAGE_WIDTH), batch_size=BATCH_SIZE, validation_data_dir=True)
  val_gen   = val_data_generator.flow_from_dataframe(val_df, target_size=(IMAGE_HEIGHT, IMAGE_WIDTH), batch_size=BATCH_SIZE, validation_data_dir=True)

  modelRes = finetuning_model()  
  history = modelRes.fit(train_gen, epochs=NUM_EPOCHS, steps_per_epoch=len(train_df)//BATCH_SIZE, validation_data=val_gen, validation_steps=len(val_gen)//BATCH_SIZE) # switching to model.fit since model.fit_generator is being deprecated
  # Check classification accuracy on the test set
  _, acc=modelRes.evaluate_generator(test_generator, steps=len(test_generator),verbose=0)
  resnetlist.append(acc)
  train_losses.append(history.history['loss'][-1]) 
  val_losses.append(history.history['val_loss'][-1]) 
  train_accuracies.append(history.history['accuracy'][-1]) 
  val_accuracies.append(history.history['val_accuracy'][-1])   


Found 0 images belonging to 5 classes.


  .format(n_invalid, x_col)


Found 128504 validated image filenames belonging to 5 classes.


  .format(n_invalid, x_col)


Found 32113 validated image filenames belonging to 5 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


ValueError: ignored

In [None]:
# printing cross validation losses and accuracies
print(train_losses)
print(val_losses)
print(train_accuracies)
print(val_accuracies)