In [1]:
import tensorflow as tf

In [2]:
!conda list

# packages in environment at C:\Users\Bilal\.conda\envs\tf:
#
# Name                    Version                   Build  Channel
_tflow_select             2.2.0                     eigen  
absl-py                   0.9.0                    py37_0  
argon2-cffi               20.1.0           py37h4ab8f01_1    conda-forge
astor                     0.8.1                    py37_0  
attrs                     19.3.0                     py_0    conda-forge
backcall                  0.2.0              pyh9f0ad1d_0    conda-forge
backports                 1.0                        py_2    conda-forge
backports.functools_lru_cache 1.6.1                      py_0    conda-forge
blas                      1.0                         mkl  
bleach                    3.1.5              pyh9f0ad1d_0    conda-forge
blinker                   1.4                      py37_0  
brotlipy                  0.7.0           py37he774522_1000  
ca-certificates           2020.6.20            hecda079_0    conda-

In [3]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import time

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Dense,MaxPool2D,Dropout,Flatten,Conv2D,GlobalAveragePooling2D,Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from random import choice,shuffle
from scipy import stats as st

from collections import deque

#from google.colab import drive
#drive.mount('/content/drive')

In [4]:
#os.chdir('/content/drive/My Drive/Colab Notebooks/Gesture Recognition')

# Create Dataset

In [None]:
# Create dataset
def gather_data(num_samples):
    
    global ThumbUp, ThumbDown, StopSign, Point, Pick, Nogestures
    
    
    # Initialize the camera
    cap = cv2.VideoCapture(0)

    # trigger tells us when to start recording
    trigger = False
    
    # Counter keeps count of the number of samples collected
    counter = 0
    
    # This the ROI size, the size of images saved will be box_size -10
    box_size = 234
    
    # Getting the width of the frame from the camera properties
    width = int(cap.get(3))


    while True:
        
        # Read frame by frame
        ret, frame = cap.read()
        
        # Flip the frame laterally
        frame = cv2.flip(frame, 1)
        
        # Break the loop if there is trouble reading the frame.
        if not ret:
            break
            
        # If counter is equal to the number samples then reset triger and the counter
        if counter == num_samples:
            trigger = not trigger
            counter = 0
        
        # Define ROI for capturing samples
        cv2.rectangle(frame, (width - box_size, 0), (width, box_size), (0, 250, 150), 2)
        
        # Make a resizable window.
        cv2.namedWindow("Collecting images", cv2.WINDOW_NORMAL)
        
        
        # If trigger is True than start capturing the samples
        if trigger:
            
            # Grab only slected roi
            roi = frame[5: box_size-5 , width-box_size + 5: width -5]
            
            # Append the roi and class name to the list with the selected class_name
            eval(class_name).append([roi, class_name])
                                    
            # Increment the counter 
            counter += 1 
        
            # Text for the counter
            text = "Collected Samples of {}: {}".format(class_name, counter)
            
        else:
            text = "Press 'u' to collect Thumb Up samples, 'd' for Thumb Down, 's' for Stop Sign,'p' for Point', 'i' for Pick and 'n' for No gestures"
        
        # Show the counter on the imaege
        cv2.putText(frame, text, (3, 350), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 1, cv2.LINE_AA)
        
        # Display the window
        cv2.imshow("Collecting images", frame)
        
        # Wait 1 ms
        k = cv2.waitKey(1)
        
        # If user press 'u' then class_name is set to ThumbUp and trigger set to True  
        if k == ord('u'):
            
            # Trigger the variable inorder to capture the samples
            trigger = not trigger
            class_name = 'ThumbUp'
            ThumbUp = []
           
            
        # If user press 'd' then class_name is set to ThumbDown and trigger set to True  
        if k == ord('d'):
            trigger = not trigger
            class_name = 'ThumbDown'
            ThumbDown = []
        
        # If user press 'p' then class_name is set to Point and trigger set to True  
        if k == ord('p'):
            trigger = not trigger
            class_name = 'Point'
            Point = []
                    
        # If user press 'n' then class_name is set to Nogestures and trigger set to True
        if k == ord('n'):
            trigger = not trigger
            class_name = 'Nogestures'
            Nogestures = []
            
        # If user press 'i' then class_name is set to Pick and trigger set to True
        if k == ord('i'):
            trigger = not trigger
            class_name = 'Pick'
            Pick = []

       # If user press 'z' then class_name is set to nothing and trigger set to True
        if k == ord('s'):
            trigger = not trigger
            class_name = 'StopSign'
            StopSign = []

            
        # Exit if user presses 'q'
        if k == ord('q'):
            break
            
    #  Release the camera and destroy the window
    cap.release()
    cv2.destroyAllWindows()

In [4]:
#run this when you want to create your own dataset
no_of_samples = 400
gather_data(no_of_samples)

In [None]:
# Save dataset on disk so that it can be used again later.
import pickle

for gesture in ['ThumbUp', 'ThumbDown', 'StopSign', 'Point', 'Pick', 'Nogestures']:
    with open(f'{gesture}.pickle', 'wb') as f:
        pickle.dump(eval(gesture), f)

We are keeping our entire dataset in memory during training so a large amount of ram is required for training. If training gives a MemoryError on your device, use Google Colab. Upload this notebook as well as the pickle files containing the dataset.

In [3]:
# Only run this cell if you are training on Google Colab or on a different device than the one you collected the dataset on.
import pickle

temp = []
for gesture in ['ThumbUp', 'ThumbDown', 'StopSign', 'Point', 'Pick', 'Nogestures']:
  print(gesture)
  temp.append(pickle.load( open( f'{gesture}.pickle', "rb" ) ) )

ThumbUp, ThumbDown, StopSign, Point, Pick, Nogestures = temp

ThumbUp
ThumbDown
StopSign
Point
Pick
Nogestures


In [5]:
# Combine the labels of all classes together
labels = [tupl[1] for tupl in ThumbUp] + [tupl[1] for tupl in ThumbDown] + [tupl[1] for tupl in StopSign] + [tupl[1] for tupl in Point] + [tupl[1] for tupl in Pick]+[tupl[1] for tupl in Nogestures]

# Combine the images of all classes together
images = [tupl[0] for tupl in ThumbUp] + [tupl[0] for tupl in ThumbDown] + [tupl[0] for tupl in StopSign] + [tupl[0] for tupl in Point]+[tupl[0] for tupl in Pick] + [tupl[0] for tupl in Nogestures]

# Normalize the images by dividing by 255, now our images are in range 0-1. This will help in training.
images = np.array(images, dtype="float") / 255.0

# Print out the total number of labels and images.
print('Total images: {} , Total Labels: {}'.format(len(labels), len(images)))

# Create an encoder Object
encoder = LabelEncoder()

# Convert Lablels to integers. mapping is done in alphabatical order
Int_labels = encoder.fit_transform(labels)

# Now the convert the integer labels into one hot format. i.e. 0 = [1,0,0,0]  etc.
one_hot_labels = to_categorical(Int_labels, 6)

# Now we're splitting the data, 75% for training and 25% for testing.
(trainX, testX, trainY, testY) = train_test_split(images, one_hot_labels, test_size=0.25, random_state=50)

# Empty memory from RAM
images = []

Total images: 2400 , Total Labels: 2400


In [5]:
# This is the input size which our model accepts.
image_size = 224

# Loading pre-trained NASNETMobile Model without the head by doing include_top = False
N_mobile = tf.keras.applications.NASNetMobile( input_shape=(image_size, image_size, 3), include_top=False, weights='imagenet')

# Freeze the whole model 
N_mobile.trainable = False
    
# Adding our own custom head
# Start by taking the output feature maps from NASNETMobile
x = N_mobile.output

# Convert to a single-dimensional vector by Global Average Pooling. 
# We could also use Flatten()(x) GAP is more effective reduces params and controls overfitting.
x = GlobalAveragePooling2D()(x)

# Adding a dense layer with 712 units
x = Dense(712, activation='relu')(x) 

# Dropout 40% of the activations, helps reduces overfitting
x = Dropout(0.40)(x)

# The fianl layer will contain 6 output units (no of units = no of classes) with softmax function.
preds = Dense(6,activation='softmax')(x)

# Construct the full model
model = Model(inputs=N_mobile.input, outputs=preds)

# Check the number of layers in the final Model
print ("Number of Layers in Model: {}".format(len(model.layers[:])))

Downloading data from https://github.com/titu1994/Keras-NASNet/releases/download/v1.2/NASNet-mobile-no-top.h5
Number of Layers in Model: 773


In [6]:
# Data augmentation in order to increase the size of our dataset and improve model performance.

augment = ImageDataGenerator( 
        # rotation_range=30,
        zoom_range=0.25,
        width_shift_range=0.10,
        height_shift_range=0.10,
        shear_range=0.10,
        horizontal_flip=False,
        fill_mode="nearest"
)

In [5]:
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

NameError: name 'model' is not defined

In [8]:
# Set batchsize according to your system
epochs = 10
# batchsize = 5
batchsize = 16

# Start training
history = model.fit(x=augment.flow(trainX, trainY, batch_size=batchsize), 
                    validation_data=(testX, testY), 
                    steps_per_epoch=(len(trainX) // batchsize), 
                    epochs=epochs)

NameError: name 'trainX' is not defined

In [4]:
# Plot the accuracy and loss curves

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Training accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training loss')
plt.legend()

plt.show()

NameError: name 'history' is not defined

In [11]:
#model.save("model.h5")  # Saves model to h5 file.

In [5]:
# Load saved model. IF this gives an error, make sure you have executed the 
# cells above where the model is defined and compiled.
model.load_weights('model.h5')  

NameError: name 'model' is not defined

In [6]:
# This list will be used to map probabilities to class names, Label names are in alphabetical order.
label_names = ['No gestures', 'Pick', 'Point', 'Stop Sign', 'Thumb Down', 'Thumb Up']

cap = cv2.VideoCapture(0)
box_size = 234
width = int(cap.get(3))

while True:
    
    ret, frame = cap.read()
    if not ret:
        break
        
    frame = cv2.flip(frame, 1)
           
    cv2.rectangle(frame, (width - box_size, 0), (width, box_size), (0, 250, 150), 2)
        
    cv2.namedWindow("Gestures", cv2.WINDOW_NORMAL)

    roi = frame[5: box_size-5 , width-box_size + 5: width -5]
    
    # Normalize the image like we did in the preprocessing step, also convert float64 array.
    roi = np.array([roi]).astype('float64') / 255.0
 
    # Get model's prediction.
    pred = model.predict(roi)
    
    # Get the index of the target class.
    target_index = np.argmax(pred[0])

    # Get the probability of the target class
    prob = np.max(pred[0])

    # Show results
    cv2.putText(frame, "prediction: {} {:.2f}%".format(label_names[np.argmax(pred[0])], prob*100 ),
                (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.90, (0, 0, 255), 2, cv2.LINE_AA)
    
    cv2.imshow("Final Year Project", frame)
    
   
    k = cv2.waitKey(1)
    if k == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

NameError: name 'model' is not defined