In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

In [2]:
mp_holistic= mp.solutions.holistic
mp_drawing= mp.solutions.drawing_utils

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    #Image is not writable
    image.flags.writeable=False
    #For media pipe processing and make prediction
    results=model.process(image)
    image.flags.writeable=True
    image= cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

In [5]:
#Definig Landmarks

def draw_landmarks(image,results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION)
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS)

    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
cap= cv2.VideoCapture(0)

In [6]:
#Definig Landmarks
def draw_styled_landmarks(image,results):
    # mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS)
    # mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS,
    #                           mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
    #                           mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1))
    landmarks_style = mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1)
    connections_style = mp_drawing.DrawingSpec(color=(80, 256, 121), thickness=1, circle_radius=1)

    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, landmarks_style, connections_style)
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, landmarks_style, connections_style)

    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(80,22,10), thickness=1, circle_radius=1),
                              mp_drawing.DrawingSpec(color=(80,44,121), thickness=1, circle_radius=1))
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121,22,76), thickness=1, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121,44,250), thickness=1, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245,117,66), thickness=1, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245,66,230), thickness=1, circle_radius=2))


In [6]:
cap= cv2.VideoCapture(1)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame= cap.read()
        #Making Detection
        image, results= mediapipe_detection(frame, holistic)
        #Drawing Landmarks
        draw_styled_landmarks(image, results)
        cv2.imshow('OpenCV Window', image)
        if cv2.waitKey(20) & 0xFF == ord('a'):
            break
    cap.release()
    cv2.destroyAllWindows() 

In [7]:
pose=[]
for res in results.pose_landmarks.landmark:
    test = np.array([res.x, res.y, res.z, res.visibility])
    pose.append(test)

NameError: name 'results' is not defined

In [14]:
def extract_key_points(results):
    pose= np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face= np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh= np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh= np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    # print(results.isempty())
    return np.concatenate([pose, face, lh, rh])

In [16]:
result_test= extract_key_points(results)


In [10]:
result_test

array([ 0.41674811,  0.35199425, -0.95747966, ...,  0.        ,
        0.        ,  0.        ])

In [11]:
np.save('0', result_test)

In [12]:
np.load("0.npy")

array([ 0.41674811,  0.35199425, -0.95747966, ...,  0.        ,
        0.        ,  0.        ])

In [13]:
len(extract_key_points(results)[:-10])

1652

In [14]:
#data_path= os.path.join("HandRecog Dataset")
data_path = os.path.join("HandRecog Dataset")
actions=np.array(["Hello", "Thanks", "I Like You"])
number_sequences= 30
sequence_length= 30
start_folder= 30

In [15]:
for action in actions: 
    dirmax = np.max(np.array(os.listdir(os.path.join(data_path, action))).astype(int))
    for sequence in range(1,number_sequences+1):
        try: 
            os.makedirs(os.path.join(data_path, action, str(dirmax+sequence)))
        except:
            pass

In [32]:
cap= cv2.VideoCapture(1)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

    for action in actions:
        for sequence in range(number_sequences):
            for frame_num in range(sequence_length):
                
                    ret, frame= cap.read()
                    #Making Detection
                    image, results= mediapipe_detection(frame, holistic)
                    #Drawing Landmarks
                    draw_styled_landmarks(image, results)
                    #Data Collection
                    if frame_num==0:
                        cv2.putText(image, "Starting Collection", (120,200),
                                    cv2.FONT_HERSHEY_COMPLEX, 1, (120,255,100), 4, cv2.LINE_AA)
                        cv2.putText(image, "Collecting Frames for {} Video Number {}". format(action, sequence), (15,12),
                                    cv2.FONT_HERSHEY_COMPLEX,0.5, (0,100,255), 1, cv2.LINE_AA)
                        cv2.waitKey(3000)
                    else:
                        cv2.putText(image, "Collecting Frames for {} Video Number {}". format(action, sequence), (15,12),
                                    cv2.FONT_HERSHEY_COMPLEX,0.5, (0,0,255), 1, cv2.LINE_AA)
                    
                    #Extracting Key Points
                    keypoints= extract_key_points(results)
                    numpy_path= os.path.join(data_path, action, str(sequence), str(frame_num))
                    np.save(numpy_path, keypoints)

                    
                    cv2.imshow('OpenCV Window', image)
                    if cv2.waitKey(20) & 0xFF == ord('a'):
                        break
    cap.release()
    cv2.destroyAllWindows() 

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\Naman Sharma\\Desktop\\Folders\\ML Projects\\Hand Gesture Recognition\\Hand Gesture\\HandRecog Dataset\\Hello\\0\\0.npy'

In [16]:
#Pre Processing Data and Creating Labels and Features
import sklearn
# import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.python.keras.utils.np_utils import to_categorical

In [17]:
label_map= {label:num for num, label in enumerate(actions)}

In [18]:
label_map

{'Hello': 0, 'Thanks': 1, 'I Like You': 2}

In [20]:
sequences, labels= [], []
for action in actions:
    for sequence in np.array(os.listdir(os.path.join(data_path, action))).astype(int):
        window=[] #represent different frames
        for frame_num in range(sequence_length):               #looping the frame as seq length is frame length
            res= np.load(os.path.join(data_path, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)   #Grabbing a window and then putting it together
        sequences.append(window)
        labels.append(label_map[action]) 

In [21]:
np.array(sequences).shape

(90, 30, 1662)

In [22]:
X=np.array(sequences)


In [None]:
# print(labels)

In [23]:
y= to_categorical(labels).astype(int) 


In [24]:
X_train, X_test, y_train, y_test= train_test_split(X,y, test_size=0.05)


In [25]:
import keras
from tensorflow.python.keras import Sequential
from tensorflow.python.keras.layers import LSTM, Dense


In [26]:
from tensorflow.python.keras.callbacks import TensorBoard
import tensorflow as tf
# log_dir= str(os.path.join('Logs'))  #to check the model training at real time
log_dir= os.path.join('Logs')
tb_callback = tf.keras.callbacks.TensorBoard(log_dir='Logs', histogram_freq=1) 

In [None]:
from tensorflow.python.keras.layers import Conv2D, MaxPooling2D, LSTM, Dense, Flatten
def create_cnn_lstm_model(input_shape):
    model = Sequential()

    # CNN layers
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())

    # LSTM layers
    model.add(LSTM(64, return_sequences=True, activation='relu'))
    model.add(LSTM(128, return_sequences=True, activation='relu'))
    model.add(LSTM(64, return_sequences=False, activation='relu'))

    # Dense layers
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(actions.shape[0], activation='softmax'))

    return model

In [None]:
input_shape=(sequence_length,result_test*3)
model = create_cnn_lstm_model(input_shape)

In [27]:
# #Adding bunch of layers
# model= Sequential()
# model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
# model.add(LSTM(128, return_sequences=True, activation='relu'))
# model.add(LSTM(64, return_sequences=False, activation='relu')) 
# model.add(Dense(64,activation='relu'))  
# model.add(Dense(32,activation='relu'))
# model.add(Dense(actions.shape[0], activation='softmax')) 


In [21]:
res=[0.7, 0.2, 0.1] #multi class classification model type


In [22]:
actions[np.argmax(res)] 


NameError: name 'actions' is not defined

In [None]:
#We took this model as mediapipe and LSTM model, the models out there like CNN which is in major usage as it need n number of CNN layers
#some used number of Cnn layers or mobile net layer then LSTM layer we trained with similar number of sequences as of 30 different sequences per class which equlas to 90 sequence in total
#and then due to compelexity we were not getting any accuracy which can be useful
#So finally researched and came to the conclusion that mediapipe holistic and LSTM is the most useful one for this reasons are
# 1. Less data required 
# 2. Faster to train (Apart for having to calculate 30million parameters we were working with half a million parameter)
# 3. Simple and becomes fater at detection 

In [30]:
from tensorflow.python.keras.metrics import Precision
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy']) 
# This function is used when we have multiple class classification model
# When we use binary classification then we use binary classification model then we use binarycrossentropy 
# for regression type model we use mean squared error


In [31]:
# model.fit(X_train, y_train, epochs=2000, callbacks=[tb_callbacks])
model.fit(X_train, y_train, epochs=2000, callbacks=[tb_callback])
# The good thing about using this holistic model is that data is not going to feed in memory so that we didnt needed data 
#generative or build up pipeline of data

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
Epoch 9/2000
Epoch 10/2000
Epoch 11/2000
Epoch 12/2000
Epoch 13/2000
Epoch 14/2000
Epoch 15/2000
Epoch 16/2000
Epoch 17/2000
Epoch 18/2000
Epoch 19/2000
Epoch 20/2000
Epoch 21/2000
Epoch 22/2000
Epoch 23/2000
Epoch 24/2000
Epoch 25/2000
Epoch 26/2000
Epoch 27/2000
Epoch 28/2000
Epoch 29/2000
Epoch 30/2000
Epoch 31/2000
Epoch 32/2000
Epoch 33/2000
Epoch 34/2000
Epoch 35/2000
Epoch 36/2000
Epoch 37/2000
Epoch 38/2000
Epoch 39/2000
Epoch 40/2000
Epoch 41/2000
Epoch 42/2000
Epoch 43/2000
Epoch 44/2000
Epoch 45/2000
Epoch 46/2000
Epoch 47/2000
Epoch 48/2000
Epoch 49/2000
Epoch 50/2000
Epoch 51/2000
Epoch 52/2000
Epoch 53/2000
Epoch 54/2000
Epoch 55/2000
Epoch 56/2000
Epoch 57/2000
Epoch 58/2000
Epoch 59/2000
Epoch 60/2000
Epoch 61/2000
Epoch 62/2000
Epoch 63/2000
Epoch 64/2000
Epoch 65/2000
Epoch 66/2000
Epoch 67/2000
Epoch 68/2000
Epoch 69/2000
Epoch 70/2000
Epoch 71/2000
Epoch 72/2000
E

KeyboardInterrupt: 

In [32]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 30, 64)            442112    
_________________________________________________________________
lstm_1 (LSTM)                (None, 30, 128)           98816     
_________________________________________________________________
lstm_2 (LSTM)                (None, 64)                49408     
_________________________________________________________________
dense (Dense)                (None, 64)                4160      
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 99        
Total params: 596,675
Trainable params: 596,675
Non-trainable params: 0
__________________________________________________

In [33]:
model.predict(X_test)

array([[1.8735344e-09, 9.9999690e-01, 3.0838839e-06],
       [1.8731383e-10, 9.9999964e-01, 3.3938849e-07],
       [2.4891391e-03, 7.0962483e-06, 9.9750382e-01],
       [9.3642610e-01, 2.4401972e-06, 6.3571453e-02],
       [2.5783424e-04, 9.6913189e-01, 3.0610245e-02]], dtype=float32)

In [34]:
actions[np.argmax(res[2])]

'Hello'

In [35]:
actions[np.argmax(y_test[2])]

'I Like You'

In [36]:
# model.save('action.h5')

In [None]:
#Save in loaded_model
from tensorflow.python.keras.models import load_model
loaded_model = load_model('action.h5')


In [None]:
#Delete the model
# del model

In [8]:
model.load_weights('action.h5')

NameError: name 'model' is not defined

In [38]:
#Evaluation of the model using confusion matrix and it will be giving confusion matrixs of each of the label so thats what is been detected as true positive and true negative 
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [39]:
yhat= model.predict(X_train)

In [40]:
#It will convert  the prediction from their one hot encoded represeantation to a categorical label eg 0, 1, 2 as [1,0,0], [0,1,0], 0,1,1
ytrue= np.argmax(y_train, axis=1).tolist()
yhat= np.argmax(yhat, axis=1).tolist()

In [41]:

multilabel_confusion_matrix(yhat, ytrue)

array([[[56,  0],
        [ 0, 29]],

       [[57,  0],
        [ 0, 28]],

       [[57,  0],
        [ 0, 28]]], dtype=int64)

In [42]:
accuracy_score(ytrue, yhat)

1.0

In [19]:
#Test in real time
colors= [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):         #Getting different probabilites 
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, action[num], (0,85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)

    return output_frame


In [24]:
plt.figure(figsize=(10,10))
plt.imshow(prob_viz(res, actions, image, colors))

NameError: name 'actions' is not defined

<Figure size 1000x1000 with 0 Axes>

In [2]:
from tensorflow.python.keras.models import load_model

# Load the saved model
loaded_model = load_model('action.h5')

# We can use this to reload the model

In [25]:

#New detection variable 
sequence=[]
sentence= []
predictions= []
threshold=0.6

cap= cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame= cap.read()
        #Making Detection
        image, results= mediapipe_detection(frame, holistic)
        #Drawing Landmarks
        draw_styled_landmarks(image, results)

        #Prediction logic
        keypoints= extract_key_points(results)
        #sequence.append(keypoints)
        #insert vs append: We need to grab 30 last values instead of the first 30, which is what the next line is currently doing
        sequence.append(keypoints)
        sequence=sequence[-30:]
        
        if len(sequence)==30:
            res= model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))

            #Viz logic
        if len(predictions) >= 10 and np.unique(predictions[-10:])[0] == np.argmax(res):
            if res[np.argmax(res)] > threshold:
                if len(sentence) > 0:
                    if actions[np.argmax(res)] != sentence[-1]:   #its checking if the sentence is not equal to previous action
                        sentence.append(actions[np.argmax(res)])
                        #First its checking if we have words in the sentences array. if not, append it. If we do, check the current predicted word isnt the same. If its not, then append. IF its the same then skip the append to prevent duplication.
                else:
                    sentence.append(actions[np.argmax(res)])

                    
        if len(sentence) > 5:
            sentence=sentence[-5:]

        #Viz probability
        image= prob_viz(res, actions, image, colors)
        cv2.rectangle(image, (0,0), (640,40), (245,117,16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30),
                   cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        cv2.imshow('OpenCV Window', image)
        if cv2.waitKey(20) & 0xFF == ord('a'):
            break
    cap.release()
    cv2.destroyAllWindows() 

NameError: name 'actions' is not defined

In [12]:
predictions

[]

In [12]:
# from keras.models import load_model
# model = load_model('action.h5')