In [3]:
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
from tensorflow.keras.models import load_model

In [4]:
mpHands = mp.solutions.hands
hands = mpHands.Hands(max_num_hands=1,min_detection_confidence=0.7)
mpDraw = mp.solutions.drawing_utils

In [5]:
#Load the gesture recognizer model
model = load_model('mp_hand_gestures')

#Load class names
f = open('gesture.names','r')
classNames = f.read().split('\n')
f.close()
print(classNames)

['okay', 'peace', 'thumbs up', 'thumbs down', 'call me', 'stop', 'rock', 'live long', 'fist', 'smile']


In [9]:
#Initialize the webcam
cap = cv2.VideoCapture(0)

while True:
    #Read each frame from the webcam
    ret, frame = cap.read()
    x,y,c = frame.shape
    
    #Flip the frame vertically
    frame = cv2.flip(frame,1)
    
    framergb = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
    #Get hand landmark prediction
    result = hands.process(framergb)
    className = ''

    #post process the result
    if result.multi_hand_landmarks:   #checking if any hand is detected or not
        landmarks = []
        for handslms in result.multi_hand_landmarks:
            for lm in handslms.landmark:
                #print(id,lm)
                lmx = int(lm.x*x)
                lmy = int(lm.y*y)
                #Here image height (qy) and image width(x) are multiplied with the result because the model returns a normalized result. This means each value in the result is between 0 and 1.
                landmarks.append([lmx,lmy])

            #Drawing landmarks on frames
            mpDraw.draw_landmarks(frame,handslms,mpHands.HAND_CONNECTIONS)
    
    #Predict gesture
    prediction = model.predict([landmarks]) #The model.predict() function takes a list of landmarks and returns an array contains 10 prediction classes for each landmark.
    print(prediction)
    classID = np.argmax(prediction) #Np.argmax() returns the index of the maximum value in the list.
    className = classNames[classID] #After getting the index we can simply take the class name from the classNames list.

    #show the prediction on the frame
    cv2.putText(frame, className, (10,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2, cv2.LINE_AA)
    
    #Show the final output
    cv2.imshow("Output",frame)
    if cv2.waitKey(1) == ord('q'):
        break
    
#Release the webcam and destroy all active windows
cap.release()
cv2.destroyAllWindows()

[[2.0449363e-12 5.3040718e-04 1.7253183e-06 5.0134468e-01 4.9801326e-01
  2.5397436e-22 6.9524035e-08 2.0638351e-08 8.6388916e-08 1.0981747e-04]]
[[2.0449363e-12 5.3040718e-04 1.7253183e-06 5.0134468e-01 4.9801326e-01
  2.5397436e-22 6.9524035e-08 2.0638351e-08 8.6388916e-08 1.0981747e-04]]
[[2.0449363e-12 5.3040718e-04 1.7253183e-06 5.0134468e-01 4.9801326e-01
  2.5397436e-22 6.9524035e-08 2.0638351e-08 8.6388916e-08 1.0981747e-04]]
[[2.0449363e-12 5.3040718e-04 1.7253183e-06 5.0134468e-01 4.9801326e-01
  2.5397436e-22 6.9524035e-08 2.0638351e-08 8.6388916e-08 1.0981747e-04]]
[[2.0449363e-12 5.3040718e-04 1.7253183e-06 5.0134468e-01 4.9801326e-01
  2.5397436e-22 6.9524035e-08 2.0638351e-08 8.6388916e-08 1.0981747e-04]]
[[2.0449363e-12 5.3040718e-04 1.7253183e-06 5.0134468e-01 4.9801326e-01
  2.5397436e-22 6.9524035e-08 2.0638351e-08 8.6388916e-08 1.0981747e-04]]
[[2.0449363e-12 5.3040718e-04 1.7253183e-06 5.0134468e-01 4.9801326e-01
  2.5397436e-22 6.9524035e-08 2.0638351e-08 8.638891

[[2.5996892e-11 1.0917412e-25 6.6193317e-15 3.4922295e-23 9.2797319e-13
  2.9046726e-16 2.3965480e-05 4.1266418e-23 9.9997604e-01 2.6707097e-23]]
[[2.5542275e-05 4.4710507e-16 1.2049144e-17 9.9349261e-13 5.2594387e-13
  8.3097268e-07 9.9953151e-01 3.5457653e-10 4.4217586e-04 6.1404800e-17]]
[[5.4643893e-05 6.5549502e-06 5.1490695e-19 2.4480814e-02 9.8557718e-10
  3.0952203e-04 7.6933211e-01 2.0581637e-01 4.3551800e-08 2.4364055e-10]]
[[2.2063404e-19 9.8851647e-09 4.0058355e-18 1.0869946e-16 1.4823855e-01
  2.1190682e-25 1.9267806e-15 3.4165283e-20 8.5176140e-01 5.4652578e-11]]
[[9.5322274e-21 1.8452572e-03 8.6131478e-24 9.9787831e-01 2.7642550e-04
  3.3759968e-23 2.2477351e-13 4.8580873e-09 7.8505717e-14 6.9353778e-10]]
[[3.6571949e-21 1.1113628e-15 1.9793096e-35 1.0000000e+00 2.2742978e-21
  3.3294998e-18 7.9357536e-12 1.3846809e-08 3.0305427e-30 5.5690495e-22]]
[[7.6029554e-24 5.6673472e-17 3.3176439e-34 1.0000000e+00 2.7898171e-19
  1.8181450e-22 2.0477960e-12 1.8667323e-11 9.464785

[[3.0007001e-09 5.5017495e-16 1.1265785e-10 1.9489237e-17 7.2305700e-10
  1.0203282e-19 6.6809119e-10 2.8432899e-19 1.0000000e+00 6.6923316e-16]]
[[3.0007001e-09 5.5017495e-16 1.1265785e-10 1.9489237e-17 7.2305700e-10
  1.0203282e-19 6.6809119e-10 2.8432899e-19 1.0000000e+00 6.6923316e-16]]
[[3.0007001e-09 5.5017495e-16 1.1265785e-10 1.9489237e-17 7.2305700e-10
  1.0203282e-19 6.6809119e-10 2.8432899e-19 1.0000000e+00 6.6923316e-16]]
[[3.0007001e-09 5.5017495e-16 1.1265785e-10 1.9489237e-17 7.2305700e-10
  1.0203282e-19 6.6809119e-10 2.8432899e-19 1.0000000e+00 6.6923316e-16]]
[[3.0007001e-09 5.5017495e-16 1.1265785e-10 1.9489237e-17 7.2305700e-10
  1.0203282e-19 6.6809119e-10 2.8432899e-19 1.0000000e+00 6.6923316e-16]]
[[3.0007001e-09 5.5017495e-16 1.1265785e-10 1.9489237e-17 7.2305700e-10
  1.0203282e-19 6.6809119e-10 2.8432899e-19 1.0000000e+00 6.6923316e-16]]
[[3.0007001e-09 5.5017495e-16 1.1265785e-10 1.9489237e-17 7.2305700e-10
  1.0203282e-19 6.6809119e-10 2.8432899e-19 1.000000

[[2.2329535e-01 2.5667710e-08 6.7798139e-10 2.7375314e-05 2.8403783e-14
  1.7371384e-08 7.6382411e-01 1.2853118e-02 2.8836473e-12 4.5359438e-10]]
[[2.2329535e-01 2.5667710e-08 6.7798139e-10 2.7375314e-05 2.8403783e-14
  1.7371384e-08 7.6382411e-01 1.2853118e-02 2.8836473e-12 4.5359438e-10]]
[[2.2329535e-01 2.5667710e-08 6.7798139e-10 2.7375314e-05 2.8403783e-14
  1.7371384e-08 7.6382411e-01 1.2853118e-02 2.8836473e-12 4.5359438e-10]]
[[2.2329535e-01 2.5667710e-08 6.7798139e-10 2.7375314e-05 2.8403783e-14
  1.7371384e-08 7.6382411e-01 1.2853118e-02 2.8836473e-12 4.5359438e-10]]


In [24]:
cap.release()
cv2.destroyAllWindows()