# Hand gesture recognition using Mediapipe API
Before using
1. [Download a pretrained model](https://storage.googleapis.com/mediapipe-tasks/gesture_recognizer/gesture_recognizer.task) and store in the root folder.
2. Modify the `model_asset_path` as part of the `ObjectDectorOptions` class initiation to point to this location.

In [1]:
import cv2
import mediapipe as mp
from mediapipe.tasks.python.vision import GestureRecognizer, GestureRecognizerOptions, GestureRecognizerResult, RunningMode
from mediapipe.framework.formats import landmark_pb2
import numpy as np

BaseOptions = mp.tasks.BaseOptions

# hands, drawing, and styles are required to visualize the hand landmarks
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

In [2]:
# async output will be handled as a FIFO queue list data structure, initiate the queue
detect_gesture_output = []

# append results to the queue
def append_gesture_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
    detect_gesture_output.append(result)

# set up gesture recognizer options
gesture_options = GestureRecognizerOptions(
    base_options=BaseOptions(model_asset_path='gesture_recognizer.task'),
    running_mode=RunningMode.LIVE_STREAM,
    num_hands=2,
    result_callback=append_gesture_result)

In [3]:
# Use opencv to read the detection and apply an overlay to the image
FONT_SIZE = 1
FONT_THICKNESS = 1
TEXT_COLOR = (255,255,255) #white

def visualize_gesture(image: np.ndarray, detection_result: list) -> np.ndarray:
	result = detection_result[0]

	# try to get the category name & hand
	try:
		category_name = result.gestures[0][0].category_name
		cat_probability = round(result.gestures[0][0].score, 2)
		handedness = result.handedness[0][0].category_name
		hand_probability = round(result.handedness[0][0].score, 2)
		category_result_text = f'Gesture: {category_name} ({cat_probability})'
		hand_result_text = f'Hand: {handedness} ({hand_probability})'
	
	# catch error if no gesture or hand is detected
	except:
		category_result_text = 'No gesture detected'
		hand_result_text = 'No hand detected'

	# Draw text
	gesture_text_location = (50, 50)
	image = cv2.putText(img=image, text=category_result_text, org=gesture_text_location, fontFace=cv2.FONT_HERSHEY_PLAIN,
						fontScale=FONT_SIZE, color=TEXT_COLOR, thickness=FONT_THICKNESS)
	image = cv2.putText(img=image, text=hand_result_text, org=(gesture_text_location[0], gesture_text_location[1] + 20), fontFace=cv2.FONT_HERSHEY_PLAIN,
		     			fontScale=FONT_SIZE, color=TEXT_COLOR, thickness=FONT_THICKNESS)

	return image

In [4]:
# create a function that will visualize the hand landmarks from the detection result
def visualize_hand(detection_result: list) -> landmark_pb2.NormalizedLandmarkList:
    """
    Returns a NormalizedLandmarkList proto message from the first hand landmark in the detection result, this is the format needed to use the solution drawing utils
    """
    result = detection_result[0]
    hand_landmarks = result.hand_landmarks
    hand_landmark_proto = landmark_pb2.NormalizedLandmarkList()
    # print([landmark.x for landmark in hand_landmarks[0]])
    tmp_list = [landmark_pb2.NormalizedLandmark(x=float(landmark.x), y=float(landmark.y), z=float(landmark.z)) for landmark in hand_landmarks[0]]
    hand_landmark_proto.landmark.extend(tmp_list)
    return hand_landmark_proto

The `GestureRecognizer` returns the following output:
* gestures=[[Category(index=-1, score=0.6, display_name='', category_name='Thumb_Up')]], 

* handedness=[[Category(index=1, score=0.9, display_name='Right', category_name='Right')]], 

* hand_landmarks=[[NormalizedHandLandmark()]], a list of the 21 hand landmarks normalized to image

* hand_world_landmarks=[[Landmark()]], a list of the 21 hand lankmarks

The result is written to queue list, then popped `pop()`

Figure 1. Hand Landmarks from Mediapipe
![Hand Landmarks](https://developers.google.com/static/mediapipe/images/solutions/hand-landmarks.png)

In [5]:
# initiate cv2 capture with selected live stream video device
cap = cv2.VideoCapture(0)
# the live stream recognizer requires a 'time stamp', initiate a counter to act as timestamp
start = 0

# Initiate the recognizer
with GestureRecognizer.create_from_options(gesture_options) as recognizer:
  while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      continue

    image.flags.writeable = False
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # convert color
    image = cv2.flip(image, 1) # flip the image for proper handedness detection
    start += 1 # increment the frame
    image.flags.writeable = True
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image)

    recognizer.recognize_async(mp_image, start) # run gesture recognition

    # handle the FIFO detection queue, once there are more than 1 result, pop the first one
    if len(detect_gesture_output) > 1:
      detect_gesture_output.pop(0)
      result_image = visualize_gesture(image, detect_gesture_output)
    elif len(detect_gesture_output) == 1:
      result_image = visualize_gesture(image, detect_gesture_output)
    else:
      result_image = image

    return_image = cv2.cvtColor(result_image, cv2.COLOR_RGB2BGR)

    # try to draw the hand landmarks, uses the same queue as the gesture recognition
    try:
      hand = visualize_hand(detect_gesture_output)
      mp_drawing.draw_landmarks(
        image=return_image,
        landmark_list=hand,
        connections=mp_hands.HAND_CONNECTIONS,
        landmark_drawing_spec=mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
        connection_drawing_spec=mp_drawing.DrawingSpec(color=(250, 44, 250), thickness=2, circle_radius=2))
    except:
      pass

    cv2.imshow('Gesture Recognizer', return_image)

    if cv2.waitKey(10) & 0xFF == ord('q'):
      break
    
cap.release()
cv2.destroyAllWindows()

W20230508 20:09:47.358242 48742 gesture_recognizer_graph.cc:128] Hand Gesture Recognizer contains CPU only ops. Sets HandGestureRecognizerGraph acceleration to Xnnpack.
I20230508 20:09:47.360633 48742 hand_gesture_recognizer_graph.cc:249] Custom gesture classifier is not defined.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
qt.qpa.plugin: Could not find the Qt platform plugin "wayland" in "/home/robert/miniconda3/envs/mpipe/lib/python3.9/site-packages/cv2/qt/plugins"
