# OpenVINO Hand Pose Demo

Source: https://github.com/PINTO0309/PINTO_model_zoo model 033_Hand_Detection_and_Tracking

Model License: Apache

This notebook demonstrates doing inference on a hand tracking model with a webcam video.

This is a work-in-progress demo. To run it, you need to install openvino and opencv-python.

You may need to adjust the camera_index in the settings.

Click the Jupyter "stop" button (the red square at the top of the notebook) to stop the webcam.

In [1]:
import time
from io import BytesIO

import cv2
import IPython.display
import numpy as np
import PIL.Image
from openvino.inference_engine import IECore

np.set_printoptions(suppress=True)

## Settings

In [2]:
modelxml = "./models/hand_landmark_fp16/hand_landmark.xml"
modelxml = "./models/hand_landmark_new_fp16/hand_landmark_new.xml"
modelbin = modelxml.replace("xml", "bin")

colors = [(255, 0, 0), (255, 255, 0), (255, 0, 255), (0, 255, 255), (0, 255, 0)]
camera_index = 0

## Set up Inference Engine

In [3]:
ie = IECore()

net = ie.read_network(model=modelxml, weights=modelbin)

exec_net = ie.load_network(net, "CPU")
input_blob = next(iter(exec_net.input_info))
output_key = list(exec_net.outputs.keys())[-1]

## Define functions for inference and showing results

In [4]:
def inference(frame):
    inputim = frame / 255
    inputim = np.expand_dims(inputim.transpose(2, 0, 1), 0)
    result = exec_net.infer({input_blob: inputim})[output_key].squeeze()
    coordinates = np.split(result, 21)
    return coordinates

In [5]:
def showarray(a, prev_display_id=None, fmt="jpeg"):
    f = BytesIO()
    PIL.Image.fromarray(a).save(f, fmt)
    obj = IPython.display.Image(data=f.getvalue())
    if prev_display_id is not None:
        IPython.display.update_display(obj, display_id=prev_display_id)
        return prev_display_id
    else:
        return IPython.display.display(obj, display_id=True).display_id

## Go!

In [6]:
first = True
prev = 0
frame_rate = 5
previndex = -1
ret = True
display_id = None
input_size = 256

cap = cv2.VideoCapture(camera_index)

try:
    while ret:

        time_elapsed = time.time() - prev

        if time_elapsed > 1.0 / frame_rate:
            prev = time.time()

            # Read frame from webcam
            ret, frame = cap.read()

            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = cv2.flip(frame, 1)

            width = frame.shape[1]
            height = frame.shape[0]

            # Resize to input size
            resized_frame = cv2.resize(frame, (input_size, input_size))
            result = inference(resized_frame)

            # Process the results and draw points and lines over the frame
            for i, point in enumerate(result):

                # Every 4 points is a new finger with a separate color
                index = (i - 1) // 4
                if i > 0:
                    color = colors[index]
                else:
                    color = (0, 0, 0)  # First landmark is the palm of the hand

                cv2.circle(
                    frame,
                    (
                        int(point[0] / input_size * width),
                        int(point[1] / input_size * height),
                    ),
                    int(2 * frame.shape[1] / 256),
                    color,
                    -1,
                )

                # Draw lines between landmark points
                if previndex == index:

                    cv2.line(
                        frame,
                        (
                            int(result[0][0] / input_size * width),
                            int(result[0][1] / input_size * height),
                        ),
                        (
                            int(result[index * 4 + 1][0] / input_size * width),
                            int(result[index * 4 + 1][1] / input_size * height),
                        ),
                        color,
                    )
                    cv2.line(
                        frame,
                        (
                            int(result[i - 1][0] / input_size * width),
                            int(result[i - 1][1] / input_size * height),
                        ),
                        (
                            int(result[i][0] / input_size * width),
                            int(result[i][1] / input_size * height),
                        ),
                        color,
                    )
                previndex = index

            display_id = showarray(frame, None if display_id is None else display_id)

except KeyboardInterrupt:
    IPython.display.clear_output(wait=False)
    cap.release()
    pass