In [1]:
%pip install tensorflow==2.4.1 tensorflow-gpu==2.4.1 tensorflow-hub opencv-python matplotlib

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement tensorflow==2.4.1 (from versions: 2.5.0, 2.5.1, 2.5.2, 2.5.3, 2.6.0rc0, 2.6.0rc1, 2.6.0rc2, 2.6.0, 2.6.1, 2.6.2, 2.6.3, 2.6.4, 2.6.5, 2.7.0rc0, 2.7.0rc1, 2.7.0, 2.7.1, 2.7.2, 2.7.3, 2.7.4, 2.8.0rc0, 2.8.0rc1, 2.8.0, 2.8.1, 2.8.2, 2.8.3, 2.8.4, 2.9.0rc0, 2.9.0rc1, 2.9.0rc2, 2.9.0, 2.9.1, 2.9.2, 2.9.3, 2.10.0rc0, 2.10.0rc1, 2.10.0rc2, 2.10.0rc3, 2.10.0, 2.10.1, 2.11.0rc0, 2.11.0rc1, 2.11.0rc2, 2.11.0)
ERROR: No matching distribution found for tensorflow==2.4.1


In [2]:
import tensorflow as tf
import tensorflow_hub as hub
import cv2
import numpy as np
from matplotlib import pyplot as plt
import math

In [3]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
print(tf.test.is_built_with_gpu_support())

True


##### LOAD MODAL

In [4]:
model = hub.load("./movenet_multipose_lightning_v1/")
movenet = model.signatures['serving_default']

##### DRAW KEYPOINTS

In [5]:
# Keypoints from the docs
# https://storage.googleapis.com/movenet/MoveNet.MultiPose%20Model%20Card.pdf
keypoints = ["nose", "left eye", "right eye", "left ear", "right ear", "left shoulder", "right shoulder", "left elbow", "right elbow", "left wrist", "right wrist", "left hip", "right hip", "left knee", "right knee", "left ankle", "right ankle"]

In [6]:
def draw_keypoints(frame, keypoints, confidence_threshold):
    y, x, c = frame.shape
    shape = np.multiply(keypoints, [y,x,1]).squeeze()

    for key_point in shape:
        key_point_y, key_point_x, key_point_confidence = key_point
        if key_point_confidence > confidence_threshold:
            cv2.circle(frame, (int(key_point_x),int(key_point_y)), 6, (0, 0, 255), -1)

##### DRAW EDGES

In [7]:
# Edges from the docs
# https://storage.googleapis.com/movenet/MoveNet.MultiPose%20Model%20Card.pdf
edges = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

In [8]:
def get_edge_color(color):
    if color == "m":
        return (255,102,51)
    elif color == "c":
        return (52,235,113)
    else:
        return (117,50,125) 
    


In [9]:
def draw_edges(frame, keypoints, edges, confidence_threshold):
    y, x, c = frame.shape
    shape = np.multiply(keypoints, [y,x,1]).squeeze()

    for edge, color in edges.items():
        keypoint_1, keypoint_2 = edge
        keypoint_1_y, keypoint_1_x, keypoint_1_c = shape[keypoint_1]
        keypoint_2_y, keypoint_2_x, keypoint_2_c = shape[keypoint_2]
        if((keypoint_1_c > confidence_threshold) & (keypoint_2_c > confidence_threshold)):
            cv2.line(frame, (int(keypoint_1_x), int(keypoint_1_y)), (int(keypoint_2_x), int(keypoint_2_y)), get_edge_color(color), 2)

In [10]:
def draw_keypoints_and_edges(frame, keypoints_and_scores, confidence_threshold):
    for person in keypoints_and_scores:
        draw_edges(frame, person, edges, confidence_threshold)
        draw_keypoints(frame, person, confidence_threshold)

In [11]:
def keep_aspect_ratio_resizer(image, target_size):

  height, width, _ = image.shape
  if height > width:
    scale = float(target_size / height)
    target_height = target_size
    scaled_width = math.ceil(width * scale)
    target_width = int(math.ceil(scaled_width / 32) * 32)
  else:
    scale = float(target_size / width)
    target_width = target_size
    scaled_height = math.ceil(height * scale)
    target_height = int(math.ceil(scaled_height / 32) * 32)
  return target_height, target_width

##### PREDICTIONS

In [22]:
capture = cv2.VideoCapture("./people_dancing.mp4")

# Documentation for model - https://tfhub.dev/google/movenet/multipose/lightning/1
# Input images height and width should be multiple of 32
# Transformed image should preserve original aspect ratio
# Larger side should be multiples of 256 

while capture.isOpened():
    retval, frame = capture.read()

    # Resize Image 
    img = frame.copy()
    img = tf.expand_dims(img, axis=0)
    height, width = keep_aspect_ratio_resizer(frame, 512)
    img = tf.image.resize_with_pad(img, height, width)
    input_img = tf.cast(img, dtype=tf.int32)


    # Make Detections
    output = movenet(input_img) # Shape: (1,6,56)
    keypoints_and_scores = output["output_0"].numpy()[:,:,:51].reshape((6,17,3)) # Shape: (6,17,3)

    # Render keypoints and edges
    draw_keypoints_and_edges(frame, keypoints_and_scores, 0.3)

    if retval:
        cv2.imshow("Multi Person Pose Detection", frame)

    pressed_key = cv2.waitKey(10)
    if pressed_key == ord("q") or pressed_key == ord("Q"):
        break

capture.release()
cv2.destroyAllWindows()