In [40]:
!pip install -q imageio
!pip install -q opencv-python
!pip install -q git+https://github.com/tensorflow/docs

  Preparing metadata (setup.py) ... [?25l[?25hdone


In [41]:
from IPython import get_ipython
from IPython.display import display
!pip install -q imageio
!pip install -q opencv-python
!pip install -q git+https://github.com/tensorflow/docs
!pip install --upgrade matplotlib
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import cv2
from matplotlib import pyplot as plt
import matplotlib
matplotlib.use('Agg')
from matplotlib.collections import LineCollection
import matplotlib.patches as patches
import os
import urllib.request
from IPython.display import HTML, display
import imageio
import os
import io

  Preparing metadata (setup.py) ... [?25l[?25hdone


In [46]:
KEYPOINT_DICT = {
    'nose': 0, 'left_eye': 1, 'right_eye': 2, 'left_ear': 3, 'right_ear': 4,
    'left_shoulder': 5, 'right_shoulder': 6, 'left_elbow': 7, 'right_elbow': 8,
    'left_wrist': 9, 'right_wrist': 10, 'left_hip': 11, 'right_hip': 12,
    'left_knee': 13, 'right_knee': 14, 'left_ankle': 15, 'right_ankle': 16
}

KEYPOINT_EDGE_INDS_TO_COLOR = {
    (0, 1): 'm', (0, 2): 'c', (1, 3): 'm', (2, 4): 'c', (0, 5): 'm', (0, 6): 'c',
    (5, 7): 'm', (7, 9): 'm', (6, 8): 'c', (8, 10): 'c', (5, 6): 'y', (5, 11): 'm',
    (6, 12): 'c', (11, 12): 'y', (11, 13): 'm', (13, 15): 'm', (12, 14): 'c', (14, 16): 'c'
}

def get_keypoints_and_edges(keypoints_with_scores, height, width, keypoint_threshold=0.11):
    keypoints_all, keypoint_edges_all, edge_colors = [], [], []

    num_instances, _, _, _ = keypoints_with_scores.shape

    for idx in range(num_instances):
        kpts_x = keypoints_with_scores[0, idx, :, 1]
        kpts_y = keypoints_with_scores[0, idx, :, 0]
        kpts_scores = keypoints_with_scores[0, idx, :, 2]

        kpts_absolute_xy = np.stack([width * kpts_x, height * kpts_y], axis=-1)
        keypoints_above_thresh = kpts_absolute_xy[kpts_scores > keypoint_threshold]
        keypoints_all.append(keypoints_above_thresh)

        for edge_pair, color in KEYPOINT_EDGE_INDS_TO_COLOR.items():
            if kpts_scores[edge_pair[0]] > keypoint_threshold and kpts_scores[edge_pair[1]] > keypoint_threshold:
                keypoint_edges_all.append(
                    np.array([
                        [kpts_absolute_xy[edge_pair[0], 0], kpts_absolute_xy[edge_pair[0], 1]],
                        [kpts_absolute_xy[edge_pair[1], 0], kpts_absolute_xy[edge_pair[1], 1]]
                    ])
                )
                edge_colors.append(color)

    keypoints_xy = np.concatenate(keypoints_all, axis=0) if keypoints_all else np.zeros((0, 2))
    edges_xy = np.stack(keypoint_edges_all, axis=0) if keypoint_edges_all else np.zeros((0, 2, 2))

    return keypoints_xy, edges_xy, edge_colors

def draw_pose(image, keypoints_with_scores):
    height, width, _ = image.shape
    fig, ax = plt.subplots(figsize=(8, 8))
    plt.axis('off')

    ax.imshow(image)
    line_segments = LineCollection([], linewidths=4)
    ax.add_collection(line_segments)

    keypoint_locs, keypoint_edges, edge_colors = get_keypoints_and_edges(keypoints_with_scores, height, width)

    if keypoint_edges.size > 0:
        line_segments.set_segments(keypoint_edges)
        line_segments.set_color(edge_colors)

    if keypoint_locs.size > 0:
        ax.scatter(*zip(*keypoint_locs), s=60, color='#FF1493', zorder=3)

    output_path = "output_pose.png"
    plt.savefig(output_path)
    plt.close()
    print(f"Pose output saved at {output_path}")
    return imageio.imread(output_path)

In [47]:
model_url = "https://tfhub.dev/google/movenet/singlepose/lightning/4"
try:
    movenet_model = hub.load(model_url)
    print("MoveNet model loaded successfully.")
except Exception as e:
    print(f"Error loading model: {e}")
    raise

def movenet(input_image):
    model = movenet_model.signatures['serving_default']
    input_image = tf.cast(input_image, dtype=tf.int32)
    outputs = model(input_image)
    return outputs['output_0'].numpy()


MoveNet model loaded successfully.


In [48]:
image_url = "https://images.pexels.com/photos/4384679/pexels-photo-4384679.jpeg"
image_path = "input_image.jpeg"

print("Downloading image...")
try:
    req = urllib.request.Request(image_url, headers={'User-Agent': 'Mozilla/5.0'})
    with urllib.request.urlopen(req) as response, open(image_path, 'wb') as f:
        f.write(response.read())
    print("Image downloaded successfully.")
except urllib.error.HTTPError as e:
    print(f"Failed to download image: {e}")
except Exception as e:
    print(f"An error occurred: {e}")
    raise


Downloading image...
Image downloaded successfully.


In [49]:
image = tf.io.read_file(image_path)
image = tf.image.decode_jpeg(image)
input_size = 192
input_image = tf.image.resize_with_pad(image, input_size, input_size)
input_image = tf.expand_dims(input_image, axis=0)

keypoints_with_scores = movenet(input_image)

output_image = draw_pose(image.numpy(), keypoints_with_scores)

Pose output saved at output_pose.png


  return imageio.imread(output_path)


In [50]:
MIN_CROP_KEYPOINT_SCORE = 0.2

def init_crop_region(image_height, image_width):
    """Defines the initial crop region."""
    if image_width > image_height:
        box_height = image_width / image_height
        box_width = 1.0
        y_min = (image_height / 2 - image_width / 2) / image_height
        x_min = 0.0
    else:
        box_height = 1.0
        box_width = image_height / image_width
        y_min = 0.0
        x_min = (image_width / 2 - image_height / 2) / image_width

    return {
        'y_min': y_min,
        'x_min': x_min,
        'y_max': y_min + box_height,
        'x_max': x_min + box_width,
        'height': box_height,
        'width': box_width
    }

def torso_visible(keypoints):
    """Checks whether there are enough torso keypoints."""
    return ((keypoints[0, 0, KEYPOINT_DICT['left_hip'], 2] >
             MIN_CROP_KEYPOINT_SCORE or
             keypoints[0, 0, KEYPOINT_DICT['right_hip'], 2] >
             MIN_CROP_KEYPOINT_SCORE) and
            (keypoints[0, 0, KEYPOINT_DICT['left_shoulder'], 2] >
             MIN_CROP_KEYPOINT_SCORE or
             keypoints[0, 0, KEYPOINT_DICT['right_shoulder'], 2] >
             MIN_CROP_KEYPOINT_SCORE))


def determine_torso_and_body_range(
        keypoints, target_keypoints, center_y, center_x):
    """Calculates the maximum range of torso and body keypoints."""
    torso_joints = ['left_shoulder', 'right_shoulder', 'left_hip', 'right_hip']
    max_torso_yrange = 0.0
    max_torso_xrange = 0.0
    max_body_yrange = 0.0
    max_body_xrange = 0.0
    for joint in torso_joints:
        dist_y = abs(center_y - target_keypoints[joint][0])
        dist_x = abs(center_x - target_keypoints[joint][1])
        if dist_y > max_torso_yrange:
            max_torso_yrange = dist_y
        if dist_x > max_torso_xrange:
            max_torso_xrange = dist_x

    for joint in KEYPOINT_DICT.keys():
        if keypoints[0, 0, KEYPOINT_DICT[joint], 2] < MIN_CROP_KEYPOINT_SCORE:
            continue
        dist_y = abs(center_y - target_keypoints[joint][0])
        dist_x = abs(center_x - target_keypoints[joint][1])
        if dist_y > max_body_yrange:
            max_body_yrange = dist_y

        if dist_x > max_body_xrange:
            max_body_xrange = dist_x

    return [max_torso_yrange, max_torso_xrange, max_body_yrange, max_body_xrange]


def determine_crop_region(
        keypoints, image_height, image_width):
    """Determines the region to crop the image."""
    target_keypoints = {}
    for joint in KEYPOINT_DICT.keys():
        target_keypoints[joint] = [
            keypoints[0, 0, KEYPOINT_DICT[joint], 0] * image_height,
            keypoints[0, 0, KEYPOINT_DICT[joint], 1] * image_width
        ]
    if torso_visible(keypoints):
        center_y = (target_keypoints['left_hip'][0] +
                    target_keypoints['right_hip'][0]) / 2
        center_x = (target_keypoints['left_hip'][1] +
                    target_keypoints['right_hip'][1]) / 2

        (max_torso_yrange, max_torso_xrange,
         max_body_yrange, max_body_xrange) = determine_torso_and_body_range(
            keypoints, target_keypoints, center_y, center_x)

        crop_length_half = np.amax(
            [max_torso_xrange * 1.9, max_torso_yrange * 1.9,
             max_body_yrange * 1.2, max_body_xrange * 1.2])

        tmp = np.array(
            [center_x, image_width - center_x, center_y, image_height - center_y])
        crop_length_half = np.amin(
            [crop_length_half, np.amax(tmp)])

        crop_corner = [center_y - crop_length_half, center_x - crop_length_half]

        if crop_length_half > max(image_width, image_height) / 2:
            return init_crop_region(image_height, image_width)
        else:
            crop_length = crop_length_half * 2
            return {
                'y_min': crop_corner[0] / image_height,
                'x_min': crop_corner[1] / image_width,
                'y_max': (crop_corner[0] + crop_length) / image_height,
                'x_max': (crop_corner[1] + crop_length) / image_width,
                'height': (crop_corner[0] + crop_length) / image_height - crop_corner[0] / image_height,
                'width': (crop_corner[1] + crop_length) / image_width - crop_corner[1] / image_width
            }
    else:
        return init_crop_region(image_height, image_width)


def crop_and_resize(image, crop_region, crop_size):
    """Crops and resizes the image based on the crop region."""
    boxes = [[crop_region['y_min'], crop_region['x_min'],
              crop_region['y_max'], crop_region['x_max']]]
    output_image = tf.image.crop_and_resize(
        image, box_indices=[0], boxes=boxes, crop_size=crop_size)
    return output_image


def run_interface(movenet, image, crop_region, crop_size):
    """Runs the model interface to estimate keypoints."""
    image_height, image_width, _ = image.shape
    input_image = crop_and_resize(
        tf.expand_dims(image, axis=0), crop_region, crop_size=crop_size)
    keypoints_with_scores = movenet(input_image)
    for idx in range(17):
        keypoints_with_scores[0, 0, idx, 0] = (
                crop_region['y_min'] * image_height +
                crop_region['height'] * image_height * keypoints_with_scores[0, 0, idx, 0]) / image_height
        keypoints_with_scores[0, 0, idx, 1] = (
                crop_region['x_min'] * image_width +
                crop_region['width'] * image_width * keypoints_with_scores[0, 0, idx, 1]) / image_width

    return keypoints_with_scores


In [51]:
!wget -q -O dance.gif https://github.com/tensorflow/tfjs-models/raw/master/pose-detection/assets/dance_input.gif

In [52]:
image_path = 'dance.gif'
image = tf.io.read_file(image_path)
image = tf.image.decode_gif(image)

num_frames, image_height, image_width, _ = image.shape

crop_region = init_crop_region(image_height, image_width)
output_images = []

from tqdm import tqdm
for frame_idx in tqdm(range(num_frames), total=num_frames, desc="Processing frames"):
    keypoints_with_scores = run_interface(
        movenet, image[frame_idx, :, :, :], crop_region,
        crop_size=[input_size, input_size])
    output_images.append(draw_prediction_on_image(
        image[frame_idx, :, :, :].numpy().astype(np.int32),
        keypoints_with_scores, crop_region=None,
        close_figure=True, output_image_height=300))
    crop_region = determine_crop_region(
        keypoints_with_scores, image_height, image_width)

output = np.stack(output_images, axis=0)
imageio.mimsave('output.gif', output, fps=10)

  output_image = imageio.imread("output_image.png")
Processing frames:   2%|▏         | 1/42 [00:03<02:38,  3.87s/it]

Output saved as 'output_image.png'


Processing frames:   5%|▍         | 2/42 [00:04<01:25,  2.13s/it]

Output saved as 'output_image.png'


Processing frames:   7%|▋         | 3/42 [00:05<00:59,  1.52s/it]

Output saved as 'output_image.png'


Processing frames:  10%|▉         | 4/42 [00:06<00:52,  1.38s/it]

Output saved as 'output_image.png'


Processing frames:  12%|█▏        | 5/42 [00:07<00:46,  1.25s/it]

Output saved as 'output_image.png'


Processing frames:  14%|█▍        | 6/42 [00:08<00:44,  1.23s/it]

Output saved as 'output_image.png'


Processing frames:  17%|█▋        | 7/42 [00:10<00:46,  1.33s/it]

Output saved as 'output_image.png'


Processing frames:  19%|█▉        | 8/42 [00:11<00:46,  1.35s/it]

Output saved as 'output_image.png'


Processing frames:  21%|██▏       | 9/42 [00:12<00:37,  1.15s/it]

Output saved as 'output_image.png'


Processing frames:  24%|██▍       | 10/42 [00:13<00:33,  1.03s/it]

Output saved as 'output_image.png'


Processing frames:  26%|██▌       | 11/42 [00:14<00:29,  1.05it/s]

Output saved as 'output_image.png'


Processing frames:  29%|██▊       | 12/42 [00:15<00:30,  1.03s/it]

Output saved as 'output_image.png'


Processing frames:  31%|███       | 13/42 [00:16<00:27,  1.06it/s]

Output saved as 'output_image.png'


Processing frames:  33%|███▎      | 14/42 [00:17<00:26,  1.04it/s]

Output saved as 'output_image.png'


Processing frames:  36%|███▌      | 15/42 [00:17<00:23,  1.13it/s]

Output saved as 'output_image.png'


Processing frames:  38%|███▊      | 16/42 [00:18<00:22,  1.18it/s]

Output saved as 'output_image.png'


Processing frames:  40%|████      | 17/42 [00:19<00:18,  1.36it/s]

Output saved as 'output_image.png'


Processing frames:  43%|████▎     | 18/42 [00:19<00:15,  1.53it/s]

Output saved as 'output_image.png'


Processing frames:  45%|████▌     | 19/42 [00:19<00:13,  1.68it/s]

Output saved as 'output_image.png'


Processing frames:  48%|████▊     | 20/42 [00:20<00:12,  1.78it/s]

Output saved as 'output_image.png'


Processing frames:  50%|█████     | 21/42 [00:20<00:11,  1.89it/s]

Output saved as 'output_image.png'


Processing frames:  52%|█████▏    | 22/42 [00:21<00:10,  1.96it/s]

Output saved as 'output_image.png'


Processing frames:  55%|█████▍    | 23/42 [00:21<00:09,  1.95it/s]

Output saved as 'output_image.png'


Processing frames:  57%|█████▋    | 24/42 [00:22<00:10,  1.79it/s]

Output saved as 'output_image.png'


Processing frames:  60%|█████▉    | 25/42 [00:23<00:09,  1.70it/s]

Output saved as 'output_image.png'


Processing frames:  62%|██████▏   | 26/42 [00:23<00:09,  1.61it/s]

Output saved as 'output_image.png'


Processing frames:  64%|██████▍   | 27/42 [00:24<00:09,  1.59it/s]

Output saved as 'output_image.png'


Processing frames:  67%|██████▋   | 28/42 [00:25<00:08,  1.72it/s]

Output saved as 'output_image.png'


Processing frames:  69%|██████▉   | 29/42 [00:25<00:07,  1.82it/s]

Output saved as 'output_image.png'


Processing frames:  71%|███████▏  | 30/42 [00:25<00:06,  1.92it/s]

Output saved as 'output_image.png'


Processing frames:  74%|███████▍  | 31/42 [00:26<00:05,  1.97it/s]

Output saved as 'output_image.png'


Processing frames:  76%|███████▌  | 32/42 [00:26<00:04,  2.04it/s]

Output saved as 'output_image.png'


Processing frames:  79%|███████▊  | 33/42 [00:27<00:04,  2.07it/s]

Output saved as 'output_image.png'


Processing frames:  81%|████████  | 34/42 [00:27<00:03,  2.09it/s]

Output saved as 'output_image.png'


Processing frames:  83%|████████▎ | 35/42 [00:28<00:03,  2.08it/s]

Output saved as 'output_image.png'


Processing frames:  86%|████████▌ | 36/42 [00:28<00:02,  2.09it/s]

Output saved as 'output_image.png'


Processing frames:  88%|████████▊ | 37/42 [00:29<00:02,  2.13it/s]

Output saved as 'output_image.png'


Processing frames:  90%|█████████ | 38/42 [00:29<00:01,  2.12it/s]

Output saved as 'output_image.png'


Processing frames:  93%|█████████▎| 39/42 [00:30<00:01,  2.11it/s]

Output saved as 'output_image.png'


Processing frames:  95%|█████████▌| 40/42 [00:30<00:00,  2.09it/s]

Output saved as 'output_image.png'


Processing frames:  98%|█████████▊| 41/42 [00:31<00:00,  2.11it/s]

Output saved as 'output_image.png'


Processing frames: 100%|██████████| 42/42 [00:31<00:00,  1.33it/s]

Output saved as 'output_image.png'



