# Pose Estimation

In [13]:
!git clone https://github.com/orittmann/ssdl_body_language.git

fatal: destination path 'ssdl_body_language' already exists and is not an empty directory.


We start by loading some dependencies

In [14]:
# import dependencies
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
import cv2

# to store resulting data aas json
import json

# to get filenames in directory
import os
import fnmatch

We need to load the pose estimation model

In [15]:
interpreter = tf.lite.Interpreter(model_path='ssdl_body_language/model/lite-model_movenet_singlepose_thunder_3.tflite')
interpreter.allocate_tensors()

Here is a function that makes the keypoint detection for us:

In [16]:
def make_keypoint_detection(video_path):

    output_images = []
    cap = cv2.VideoCapture(video_path)
    while cap.isOpened():
        ret, frame = cap.read()

        # if frame is read correctly ret is True
        if not ret:
            print("Stream end.")
            break

        # Reshape image
        img = frame.copy()

        # 192x192 for lightning, 256x256 for thunder
        # img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192, 192)

        img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 256, 256)

        input_image = tf.cast(img, dtype=tf.float32)

        # Setup input and output
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()

        # Make predictions
        #interpreter.set_tensor(input_details[0]['index'], np.array(input_image))
        interpreter.set_tensor(input_details[0]['index'], input_image.numpy())
        interpreter.invoke()
        keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
        #print(keypoints_with_scores)

        # store keypoints of frame
        # (transform numpy array to a list first, makes it easier to store as json later
        output_images.append(keypoints_with_scores.tolist())

    cap.release()
    cv2.destroyAllWindows()

    return output_images

Get video files

In [17]:
video_files = fnmatch.filter(os.listdir("ssdl_body_language/videos"), "*.mp4")

print(video_files)

['speech2_klaus_ernst.mp4', 'speech1_gabriela_heinrich.mp4']


Apply the function

In [18]:
len(video_files)

np.arange(0, len(video_files))

array([0, 1])

In [22]:
# loop over all videos
for i in np.arange(0, len(video_files)):
    # specify current file path
    current_file = video_files[i]
    current_path = "ssdl_body_language/videos/" + current_file

    print("Start inference for video " + str(i) + ": " + current_file)

    # execute keypoint detection
    keypoints_result_tmp = make_keypoint_detection(current_path)

    # Store data
    res_json_file = current_file.replace("mp4", "json")
    res_json_file_path = "ssdl_body_language/movenet_results/" + res_json_file

    with open(res_json_file_path, 'w') as fp:
        json.dump(keypoints_result_tmp, fp)

    # delete temporary keypoint results
    del keypoints_result_tmp

    print("End inference for video " + str(i) + ": " + current_file)


Start inference for video 0: speech2_klaus_ernst.mp4
Stream end.
End inference for video 0: speech2_klaus_ernst.mp4
Start inference for video 1: speech1_gabriela_heinrich.mp4
Stream end.
End inference for video 1: speech1_gabriela_heinrich.mp4


# Working With Pose Estimation Results