In [23]:
from pathlib import Path
from openvino.runtime import Core
import cv2
import numpy as np
import sys
from pathlib import Path

sys.path.append("./engine")
#import engine.engine3js as engine
from engine.parse_poses import parse_poses

In [24]:
# directory where model will be downloaded
base_model_dir = "model"

# model name as named in Open Model Zoo
model_name = "human-pose-estimation-3d-0001"
# selected precision (FP32, FP16)
precision = "FP32"

BASE_MODEL_NAME = f"{base_model_dir}/public/{model_name}/{model_name}"
model_path = Path(BASE_MODEL_NAME).with_suffix(".pth")
onnx_path = Path(BASE_MODEL_NAME).with_suffix(".onnx")

ir_model_path = f"model/public/{model_name}/{precision}/{model_name}.xml"
model_weights_path = f"model/public/{model_name}/{precision}/{model_name}.bin"

if not model_path.exists():
    download_command = (
        f"omz_downloader " f"--name {model_name} " f"--output_dir {base_model_dir}"
    )
    ! $download_command

In [25]:
if not onnx_path.exists():
    convert_command = (
        f"omz_converter "
        f"--name {model_name} "
        f"--precisions {precision} "
        f"--download_dir {base_model_dir} "
        f"--output_dir {base_model_dir}"
    )
    ! $convert_command

In [26]:
# initialize inference engine
ie_core = Core()
# read the network and corresponding weights from file
model = ie_core.read_model(model=ir_model_path, weights=model_weights_path)
# load the model on the CPU (you can use GPU or MYRIAD as well)
compiled_model = ie_core.compile_model(model=model, device_name="CPU")
infer_request = compiled_model.create_infer_request()
input_tensor_name = model.inputs[0].get_any_name()

# get input and output names of nodes
input_layer = compiled_model.input(0)
output_layers = list(compiled_model.outputs)

In [27]:
cap = cv2.VideoCapture('0327.mp4')

In [28]:
body_edges = np.array(
    [
        [0, 1], 
        [0, 9], [9, 10], [10, 11],    # neck - r_shoulder - r_elbow - r_wrist
        [0, 3], [3, 4], [4, 5],       # neck - l_shoulder - l_elbow - l_wrist
        [1, 15], [15, 16],            # nose - l_eye - l_ear
        [1, 17], [17, 18],            # nose - r_eye - r_ear
        [0, 6], [6, 7], [7, 8],       # neck - l_hip - l_knee - l_ankle
        [0, 12], [12, 13], [13, 14],  # neck - r_hip - r_knee - r_ankle
    ]
)

focal_length = -1  # default
stride = 8
player = None
skeleton_set = None

In [29]:
poses = []
i = 0
offset = []
while cap.isOpened():
    # Read the next frame
    ret, frame = cap.read()
    if not ret:
        break

    input_image = cv2.resize(frame, (input_layer.shape[3], input_layer.shape[2]))
    input_image = input_image.transpose((2, 0, 1))  # change data layout from HWC to CHW
    input_image = input_image.reshape(input_layer.shape)  # reshape to input shape
    # run inference
    infer_request.infer({input_tensor_name: input_image})

    # A set of three inference results is obtained
    results = {
        name: infer_request.get_tensor(name).data[:]
        for name in {"features", "heatmaps", "pafs"}
    }
    # Get the results
    results = (results["features"][0], results["heatmaps"][0], results["pafs"][0])
    poses_3d, poses_2d = parse_poses(results, 1, stride, focal_length, True)
    if len(poses_3d) > 0:
                # From here, you can rotate the 3D point positions using the function "draw_poses",
                # or you can directly make the correct mapping below to properly display the object image on the screen
                poses_3d_copy = poses_3d.copy()
                x = poses_3d_copy[:, 0::4]
                y = poses_3d_copy[:, 1::4]
                z = poses_3d_copy[:, 2::4]
                poses_3d[:, 0::4], poses_3d[:, 1::4], poses_3d[:, 2::4] = (
                    -z + np.ones(poses_3d[:, 2::4].shape) * 200,
                    -y + np.ones(poses_3d[:, 2::4].shape) * 100,
                    -x,
                )

                poses_3d = poses_3d.reshape(poses_3d.shape[0], 19, -1)[:, :, 0:3]
                poses_3d = poses_3d.astype(float)
                if len(offset) == 0:
                    # Find the center of the skeleton
                    center = np.mean(poses_3d, axis=(0, 1))

                    # Compute the offset to move the skeleton to the origin
                    offset = -center

                # Apply the offset to all points of the skeleton
                poses_3d += offset

    for pose_3d in poses_3d:
        frame_pose = {
            "frame": i,
            "pose": {
                "neck": {"y":pose_3d[0][0], "z":pose_3d[0][1], "x":pose_3d[0][2]},
                "r_shoulder": {"y":pose_3d[9][0], "z":pose_3d[9][1], "x":pose_3d[9][2]},
                "r_elbow": {"y":pose_3d[10][0], "z":pose_3d[10][1], "x":pose_3d[10][2]},
                "r_wrist": {"y":pose_3d[11][0], "z":pose_3d[11][1], "x":pose_3d[11][2]},
                "l_shoulder": {"y":pose_3d[3][0], "z":pose_3d[3][1], "x":pose_3d[3][2]},
                "l_elbow": {"y":pose_3d[4][0], "z":pose_3d[4][1], "x":pose_3d[4][2]},
                "l_wrist": {"y":pose_3d[5][0], "z":pose_3d[5][1], "x":pose_3d[5][2]},
                "l_eye": {"y":pose_3d[15][0], "z":pose_3d[15][1], "x":pose_3d[15][2]},
                "l_ear": {"y":pose_3d[16][0], "z":pose_3d[16][1], "x":pose_3d[16][2]},
                "r_eye": {"y":pose_3d[17][0], "z":pose_3d[17][1], "x":pose_3d[17][2]},
                "r_ear": {"y":pose_3d[18][0], "z":pose_3d[18][1], "x":pose_3d[18][2]},
                "nose": {"y":pose_3d[1][0], "z":pose_3d[1][1], "x":pose_3d[1][2]},
                "l_hip": {"y":pose_3d[6][0], "z":pose_3d[6][1], "x":pose_3d[6][2]},
                "l_knee": {"y":pose_3d[7][0], "z":pose_3d[7][1], "x":pose_3d[7][2]},
                "l_ankle": {"y":pose_3d[8][0], "z":pose_3d[8][1], "x":pose_3d[8][2]},
                "r_hip": {"y":pose_3d[12][0], "z":pose_3d[12][1], "x":pose_3d[12][2]},
                "r_knee": {"y":pose_3d[13][0], "z":pose_3d[13][1], "x":pose_3d[13][2]},
                "r_ankle": {"y":pose_3d[14][0], "z":pose_3d[14][1], "x":pose_3d[14][2]}
            }
        }
        poses.append(frame_pose)
    i += 1
    if cv2.waitKey(1) == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()

In [30]:
bone_name = ["neck", "r_shoulder", "r_elbow", "r_wrist", "l_shoulder", "l_elbow", "l_wrist", "l_eye", "l_ear", "r_eye", "r_ear", "nose", "l_hip", "l_knee", "l_ankle", "r_hip", "r_knee", "r_ankle"]

In [31]:
def moving_average(data : list, window_size):
    new_data = data.copy()
    for name in bone_name:
        x_array = np.zeros(len(data))
        y_array = np.zeros(len(data))
        z_array = np.zeros(len(data))
        for i in range(len(data)):
            x_array[i] = data[i]["pose"][name]["x"]
            y_array[i] = data[i]["pose"][name]["y"]
            z_array[i] = data[i]["pose"][name]["z"]
        window = np.ones(window_size) / window_size
        x_array = np.convolve(x_array, window, mode='same')
        y_array = np.convolve(y_array, window, mode='same')
        z_array = np.convolve(z_array, window, mode='same')
        for i in range(len(data)):
            new_data[i]["pose"][name]["x"] = x_array[i]
            new_data[i]["pose"][name]["y"] = y_array[i]
            new_data[i]["pose"][name]["z"] = z_array[i]
    return new_data

In [32]:
import json
poses = moving_average(poses, 15)
with open("poses.json", "w") as f:
	json.dump(poses, f)

In [33]:
poses_file = []
with open("C:\\Users\\mievst\\Desktop\\masters\\pose_export\\poses.json", "r") as f:
    poses_file = json.loads(f.read())
poses_file[0]

{'frame': 0,
 'pose': {'neck': {'y': -1.8215934351870904,
   'z': 14.39837817811129,
   'x': -1.1863718911221155},
  'r_shoulder': {'y': -2.1462557073225073,
   'z': 14.798808475962854,
   'x': 5.840554594575313},
  'r_elbow': {'y': -0.9495414332339652,
   'z': 8.853952722382125,
   'x': 17.04074660100435},
  'r_wrist': {'y': 5.129904977898847,
   'z': 12.239664392304,
   'x': 16.383362491507278},
  'l_shoulder': {'y': -2.0685426946271943,
   'z': 15.056831547251917,
   'x': -8.905923051060292},
  'l_elbow': {'y': 1.1696592096696805,
   'z': 8.392819909882125,
   'x': -17.86967508834705},
  'l_wrist': {'y': 8.401175729851971,
   'z': 10.674493086965459,
   'x': -19.457883239210695},
  'l_eye': {'y': 7.276687407911869,
   'z': 22.516839278371705,
   'x': -0.8804706180304814},
  'l_ear': {'y': 1.7990649306983262,
   'z': 20.9259447064316,
   'x': -3.7591081861864057},
  'r_eye': {'y': 6.859045195997806,
   'z': 23.287898441783167,
   'x': 1.4878085211703649},
  'r_ear': {'y': 3.367016387