In [1]:
from utils.multicamera_tools import parse_camera_xml, triangulate_poses
from utils.video_tools import get_camera_calibration_files, get_video_files
from utils.mediapipe_estimator import MediaPipeEstimator
from scripts.frame_iterator import video_frame_iterator
from scripts.parsers import parse_sequences as parse_sequence_info
import numpy as np
import bvhio

In [2]:
file_path = 'gait3d\\ListOfSequences.txt'
sequences = parse_sequence_info(file_path)

In [3]:
SEQ_KEY = "p1s3"

def calc_camera_params_for_seq(sequence_key):
    camera_files_paths = get_camera_calibration_files(sequence_key)
    return [parse_camera_xml(camera_path) for camera_path in camera_files_paths]
    
cameras_params = calc_camera_params_for_seq(SEQ_KEY)
cameras_params

[{'name': 'c1',
  'width': 960.0,
  'height': 540.0,
  'R': array([[ 0.00120615,  0.99999022,  0.0042542 ],
         [ 0.21610251,  0.00389302, -0.97636292],
         [-0.97636994,  0.00209698, -0.2160957 ]]),
  'T': array([[  21.1943],
         [ 613.873 ],
         [3393.53  ]]),
  'f': 546.711,
  'c': array([480., 270.]),
  'k': array([3.05309e-07, 0.00000e+00, 0.00000e+00]),
  'p': array([0., 0.])},
 {'name': 'c2',
  'width': 960.0,
  'height': 540.0,
  'R': array([[-0.99992544, -0.01190427,  0.002721  ],
         [-0.00425836,  0.13109134, -0.99136115],
         [ 0.01144473, -0.99129882, -0.13113225]]),
  'T': array([[ -62.0904],
         [ 618.389 ],
         [5207.65  ]]),
  'f': 553.247,
  'c': array([480., 270.]),
  'k': array([6.18469e-07, 0.00000e+00, 0.00000e+00]),
  'p': array([0., 0.])},
 {'name': 'c3',
  'width': 960.0,
  'height': 540.0,
  'R': array([[-0.01314138, -0.99977742,  0.01650498],
         [-0.22256503, -0.01316765, -0.97482892],
         [ 0.97482927, -0.01

In [4]:
video_files = get_video_files(SEQ_KEY)
video_files

['./gait3d/Sequences/p1s3/Images/c1_0435.avi',
 './gait3d/Sequences/p1s3/Images/c2_0435.avi',
 './gait3d/Sequences/p1s3/Images/c3_0435.avi',
 './gait3d/Sequences/p1s3/Images/c4_0435.avi']

In [5]:
frames_iterator = [video_frame_iterator(avi_file) for avi_file in video_files]
mp_estimators = [MediaPipeEstimator() for _ in range(4)]
predicted = [{} for _ in range(4)]
combined_cameras_with_landmarks = []

VIDEO_FPS = 25
MOCAP_FPS = 100
FRAME_TIME = 1000/VIDEO_FPS
MAX_FRAMES = sequences[SEQ_KEY]['number_of_frames']

for frame_i in range(MAX_FRAMES):
    cameras_with_landmarks = [True for _ in range(4)]
    
    for camera_ind, (f_iterator, mp_estimator) in enumerate(zip(frames_iterator, mp_estimators)):
        frame_ts, frame = next(f_iterator)
        estimations = mp_estimator.predict_for_frame(frame_i, frame)
        if not estimations:
            print(f'Camera {camera_ind} | Frame {frame_i} - landmarks not found')
            cameras_with_landmarks[camera_ind] = False

        predicted[camera_ind][frame_i] = estimations
        
    print(cameras_with_landmarks)
    combined_cameras_with_landmarks.append(cameras_with_landmarks)



[Frame 0] Landmarks not found!
Camera 1 | Frame 0 - landmarks not found
[True, False, True, True]
[Frame 1] Landmarks not found!
Camera 1 | Frame 1 - landmarks not found
[True, False, True, True]
[Frame 2] Landmarks not found!
Camera 1 | Frame 2 - landmarks not found
[True, False, True, True]
[Frame 3] Landmarks not found!
Camera 1 | Frame 3 - landmarks not found
[True, False, True, True]
[Frame 4] Landmarks not found!
Camera 1 | Frame 4 - landmarks not found
[True, False, True, True]
[Frame 5] Landmarks not found!
Camera 1 | Frame 5 - landmarks not found
[True, False, True, True]
[Frame 6] Landmarks not found!
Camera 1 | Frame 6 - landmarks not found
[True, False, True, True]
[Frame 7] Landmarks not found!
Camera 1 | Frame 7 - landmarks not found
[True, False, True, True]
[True, True, True, True]
[Frame 9] Landmarks not found!
Camera 1 | Frame 9 - landmarks not found
[True, False, True, True]
[Frame 10] Landmarks not found!
Camera 1 | Frame 10 - landmarks not found
[True, False, True,

In [6]:
len(cameras_params)

4

In [7]:
combined_triangulation_results = []

for frame_i in range(0, sequences[SEQ_KEY]['number_of_frames'], 1):
    print(frame_i)
    found_landmarks_cameras_idx = ([camera_i for camera_i, camera_l_found in 
                                    enumerate(combined_cameras_with_landmarks[frame_i])
                                    if camera_l_found])

    print(found_landmarks_cameras_idx)
    
    selected_cameras_params = [cameras_params[camera_i] for camera_i in found_landmarks_cameras_idx]
    print(selected_cameras_params)
    found_2d_points = np.array([predicted[camera_i][frame_i] for camera_i in found_landmarks_cameras_idx])
    triangulation_result = triangulate_poses(selected_cameras_params, found_2d_points)
    combined_triangulation_results.append(triangulation_result)


0
[0, 2, 3]
[{'name': 'c1', 'width': 960.0, 'height': 540.0, 'R': array([[ 0.00120615,  0.99999022,  0.0042542 ],
       [ 0.21610251,  0.00389302, -0.97636292],
       [-0.97636994,  0.00209698, -0.2160957 ]]), 'T': array([[  21.1943],
       [ 613.873 ],
       [3393.53  ]]), 'f': 546.711, 'c': array([480., 270.]), 'k': array([3.05309e-07, 0.00000e+00, 0.00000e+00]), 'p': array([0., 0.])}, {'name': 'c3', 'width': 960.0, 'height': 540.0, 'R': array([[-0.01314138, -0.99977742,  0.01650498],
       [-0.22256503, -0.01316765, -0.97482892],
       [ 0.97482927, -0.01648403, -0.22234245]]), 'T': array([[   7.11863],
       [ 503.15   ],
       [3684.76   ]]), 'f': 536.266, 'c': array([480., 270.]), 'k': array([3.43876e-07, 0.00000e+00, 0.00000e+00]), 'p': array([0., 0.])}, {'name': 'c4', 'width': 960.0, 'height': 540.0, 'R': array([[ 0.99995614, -0.00491679, -0.00797109],
       [-0.00869044, -0.16984445, -0.98543256],
       [ 0.00349131,  0.98545862, -0.16987973]]), 'T': array([[ 151.378

In [8]:
selected_cameras_params

[{'name': 'c1',
  'width': 960.0,
  'height': 540.0,
  'R': array([[ 0.00120615,  0.99999022,  0.0042542 ],
         [ 0.21610251,  0.00389302, -0.97636292],
         [-0.97636994,  0.00209698, -0.2160957 ]]),
  'T': array([[  21.1943],
         [ 613.873 ],
         [3393.53  ]]),
  'f': 546.711,
  'c': array([480., 270.]),
  'k': array([3.05309e-07, 0.00000e+00, 0.00000e+00]),
  'p': array([0., 0.])},
 {'name': 'c2',
  'width': 960.0,
  'height': 540.0,
  'R': array([[-0.99992544, -0.01190427,  0.002721  ],
         [-0.00425836,  0.13109134, -0.99136115],
         [ 0.01144473, -0.99129882, -0.13113225]]),
  'T': array([[ -62.0904],
         [ 618.389 ],
         [5207.65  ]]),
  'f': 553.247,
  'c': array([480., 270.]),
  'k': array([6.18469e-07, 0.00000e+00, 0.00000e+00]),
  'p': array([0., 0.])},
 {'name': 'c3',
  'width': 960.0,
  'height': 540.0,
  'R': array([[-0.01314138, -0.99977742,  0.01650498],
         [-0.22256503, -0.01316765, -0.97482892],
         [ 0.97482927, -0.01

In [9]:
triangulation_result

array([[[-117.53598594, 2288.47376615, 1449.33833281],
        [-131.52325226, 2278.77883466, 1477.02968835],
        [-142.49424028, 2276.02085374, 1475.86397196],
        [-153.424189  , 2273.01514574, 1477.37667854],
        [-105.24950313, 2277.99536889, 1480.11807733],
        [ -96.04873055, 2274.62457848, 1482.79911488],
        [ -86.83719606, 2271.18901004, 1484.2629102 ],
        [-165.77693473, 2241.91253792, 1468.06418352],
        [ -71.1740008 , 2245.76765285, 1473.05515019],
        [-131.61502241, 2280.52562389, 1421.7156166 ],
        [-103.08604119, 2277.07126938, 1424.19516484],
        [-279.20280218, 2188.9771818 , 1278.68162654],
        [  53.9330545 , 2194.06410498, 1289.39879833],
        [-289.17312208, 2220.60632854, 1020.22796522],
        [  57.17085803, 2225.37460921, 1015.99722634],
        [-296.9892068 , 2287.6935431 ,  797.56838456],
        [  65.17242268, 2314.85329145,  785.95024573],
        [-299.37723572, 2293.93461422,  732.97016145],
        [ 

In [10]:
sequences[SEQ_KEY]

{'start_frame': 435,
 'number_of_frames': 130,
 'frame_offset': 0,
 'MoCap_data': True}

In [11]:
VIDEO_FPS = 25
MOCAP_FPS = 100

In [12]:
bvh_file_path = f'gait3d/Sequences/{SEQ_KEY}/MoCap/{SEQ_KEY}.bvh'

root = bvhio.readAsHierarchy(bvh_file_path)

restPose=bvhio.Transform(scale=1)
root = bvhio.Joint('Root', restPose=bvhio.Transform(scale=1)).attach(root)

step = int(MOCAP_FPS/VIDEO_FPS)
start_frame = sequences[SEQ_KEY]['start_frame'] * step + sequences[SEQ_KEY]['frame_offset']
end_frame = start_frame + sequences[SEQ_KEY]['number_of_frames'] * step

position_bvh = [joint.PositionWorld for (joint, index, depth) in root.loadPose(start_frame).layout()]
print(position_bvh)

[vec3( 0, 0, 0 ), vec3( -10.6319, 3.82494, -0.289513 ), vec3( -10.6319, 3.82494, -0.289513 ), vec3( -10.6142, 3.7092, -0.820986 ), vec3( -10.7348, 1.90243, -0.697607 ), vec3( -11.0781, 0.387988, -0.678994 ), vec3( -10.5988, 0.221539, -0.637019 ), vec3( -10.6319, 3.82494, -0.289513 ), vec3( -10.6276, 3.67394, 0.213116 ), vec3( -10.8079, 1.84738, 0.0136224 ), vec3( -11.0485, 0.295754, -0.133059 ), vec3( -10.5591, 0.27068, -0.122417 ), vec3( -10.6319, 3.82494, -0.289513 ), vec3( -10.5731, 4.27976, -0.308113 ), vec3( -10.651, 4.73477, -0.354809 ), vec3( -10.7231, 5.21179, -0.372311 ), vec3( -10.7208, 5.5756, -0.352015 ), vec3( -10.633, 5.92576, -0.353485 ), vec3( -10.7231, 5.21179, -0.372311 ), vec3( -10.6891, 5.42089, -0.947508 ), vec3( -10.7286, 4.22626, -0.998492 ), vec3( -10.6033, 3.60274, -1.04575 ), vec3( -10.5407, 3.29098, -1.06938 ), vec3( -10.7231, 5.21179, -0.372311 ), vec3( -10.6654, 5.38927, 0.168759 ), vec3( -10.8439, 4.11959, 0.296021 ), vec3( -10.6817, 3.5233, 0.358281 ), ve

In [13]:
# position = [joint.PositionWorld for (joint, index, depth) in root.loadRestPose().layout()]
# print(position)

In [14]:
# root.loadRestPose().layout()

In [15]:
start_frame

1740

In [16]:
labels = [joint.Name for joint, _, _ in root.layout()][1:]
parent_children = [(joint.Parent.Name, joint.Name) for (joint, _, _) in root.loadPose(0).layout()[2:]]
parent_children_idxs = [(labels.index(parent), labels.index(child)) for parent, child in parent_children]

In [17]:
import mediapipe as mp
mp_pose = mp.solutions.pose
landmarks_num = 33

pose_landmark_names = {i: mp_pose.PoseLandmark(i).name for i in range(landmarks_num)}
connections = list(mp_pose.POSE_CONNECTIONS)

print(pose_landmark_names)
print(connections)

{0: 'NOSE', 1: 'LEFT_EYE_INNER', 2: 'LEFT_EYE', 3: 'LEFT_EYE_OUTER', 4: 'RIGHT_EYE_INNER', 5: 'RIGHT_EYE', 6: 'RIGHT_EYE_OUTER', 7: 'LEFT_EAR', 8: 'RIGHT_EAR', 9: 'MOUTH_LEFT', 10: 'MOUTH_RIGHT', 11: 'LEFT_SHOULDER', 12: 'RIGHT_SHOULDER', 13: 'LEFT_ELBOW', 14: 'RIGHT_ELBOW', 15: 'LEFT_WRIST', 16: 'RIGHT_WRIST', 17: 'LEFT_PINKY', 18: 'RIGHT_PINKY', 19: 'LEFT_INDEX', 20: 'RIGHT_INDEX', 21: 'LEFT_THUMB', 22: 'RIGHT_THUMB', 23: 'LEFT_HIP', 24: 'RIGHT_HIP', 25: 'LEFT_KNEE', 26: 'RIGHT_KNEE', 27: 'LEFT_ANKLE', 28: 'RIGHT_ANKLE', 29: 'LEFT_HEEL', 30: 'RIGHT_HEEL', 31: 'LEFT_FOOT_INDEX', 32: 'RIGHT_FOOT_INDEX'}
[(15, 21), (16, 20), (18, 20), (3, 7), (14, 16), (23, 25), (28, 30), (11, 23), (27, 31), (6, 8), (15, 17), (24, 26), (16, 22), (4, 5), (5, 6), (29, 31), (12, 24), (23, 24), (0, 1), (9, 10), (1, 2), (0, 4), (11, 13), (30, 32), (28, 32), (15, 19), (16, 18), (25, 27), (26, 28), (12, 14), (17, 19), (2, 3), (11, 12), (27, 29), (13, 15)]


In [18]:
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = 'iframe'

step = 4
frame_num = sequences[SEQ_KEY]['number_of_frames'] - 1

position_bvh = [joint.PositionWorld for (joint, index, depth) in root.loadPose(start_frame + frame_num * step).layout()]

x = [vec[2] for vec in position_bvh[1:]]
y = [vec[0] for vec in position_bvh[1:]]
z = [vec[1] for vec in position_bvh[1:]]

x_lines = []
y_lines = []
z_lines = []

for i, j in parent_children_idxs:
    x_lines += [x[i], x[j], None]
    y_lines += [y[i], y[j], None]
    z_lines += [z[i], z[j], None]


SCALE_FACTOR = 254

triangulation_result = combined_triangulation_results[frame_num]

x_t = triangulation_result[:,:, 0][0]/SCALE_FACTOR
y_t = triangulation_result[:,:, 1][0]/SCALE_FACTOR
z_t = triangulation_result[:,:, 2][0]/SCALE_FACTOR

x_t_lines = []
y_t_lines = []
z_t_lines = []

for i, j in connections:
    x_t_lines += [x_t[i], x_t[j], None]
    y_t_lines += [y_t[i], y_t[j], None]
    z_t_lines += [z_t[i], z_t[j], None]
    
    
fig = go.Figure(
    data=[
        go.Scatter3d(
            x=x, y=y, z=z,
            mode='markers',
            marker=dict(size=5, color='blue'),
            text=labels,
            hoverinfo='text',
            name='Joints BVH'),
        go.Scatter3d(
            x=x_lines, y=y_lines, z=z_lines,
            mode='lines',
            line=dict(color='royalblue', width=3),
            name='Bones BVH'),
        go.Scatter3d(
            x=x_t, y=y_t, z=z_t,
            mode='markers',
            marker=dict(size=5, color='red'),
            text=labels,
            hoverinfo='text',
            name='Joints triangulation mediapipe'),
        go.Scatter3d(
            x=x_t_lines, y=y_t_lines, z=z_t_lines,
            mode='lines',
            line=dict(color='indianred', width=3),
            name='Bones triangulation mediapipe'),
        ]
)

fig.update_layout(scene=dict(
    xaxis_title='X',
    yaxis_title='Y',
    zaxis_title='Z',
    xaxis=dict(range=[-15, 15]),
    yaxis=dict(range=[-15, 15]),
    zaxis=dict(range=[-15, 15]),
    aspectmode='cube', 
),
title='3D joints plot from bvh file',
width=800,
height=800
)

fig.show()

In [19]:
sequences[SEQ_KEY]['number_of_frames']

130

In [23]:
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = 'iframe'

step = 4
total_frames = sequences[SEQ_KEY]['number_of_frames']
SCALE_FACTOR_TRIANG = 0.1
SCALE_FACTOR_BVH = 25.4

def get_frame_data(frame_idx):
    pose = root.loadPose(start_frame + frame_idx * step).layout()
    position_bvh = [joint.PositionWorld for (joint, index, depth) in pose]

    x = np.array([vec[2] for vec in position_bvh[1:]]) * SCALE_FACTOR_BVH
    y = np.array([vec[0] for vec in position_bvh[1:]]) * SCALE_FACTOR_BVH
    z = np.array([vec[1] for vec in position_bvh[1:]]) * SCALE_FACTOR_BVH

    x_lines, y_lines, z_lines = [], [], []
    for i, j in parent_children_idxs:
        x_lines += [x[i], x[j], None]
        y_lines += [y[i], y[j], None]
        z_lines += [z[i], z[j], None]

    triangulation_result = combined_triangulation_results[frame_idx]
    x_t = triangulation_result[:, :, 0][0] * SCALE_FACTOR_TRIANG
    y_t = triangulation_result[:, :, 1][0] * SCALE_FACTOR_TRIANG
    z_t = triangulation_result[:, :, 2][0] * SCALE_FACTOR_TRIANG

    x_t_lines, y_t_lines, z_t_lines = [], [], []
    for i, j in connections:
        x_t_lines += [x_t[i], x_t[j], None]
        y_t_lines += [y_t[i], y_t[j], None]
        z_t_lines += [z_t[i], z_t[j], None]

    return x, y, z, x_lines, y_lines, z_lines, x_t, y_t, z_t, x_t_lines, y_t_lines, z_t_lines

x, y, z, x_lines, y_lines, z_lines, x_t, y_t, z_t, x_t_lines, y_t_lines, z_t_lines = get_frame_data(0)

fig = go.Figure(
    data=[
        go.Scatter3d(x=x, y=y, z=z, mode='markers', marker=dict(size=5, color='blue'), name='Joints BVH'),
        go.Scatter3d(x=x_lines, y=y_lines, z=z_lines, mode='lines', line=dict(color='royalblue', width=3), name='Bones BVH'),
        go.Scatter3d(x=x_t, y=y_t, z=z_t, mode='markers', marker=dict(size=5, color='red'), name='Joints triangulation mediapipe'),
        go.Scatter3d(x=x_t_lines, y=y_t_lines, z=z_t_lines, mode='lines', line=dict(color='indianred', width=3), name='Bones triangulation mediapipe'),
    ],
    layout=go.Layout(
        title="3D joints animation from BVH and triangulation",
        scene=dict(
            xaxis_title='X',
            yaxis_title='Y',
            zaxis_title='Z',
            xaxis=dict(range=[-300, 300]),
            yaxis=dict(range=[-300, 300]),
            zaxis=dict(range=[-300, 300]),
            aspectmode='cube'
        ),
        width=800,
        height=800,
        updatemenus=[dict(
            type="buttons",
            buttons=[dict(label="Play", method="animate", args=[None, {"frame": {"duration": 400, "redraw": True}, "fromcurrent": True}]),
                     dict(label="Pause", method="animate", args=[[None], {"frame": {"duration": 0}, "mode": "immediate", "transition": {"duration": 0}}])]
        )]
    ),
    frames=[
        go.Frame(
            data=[
                go.Scatter3d(x=x, y=y, z=z, mode='markers', marker=dict(size=5, color='blue'), name='Joints BVH'),
                go.Scatter3d(x=x_lines, y=y_lines, z=z_lines, mode='lines', line=dict(color='royalblue', width=3), name='Bones BVH'),
                go.Scatter3d(x=x_t, y=y_t, z=z_t, mode='markers', marker=dict(size=5, color='red'), name='Joints triangulation mediapipe'),
                go.Scatter3d(x=x_t_lines, y=y_t_lines, z=z_t_lines, mode='lines', line=dict(color='indianred', width=3), name='Bones triangulation mediapipe')
            ]
        )
        for frame_idx in range(0, total_frames, step)
        for x, y, z, x_lines, y_lines, z_lines, x_t, y_t, z_t, x_t_lines, y_t_lines, z_t_lines in [get_frame_data(frame_idx)]
    ]
)

fig.show()
