In [1]:
import torch

print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(0)}")

CUDA available: True
CUDA device: NVIDIA GeForce RTX 3050 Ti Laptop GPU


In [2]:
import torch
import torch.nn as nn
import numpy as np

# x = torch.randn(1, 5, 2)
x = torch.from_numpy(np.array([[[1,2], [3,4], [5,6], [7, 8], [9, 10]]])).float()
print(x.shape)
print(x)

conv1d = nn.Conv1d(in_channels=5, out_channels=3, kernel_size=2)

output = conv1d(x)
print(output.shape)
print(output)

torch.Size([1, 5, 2])
tensor([[[ 1.,  2.],
         [ 3.,  4.],
         [ 5.,  6.],
         [ 7.,  8.],
         [ 9., 10.]]])
torch.Size([1, 3, 1])
tensor([[[ 0.5859],
         [-4.1594],
         [ 2.1191]]], grad_fn=<ConvolutionBackward0>)


In [3]:
print(x)

tensor([[[ 1.,  2.],
         [ 3.,  4.],
         [ 5.,  6.],
         [ 7.,  8.],
         [ 9., 10.]]])


In [4]:
print(output)

tensor([[[ 0.5859],
         [-4.1594],
         [ 2.1191]]], grad_fn=<ConvolutionBackward0>)


In [5]:
from scripts.parsers import parse_sequences as parse_sequence_info

file_path = 'gait3d\\ListOfSequences.txt'
sequences = parse_sequence_info(file_path)

mocap_keys = []
par_cam_keys = []
par_cam_person = set()
par_after_cloth_change_keys = []
par_after_cloth_change_person = set()

for key, params in sequences.items():
    if params['MoCap_data']:
        mocap_keys.append(key)
        if key[-1] in ["1", "3", "5", "7"]:
            par_cam_keys.append(key)
            par_cam_person.add(key[:-2])
        if key[-1] in ["5", "7"]:
            par_after_cloth_change_keys.append(key)
            par_after_cloth_change_person.add(key[:-2])

print(f"Number of sequences with mocap data: {len(mocap_keys)}")
print(f"Number of sequences with mocap data and parallel cameras: {len(par_cam_keys)}")
print(f"Number of sequences with mocap data, parallel cameras and after clothing change: {len(par_after_cloth_change_keys)}")
print(f"Number of unique participants with mocap data and parallel cameras: {len(par_cam_person)}")
print(f"Number of unique participants with mocap data, parallel cameras and after clothing change: {len(par_after_cloth_change_person)}")
par_after_cloth_change_person

Number of sequences with mocap data: 152
Number of sequences with mocap data and parallel cameras: 76
Number of sequences with mocap data, parallel cameras and after clothing change: 12
Number of unique participants with mocap data and parallel cameras: 32
Number of unique participants with mocap data, parallel cameras and after clothing change: 6


{'p26', 'p27', 'p28', 'p29', 'p30', 'p31'}

In [6]:
import random

random.seed(42)

without_clothing_change = []
while len(without_clothing_change) < 6:
    random_person = random.choice(list(par_cam_person))
    if random_person not in par_after_cloth_change_person:
        without_clothing_change.append(random_person)
        par_cam_person.remove(random_person)

with_clothing_change = []
while len(with_clothing_change) < 4:
    random_person = random.choice(list(par_after_cloth_change_person))
    with_clothing_change.append(random_person)
    par_after_cloth_change_person.remove(random_person)


test_seq_set = ([f'{p_seq}s{seq_idx}' for p_seq in without_clothing_change[:3] for seq_idx in [1, 3]] +
                [f'{p_seq}s{seq_idx}' for p_seq in with_clothing_change[:2] for seq_idx in [5, 7]])

valid_seq_set = ([f'{p_seq}s{seq_idx}' for p_seq in without_clothing_change[3:] for seq_idx in [1, 3]] +
                [f'{p_seq}s{seq_idx}' for p_seq in with_clothing_change[2:] for seq_idx in [5, 7]])

print(f"test sequences: {test_seq_set}")
print(f"valid sequences: {valid_seq_set}")
# without_clothing_change + [only_after_clothing_change] + [only_before_clothing_change]

test sequences: ['p13s1', 'p13s3', 'p14s1', 'p14s3', 'p32s1', 'p32s3', 'p28s5', 'p28s7', 'p31s5', 'p31s7']
valid sequences: ['p24s1', 'p24s3', 'p21s1', 'p21s3', 'p1s1', 'p1s3', 'p26s5', 'p26s7', 'p27s5', 'p27s7']


In [7]:
train_seq_set = ([f'{p_seq}s{seq_idx}' for p_seq in list(par_cam_person) for seq_idx in [1, 3]] +
                 [f'{p_seq}s{seq_idx}' for p_seq in list(par_after_cloth_change_person) for seq_idx in [5, 7]])

print(f"train sequences: {train_seq_set}")

train sequences: ['p15s1', 'p15s3', 'p16s1', 'p16s3', 'p7s1', 'p7s3', 'p26s1', 'p26s3', 'p11s1', 'p11s3', 'p18s1', 'p18s3', 'p29s1', 'p29s3', 'p9s1', 'p9s3', 'p28s1', 'p28s3', 'p6s1', 'p6s3', 'p12s1', 'p12s3', 'p10s1', 'p10s3', 'p20s1', 'p20s3', 'p3s1', 'p3s3', 'p27s1', 'p27s3', 'p22s1', 'p22s3', 'p2s1', 'p2s3', 'p23s1', 'p23s3', 'p30s1', 'p30s3', 'p25s1', 'p25s3', 'p17s1', 'p17s3', 'p31s1', 'p31s3', 'p5s1', 'p5s3', 'p8s1', 'p8s3', 'p19s1', 'p19s3', 'p4s1', 'p4s3', 'p29s5', 'p29s7', 'p30s5', 'p30s7']


In [8]:
for key, params in sequences.items():
    if params['MoCap_data']:
        if key[-1] in ["1", "3", "5", "7"]:
            print(f"{key} | {'train' if key in train_seq_set else '     '} | {'valid' if key in valid_seq_set else '     '} | {'test' if key in test_seq_set else '    '} |")

p1s1 |       | valid |      |
p1s3 |       | valid |      |
p2s1 | train |       |      |
p2s3 | train |       |      |
p3s1 | train |       |      |
p3s3 | train |       |      |
p4s1 | train |       |      |
p4s3 | train |       |      |
p5s1 | train |       |      |
p5s3 | train |       |      |
p6s1 | train |       |      |
p6s3 | train |       |      |
p7s1 | train |       |      |
p7s3 | train |       |      |
p8s1 | train |       |      |
p8s3 | train |       |      |
p9s1 | train |       |      |
p9s3 | train |       |      |
p10s1 | train |       |      |
p10s3 | train |       |      |
p11s1 | train |       |      |
p11s3 | train |       |      |
p12s1 | train |       |      |
p12s3 | train |       |      |
p13s1 |       |       | test |
p13s3 |       |       | test |
p14s1 |       |       | test |
p14s3 |       |       | test |
p15s1 | train |       |      |
p15s3 | train |       |      |
p16s1 | train |       |      |
p16s3 | train |       |      |
p17s1 | train |       |   

In [9]:
print(f"Train size: {len(train_seq_set)} | {100*len(train_seq_set)/72:.2f}%")
print(f"Test size: {len(test_seq_set)} | {100*len(test_seq_set)/72:.2f}%")
print(f"Valid size: {len(valid_seq_set)} | {100*len(valid_seq_set)/72:.2f}%")

Train size: 56 | 77.78%
Test size: 10 | 13.89%
Valid size: 10 | 13.89%


In [10]:
import json

selected_names_file = "./datasets/mediapipe/selected_joint_names.json"
input_data_file = "./datasets/mediapipe/dataset_v2.json"
output_data_file = "./datasets/mocap/dataset_v2.json"

with open(input_data_file, 'r') as file:
    raw_input = json.load(file)

with open(output_data_file, 'r') as file:
    raw_output = json.load(file)

with open(selected_names_file, 'r') as file:
    selected_names = json.load(file)

In [30]:
triang_data_file = "./datasets/mediapipe/triangulation.json"

with open(triang_data_file, 'r') as file:
    triangulation_data = json.load(file)


In [11]:
sequences['p1s1']

{'start_frame': 195,
 'number_of_frames': 135,
 'frame_offset': 0,
 'MoCap_data': True}

In [12]:
input_frames_data = {f"c{c_idx}": [] for c_idx in range(1, 5)}
output_frames_data = []
img_width = 960
img_height = 540

not_found = 0
seq_keys_list = train_seq_set + test_seq_set + valid_seq_set

for seq_key in seq_keys_list:
    for f_idx in range(sequences[seq_key]['number_of_frames']):
    # for f_idx in range(2):
        curr_output_array = []
        output_frame_dict = raw_output[seq_key][f_idx]
        for point_idx, joint_name in selected_names.items():
            curr_output_array.append(output_frame_dict[joint_name])

        curr_output_array_np = np.array(curr_output_array)
        # print(curr_output_array_np)
        
        curr_input_arrays = {f"c{c_idx}": [] for c_idx in range(1, 5)}

        all_found = True
        
        for c_idx in range(1, 5):
            input_frame_list = raw_input[seq_key][f"c{c_idx}"][str(f_idx)]
            if [None, None] in input_frame_list:
                all_found = False
                break
                
            for point_idx, joint_name in selected_names.items(): 
                pixel_coords = input_frame_list[int(point_idx)]
                curr_input_arrays[f"c{c_idx}"].append(pixel_coords)

                # curr_input_arrays[f"c{c_idx}"].append([pixel_coords[0]/img_width, pixel_coords[1]/img_height])
                # conversion from pixels to propotions if needed

        # print(curr_input_arrays)

        if all_found:
            for c_idx in range(1, 5):
                input_frames_data[f"c{c_idx}"].append(np.array(curr_input_arrays[f"c{c_idx}"]))
            #     print(np.array(curr_input_arrays[f"c{c_idx}"]).shape)

            # print(curr_output_array_np.shape)    
            output_frames_data.append(curr_output_array_np)
        else:
            not_found += 1

print(f"Frames with all found mocaps: {len(output_frames_data)}")
print(f"Frames with at least one not found mocap: {not_found}")
print(f"Proportion: {100*len(output_frames_data)/(len(output_frames_data) + not_found):.2f}%")
# print(input_frames_data['c4'][0])

Frames with all found mocaps: 6035
Frames with at least one not found mocap: 3495
Proportion: 63.33%


In [13]:
import torch
from torch.utils.data import Dataset

class MoCapInputDataset(Dataset):
    def __init__(self, seq_keys_list, sequences, selected_names, raw_input, raw_output):
        self.img_width = 960
        self.img_height = 540
        self.input_frames_data = {f"c{c_idx}": [] for c_idx in range(1, 5)}
        self.output_frames_data = []
        self.not_found = 0
              
        for seq_key in seq_keys_list:
            for f_idx in range(sequences[seq_key]['number_of_frames']):
                curr_output_array = []
                output_frame_dict = raw_output[seq_key][f_idx]
                for point_idx, joint_name in selected_names.items():
                    curr_output_array.append(output_frame_dict[joint_name])
        
                curr_output_array_np = np.array(curr_output_array)*255
                # 255 multiplier added to mocap to obtain distance in mm
                curr_input_arrays = {f"c{c_idx}": [] for c_idx in range(1, 5)}
        
                all_found = True
                
                for c_idx in range(1, 5):
                    input_frame_list = raw_input[seq_key][f"c{c_idx}"][str(f_idx)]
                    if [None, None] in input_frame_list:
                        all_found = False
                        break
                        
                    for point_idx, joint_name in selected_names.items(): 
                        pixel_coords = input_frame_list[int(point_idx)]
                        curr_input_arrays[f"c{c_idx}"].append(pixel_coords)
                        # curr_input_arrays[f"c{c_idx}"].append(
                        #     [pixel_coords[0]/self.img_width, 
                        #      pixel_coords[1]/self.img_height])
        
                if all_found:
                    for c_idx in range(1, 5):
                        self.input_frames_data[f"c{c_idx}"].append(np.array(curr_input_arrays[f"c{c_idx}"]))
 
                    self.output_frames_data.append(curr_output_array_np)
                else:
                    self.not_found += 1

        self.length = len(self.output_frames_data)

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        inputs = [torch.from_numpy(self.input_frames_data[f"c{c_idx}"][idx]).float() for c_idx in range(1, 5)]  # each: (12, 2)
        target = torch.from_numpy(self.output_frames_data[idx]).float()  # (12, 3)
        return inputs, target

In [14]:
from torch.utils.data import DataLoader

train_ds = MoCapInputDataset(train_seq_set, sequences, selected_names, raw_input, raw_output)
valid_ds = MoCapInputDataset(valid_seq_set, sequences, selected_names, raw_input, raw_output)
test_ds = MoCapInputDataset(test_seq_set, sequences, selected_names, raw_input, raw_output)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(valid_ds, batch_size=32, shuffle=False)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False)

In [39]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CustomNet(nn.Module):
    def __init__(self):
        super(CustomNet, self).__init__()
        # shape (12, 2) -> reshape to (2, 12) 
        self.conv1d = nn.Conv1d(in_channels=2, out_channels=1, kernel_size=3)  # (2, 12) -> (1, 10)
        # flatten 4 x 10 -> 40 x 1
        self.fc1 = nn.Linear(40, 64)
        self.dropout1 = nn.Dropout(p=0.3)
        self.fc2 = nn.Linear(64, 72)
        self.dropout2 = nn.Dropout(p=0.3)
        self.fc3 = nn.Linear(72, 36)

    def forward(self, x):
        # x: 4 tensors of shape (batch, 12, 2)
        conv_outs = []
        for xi in x:
            xi = xi.permute(0, 2, 1)  # reshape to (batch, 2, 12) 
            conv = self.conv1d(xi)     # (batch, 1, 10)
            conv = conv.squeeze(1)     # (batch, 10)
            conv_outs.append(conv)

        concat = torch.cat(conv_outs, dim=1)  # (batch, 40)

        out = F.relu(self.fc1(concat))
        out = self.dropout1(out)
        out = F.relu(self.fc2(out))
        out = self.dropout2(out)
        out = self.fc3(out)  # (batch, 36)
        out = out.view(-1, 12, 3)  # reshape to (batch, 12, 3)
        return out


In [40]:
import torch
import torch.nn as nn

class MPJPE(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, predictions, targets):
        # shape (batch, 12, 3)
        # compute euclidean distance for each point pair
        distances = torch.norm(predictions - targets, dim=2)
        mean_distance = distances.mean()
        return mean_distance


In [41]:
model = CustomNet()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)
# criterion = torch.nn.MSELoss()
criterion = MPJPE()

num_epochs = 50

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for inputs, targets in train_loader:
        inputs = [inp.float() for inp in inputs]
        targets = targets.float()

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * targets.size(0)

    avg_train_loss = train_loss / len(train_loader.dataset)

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs = [inp.float() for inp in inputs]
            targets = targets.float()

            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item() * targets.size(0)

    avg_val_loss = val_loss / len(val_loader.dataset)

    print(f"Epoch {epoch+1}: Train MPJPE = {avg_train_loss:.4f}, Val MPJPE = {avg_val_loss:.4f}")


Epoch 1: Train MPJPE = 1580.5001, Val MPJPE = 1281.9271
Epoch 2: Train MPJPE = 1331.4666, Val MPJPE = 1260.6357
Epoch 3: Train MPJPE = 1327.2034, Val MPJPE = 1254.5521
Epoch 4: Train MPJPE = 1320.0655, Val MPJPE = 1244.5883
Epoch 5: Train MPJPE = 1301.3921, Val MPJPE = 1210.2246
Epoch 6: Train MPJPE = 1206.0619, Val MPJPE = 1034.7743
Epoch 7: Train MPJPE = 1001.4151, Val MPJPE = 814.1994
Epoch 8: Train MPJPE = 754.1588, Val MPJPE = 454.2428
Epoch 9: Train MPJPE = 605.9131, Val MPJPE = 373.6592
Epoch 10: Train MPJPE = 556.0895, Val MPJPE = 359.2036
Epoch 11: Train MPJPE = 536.9649, Val MPJPE = 344.6037
Epoch 12: Train MPJPE = 522.0633, Val MPJPE = 363.2411
Epoch 13: Train MPJPE = 511.3151, Val MPJPE = 290.5614
Epoch 14: Train MPJPE = 505.8306, Val MPJPE = 311.0294
Epoch 15: Train MPJPE = 499.4427, Val MPJPE = 310.6782
Epoch 16: Train MPJPE = 491.0160, Val MPJPE = 300.7964
Epoch 17: Train MPJPE = 483.4201, Val MPJPE = 308.7988
Epoch 18: Train MPJPE = 474.3529, Val MPJPE = 302.2669
Epoch 

In [27]:
model.eval()
test_loss = 0.0
with torch.no_grad():
    for inputs, targets in test_loader:
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item() * targets.size(0)
test_loss /= len(test_loader.dataset)

print(f"Test loss = {test_loss:.4f}")

Test loss = 234.9538


In [28]:
def predict_single(model, inputs_list, device="cpu"):
    model.eval()
    inputs = [torch.from_numpy(inp).float().unsqueeze(0).to(device) for inp in inputs_list]
    
    with torch.no_grad():
        output = model(inputs) 
    
    return output.squeeze(0).cpu().numpy()

In [29]:
list(raw_output[test_seq_set[0]][0].values())

[[8.313590049743652, 3.798708915710449, 0.7660826444625854],
 [8.503310203552246, 1.9807288646697998, 0.5631764531135559],
 [8.67428207397461, 0.3377190828323364, 0.38371729850769043],
 [8.385348320007324, 3.7756404876708984, -0.361005961894989],
 [8.568338394165039, 1.9954566955566406, -0.2797110378742218],
 [8.740983963012695, 0.31866610050201416, -0.20210732519626617],
 [8.44154167175293, 5.744988918304443, 0.8683866262435913],
 [8.519126892089844, 4.909095287322998, 1.8944714069366455],
 [8.377540588378906, 4.756703853607178, 2.5212531089782715],
 [8.439878463745117, 5.685400485992432, -0.36643746495246887],
 [8.516109466552734, 4.839725494384766, -1.3964648246765137],
 [8.319049835205078, 4.764559745788574, -2.0099925994873047]]

In [35]:
test_seq = test_seq_set[-1]
print(test_seq)
frame = 100

bvh_sample_data = list(raw_output[test_seq][frame].values())
triangulation_sample_all_data = triangulation_data[test_seq][frame]
triangulation_sample_data = [triangulation_sample_all_data[int(j_idx)] for j_idx in selected_names.keys()]
print(bvh_sample_data)
print()
print(triangulation_sample_data)

p31s7
[[6.9573774337768555, 3.990058183670044, -0.9458487033843994], [6.790740966796875, 2.108768939971924, -0.6557729244232178], [6.641436576843262, 0.4229142665863037, -0.39575472474098206], [6.779537200927734, 3.968749761581421, 0.7932784557342529], [7.422176361083984, 2.181001901626587, 0.5760002732276917], [6.857697010040283, 0.5481630563735962, 0.29319626092910767], [6.661077976226807, 5.7644195556640625, -1.014063835144043], [6.555835247039795, 4.422194957733154, -1.3695333003997803], [7.137636184692383, 4.12354850769043, -1.546125888824463], [6.542348384857178, 5.848693370819092, 0.5578198432922363], [5.999451637268066, 4.781178951263428, 0.8451588153839111], [6.1203436851501465, 4.0845947265625, 1.1242176294326782]]

[[-118.12306939199553, 1673.1732366172698, 123.56820019962382], [76.37843527666733, 1675.2784158202187, 185.91329433397286], [-120.98718373185523, 1689.6438855787865, 539.2161825129382], [112.75039950315202, 1737.7942719420732, 564.7950584474041], [-178.3228227932

In [36]:
img_width = 960
img_height = 540

mp_input_sample = []

for c_idx in range(1, 5):
    all_frames_for_camera = raw_input[test_seq][f"c{c_idx}"][str(frame)]
    camera_mp_input_sample = []
    
    for point_idx, joint_name in selected_names.items(): 
        pixel_coords = all_frames_for_camera[int(point_idx)]
        camera_mp_input_sample.append(pixel_coords)
        # camera_mp_input_sample.append([pixel_coords[0]/img_width, pixel_coords[1]/img_height])

    mp_input_sample.append(np.array(camera_mp_input_sample))
    
mp_input_sample

[array([[0.76316017, 0.68825501],
        [0.77828461, 0.69167244],
        [0.7688598 , 0.57809174],
        [0.80211681, 0.58096778],
        [0.74845439, 0.48069626],
        [0.77233142, 0.47910824],
        [0.7542181 , 0.32151055],
        [0.79155314, 0.33582935],
        [0.75089961, 0.41073531],
        [0.783611  , 0.40402964],
        [0.76022077, 0.48667321],
        [0.79006279, 0.47789776]]),
 array([[0.5083378 , 0.69301528],
        [0.47600701, 0.67435449],
        [0.50878209, 0.57964247],
        [0.46967804, 0.58347642],
        [0.51285273, 0.46745655],
        [0.46622449, 0.45879248],
        [0.53639412, 0.30874494],
        [0.46251309, 0.29740098],
        [0.55061233, 0.38244581],
        [0.44634911, 0.375168  ],
        [0.56295902, 0.43414575],
        [0.43660882, 0.43939573]]),
 array([[0.22855243, 0.69278949],
        [0.23841311, 0.65525663],
        [0.21662265, 0.57509148],
        [0.23195376, 0.55330348],
        [0.21496983, 0.46950275],
        [0

In [37]:
predicted = predict_single(model, mp_input_sample, 'cpu')
predicted

array([[1637.281   ,  104.79422 ,  -68.8728  ],
       [1645.0094  ,  105.59387 ,   20.323442],
       [1700.0118  ,  465.3472  , -103.194374],
       [1705.5361  ,  460.60898 ,   46.954372],
       [1654.883   ,  864.1387  , -134.34045 ],
       [1651.0089  ,  861.4031  ,   79.19628 ],
       [1634.5836  , 1304.4253  , -173.01857 ],
       [1625.2422  , 1304.3583  ,  100.3998  ],
       [1621.0388  , 1023.0364  , -205.30948 ],
       [1596.331   , 1020.94617 ,  125.41394 ],
       [1680.657   ,  886.38727 , -216.04456 ],
       [1646.4257  ,  877.7934  ,  145.09079 ]], dtype=float32)

In [38]:
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = 'iframe'

SCALE_FACTOR = 255

x = [vec[2]*SCALE_FACTOR for vec in bvh_sample_data]
y = [vec[0]*SCALE_FACTOR for vec in bvh_sample_data]
z = [vec[1]*SCALE_FACTOR for vec in bvh_sample_data]

# x_t = [vec[0]/SCALE_FACTOR for vec in triangulation_sample_data]
# y_t = [vec[1]/SCALE_FACTOR for vec in triangulation_sample_data]
# z_t = [vec[2]/SCALE_FACTOR for vec in triangulation_sample_data]

x_t = [vec[0] for vec in triangulation_sample_data]
y_t = [vec[1] for vec in triangulation_sample_data]
z_t = [vec[2] for vec in triangulation_sample_data]

x_p = [vec[2] for vec in predicted]
y_p = [vec[0] for vec in predicted]
z_p = [vec[1] for vec in predicted]
    
fig = go.Figure(
    data=[
        go.Scatter3d(
            x=x, y=y, z=z,
            mode='markers',
            marker=dict(size=5, color='blue'),
            hoverinfo='text',
            name='Joints BVH'),
        go.Scatter3d(
            x=x_t, y=y_t, z=z_t,
            mode='markers',
            marker=dict(size=5, color='red'),
            hoverinfo='text',
            name='Joints triangulation mediapipe'),
        go.Scatter3d(
            x=x_p, y=y_p, z=z_p,
            mode='markers',
            marker=dict(size=5, color='green'),
            hoverinfo='text',
            name='Predicted by NN'),
        ]
)

fig.update_layout(scene=dict(
    xaxis_title='X',
    yaxis_title='Y',
    zaxis_title='Z',
    xaxis=dict(range=[-6000, 6000]),
    yaxis=dict(range=[-6000, 6000]),
    zaxis=dict(range=[-6000, 6000]),
    aspectmode='cube', 
),
title='3D joints plot from bvh file',
width=800,
height=800
)

fig.show()