In [1]:
import torch

print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(0)}")

CUDA available: True
CUDA device: NVIDIA GeForce RTX 3050 Ti Laptop GPU


In [2]:
import torch
import torch.nn as nn
import numpy as np

# x = torch.randn(1, 5, 2)
x = torch.from_numpy(np.array([[[1,2], [3,4], [5,6], [7, 8], [9, 10]]])).float()
print(x.shape)
print(x)

conv1d = nn.Conv1d(in_channels=5, out_channels=3, kernel_size=2)

output = conv1d(x)
print(output.shape)
print(output)

torch.Size([1, 5, 2])
tensor([[[ 1.,  2.],
         [ 3.,  4.],
         [ 5.,  6.],
         [ 7.,  8.],
         [ 9., 10.]]])
torch.Size([1, 3, 1])
tensor([[[ 0.5524],
         [ 5.2233],
         [-3.0487]]], grad_fn=<ConvolutionBackward0>)


In [3]:
print(x)

tensor([[[ 1.,  2.],
         [ 3.,  4.],
         [ 5.,  6.],
         [ 7.,  8.],
         [ 9., 10.]]])


In [4]:
print(output)

tensor([[[ 0.5524],
         [ 5.2233],
         [-3.0487]]], grad_fn=<ConvolutionBackward0>)


In [5]:
from scripts.parsers import parse_sequences as parse_sequence_info

file_path = 'gait3d\\ListOfSequences.txt'
sequences = parse_sequence_info(file_path)

mocap_keys = []
par_cam_keys = []
par_cam_person = set()
par_after_cloth_change_keys = []
par_after_cloth_change_person = set()

for key, params in sequences.items():
    if params['MoCap_data']:
        mocap_keys.append(key)
        if key[-1] in ["1", "3", "5", "7"]:
            par_cam_keys.append(key)
            par_cam_person.add(key[:-2])
        if key[-1] in ["5", "7"]:
            par_after_cloth_change_keys.append(key)
            par_after_cloth_change_person.add(key[:-2])

print(f"Number of sequences with mocap data: {len(mocap_keys)}")
print(f"Number of sequences with mocap data and parallel cameras: {len(par_cam_keys)}")
print(f"Number of sequences with mocap data, parallel cameras and after clothing change: {len(par_after_cloth_change_keys)}")
print(f"Number of unique participants with mocap data and parallel cameras: {len(par_cam_person)}")
print(f"Number of unique participants with mocap data, parallel cameras and after clothing change: {len(par_after_cloth_change_person)}")
par_after_cloth_change_person

Number of sequences with mocap data: 152
Number of sequences with mocap data and parallel cameras: 76
Number of sequences with mocap data, parallel cameras and after clothing change: 12
Number of unique participants with mocap data and parallel cameras: 32
Number of unique participants with mocap data, parallel cameras and after clothing change: 6


{'p26', 'p27', 'p28', 'p29', 'p30', 'p31'}

In [6]:
import random

random.seed(42)

without_clothing_change = []
while len(without_clothing_change) < 6:
    random_person = random.choice(list(par_cam_person))
    if random_person not in par_after_cloth_change_person:
        without_clothing_change.append(random_person)
        par_cam_person.remove(random_person)

with_clothing_change = []
while len(with_clothing_change) < 4:
    random_person = random.choice(list(par_after_cloth_change_person))
    with_clothing_change.append(random_person)
    par_after_cloth_change_person.remove(random_person)


test_seq_set = ([f'{p_seq}s{seq_idx}' for p_seq in without_clothing_change[:3] for seq_idx in [1, 3]] +
                [f'{p_seq}s{seq_idx}' for p_seq in with_clothing_change[:2] for seq_idx in [5, 7]])

valid_seq_set = ([f'{p_seq}s{seq_idx}' for p_seq in without_clothing_change[3:] for seq_idx in [1, 3]] +
                [f'{p_seq}s{seq_idx}' for p_seq in with_clothing_change[2:] for seq_idx in [5, 7]])

print(f"test sequences: {test_seq_set}")
print(f"valid sequences: {valid_seq_set}")
# without_clothing_change + [only_after_clothing_change] + [only_before_clothing_change]

test sequences: ['p16s1', 'p16s3', 'p13s1', 'p13s3', 'p4s1', 'p4s3', 'p29s5', 'p29s7', 'p26s5', 'p26s7']
valid sequences: ['p8s1', 'p8s3', 'p19s1', 'p19s3', 'p3s1', 'p3s3', 'p31s5', 'p31s7', 'p27s5', 'p27s7']


In [7]:
train_seq_set = ([f'{p_seq}s{seq_idx}' for p_seq in list(par_cam_person) for seq_idx in [1, 3]] +
                 [f'{p_seq}s{seq_idx}' for p_seq in list(par_after_cloth_change_person) for seq_idx in [5, 7]])

print(f"train sequences: {train_seq_set}")

train sequences: ['p29s1', 'p29s3', 'p17s1', 'p17s3', 'p25s1', 'p25s3', 'p18s1', 'p18s3', 'p10s1', 'p10s3', 'p30s1', 'p30s3', 'p28s1', 'p28s3', 'p14s1', 'p14s3', 'p21s1', 'p21s3', 'p23s1', 'p23s3', 'p31s1', 'p31s3', 'p7s1', 'p7s3', 'p6s1', 'p6s3', 'p15s1', 'p15s3', 'p12s1', 'p12s3', 'p1s1', 'p1s3', 'p11s1', 'p11s3', 'p32s1', 'p32s3', 'p5s1', 'p5s3', 'p20s1', 'p20s3', 'p27s1', 'p27s3', 'p22s1', 'p22s3', 'p26s1', 'p26s3', 'p24s1', 'p24s3', 'p9s1', 'p9s3', 'p2s1', 'p2s3', 'p30s5', 'p30s7', 'p28s5', 'p28s7']


In [8]:
for key, params in sequences.items():
    if params['MoCap_data']:
        if key[-1] in ["1", "3", "5", "7"]:
            print(f"{key} | {'train' if key in train_seq_set else '     '} | {'valid' if key in valid_seq_set else '     '} | {'test' if key in test_seq_set else '    '} |")

p1s1 | train |       |      |
p1s3 | train |       |      |
p2s1 | train |       |      |
p2s3 | train |       |      |
p3s1 |       | valid |      |
p3s3 |       | valid |      |
p4s1 |       |       | test |
p4s3 |       |       | test |
p5s1 | train |       |      |
p5s3 | train |       |      |
p6s1 | train |       |      |
p6s3 | train |       |      |
p7s1 | train |       |      |
p7s3 | train |       |      |
p8s1 |       | valid |      |
p8s3 |       | valid |      |
p9s1 | train |       |      |
p9s3 | train |       |      |
p10s1 | train |       |      |
p10s3 | train |       |      |
p11s1 | train |       |      |
p11s3 | train |       |      |
p12s1 | train |       |      |
p12s3 | train |       |      |
p13s1 |       |       | test |
p13s3 |       |       | test |
p14s1 | train |       |      |
p14s3 | train |       |      |
p15s1 | train |       |      |
p15s3 | train |       |      |
p16s1 |       |       | test |
p16s3 |       |       | test |
p17s1 | train |       |   

In [9]:
print(f"Train size: {len(train_seq_set)} | {100*len(train_seq_set)/72:.2f}%")
print(f"Test size: {len(test_seq_set)} | {100*len(test_seq_set)/72:.2f}%")
print(f"Valid size: {len(valid_seq_set)} | {100*len(valid_seq_set)/72:.2f}%")

Train size: 56 | 77.78%
Test size: 10 | 13.89%
Valid size: 10 | 13.89%


In [10]:
import json

train_test_split = {"test": test_seq_set,
                    "valid": valid_seq_set,
                    "train": train_seq_set}

with open("./datasets/train_test_split.json", "w") as f:
    json.dump(train_test_split, f, indent=4)

In [11]:
import json

selected_names_file = "./datasets/mediapipe/selected_joint_names.json"
input_data_file = "./datasets/mediapipe/dataset_v2.json"
output_data_file = "./datasets/mocap/dataset_v2.json"

with open(input_data_file, 'r') as file:
    raw_input = json.load(file)

with open(output_data_file, 'r') as file:
    raw_output = json.load(file)

with open(selected_names_file, 'r') as file:
    selected_names = json.load(file)

# selected_names.pop('15')
# selected_names.pop('16')
# selected_names.pop('13')
# selected_names.pop('14')
# selected_names

with open(selected_names_file, "w") as f:
    json.dump(selected_names, f, indent=4)

In [12]:
triang_data_file = "./datasets/mediapipe/triangulation.json"

with open(triang_data_file, 'r') as file:
    triangulation_data = json.load(file)


In [13]:
sequences['p1s1']

{'start_frame': 195,
 'number_of_frames': 135,
 'frame_offset': 0,
 'MoCap_data': True}

In [14]:
input_frames_data = {f"c{c_idx}": [] for c_idx in range(1, 5)}
output_frames_data = []
img_width = 960
img_height = 540

not_found = 0
seq_keys_list = train_seq_set + test_seq_set + valid_seq_set

for seq_key in seq_keys_list:
    for f_idx in range(sequences[seq_key]['number_of_frames']):
    # for f_idx in range(2):
        curr_output_array = []
        output_frame_dict = raw_output[seq_key][f_idx]
        for point_idx, joint_name in selected_names.items():
            curr_output_array.append(output_frame_dict[joint_name])

        curr_output_array_np = np.array(curr_output_array)
        # print(curr_output_array_np)
        
        curr_input_arrays = {f"c{c_idx}": [] for c_idx in range(1, 5)}

        all_found = True
        
        for c_idx in range(1, 5):
            input_frame_list = raw_input[seq_key][f"c{c_idx}"][str(f_idx)]
            if [None, None] in input_frame_list:
                all_found = False
                break
                
            for point_idx, joint_name in selected_names.items(): 
                pixel_coords = input_frame_list[int(point_idx)]
                curr_input_arrays[f"c{c_idx}"].append(pixel_coords)

                # curr_input_arrays[f"c{c_idx}"].append([pixel_coords[0]/img_width, pixel_coords[1]/img_height])
                # conversion from pixels to propotions if needed

        # print(curr_input_arrays)

        if all_found:
            for c_idx in range(1, 5):
                input_frames_data[f"c{c_idx}"].append(np.array(curr_input_arrays[f"c{c_idx}"]))
            #     print(np.array(curr_input_arrays[f"c{c_idx}"]).shape)

            # print(curr_output_array_np.shape)    
            output_frames_data.append(curr_output_array_np)
        else:
            not_found += 1

print(f"Frames with all found mocaps: {len(output_frames_data)}")
print(f"Frames with at least one not found mocap: {not_found}")
print(f"Proportion: {100*len(output_frames_data)/(len(output_frames_data) + not_found):.2f}%")
# print(input_frames_data['c4'][0])

Frames with all found mocaps: 6035
Frames with at least one not found mocap: 3495
Proportion: 63.33%


In [15]:
import torch
from torch.utils.data import Dataset

class MoCapInputDataset(Dataset):
    def __init__(self, seq_keys_list, sequences, selected_names, raw_input, raw_output):
        self.img_width = 960
        self.img_height = 540
        self.input_frames_data = {f"c{c_idx}": [] for c_idx in range(1, 5)}
        self.output_frames_data = []
        self.not_found = 0
              
        for seq_key in seq_keys_list:
            for f_idx in range(sequences[seq_key]['number_of_frames']):
                curr_output_array = []
                output_frame_dict = raw_output[seq_key][f_idx]
                for point_idx, joint_name in selected_names.items():
                    curr_output_array.append(output_frame_dict[joint_name])
        
                curr_output_array_np = np.array(curr_output_array)*255
                # 255 multiplier added to mocap to obtain distance in mm
                curr_input_arrays = {f"c{c_idx}": [] for c_idx in range(1, 5)}
        
                all_found = True
                
                for c_idx in range(1, 5):
                    input_frame_list = raw_input[seq_key][f"c{c_idx}"][str(f_idx)]
                    if [None, None] in input_frame_list:
                        all_found = False
                        break
                        
                    for point_idx, joint_name in selected_names.items(): 
                        pixel_coords = input_frame_list[int(point_idx)]
                        curr_input_arrays[f"c{c_idx}"].append(pixel_coords)
                        # curr_input_arrays[f"c{c_idx}"].append(
                        #     [pixel_coords[0]/self.img_width, 
                        #      pixel_coords[1]/self.img_height])
        
                if all_found:
                    for c_idx in range(1, 5):
                        self.input_frames_data[f"c{c_idx}"].append(np.array(curr_input_arrays[f"c{c_idx}"]))
 
                    self.output_frames_data.append(curr_output_array_np)
                else:
                    self.not_found += 1

        self.length = len(self.output_frames_data)

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        inputs = [torch.from_numpy(self.input_frames_data[f"c{c_idx}"][idx]).float() for c_idx in range(1, 5)]  # each: (12, 2)
        target = torch.from_numpy(self.output_frames_data[idx]).float()  # (12, 3)
        return inputs, target

In [16]:
# Optuna results
best_params = {'lr': 0.005589010994074508, 'weight_decay': 1.1906353862455155e-05, 'dropout': 0.2214033785244307, 'batch_size': 64, 'activation': 'gelu'}

In [17]:
from torch.utils.data import DataLoader

batch_size = best_params['batch_size']

train_ds = MoCapInputDataset(train_seq_set, sequences, selected_names, raw_input, raw_output)
valid_ds = MoCapInputDataset(valid_seq_set, sequences, selected_names, raw_input, raw_output)
test_ds = MoCapInputDataset(test_seq_set, sequences, selected_names, raw_input, raw_output)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(valid_ds, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

In [18]:
train_ds[1]

([tensor([[0.4650, 0.6808],
          [0.4615, 0.6887],
          [0.4484, 0.5939],
          [0.4334, 0.5923],
          [0.4552, 0.4929],
          [0.4559, 0.4906],
          [0.4586, 0.3395],
          [0.4598, 0.3398]]),
  tensor([[0.4985, 0.5462],
          [0.5093, 0.5628],
          [0.4982, 0.4780],
          [0.5129, 0.4851],
          [0.4989, 0.4136],
          [0.5176, 0.4131],
          [0.4890, 0.3128],
          [0.5227, 0.3134]]),
  tensor([[0.5259, 0.6577],
          [0.5332, 0.6965],
          [0.5461, 0.5803],
          [0.5527, 0.5841],
          [0.5303, 0.4755],
          [0.5338, 0.4780],
          [0.5319, 0.3294],
          [0.5367, 0.3307]]),
  tensor([[0.5049, 0.5729],
          [0.4981, 0.5827],
          [0.5050, 0.5119],
          [0.4937, 0.5094],
          [0.5078, 0.4394],
          [0.4880, 0.4350],
          [0.5195, 0.3234],
          [0.4811, 0.3169]])],
 tensor([[-193.1270,  202.0149,  -73.8722],
         [-235.4661,   92.5106, -151.9323],
       

In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CustomNet(nn.Module):
    def __init__(self):
        super(CustomNet, self).__init__()
        dropout = 0
        # shape (8, 2) -> reshape to (2, 8) 
        self.conv1d1 = nn.ModuleList([
            nn.Conv1d(in_channels=2, out_channels=8, kernel_size=2, padding=1) for _ in range(4)
        ])
        self.conv1d2 = nn.ModuleList([
            nn.Conv1d(in_channels=8, out_channels=1, kernel_size=2) for _ in range(4)
        ])
        
        self.bn1 = nn.BatchNorm1d(32)
        self.fc1 = nn.Linear(32, 48)
        self.dropout1 = nn.Dropout(p=dropout)
        self.bn2 = nn.BatchNorm1d(48)
        self.fc2 = nn.Linear(48, 32)
        self.dropout2 = nn.Dropout(p=dropout)
        self.bn3 = nn.BatchNorm1d(32)
        self.fc3 = nn.Linear(32, 24)

    def forward(self, x):
        # x: 4 tensors of shape (batch, 8, 2)
        conv_outs = []
        for i, xi in enumerate(x):
            xi = xi.permute(0, 2, 1)  # reshape to (batch, 2, 8) 
            # conv = self.conv1d[i](xi)     # (batch, 1, 8)
            conv = self.conv1d1[i](xi)
            conv = self.conv1d2[i](conv)
            conv = conv.squeeze(1)     # (batch, 8)
            conv_outs.append(conv)

        concat = torch.cat(conv_outs, dim=1)  # (batch, 28)

        out = self.bn1(concat)
        out = F.gelu(self.bn2(self.fc1(out)))
        out = self.dropout1(out)
        out = F.gelu(self.bn3(self.fc2(out)))
        out = self.dropout2(out)
        out = self.fc3(out)  # (batch, 24)
        out = out.view(-1, 8, 3)  # reshape to (batch, 8, 3)
        return out


In [20]:
import torch
import torch.nn as nn

class MPJPE(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, predictions, targets):
        # shape (batch, 8, 3)
        # compute euclidean distance for each point pair
        distances = torch.norm(predictions - targets, dim=2)
        mean_distance = distances.mean()
        return mean_distance


In [21]:
model = CustomNet()
optimizer = torch.optim.AdamW(model.parameters(), lr=best_params['lr'], weight_decay=best_params['weight_decay'])
# criterion = torch.nn.MSELoss()
criterion = MPJPE()

num_epochs = 200

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for inputs, targets in train_loader:
        inputs = [inp.float() for inp in inputs]
        targets = targets.float()

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * targets.size(0)

    avg_train_loss = train_loss / len(train_loader.dataset)

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs = [inp.float() for inp in inputs]
            targets = targets.float()

            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item() * targets.size(0)

    avg_val_loss = val_loss / len(val_loader.dataset)

    print(f"Epoch {epoch+1}: Train MPJPE = {avg_train_loss:.4f}, Val MPJPE = {avg_val_loss:.4f}")


Epoch 1: Train MPJPE = 1627.9130, Val MPJPE = 1557.4268
Epoch 2: Train MPJPE = 1599.3458, Val MPJPE = 1505.0625
Epoch 3: Train MPJPE = 1542.8411, Val MPJPE = 1456.2364
Epoch 4: Train MPJPE = 1461.3585, Val MPJPE = 1361.4021
Epoch 5: Train MPJPE = 1356.3216, Val MPJPE = 1236.5474
Epoch 6: Train MPJPE = 1231.0650, Val MPJPE = 1058.0344
Epoch 7: Train MPJPE = 1089.2551, Val MPJPE = 820.4271
Epoch 8: Train MPJPE = 938.8463, Val MPJPE = 792.6554
Epoch 9: Train MPJPE = 790.3421, Val MPJPE = 716.9544
Epoch 10: Train MPJPE = 647.4425, Val MPJPE = 517.6722
Epoch 11: Train MPJPE = 522.8971, Val MPJPE = 382.8199
Epoch 12: Train MPJPE = 428.6085, Val MPJPE = 377.9130
Epoch 13: Train MPJPE = 356.8749, Val MPJPE = 347.4649
Epoch 14: Train MPJPE = 310.8297, Val MPJPE = 256.4222
Epoch 15: Train MPJPE = 286.3256, Val MPJPE = 271.2785
Epoch 16: Train MPJPE = 247.1435, Val MPJPE = 238.0213
Epoch 17: Train MPJPE = 253.8007, Val MPJPE = 203.0062
Epoch 18: Train MPJPE = 239.7046, Val MPJPE = 198.9723
Epoch 

In [22]:
model.eval()
test_loss = 0.0
with torch.no_grad():
    for inputs, targets in test_loader:
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item() * targets.size(0)
test_loss /= len(test_loader.dataset)

print(f"Test loss = {test_loss:.4f}")

Test loss = 275.6595


In [23]:
def predict_single(model, inputs_list, device="cpu"):
    model.eval()
    inputs = [torch.from_numpy(inp).float().unsqueeze(0).to(device) for inp in inputs_list]
    
    with torch.no_grad():
        output = model(inputs) 
    
    return output.squeeze(0).cpu().numpy()

In [24]:
list(raw_output[test_seq_set[0]][0].values())

[[6.90322208404541, 3.6994009017944336, -3.9581351280212402],
 [7.050997257232666, 1.9678819179534912, -4.278520107269287],
 [7.196775913238525, 0.25871431827545166, -4.5949249267578125],
 [6.4067840576171875, 3.712695360183716, -5.024971008300781],
 [6.647609710693359, 1.9436155557632446, -5.044170379638672],
 [6.8671674728393555, 0.3317227363586426, -5.061561107635498],
 [7.128551483154297, 5.347268104553223, -3.8196592330932617],
 [7.222786903381348, 4.209889888763428, -3.9270873069763184],
 [7.1465840339660645, 3.58955454826355, -3.8218801021575928],
 [6.4526262283325195, 5.434737682342529, -4.97758674621582],
 [6.611799716949463, 4.247878551483154, -5.248648166656494],
 [6.399296283721924, 3.629312753677368, -5.209601402282715]]

In [25]:
test_seq = test_seq_set[2]
print(test_seq)
frame = 110

bvh_sample_data = list(raw_output[test_seq][frame].values())
triangulation_sample_all_data = triangulation_data[test_seq][frame]
triangulation_sample_data = [triangulation_sample_all_data[int(j_idx)] for j_idx in selected_names.keys()]
print(bvh_sample_data)
print()
print(triangulation_sample_data)

p13s1
[[-8.251530647277832, 3.844074249267578, 0.7468725442886353], [-8.001875877380371, 2.0354771614074707, 0.5258227586746216], [-7.313557147979736, 0.5287668704986572, 0.39559948444366455], [-8.215202331542969, 3.807081937789917, -0.3815429210662842], [-8.524124145507812, 2.0546159744262695, -0.1751963347196579], [-8.814669609069824, 0.403894305229187, 0.020141497254371643], [-8.087128639221191, 5.645803451538086, 0.9305919408798218], [-8.333471298217773, 4.343874931335449, 0.9740116000175476], [-8.786253929138184, 3.8658547401428223, 1.0251011848449707], [-8.0086669921875, 5.5879950523376465, -0.4180036783218384], [-7.919554710388184, 4.258255958557129, -0.49406713247299194], [-8.123318672180176, 3.650397300720215, -0.5934985280036926]]

[[102.1602198670949, -1822.8256934190747, 131.24888403694953], [9.131663350769033, -2158.727828409204, 123.0044462706963], [144.36682717263608, -1982.0813164831154, 504.9846829937159], [-6.836637753528645, -2106.9630355077556, 512.7716947321197], [

In [26]:
img_width = 960
img_height = 540

mp_input_sample = []

for c_idx in range(1, 5):
    all_frames_for_camera = raw_input[test_seq][f"c{c_idx}"][str(frame)]
    camera_mp_input_sample = []
    
    for point_idx, joint_name in selected_names.items(): 
        pixel_coords = all_frames_for_camera[int(point_idx)]
        camera_mp_input_sample.append(pixel_coords)
        # camera_mp_input_sample.append([pixel_coords[0]/img_width, pixel_coords[1]/img_height])

    mp_input_sample.append(np.array(camera_mp_input_sample))
    
mp_input_sample

[array([[0.20813051, 0.70449907],
        [0.16183747, 0.69482696],
        [0.17008203, 0.60033315],
        [0.16713762, 0.59231079],
        [0.16044874, 0.49173686],
        [0.1793174 , 0.48987851],
        [0.14938203, 0.34869745],
        [0.18506591, 0.3496685 ]]),
 array([[0.49516344, 0.52987403],
        [0.50331253, 0.52303612],
        [0.49279758, 0.47022182],
        [0.50423014, 0.46748048],
        [0.49058369, 0.41310054],
        [0.5052231 , 0.41280025],
        [0.48446709, 0.33909434],
        [0.51155484, 0.34280893]]),
 array([[0.78400618, 0.66194117],
        [0.84437943, 0.67142469],
        [0.81011289, 0.55996567],
        [0.84866661, 0.56853324],
        [0.81925732, 0.45377782],
        [0.83465594, 0.46060836],
        [0.81674045, 0.30456683],
        [0.83668095, 0.31386459]]),
 array([[0.52538806, 0.66325176],
        [0.50655919, 0.72549403],
        [0.53693497, 0.56182033],
        [0.50632167, 0.56894732],
        [0.54407269, 0.41606939],
        

In [27]:
predicted = predict_single(model, mp_input_sample, 'cpu')
predicted

array([[-1812.9342  ,   129.25957 ,   108.4758  ],
       [-2149.7168  ,   129.08055 ,   -11.530312],
       [-1947.7455  ,   506.13046 ,   146.98146 ],
       [-2143.4973  ,   497.92108 ,   -33.88203 ],
       [-1990.8002  ,   899.36646 ,   182.06152 ],
       [-2001.2983  ,   892.08813 ,   -64.33371 ],
       [-1989.7083  ,  1355.3257  ,   233.01064 ],
       [-1970.1957  ,  1355.3562  ,   -92.08473 ]], dtype=float32)

In [28]:
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = 'iframe'

SCALE_FACTOR = 255

x = [vec[2]*SCALE_FACTOR for vec in bvh_sample_data]
y = [vec[0]*SCALE_FACTOR for vec in bvh_sample_data]
z = [vec[1]*SCALE_FACTOR for vec in bvh_sample_data]

# x_t = [vec[0]/SCALE_FACTOR for vec in triangulation_sample_data]
# y_t = [vec[1]/SCALE_FACTOR for vec in triangulation_sample_data]
# z_t = [vec[2]/SCALE_FACTOR for vec in triangulation_sample_data]

x_t = [vec[0] for vec in triangulation_sample_data]
y_t = [vec[1] for vec in triangulation_sample_data]
z_t = [vec[2] for vec in triangulation_sample_data]

x_p = [vec[2] for vec in predicted]
y_p = [vec[0] for vec in predicted]
z_p = [vec[1] for vec in predicted]
    
fig = go.Figure(
    data=[
        go.Scatter3d(
            x=x, y=y, z=z,
            mode='markers',
            marker=dict(size=5, color='blue'),
            hoverinfo='text',
            name='Joints BVH'),
        go.Scatter3d(
            x=x_t, y=y_t, z=z_t,
            mode='markers',
            marker=dict(size=5, color='red'),
            hoverinfo='text',
            name='Joints triangulation mediapipe'),
        go.Scatter3d(
            x=x_p, y=y_p, z=z_p,
            mode='markers',
            marker=dict(size=5, color='green'),
            hoverinfo='text',
            name='Predicted by NN'),
        ]
)

fig.update_layout(scene=dict(
    xaxis_title='X',
    yaxis_title='Y',
    zaxis_title='Z',
    xaxis=dict(range=[-6000, 6000]),
    yaxis=dict(range=[-6000, 6000]),
    zaxis=dict(range=[-6000, 6000]),
    aspectmode='cube', 
),
title='3D joints plot from bvh file',
width=800,
height=800
)

fig.show()

In [29]:
torch.save(model.state_dict(), './models/custom_net_v1.pth')