In [1]:
from utils.multicamera_tools import parse_camera_xml, triangulate_poses
from utils.video_tools import get_camera_calibration_files, get_video_files
from utils.mediapipe_estimator import MediaPipeEstimator
from scripts.frame_iterator import video_frame_iterator
from scripts.parsers import parse_sequences as parse_sequence_info
import numpy as np
import bvhio
import warnings

warnings.filterwarnings('ignore')

file_path = 'gait3d\\ListOfSequences.txt'
sequences = parse_sequence_info(file_path)

In [2]:
selected_joint_names = {27 : "lfoot", 
                        28 : "rfoot", 
                        25 : "ltibia", 
                        26 : "rtibia", 
                        23 : "lfemur", 
                        24 : "rfemur",
                        11 : "lhumerus", 
                        12 : "rhumerus",
                        13 : "lradius", 
                        14 : "rradius",
                        15 : "lwrist", 
                        16 : "rwrist",}
selected_joint_names

{27: 'lfoot',
 28: 'rfoot',
 25: 'ltibia',
 26: 'rtibia',
 23: 'lfemur',
 24: 'rfemur',
 11: 'lhumerus',
 12: 'rhumerus',
 13: 'lradius',
 14: 'rradius',
 15: 'lwrist',
 16: 'rwrist'}

In [3]:
VIDEO_FPS = 25
MOCAP_FPS = 100
FRAME_TIME = 1000/VIDEO_FPS
MP_LANDMARKS_NUM = 33

mediapipe_selection = {}
mediapipe_triangulation = {}

for seq_key in sequences.keys():
    print(seq_key, end=" | ")
    if sequences[seq_key]['MoCap_data']:
        video_files = get_video_files(seq_key)
        frames_iterator = [video_frame_iterator(avi_file) for avi_file in video_files]
        mp_estimators = [MediaPipeEstimator() for _ in range(4)]
        
        camera_files_paths = get_camera_calibration_files(seq_key)
        cameras_params = [parse_camera_xml(camera_path) for camera_path in camera_files_paths]
    
        predicted_for_seq = {f"c{i+1}": {} for i in range(4)}
        
        combined_cameras_with_landmarks = []
        combined_triangulation_results = []
        
        max_frames = sequences[seq_key]['number_of_frames']
        
        for frame_i in range(max_frames):
            cameras_with_landmarks = [True for _ in range(4)]
            
            for camera_ind, (f_iterator, mp_estimator) in enumerate(zip(frames_iterator, mp_estimators)):
                frame_ts, frame = next(f_iterator)
                norm, pixels = mp_estimator.predict_for_frame_to_dataset(frame_i, frame)
                # print(camera_ind, norm)
                # print(camera_ind, pixels)
                if pixels == [(None, None) for _ in range(MP_LANDMARKS_NUM)]:
                    cameras_with_landmarks[camera_ind] = False
        
                predicted_for_seq[f"c{camera_ind+1}"][frame_i] = norm
            
            combined_cameras_with_landmarks.append(cameras_with_landmarks)

        mediapipe_selection[seq_key] = predicted_for_seq

        
        for frame_i in range(max_frames):
            found_landmarks_cameras_idx = ([camera_i for camera_i, camera_l_found in 
                                            enumerate(combined_cameras_with_landmarks[frame_i])
                                            if camera_l_found])
            
            selected_cameras_params = [cameras_params[camera_i] for camera_i in found_landmarks_cameras_idx]
            found_2d_points = np.array([predicted_for_seq[f"c{camera_i+1}"][frame_i] for camera_i in found_landmarks_cameras_idx])
            triangulation_result = triangulate_poses(selected_cameras_params, found_2d_points)
            combined_triangulation_results.append(triangulation_result[0].tolist())

        mediapipe_triangulation[seq_key] = combined_triangulation_results


p1s1 | p1s2 | p1s3 | p1s4 | p2s1 | p2s2 | p2s3 | p2s4 | p3s1 | p3s2 | p3s3 | p3s4 | p4s1 | p4s2 | p4s3 | p4s4 | p5s1 | p5s2 | p5s3 | p5s4 | p6s1 | p6s2 | p6s3 | p6s4 | p7s1 | p7s2 | p7s3 | p7s4 | p8s1 | p8s2 | p8s3 | p8s4 | p9s1 | p9s2 | p9s3 | p9s4 | p10s1 | p10s2 | p10s3 | p10s4 | p11s1 | p11s2 | p11s3 | p11s4 | p12s1 | p12s2 | p12s3 | p12s4 | p13s1 | p13s2 | p13s3 | p13s4 | p14s1 | p14s2 | p14s3 | p14s4 | p15s1 | p15s2 | p15s3 | p15s4 | p16s1 | p16s2 | p16s3 | p16s4 | p17s1 | p17s2 | p17s3 | p17s4 | p18s1 | p18s2 | p18s3 | p18s4 | p19s1 | p19s2 | p19s3 | p19s4 | p20s1 | p20s2 | p20s3 | p20s4 | p21s1 | p21s2 | p21s3 | p21s4 | p22s1 | p22s2 | p22s3 | p22s4 | p23s1 | p23s2 | p23s3 | p23s4 | p24s1 | p24s2 | p24s3 | p24s4 | p25s1 | p25s2 | p25s3 | p25s4 | p26s1 | p26s2 | p26s3 | p26s4 | p26s5 | p26s6 | p26s7 | p26s8 | p26s9 | p26s10 | p27s1 | p27s2 | p27s3 | p27s4 | p27s5 | p27s6 | p27s7 | p27s8 | p27s9 | p27s10 | p28s1 | p28s2 | p28s3 | p28s4 | p28s5 | p28s6 | p28s7 | p28s8 | p28s9 | p2

In [4]:
import json

with open("./datasets/mediapipe/dataset_v2.json", "w") as f:
    json.dump(mediapipe_selection, f, indent=4)

In [5]:
import json

with open("./datasets/mediapipe/triangulation_v2.json", "w") as f:
    json.dump(mediapipe_triangulation, f, indent=4)

In [6]:
with open("./datasets/mediapipe/selected_joint_names.json", "w") as f:
    json.dump(selected_joint_names, f, indent=4)