In [1]:
# ! pip install ultralytics

In [2]:
from utils.multicamera_tools import parse_camera_xml, triangulate_poses
from utils.video_tools import get_camera_calibration_files, get_video_files
from scripts.frame_iterator import video_frame_iterator
from scripts.parsers import parse_sequences as parse_sequence_info
import numpy as np
import bvhio
import warnings

warnings.filterwarnings('ignore')

file_path = 'gait3d\\ListOfSequences.txt'
sequences = parse_sequence_info(file_path)

In [3]:
from ultralytics import YOLO

# model = YOLO("yolo11n-pose.pt")
model = YOLO("yolo11x-pose.pt")

In [4]:
selected_joint_names = {
    5: 'lhumerus',
    6: 'rhumerus',
    11: 'lfemur',
    12: 'rfemur',
    13: 'ltibia',
    14: 'rtibia',
    15: 'lfoot',
    16: 'rfoot'
}

selected_joint_names

{5: 'lhumerus',
 6: 'rhumerus',
 11: 'lfemur',
 12: 'rfemur',
 13: 'ltibia',
 14: 'rtibia',
 15: 'lfoot',
 16: 'rfoot'}

In [5]:
FRAME_WIDTH = 960
FRAME_HEIGHT = 540

for i in range(1, 5):
    results = model.predict(
        source=f'./gait3d/Sequences/p5s1/Images/c{i}_0195.avi',
        show=False, # do not display during processing
        save=False, # save annotated video
        project='sample_vids',
        name='yolo11', 
        # exist_ok=True,
        verbose=False, 
        stream=True
    )

    for result in results:
        if not len(result.keypoints.xyn) == 1:
            xy_n == [[0, 0] for _ in range(17)]
        xy_n = result.keypoints.xyn[0].cpu().numpy()
        xy_abs = xy_n * [FRAME_WIDTH, FRAME_HEIGHT]
    
        # print(f"{xy_n = }")
        # print(f"{xy_abs }")
        # break

In [6]:
FRAME_WIDTH = 960
FRAME_HEIGHT = 540

for result in results:
    xy_n = result.keypoints.xy_n[0].cpu().numpy()  # normalized
    xy_abs = xy_n * [FRAME_WIDTH, FRAME_HEIGHT]

    print(f"{xy_n = }")
    print(f"{xy_abs }")
    break

In [7]:
len(xy_n[0])

2

In [14]:
list(sequences.keys())[105:]

['p26s6',
 'p26s7',
 'p26s8',
 'p26s9',
 'p26s10',
 'p27s1',
 'p27s2',
 'p27s3',
 'p27s4',
 'p27s5',
 'p27s6',
 'p27s7',
 'p27s8',
 'p27s9',
 'p27s10',
 'p28s1',
 'p28s2',
 'p28s3',
 'p28s4',
 'p28s5',
 'p28s6',
 'p28s7',
 'p28s8',
 'p28s9',
 'p28s10',
 'p29s1',
 'p29s2',
 'p29s3',
 'p29s4',
 'p29s5',
 'p29s6',
 'p29s7',
 'p29s8',
 'p29s9',
 'p29s10',
 'p30s1',
 'p30s2',
 'p30s3',
 'p30s4',
 'p30s5',
 'p30s6',
 'p30s7',
 'p30s8',
 'p30s9',
 'p30s10',
 'p31s1',
 'p31s2',
 'p31s3',
 'p31s4',
 'p31s5',
 'p31s6',
 'p31s7',
 'p31s8',
 'p31s9',
 'p31s10',
 'p32s1',
 'p32s2',
 'p32s3',
 'p32s4',
 'p32s9',
 'p32s10']

In [15]:
VIDEO_FPS = 25
MOCAP_FPS = 100
FRAME_TIME = 1000/VIDEO_FPS
FRAME_WIDTH = 960
FRAME_HEIGHT = 540
YOLO_LANDMARKS_NUM = 17

yolo_selection = {}
yolo_triangulation = {}

for seq_key in list(sequences.keys())[105:]:
    print(seq_key, end=" | ")
    if sequences[seq_key]['MoCap_data']:
        video_files = get_video_files(seq_key)
        max_frames = sequences[seq_key]['number_of_frames']
                
        camera_files_paths = get_camera_calibration_files(seq_key)
        cameras_params = [parse_camera_xml(camera_path) for camera_path in camera_files_paths]
    
        predicted_for_seq = {f"c{i+1}": {} for i in range(4)}
        camera_landmarks_found = [[True, True, True, True] for j in range(max_frames)]
        
        combined_cameras_with_landmarks = []
        combined_triangulation_results = []
        
        for c_idx, c_file in enumerate(video_files):
            # print(c_idx + 1, c_file)
            results = model.predict(
                source=c_file,
                show=False, # do not display during processing
                save=False, # do not save annotated video
                project='sample_vids',
                name='yolo11', 
                verbose=False, 
                stream=True
            )

            
            for f_idx, result in enumerate(results):
                if len(result.keypoints.xyn) == 1 and len(result.keypoints.xyn[0] == YOLO_LANDMARKS_NUM):
                    xy_n = result.keypoints.xyn[0].cpu().numpy().tolist()
                    
                    for important_joint in selected_joint_names.keys():
                        if xy_n[important_joint] == [0, 0]:
                            xy_n == [[None, None] for _ in range(17)]
                            camera_landmarks_found[f_idx][c_idx] = False
                            # print(seq_key, c_idx, f_idx, selected_joint_names[important_joint])
                            break

                else:
                    xy_n == [[None, None] for _ in range(17)]
                    camera_landmarks_found[f_idx][c_idx] = False
                    # print(seq_key, c_idx, f_idx, len(result.keypoints.xyn))

                predicted_for_seq[f"c{c_idx+1}"][f_idx] = xy_n

        yolo_selection[seq_key] = predicted_for_seq

        for f_idx in range(max_frames):
            found_landmarks_cameras_idx = ([camera_i for camera_i, camera_l_found in 
                                            enumerate(camera_landmarks_found[f_idx])
                                            if camera_l_found])
            
            selected_cameras_params = [cameras_params[camera_i] for camera_i in found_landmarks_cameras_idx]
            found_2d_points = np.array([np.array(predicted_for_seq[f"c{camera_i+1}"][f_idx]) * [FRAME_WIDTH, FRAME_HEIGHT] for camera_i in found_landmarks_cameras_idx])
            triangulation_result = triangulate_poses(selected_cameras_params, found_2d_points)
            combined_triangulation_results.append(triangulation_result[0].tolist())

        yolo_triangulation[seq_key] = combined_triangulation_results


p26s6 | p26s7 | p26s8 | p26s9 | p26s10 | p27s1 | p27s2 | p27s3 | p27s4 | p27s5 | p27s6 | p27s7 | p27s8 | p27s9 | p27s10 | p28s1 | p28s2 | p28s3 | p28s4 | p28s5 | p28s6 | p28s7 | p28s8 | p28s9 | p28s10 | p29s1 | p29s2 | p29s3 | p29s4 | p29s5 | p29s6 | p29s7 | p29s8 | p29s9 | p29s10 | p30s1 | p30s2 | p30s3 | p30s4 | p30s5 | p30s6 | p30s7 | p30s8 | p30s9 | p30s10 | p31s1 | p31s2 | p31s3 | p31s4 | p31s5 | p31s6 | p31s7 | p31s8 | p31s9 | p31s10 | p32s1 | p32s2 | p32s3 | p32s4 | p32s9 | p32s10 | 

In [18]:
with open("./datasets/yolo/dataset.json", 'r') as file:
    yolo_selection_prev = json.load(file)

with open("./datasets/yolo/triangulation.json", 'r') as file:
    yolo_triangulation_prev = json.load(file)

yolo_selection_prev.update(yolo_selection)
yolo_triangulation_prev.update(yolo_triangulation)

In [19]:
import json

with open("./datasets/yolo/dataset_v2.json", "w") as f:
    json.dump(yolo_selection_prev, f, indent=4)

with open("./datasets/yolo/triangulation_v2.json", "w") as f:
    json.dump(yolo_triangulation_prev, f, indent=4)
    
with open("./datasets/yolo/selected_joint_names.json", "w") as f:
    json.dump(selected_joint_names, f, indent=4)