- frame_dir (str): The identifier of the corresponding video. (name of file)
- total_frames (int): The number of frames in this video. (len of 'keypoints')
- img_shape (tuple[int]): The shape of a video frame, a tuple with two elements, in the format of (height, width). Only required for 2D skeletons. (got it)
- original_shape (tuple[int]): Same as img_shape. (got it)
- label (int): The action label. ('overhead press')
- keypoint (np.ndarray, with shape [M x T x V x C]): The keypoint annotation. M: number of persons; T: number of frames (same as total_frames); V: number of keypoints (25 for NTURGB+D 3D skeleton, 17 for CoCo, 18 for OpenPose, etc. ); C: number of dimensions for keypoint coordinates (C=2 for 2D keypoint)
- keypoint_score (np.ndarray, with shape [M x T x V]): The confidence score of keypoints. Only required for 2D skeletons.

In [3]:
import pandas as pd
import json
import pickle
import os

In [4]:
# Settings
base_dir = '../../../data'
sample_class = 'correct' # 'knees_error', 'elbows_error'

extract_main_person = False


In [5]:

# Path to the folder with JSON files
json_folder = os.path.join(base_dir, 'ohp_poses', sample_class)

# Dictionary to store all loaded JSON data
all_data = {}

# Loop through all .json files in the folder
for filename in os.listdir(json_folder):
    if filename.endswith('.json'):
        filepath = os.path.join(json_folder, filename)
        with open(filepath, 'r') as file:
            try:
                data = json.load(file)
                key = os.path.splitext(filename)[0]  # filename without .json
                all_data[key] = data
            except json.JSONDecodeError:
                print(f"⚠️ Could not parse {filename}, skipping.")

# Example: print one loaded entry
print(all_data.keys())  # prints all filenames loaded


dict_keys(['77473_1', '76525_4', '63414_1', '78547_3', '79905_5', '63655_1', '64024_2', '64188_4', '79702_2', '68718_1', '78738_2', '76760_4', '63745_2', '69630_8', '76289_6', '79770_1', '63043_1', '75500_1', '79497_3', '65810_12', '70430_1', '78921_4', '75184_4', '73356_1', '79193_4', '65543_4', '63240_2', '77165_1', '75408_2', '67526_1', '68800_6', '69377_7', '70509_1', '67200_5', '78436_2', '65211_15', '68777_1', '78193_2', '74504_1', '67865_5', '79144_4', '66606_1', '80055_1', '80467_3', '75360_1', '77004_4', '64890_1', '80186_1', '63538_1', '77676_1', '76593_2', '65616_2', '80660_6', '63973_1', '71938_1', '73964_3', '68153_4', '64915_2', '72376_9', '78715_6', '73405_3', '68856_1', '68052_2', '77188_1', '76213_4', '70819_1', '79339_7', '66636_1', '72126_5', '63482_4', '69574_6', '68157_1', '71346_4', '76086_3', '64498_1', '76183_2', '78182_2', '69026_1', '75100_1', '77562_2', '64069_6', '69624_3', '76310_1', '73147_1', '79269_1', '78772_5', '68641_1', '69448_1', '77192_1', '66479_3

In [6]:
#print('No. people: ',len(all_data.get('62794_6').get('keypoints')[0].keys()))

all_data

In [7]:
#print('No. frames: ', len(all_data.get('62794_6').get('keypoints')))

In [8]:
import cv2

# Path to the folder containing .mp4 videos
video_folder = os.path.join(base_dir, 'ohp_labeled', sample_class)

# Dictionary to hold video metadata
video_info = {}

# Loop through all files in the folder
for filename in os.listdir(video_folder):
    if filename.lower().endswith('.mp4'):
        video_path = os.path.join(video_folder, filename)
        video_name = os.path.splitext(filename)[0]

        # Open video file
        vid = cv2.VideoCapture(video_path)

        if not vid.isOpened():
            print(f"❌ Failed to open: {filename}")
            continue

        # Get properties
        width = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
        height = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
        fps = vid.get(cv2.CAP_PROP_FPS)
        frame_count = vid.get(cv2.CAP_PROP_FRAME_COUNT)
        duration = frame_count / fps if fps else 0

        # Store in dictionary
        video_info[video_name] = {
            "width": int(width),
            "height": int(height),
            "fps": round(fps, 2),
            "frame_count": int(frame_count),
            "duration_sec": round(duration, 2)
        }

        vid.release()

# Print or save the results
output_path = os.path.join(video_folder, 'video_properties.json')
with open(output_path, 'w') as f:
    json.dump(video_info, f, indent=2)

print(f"✅ Processed {len(video_info)} videos. Info saved to: {output_path}")

✅ Processed 1024 videos. Info saved to: ../../../data/ohp_labeled/correct/video_properties.json


In [9]:
with open(os.path.join(video_folder, 'video_properties.json'), 'r') as file:
    video_properties = json.load(file)

In [10]:
video_properties

{'66980_1': {'width': 480,
  'height': 480,
  'fps': 30.0,
  'frame_count': 290,
  'duration_sec': 9.67},
 '68147_2': {'width': 480,
  'height': 270,
  'fps': 30.0,
  'frame_count': 50,
  'duration_sec': 1.67},
 '69392_1': {'width': 480,
  'height': 600,
  'fps': 30.0,
  'frame_count': 614,
  'duration_sec': 20.47},
 '68153_4': {'width': 480,
  'height': 480,
  'fps': 30.0,
  'frame_count': 42,
  'duration_sec': 1.4},
 '77488_4': {'width': 480,
  'height': 480,
  'fps': 30.0,
  'frame_count': 75,
  'duration_sec': 2.5},
 '68641_1': {'width': 480,
  'height': 270,
  'fps': 30.0,
  'frame_count': 173,
  'duration_sec': 5.77},
 '77747_1': {'width': 480,
  'height': 480,
  'fps': 30.0,
  'frame_count': 241,
  'duration_sec': 8.03},
 '72145_3': {'width': 480,
  'height': 600,
  'fps': 30.0,
  'frame_count': 85,
  'duration_sec': 2.83},
 '68052_2': {'width': 480,
  'height': 270,
  'fps': 30.0,
  'frame_count': 69,
  'duration_sec': 2.3},
 '73132_2': {'width': 480,
  'height': 600,
  'fps': 

In [11]:
people_lst = []
for i in all_data.keys():
    print('No. people: ',len(all_data.get(i).get('keypoints')[0].keys()))
    people_lst.append([i, len(all_data.get(i).get('keypoints')[0].keys())])
    

No. people:  1
No. people:  1
No. people:  1
No. people:  2
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  2
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  2
No. people:  1
No. people:  1
No. people:  2
No. people:  1
No. people:  3
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  2
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  2
No. people:  1
No. people:  2
No. people:  1
No. people:  1
No. people:  2
No. people:  1
No. people:  1
No. people:  1
No. people:  2
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people

In [12]:
for i in people_lst:
    if i[1]==0:
        print(i)

['68572_2', 0]
['65531_4', 0]
['79695_1', 0]
['75901_2', 0]
['74890_14', 0]
['65394_1', 0]
['69065_1', 0]


In [13]:
people_lst = []
for i in all_data.keys():
    people_visible = []
    for j in range(len(all_data.get(i).get('keypoints'))):
        if len(all_data.get(i).get('keypoints')[j])>0:
            people_visible.append(len(all_data.get(i).get('keypoints')[j].keys()))
    people_lst.append([i, max(people_visible)])
people_lst

[['77473_1', 1],
 ['76525_4', 1],
 ['63414_1', 2],
 ['78547_3', 2],
 ['79905_5', 2],
 ['63655_1', 1],
 ['64024_2', 1],
 ['64188_4', 1],
 ['79702_2', 1],
 ['68718_1', 3],
 ['78738_2', 1],
 ['76760_4', 3],
 ['63745_2', 1],
 ['69630_8', 1],
 ['76289_6', 1],
 ['79770_1', 2],
 ['63043_1', 1],
 ['75500_1', 1],
 ['79497_3', 1],
 ['65810_12', 1],
 ['70430_1', 1],
 ['78921_4', 1],
 ['75184_4', 1],
 ['73356_1', 1],
 ['79193_4', 2],
 ['65543_4', 1],
 ['63240_2', 1],
 ['77165_1', 1],
 ['75408_2', 2],
 ['67526_1', 4],
 ['68800_6', 2],
 ['69377_7', 1],
 ['70509_1', 1],
 ['67200_5', 3],
 ['78436_2', 1],
 ['65211_15', 3],
 ['68777_1', 1],
 ['78193_2', 3],
 ['74504_1', 1],
 ['67865_5', 1],
 ['79144_4', 2],
 ['66606_1', 2],
 ['80055_1', 1],
 ['80467_3', 1],
 ['75360_1', 3],
 ['77004_4', 1],
 ['64890_1', 1],
 ['80186_1', 2],
 ['63538_1', 1],
 ['77676_1', 1],
 ['76593_2', 1],
 ['65616_2', 3],
 ['80660_6', 1],
 ['63973_1', 2],
 ['71938_1', 2],
 ['73964_3', 1],
 ['68153_4', 3],
 ['64915_2', 1],
 ['72376_9',

In [14]:
for i in people_lst:
    if i[1]>3:
        print(i)

['67526_1', 4]
['79269_1', 4]
['78104_3', 4]
['73959_5', 6]
['72616_2', 6]
['76846_3', 4]
['79062_2', 4]
['80322_1', 4]
['77848_1', 4]
['64849_4', 5]
['80091_3', 4]
['63566_2', 4]
['71718_11', 4]
['73259_8', 4]
['71318_2', 5]
['77161_2', 4]
['70183_7', 4]
['69392_1', 4]
['79032_1', 4]
['64354_1', 4]
['71795_4', 4]
['64253_14', 4]
['64303_1', 4]
['77344_3', 5]
['78930_2', 4]
['68761_1', 5]
['71697_1', 4]
['73835_6', 4]
['68611_1', 4]
['79592_1', 4]
['78782_1', 4]
['69988_6', 5]
['76696_2', 4]
['78967_2', 4]
['69556_2', 5]
['77113_7', 4]
['78095_2', 4]
['80557_5', 5]
['75565_2', 4]
['69777_3', 4]
['71951_9', 5]
['79619_3', 4]
['67396_2', 4]


In [15]:
def get_bounding_box(keypoints, threshold=0.0):
    """
    keypoints: list of (x, y, confidence) or (x, y)
    """
    valid_points = []
    for kp in keypoints:
        if len(kp) == 3:
            x, y, conf = kp
            if conf >= threshold:
                valid_points.append((x, y))
        elif len(kp) == 2:
            x, y = kp
            valid_points.append((x, y))

    if not valid_points:
        return None

    xs, ys = zip(*valid_points)
    return min(xs), min(ys), max(xs), max(ys)

In [16]:
def bbox_area(bbox):
    x_min, y_min, x_max, y_max = bbox
    return (x_max - x_min) * (y_max - y_min)

In [17]:
#all_data.get('80557_5').get('keypoints')

In [18]:
#video_properties.get('80557_5')

In [19]:
len(all_data.keys())

1024

In [None]:
main_person = None
counter = 0
main_person_keypoints = {}

if not extract_main_person:
    exit()

for video in all_data.keys():
    main_person = None
    largest_area = 0
    for frame in all_data.get(video).get('keypoints'):
        if len(frame.keys())>0:
            for (person, person_keypoints) in frame.items():  # each is a list of keypoints
                bbox = get_bounding_box(person_keypoints, threshold=0.2)  # optional threshold
                if bbox:
                    area = bbox_area(bbox)
                    if area > largest_area:
                        largest_area = area
                        main_person = {
                            "bbox": bbox,
                            "keypoints": person_keypoints,
                            "area": area,
                            "person_id": person
                        }
            counter+=1
            if main_person:
                print("Main person bounding box:", main_person["bbox"], video, counter)
                print(main_person['area'])
                print(main_person['person_id'])
            break
    main_persons_frames = []
    for frame in all_data.get(video).get('keypoints'):
        if frame.get(main_person['person_id']):
            main_persons_frames.append(frame.get(main_person['person_id'])[:17])
        
    main_person_keypoints[video] = {main_person['person_id']: main_persons_frames}


Main person bounding box: (136.37591552734375, 234.962158203125, 604.0657958984375, 384.6919860839844) 68679_6 326
70027.12528958358
823
Main person bounding box: (186.01382446289062, 195.05838012695312, 482.10430908203125, 317.0549011230469) 68683_1 327
36122.00902358256
818
Main person bounding box: (180.77671813964844, 77.41246032714844, 486.28955078125, 342.7523498535156) 68689_1 328
81064.74126201007
3
Main person bounding box: (104.00260925292969, 15.4814453125, 455.77947998046875, 173.87713623046875) 68718_1 329
55719.940487849526
867
Main person bounding box: (146.926025390625, 112.95146179199219, 480.00177001953125, 287.18536376953125) 68720_3 330
58033.086640768684
822
Main person bounding box: (35.81121826171875, 192.36090087890625, 256.98016357421875, 250.99481201171875) 68755_1 331
12968.000284790993
884
Main person bounding box: (206.88912963867188, 212.92697143554688, 450.66973876953125, 297.44622802734375) 68761_1 332
20604.155855235644
4
Main person bounding box: (202.

Main person bounding box: (161.8844451904297, 129.51243591308594, 482.9296875, 370.0581970214844) 71357_3 476
77226.07216158579
556
Main person bounding box: (173.23776245117188, 172.8264617919922, 534.99755859375, 313.90814208984375) 71363_2 477
51037.67990400316
578
Main person bounding box: (122.73760986328125, 183.00286865234375, 268.9639892578125, 280.4691467285156) 71382_2 478
14252.140956139192
566
Main person bounding box: (175.5814971923828, 170.15957641601562, 565.4210815429688, 272.7928466796875) 71418_4 479
40010.511420131195
570
Main person bounding box: (276.8362731933594, 187.71826171875, 602.3541870117188, 324.1500244140625) 71425_1 480
44410.98277113959
580
Main person bounding box: (166.2648468017578, 264.7011413574219, 482.57275390625, 363.9401550292969) 71433_6 481
31390.084717664868
591
Main person bounding box: (91.64382934570312, 170.72686767578125, 479.34130859375, 292.69140625) 71450_1 482
47285.34416287579
572
Main person bounding box: (183.92312622070312, 150

Main person bounding box: (116.96694946289062, 102.1796875, 483.11566162109375, 233.30545043945312) 75265_4 689
48011.5292310426
512
Main person bounding box: (61.086334228515625, 195.18569946289062, 420.34326171875, 289.6539611816406) 75292_2 690
33938.37745042145
486
Main person bounding box: (87.77558898925781, 279.5751647949219, 475.9063720703125, 372.7276611328125) 75293_2 691
36155.35134958057
453
Main person bounding box: (263.99224853515625, 172.1409912109375, 602.9464111328125, 300.0357360839844) 75299_4 692
43350.45614908449
464
Main person bounding box: (282.76397705078125, 241.8470458984375, 434.98956298828125, 323.926513671875) 75317_2 693
12494.595075249672
495
Main person bounding box: (217.50018310546875, 212.95230102539062, 530.7982177734375, 349.8548278808594) 75319_2 694
42891.29260489717
470
Main person bounding box: (99.56672668457031, 110.93217468261719, 483.33441162109375, 272.431884765625) 75330_1 695
61978.36985647562
475
Main person bounding box: (328.30749511

Main person bounding box: (109.98915100097656, 213.02456665039062, 253.884521484375, 293.4987487792969) 80651_10 1014
11579.862251787446
1841
Main person bounding box: (150.29571533203125, 226.17950439453125, 425.7340087890625, 348.6500244140625) 80660_6 1015
33733.07103297487
1858
Main person bounding box: (114.54934692382812, 133.26034545898438, 479.3975830078125, 237.57757568359375) 80672_2 1016
38059.95744061563
1852
Main person bounding box: (120.14279174804688, 214.93109130859375, 418.638916015625, 373.03277587890625) 80676_3 1017
47192.74008441344
1839
Main person bounding box: (129.13446044921875, 178.634521484375, 479.29119873046875, 300.46575927734375) 80677_3 1018
42660.02884635329
1821
Main person bounding box: (223.20602416992188, 251.46253967285156, 602.8665771484375, 383.9990234375) 80717_2 1019
50318.874715914484
1855
Main person bounding box: (171.90570068359375, 129.21343994140625, 437.2290344238281, 290.850830078125) 80719_4 1020
42886.1712081451
1829
Main person bou

In [21]:
all_keypoints= {}
for video in all_data.keys():
    persons_frames = {}
    all_keypoints[video] = []
    for person in video.keys()[]:
            persons_frames[person] = []
    
    for frame in all_data.get(video).get('keypoints'):
        
        for person in frame.keys():
            persons_frames[person] = []
        for person in frame.keys():
            persons_frames[person].append(frame.get(person)[:17])
            
    all_keypoints[video].append(persons_frames)

    

In [22]:
all_keypoints

{'77473_1': [{'1026': [[[122.98733520507812,
      117.8563232421875,
      0.8428214192390442],
     [116.77931213378906, 112.776611328125, 0.7452430129051208],
     [117.30815124511719, 111.83099365234375, 0.8920747637748718],
     [142.47830200195312, 70.981201171875, 0.5564297437667847],
     [126.81362915039062, 102.12994384765625, 0.9882397651672363],
     [201.21792602539062, 71.40676879882812, 0.8673635125160217],
     [185.5972900390625, 139.3697509765625, 0.7845907211303711],
     [274.85479736328125, 80.365966796875, 0.798898458480835],
     [255.9560546875, 201.08743286132812, 0.8864536881446838],
     [298.4752197265625, 113.3426513671875, 0.3911632001399994],
     [179.85678100585938, 204.2281494140625, 0.8377419710159302],
     [332.26666259765625, 112.42718505859375, 0.6382679343223572],
     [327.34063720703125, 159.76248168945312, 0.6316418647766113],
     [451.75732421875, 113.61151123046875, 0.7016116380691528],
     [457.11798095703125, 165.61859130859375, 0.721979

In [23]:
#main_person_keypoints.get('80756_1').get('1860')[0]

In [24]:
if extract_main_person:
    keypoints = main_person_keypoints.keys()
else:
    pass

In [25]:
import random
from sklearn.model_selection import train_test_split

video_ids = list(keypoints.keys())
random.seed(42)

train_val, test = train_test_split(video_ids, test_size=0.10, random_state=42)

val_size = 0.1111
train, val = train_test_split(train_val, test_size=val_size, random_state=42)

split = {
    'train': train,
    'val': val,
    'test': test
}

NameError: name 'keypoints' is not defined

In [None]:
print(len(train), len(test), len(val))

In [None]:
coords = {}
for video_id, video in main_person_keypoints.items():
    coords[video_id] = []
    for frame in video.get(list(video.keys())[0]):
        coord2d = []
        for keypoint in frame:
            coord2d.append(keypoint[:2])
        coords[video_id].append(coord2d)  

In [None]:
coords[video_id]

In [None]:
confidences = {}

for video_id, video in main_person_keypoints.items():
    confidences[video_id] = []
    for frame in video.get(list(video.keys())[0]):
        conf = []
        for keypoint in frame:
            conf.append(keypoint[-1])
        confidences[video_id].append(conf) 
        

In [None]:
import numpy as np

In [None]:
final_dict ={}
final_dict['split'] = {'train': train, 'test': test, 'val': val}
final_dict['annotations'] = []

for video_id, video in main_person_keypoints.items():
    final_dict['annotations'].append({'frame_dir': video_id, 
                                      'total_frames': video_properties.get(video_id).get('frame_count'), 
                                      'img_shape': (video_properties.get(video_id).get('height'), video_properties.get(video_id).get('width')), 
                                      'original_shape': (video_properties.get(video_id).get('height'), video_properties.get(video_id).get('width')), 
                                      'label': 0, 'keypoint': np.array([coords[video_id]], dtype='float16'), 
                                      'keypoint_score': np.array([confidences[video_id]], dtype='float16')})

In [None]:
final_dict['annotations'][0]

In [None]:
import pickle


with open(os.path.join(f'{sample_class}.pkl'), 'wb') as handle:
    pickle.dump(final_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [None]:
from joblib import load

obj = load("correct.pkl")
print(type(obj))

In [None]:
obj.get('split')

In [None]:
obj.get('annotations')