- frame_dir (str): The identifier of the corresponding video. (name of file)
- total_frames (int): The number of frames in this video. (len of 'keypoints')
- img_shape (tuple[int]): The shape of a video frame, a tuple with two elements, in the format of (height, width). Only required for 2D skeletons. (got it)
- original_shape (tuple[int]): Same as img_shape. (got it)
- label (int): The action label. ('overhead press')
- keypoint (np.ndarray, with shape [M x T x V x C]): The keypoint annotation. M: number of persons; T: number of frames (same as total_frames); V: number of keypoints (25 for NTURGB+D 3D skeleton, 17 for CoCo, 18 for OpenPose, etc. ); C: number of dimensions for keypoint coordinates (C=2 for 2D keypoint)
- keypoint_score (np.ndarray, with shape [M x T x V]): The confidence score of keypoints. Only required for 2D skeletons.

In [1]:
import pandas as pd
import json
import pickle
import os

In [2]:
# Settings
base_dir = '../../../data'
sample_class = 'correct' # 'knees_error', 'elbows_error'

extract_main_person = False


In [6]:

# Path to the folder with JSON files
json_folder = os.path.join(base_dir, 'ohp_poses', sample_class)

# Dictionary to store all loaded JSON data
all_data = {}

# Loop through all .json files in the folder
for filename in os.listdir(json_folder):
    if filename.endswith('.json'):
        filepath = os.path.join(json_folder, filename)
        with open(filepath, 'r') as file:
            try:
                data = json.load(file)
                key = os.path.splitext(filename)[0]  # filename without .json
                all_data[key] = data
            except json.JSONDecodeError:
                print(f"⚠️ Could not parse {filename}, skipping.")

# Example: print one loaded entry
print(all_data.keys())  # prints all filenames loaded


dict_keys(['62794_6', '62824_1', '62830_1', '62938_3', '62941_3', '62943_6', '62945_2', '62959_3', '63000_2', '63016_1', '63037_3', '63043_1', '63065_1', '63161_1', '63178_1', '63206_1', '63208_2', '63240_2', '63243_5', '63252_1', '63291_2', '63308_6', '63321_8', '63375_5', '63387_3', '63414_1', '63469_2', '63479_5', '63482_4', '63499_3', '63511_6', '63533_5', '63538_1', '63553_2', '63556_1', '63566_2', '63614_10', '63639_5', '63655_1', '63692_5', '63695_5', '63745_2', '63751_3', '63759_5', '63777_2', '63824_1', '63832_1', '63887_8', '63894_1', '63906_2', '63925_7', '63951_1', '63973_1', '63975_7', '63984_3', '63990_5', '64010_7', '64024_2', '64040_5', '64054_8', '64057_1', '64069_6', '64111_8', '64119_4', '64168_1', '64188_4', '64218_8', '64253_14', '64303_1', '64322_3', '64332_1', '64354_1', '64373_2', '64380_6', '64394_1', '64415_4', '64424_1', '64434_3', '64453_1', '64462_2', '64491_1', '64495_2', '64498_1', '64508_8', '64567_3', '64571_1', '64611_4', '64620_1', '64625_3', '64717_1

In [None]:
#print('No. people: ',len(all_data.get('62794_6').get('keypoints')[0].keys()))

all_data

In [None]:
#print('No. frames: ', len(all_data.get('62794_6').get('keypoints')))

In [7]:
import cv2

# Path to the folder containing .mp4 videos
video_folder = os.path.join(base_dir, 'ohp_labeled', sample_class)

# Dictionary to hold video metadata
video_info = {}

# Loop through all files in the folder
for filename in os.listdir(video_folder):
    if filename.lower().endswith('.mp4'):
        video_path = os.path.join(video_folder, filename)
        video_name = os.path.splitext(filename)[0]

        # Open video file
        vid = cv2.VideoCapture(video_path)

        if not vid.isOpened():
            print(f"❌ Failed to open: {filename}")
            continue

        # Get properties
        width = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
        height = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
        fps = vid.get(cv2.CAP_PROP_FPS)
        frame_count = vid.get(cv2.CAP_PROP_FRAME_COUNT)
        duration = frame_count / fps if fps else 0

        # Store in dictionary
        video_info[video_name] = {
            "width": int(width),
            "height": int(height),
            "fps": round(fps, 2),
            "frame_count": int(frame_count),
            "duration_sec": round(duration, 2)
        }

        vid.release()

# Print or save the results
output_path = os.path.join(video_folder, 'video_properties.json')
with open(output_path, 'w') as f:
    json.dump(video_info, f, indent=2)

print(f"✅ Processed {len(video_info)} videos. Info saved to: {output_path}")

✅ Processed 1024 videos. Info saved to: ../../data_videos/correct\video_properties.json


In [8]:
with open(os.path.join(video_folder, 'video_properties.json'), 'r') as file:
    video_properties = json.load(file)

In [9]:
video_properties

{'62794_6': {'width': 480,
  'height': 270,
  'fps': 30.0,
  'frame_count': 71,
  'duration_sec': 2.37},
 '62824_1': {'width': 480,
  'height': 480,
  'fps': 30.0,
  'frame_count': 162,
  'duration_sec': 5.4},
 '62830_1': {'width': 480,
  'height': 600,
  'fps': 30.0,
  'frame_count': 310,
  'duration_sec': 10.33},
 '62938_3': {'width': 480,
  'height': 480,
  'fps': 30.0,
  'frame_count': 55,
  'duration_sec': 1.83},
 '62941_3': {'width': 480,
  'height': 270,
  'fps': 30.0,
  'frame_count': 58,
  'duration_sec': 1.93},
 '62943_6': {'width': 480,
  'height': 600,
  'fps': 30.0,
  'frame_count': 125,
  'duration_sec': 4.17},
 '62945_2': {'width': 480,
  'height': 600,
  'fps': 30.0,
  'frame_count': 75,
  'duration_sec': 2.5},
 '62959_3': {'width': 480,
  'height': 600,
  'fps': 30.0,
  'frame_count': 115,
  'duration_sec': 3.83},
 '63000_2': {'width': 480,
  'height': 270,
  'fps': 30.0,
  'frame_count': 139,
  'duration_sec': 4.63},
 '63016_1': {'width': 480,
  'height': 600,
  'fps'

In [10]:
people_lst = []
for i in all_data.keys():
    print('No. people: ',len(all_data.get(i).get('keypoints')[0].keys()))
    people_lst.append([i, len(all_data.get(i).get('keypoints')[0].keys())])
    

No. people:  2
No. people:  1
No. people:  1
No. people:  1
No. people:  2
No. people:  1
No. people:  1
No. people:  1
No. people:  2
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  2
No. people:  1
No. people:  1
No. people:  3
No. people:  1
No. people:  2
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  2
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  2
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  3
No. people:  1
No. people:  1
No. people:  1
No. people:  2
No. people:  2
No. people:  1
No. people:  1
No. people:  2
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  1
No. people:  3
No. people:  1
No. people

In [11]:
for i in people_lst:
    if i[1]==0:
        print(i)

['65394_1', 0]
['65531_4', 0]
['68572_2', 0]
['69065_1', 0]
['74890_14', 0]
['75901_2', 0]
['79695_1', 0]


In [12]:
people_lst = []
for i in all_data.keys():
    people_visible = []
    for j in range(len(all_data.get(i).get('keypoints'))):
        if len(all_data.get(i).get('keypoints')[j])>0:
            people_visible.append(len(all_data.get(i).get('keypoints')[j].keys()))
    people_lst.append([i, max(people_visible)])
people_lst

[['62794_6', 2],
 ['62824_1', 3],
 ['62830_1', 1],
 ['62938_3', 1],
 ['62941_3', 2],
 ['62943_6', 3],
 ['62945_2', 1],
 ['62959_3', 2],
 ['63000_2', 2],
 ['63016_1', 2],
 ['63037_3', 2],
 ['63043_1', 1],
 ['63065_1', 1],
 ['63161_1', 2],
 ['63178_1', 1],
 ['63206_1', 1],
 ['63208_2', 3],
 ['63240_2', 1],
 ['63243_5', 3],
 ['63252_1', 1],
 ['63291_2', 1],
 ['63308_6', 1],
 ['63321_8', 1],
 ['63375_5', 2],
 ['63387_3', 1],
 ['63414_1', 2],
 ['63469_2', 2],
 ['63479_5', 1],
 ['63482_4', 1],
 ['63499_3', 1],
 ['63511_6', 1],
 ['63533_5', 1],
 ['63538_1', 1],
 ['63553_2', 1],
 ['63556_1', 2],
 ['63566_2', 4],
 ['63614_10', 1],
 ['63639_5', 1],
 ['63655_1', 1],
 ['63692_5', 1],
 ['63695_5', 1],
 ['63745_2', 1],
 ['63751_3', 2],
 ['63759_5', 1],
 ['63777_2', 1],
 ['63824_1', 1],
 ['63832_1', 1],
 ['63887_8', 1],
 ['63894_1', 3],
 ['63906_2', 1],
 ['63925_7', 1],
 ['63951_1', 2],
 ['63973_1', 2],
 ['63975_7', 2],
 ['63984_3', 1],
 ['63990_5', 2],
 ['64010_7', 2],
 ['64024_2', 1],
 ['64040_5', 

In [13]:
for i in people_lst:
    if i[1]>3:
        print(i)

['63566_2', 4]
['64253_14', 4]
['64303_1', 4]
['64354_1', 4]
['64849_4', 5]
['67396_2', 4]
['67526_1', 4]
['68611_1', 4]
['68761_1', 5]
['69392_1', 4]
['69556_2', 5]
['69777_3', 4]
['69988_6', 5]
['70183_7', 4]
['71318_2', 5]
['71697_1', 4]
['71718_11', 4]
['71795_4', 4]
['71951_9', 5]
['72616_2', 6]
['73259_8', 4]
['73835_6', 4]
['73959_5', 6]
['75565_2', 4]
['76696_2', 4]
['76846_3', 4]
['77113_7', 4]
['77161_2', 4]
['77344_3', 5]
['77848_1', 4]
['78095_2', 4]
['78104_3', 4]
['78782_1', 4]
['78930_2', 4]
['78967_2', 4]
['79032_1', 4]
['79062_2', 4]
['79269_1', 4]
['79592_1', 4]
['79619_3', 4]
['80091_3', 4]
['80322_1', 4]
['80557_5', 5]


In [None]:
def get_bounding_box(keypoints, threshold=0.0):
    """
    keypoints: list of (x, y, confidence) or (x, y)
    """
    valid_points = []
    for kp in keypoints:
        if len(kp) == 3:
            x, y, conf = kp
            if conf >= threshold:
                valid_points.append((x, y))
        elif len(kp) == 2:
            x, y = kp
            valid_points.append((x, y))

    if not valid_points:
        return None

    xs, ys = zip(*valid_points)
    return min(xs), min(ys), max(xs), max(ys)

In [None]:
def bbox_area(bbox):
    x_min, y_min, x_max, y_max = bbox
    return (x_max - x_min) * (y_max - y_min)

In [None]:
#all_data.get('80557_5').get('keypoints')

In [None]:
#video_properties.get('80557_5')

In [None]:
len(all_data.keys())

In [None]:
main_person = None
counter = 0
main_person_keypoints = {}

if not extract_main_person:
    exit()

for video in all_data.keys():
    main_person = None
    largest_area = 0
    for frame in all_data.get(video).get('keypoints'):
        if len(frame.keys())>0:
            for (person, person_keypoints) in frame.items():  # each is a list of keypoints
                bbox = get_bounding_box(person_keypoints, threshold=0.2)  # optional threshold
                if bbox:
                    area = bbox_area(bbox)
                    if area > largest_area:
                        largest_area = area
                        main_person = {
                            "bbox": bbox,
                            "keypoints": person_keypoints,
                            "area": area,
                            "person_id": person
                        }
            counter+=1
            if main_person:
                print("Main person bounding box:", main_person["bbox"], video, counter)
                print(main_person['area'])
                print(main_person['person_id'])
            break
    main_persons_frames = []
    for frame in all_data.get(video).get('keypoints'):
        if frame.get(main_person['person_id']):
            main_persons_frames.append(frame.get(main_person['person_id'])[:17])
        
    main_person_keypoints[video] = {main_person['person_id']: main_persons_frames}


In [33]:
import itertools

all_keypoints= {}
for video in all_data.keys():
    persons_frames = {}
    all_keypoints[video] = []
    people_lst = list([list(all_data.get(video).get('keypoints')[i].keys()) for i in range(len(all_data.get(video).get('keypoints')))])
    people_set = list(set(itertools.chain.from_iterable(people_lst)))
    for person in people_set:
            persons_frames[person] = []
    
    for frame in all_data.get(video).get('keypoints'):
        for person in frame.keys():
            persons_frames[person].append(frame.get(person)[:17])
            
    all_keypoints[video].append(persons_frames)


In [44]:
len(all_keypoints['62794_6'][0]['44'])

71

In [23]:
#main_person_keypoints.get('80756_1').get('1860')[0]

In [24]:
if extract_main_person:
    keypoints = main_person_keypoints.keys()
else:
    pass

In [25]:
import random
from sklearn.model_selection import train_test_split

video_ids = list(keypoints.keys())
random.seed(42)

train_val, test = train_test_split(video_ids, test_size=0.10, random_state=42)

val_size = 0.1111
train, val = train_test_split(train_val, test_size=val_size, random_state=42)

split = {
    'train': train,
    'val': val,
    'test': test
}

NameError: name 'keypoints' is not defined

In [None]:
print(len(train), len(test), len(val))

In [None]:
coords = {}
for video_id, video in main_person_keypoints.items():
    coords[video_id] = []
    for frame in video.get(list(video.keys())[0]):
        coord2d = []
        for keypoint in frame:
            coord2d.append(keypoint[:2])
        coords[video_id].append(coord2d)  

In [None]:
coords[video_id]

In [None]:
confidences = {}

for video_id, video in main_person_keypoints.items():
    confidences[video_id] = []
    for frame in video.get(list(video.keys())[0]):
        conf = []
        for keypoint in frame:
            conf.append(keypoint[-1])
        confidences[video_id].append(conf) 
        

In [None]:
import numpy as np

In [None]:
final_dict ={}
final_dict['split'] = {'train': train, 'test': test, 'val': val}
final_dict['annotations'] = []

for video_id, video in main_person_keypoints.items():
    final_dict['annotations'].append({'frame_dir': video_id, 
                                      'total_frames': video_properties.get(video_id).get('frame_count'), 
                                      'img_shape': (video_properties.get(video_id).get('height'), video_properties.get(video_id).get('width')), 
                                      'original_shape': (video_properties.get(video_id).get('height'), video_properties.get(video_id).get('width')), 
                                      'label': 0, 'keypoint': np.array([coords[video_id]], dtype='float16'), 
                                      'keypoint_score': np.array([confidences[video_id]], dtype='float16')})

In [None]:
final_dict['annotations'][0]

In [None]:
import pickle


with open(os.path.join(f'{sample_class}.pkl'), 'wb') as handle:
    pickle.dump(final_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [None]:
from joblib import load

obj = load("correct.pkl")
print(type(obj))

In [None]:
obj.get('split')

In [None]:
obj.get('annotations')