In [1]:
# imports
import os
import json
import pandas as pd
import pandas as pd
import numpy as np
from scipy.ndimage import median_filter

In [3]:
# loading keypoint data for PANDA 3
class KeypointsDataset:
    def __init__(self, json_dir):
        self.json_dir = json_dir
        self.json_files = [f for f in os.listdir(json_dir) if f.endswith('.json')]
        self.data = self.load_data_yt() # change depending on dataset

    def keypoint_to_face_part(self, index):
        if 1 <= index <= 33:
            return "Chin"
        elif 34 <= index <= 42:
            return "Right_brow"
        elif 43 <= index <= 51:
            return "Left_brow"
        elif 52 <= index <= 66:
            return "Nose"
        elif 67 <= index <= 75:
            return "Right_Eye"
        elif 76 <= index <= 84:
            return "Left_Eye"
        elif 85 <= index <= 104:
            return "Mouth"
        elif index == 105:
            return "Right_Pupil"
        elif index == 106:
            return "Left_Pupil"
        else:
            return "Unknown"

    def load_data(self):
        data = []
        for json_file in self.json_files:
            video_id = os.path.splitext(json_file)[0]
            parts = video_id.split('_')
            date = '_'.join(parts[:3])
            infant_id = parts[4]  # to extract the number before "cam"
            #cam = parts[5]
            #print(f"Processing infant ID: {infant_id} on date: {date}, camera: {cam}")
            if "vid3" in video_id:
                print(f"Processing infant ID: {infant_id} on date: {date}")
                with open(os.path.join(self.json_dir, json_file), 'r') as f:
                    frames = json.load(f)
                    for frame_index, frame_data in enumerate(frames):
                        frame_id = frame_data["frame_id"]
                        for instance_index, instance in enumerate(frame_data["instances"]):
                            keypoints = instance["keypoints"]
                            keypoint_scores = instance["keypoint_scores"]
                            for idx, (kp, score) in enumerate(zip(keypoints, keypoint_scores)):
                                face_part = self.keypoint_to_face_part(idx + 1)
                                data.append({
                                    "infant_id": int(infant_id),
                                    "date": date,
                                    #"cam": cam,
                                    "frame_id": frame_id,
                                    "keypoint": tuple(kp),
                                    "keypoint_score": score,
                                    "face_part": face_part,
                                    "keypoint_index": idx + 1
                                    
                                })
    def load_data_yt(self):
        data = []
        for json_file in self.json_files:
            video_id = os.path.splitext(json_file)[0]
            infant_id = video_id.split('_')[-1]  # to extract the number after "video_"
            print(f"Processing infant ID: {infant_id}")
            with open(os.path.join(self.json_dir, json_file), 'r') as f:
                frames = json.load(f)
                for frame_index, frame_data in enumerate(frames):
                    frame_id = frame_data["frame_id"]
                    # print(f"  Processing frame {frame_index + 1}/{len(frames)}")
                    for instance_index, instance in enumerate(frame_data["instances"]):
                        keypoints = instance["keypoints"]
                        keypoint_scores = instance["keypoint_scores"]
                        for idx, (kp, score) in enumerate(zip(keypoints, keypoint_scores)):
                            # indexing the datapoints for future groupping purposes
                            face_part = self.keypoint_to_face_part(idx + 1)
                            data.append({
                                "infant_id": int(infant_id),
                                "frame_id": frame_id,
                                "keypoint": tuple(kp),
                                "keypoint_score": score,
                                "face_part": face_part,
                                "keypoint_index": idx + 1
                            })

        return data

In [4]:
# working with PANDA 3/Yt data
json_dir = r'/workspaces/wiggle-face/data-ioana/PANDA3/annotations' 
json_dir_yt = r'/workspaces/wiggle-face/data-ioana/YT/annotations' 
dataset = KeypointsDataset(json_dir_yt)
df = pd.DataFrame(dataset.data)

Processing infant ID: 000343
Processing infant ID: 000285
Processing infant ID: 000244
Processing infant ID: 000079
Processing infant ID: 000339
Processing infant ID: 000345
Processing infant ID: 000340
Processing infant ID: 000031
Processing infant ID: 000179
Processing infant ID: 000342
Processing infant ID: 000282
Processing infant ID: 000086
Processing infant ID: 000346
Processing infant ID: 000348
Processing infant ID: 000365
Processing infant ID: 000070
Processing infant ID: 000366
Processing infant ID: 000360
Processing infant ID: 000352
Processing infant ID: 000000
Processing infant ID: 000191
Processing infant ID: 000047
Processing infant ID: 000353
Processing infant ID: 000071
Processing infant ID: 000347
Processing infant ID: 000072
Processing infant ID: 000358
Processing infant ID: 000088
Processing infant ID: 000369
Processing infant ID: 000052
Processing infant ID: 000089
Processing infant ID: 000090
Processing infant ID: 000073
Processing infant ID: 000077
Processing inf

In [5]:
df = df.sort_values(by=['infant_id', 'frame_id']).reset_index(drop=True)
print(df)

         infant_id  frame_id                                  keypoint  \
0                0         0  (351.60113525390625, 463.11529541015625)   
1                0         0    (358.9483947753906, 466.7889404296875)   
2                0         0    (367.3977355957031, 470.8299255371094)   
3                0         0    (375.1123352050781, 473.4014587402344)   
4                0         0    (384.2964172363281, 476.3403625488281)   
...            ...       ...                                       ...   
7860849        369      2699    (422.5567932128906, 379.5511779785156)   
7860850        369      2699    (420.531982421875, 393.72503662109375)   
7860851        369      2699   (418.50714111328125, 406.8865051269531)   
7860852        369      2699   (333.4638977050781, 425.11004638671875)   
7860853        369      2699     (349.66259765625, 339.05438232421875)   

         keypoint_score    face_part  keypoint_index  
0              0.577999         Chin               1  
1

In [6]:
## Smooth the data & interpolate (mean filter + median filter)

def process_infant_data(group):
    # Convert 'keypoint' to separate columns for x and y coordinates
    group[['keypoint_x', 'keypoint_y']] = pd.DataFrame(group['keypoint'].tolist(), index=group.index)
    
    # Apply mean filter
    group['mean_keypoint_x'] = group['keypoint_x'].rolling(window=3, min_periods=1).mean()
    group['mean_keypoint_y'] = group['keypoint_y'].rolling(window=3, min_periods=1).mean()
    
    # Apply median filter on the mean-filtered data
    group['median_keypoint_x'] = median_filter(group['mean_keypoint_x'], size=3)
    group['median_keypoint_y'] = median_filter(group['mean_keypoint_y'], size=3)
    
    # Combine the median-filtered coordinates back into a tuple
    group['processed_keypoint'] = list(zip(group['median_keypoint_x'], group['median_keypoint_y']))
    
    # Drop intermediate columns
    group.drop(columns=['keypoint_x', 'keypoint_y', 'mean_keypoint_x', 'mean_keypoint_y', 'median_keypoint_x', 'median_keypoint_y'], inplace=True)
    
    return group

# apply the process_infant_data function to each group
# df_processed = df.groupby(['infant_id', 'date'], group_keys=False).apply(process_infant_data).reset_index(drop=True)
df_processed = df.groupby(['infant_id'], group_keys=False).apply(process_infant_data).reset_index(drop=True)

In [7]:
print("Original DataFrame:")
df.head()

Original DataFrame:


Unnamed: 0,infant_id,frame_id,keypoint,keypoint_score,face_part,keypoint_index
0,0,0,"(351.60113525390625, 463.11529541015625)",0.577999,Chin,1
1,0,0,"(358.9483947753906, 466.7889404296875)",0.752206,Chin,2
2,0,0,"(367.3977355957031, 470.8299255371094)",0.598173,Chin,3
3,0,0,"(375.1123352050781, 473.4014587402344)",0.759949,Chin,4
4,0,0,"(384.2964172363281, 476.3403625488281)",0.610798,Chin,5


In [8]:
print("\nProcessed DataFrame:")
df_processed =  df_processed.drop(columns="keypoint")
df_processed.head()


Processed DataFrame:


Unnamed: 0,infant_id,frame_id,keypoint_score,face_part,keypoint_index,processed_keypoint
0,0,0,0.577999,Chin,1,"(351.60113525390625, 463.11529541015625)"
1,0,0,0.752206,Chin,2,"(355.27476501464844, 464.9521179199219)"
2,0,0,0.598173,Chin,3,"(359.3157552083333, 466.91138712565106)"
3,0,0,0.759949,Chin,4,"(367.15282185872394, 470.34010823567706)"
4,0,0,0.610798,Chin,5,"(375.60216267903644, 473.52391560872394)"


In [9]:
## Find all frames where all keypoints are visible & average confidence above 0.8

# group by 'infant_id', 'date', and 'frame_id' and calculate the mean keypoint score for each group

#grouped = df_processed .groupby(['infant_id', 'date', 'frame_id']).agg(average_confidence=('keypoint_score', 'mean')).reset_index()
grouped = df_processed.groupby(['infant_id', 'frame_id']).agg(average_confidence=('keypoint_score', 'mean')).reset_index()

filtered = grouped[grouped['average_confidence'] > 0.8]

# merge back with the original dataframe

#filtered_frames = df_processed.merge(filtered, on=['infant_id', 'date', 'frame_id'], how='inner')
filtered_frames = df_processed.merge(filtered, on=['infant_id', 'frame_id'], how='inner')

#filtered_frames = filtered_frames.sort_values(by=['infant_id', 'date', 'frame_id'])
filtered_frames = filtered_frames.sort_values(by=['infant_id', 'frame_id'])
filtered_frames = filtered_frames.reset_index(drop = True)
print(filtered_frames)

         infant_id  frame_id  keypoint_score    face_part  keypoint_index  \
0                0        17        0.595462         Chin               1   
1                0        17        0.845258         Chin               2   
2                0        17        0.669217         Chin               3   
3                0        17        0.959256         Chin               4   
4                0        17        0.735723         Chin               5   
...            ...       ...             ...          ...             ...   
2354149        369      2699        0.843840        Mouth             102   
2354150        369      2699        0.890791        Mouth             103   
2354151        369      2699        0.839571        Mouth             104   
2354152        369      2699        0.845012  Right_Pupil             105   
2354153        369      2699        0.961737   Left_Pupil             106   

                               processed_keypoint  average_confidence  
0  

In [11]:
## Identify continuous segments that are at least 20 frames long (roughly 1 second)

#filtered_frames = filtered_frames.sort_values(by=['infant_id', 'date', 'keypoint_index', 'frame_id'])
filtered_frames = filtered_frames.sort_values(by=['infant_id', 'keypoint_index', 'frame_id'])

# identify continuous segments for each 'infant_id' and 'keypoint_index'
filtered_frames['frame_diff'] = filtered_frames.groupby(['infant_id', 'keypoint_index'])['frame_id'].diff().fillna(1)
filtered_frames['block'] = (filtered_frames['frame_diff'] != 1).cumsum()
blocks = filtered_frames.groupby(['infant_id', 'keypoint_index', 'block']).filter(lambda x: len(x) >= 20)

blocks = blocks.drop(columns=['block', 'frame_diff', 'average_confidence'])
blocks = blocks.rename(columns={"processed_keypoint":"keypoint"})
blocks = blocks[['infant_id', 'frame_id', 'keypoint_index', 'keypoint', 'keypoint_score', 'face_part']]
blocks = blocks.sort_values(by=['infant_id', 'frame_id']).reset_index(drop = True)
blocks.head()

Unnamed: 0,infant_id,frame_id,keypoint_index,keypoint,keypoint_score,face_part
0,0,1052,1,"(347.1287536621094, 387.8479817708333)",0.714166,Chin
1,0,1052,2,"(340.10350545247394, 411.12335205078125)",0.803278,Chin
2,0,1052,3,"(337.18776448567706, 454.8063659667969)",0.724702,Chin
3,0,1052,4,"(337.18776448567706, 459.24217732747394)",0.764241,Chin
4,0,1052,5,"(347.53797403971356, 463.0058898925781)",0.72245,Chin


In [12]:
print(blocks)

         infant_id  frame_id  keypoint_index  \
0                0      1052               1   
1                0      1052               2   
2                0      1052               3   
3                0      1052               4   
4                0      1052               5   
...            ...       ...             ...   
1723025        369      1922             102   
1723026        369      1922             103   
1723027        369      1922             104   
1723028        369      1922             105   
1723029        369      1922             106   

                                         keypoint  keypoint_score    face_part  
0          (347.1287536621094, 387.8479817708333)        0.714166         Chin  
1        (340.10350545247394, 411.12335205078125)        0.803278         Chin  
2         (337.18776448567706, 454.8063659667969)        0.724702         Chin  
3        (337.18776448567706, 459.24217732747394)        0.764241         Chin  
4         (347.537

In [13]:
# save as csv files to be processed by "Data_Inspection_Emotion_Analysis"
blocks.to_csv('keypoints_clean_yt_dataset.csv', index=False)
print("Dataset saved successfully as keypoints_clean_yt_dataset.csv.")

Dataset saved successfully as keypoints_clean_yt_dataset.csv.
