In [3]:
# install torch and pandas before running
%reset -f

import sys, os, cv2, glob, json, gc
import pandas as pd
import multiprocessing
from multiprocessing import Pool, cpu_count
from functools import partial
import gc

import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import numpy as np
import itertools
from itertools import chain
from tqdm import tqdm
import circstat as CS
import scipy as sc
import math

from processing import *
from kinematics import *
from skeleton import *

gc.collect()
OVERWRITE = True
USE_CENTER_INSTANCE = False
USE_BEST_INSTANCE = True
dataset = 'CHOP_face'

json_path = f'./data/Infant Pose Data/{dataset}/annotations'
json_files = os.listdir(json_path)
directory = f'./data'

save_path = f'./pose_estimates/{dataset}_norm'

if not os.path.exists(save_path):
    os.makedirs(save_path)

kp_mapping = {
    **{i: "Chin" for i in range(0, 33)},
    **{i: "Right_brow" for i in range(33, 42)},
    **{i: "Left_brow" for i in range(42, 51)},
    **{i: "Nose" for i in range(51, 66)},
    **{i: "Right_Eye" for i in range(66, 75)},
    **{i: "Left_Eye" for i in range(75, 84)},
    **{i: "Mouth" for i in range(84, 104)},
    104: "Right_Pupil",
    105: "Left_Pupil"
}

# Define the DataFrame columns as specified
columns = ['video_number', 'video', 'bp', 'frame', 'x', 'y', 'c','fps', 'pixel_x', 'pixel_y', 'time', 'part_idx']
data = []  # This will hold the data to be loaded into the DataFrame

vid_info = pd.read_csv(f'./data/{dataset}_video_info.csv')

In [None]:
# Constants
USE_CENTER_INSTANCE = True
USE_BEST_INSTANCE = False
OVERWRITE = False
columns = ["file_number", "fname", "bp", "frame_id", "x", "y", "c", "fps", "pixel_x", "pixel_y", "time", "part_idx"]

def process_file(args):
    """Function to process a single file."""
    file_number, file, json_path, save_path, vid_info, kp_mapping = args
    
    # Construct the full file path
    file_path = os.path.join(json_path, file)
    fname = file.split('.')[0]
    interim = []

    if not OVERWRITE and os.path.exists(f'{save_path}/{fname}.csv'):
        return  # Skip if already exists and not overwriting

    try:
        # Open and load the JSON data
        with open(file_path, 'r') as f:
            frames = json.load(f)
            info = vid_info[vid_info['video'] == fname]
            fps = info['fps'].values[0]

            pixel_x = info['width'].values[0]
            pixel_y = info['height'].values[0]
            
            center_x = pixel_x / 2
            center_y = pixel_y / 2
            
            # Iterate through each frame in the JSON file
            for frame in frames:
                frame_id = frame['frame_id']
                if 'instances' in frame and len(frame['instances']) > 0:

                    if USE_CENTER_INSTANCE:
                        instance_id = get_center_instance(frame['instances'], center_x, center_y)
                    elif USE_BEST_INSTANCE:
                        instance_id = get_best_instance(frame['instances'])
                    else:
                        instance_id = 0

                    keypoints = frame['instances'][instance_id]['keypoints']
                    confidence = frame['instances'][instance_id]['keypoint_scores']

                    # Iterate through each keypoint
                    for part_idx, (x, y) in enumerate(keypoints):
                        bp = kp_mapping[part_idx]
                        time = frame_id / fps
                        c = confidence[part_idx]

                        row = [file_number, fname, bp, frame_id, x, y, c, fps, pixel_x, pixel_y, time, part_idx]
                        interim.append(row)

        interim_df = pd.DataFrame(interim, columns=columns)
        interim_df.to_csv(f'{save_path}/{fname}.csv', index=False)
        return
    
    except Exception as e:
        return
    
def process_annotations_multiprocess(json_files, json_path, save_path, vid_info, kp_mapping):
    """Run the annotation processing using multiprocessing."""
    args = [
        (file_number, file, json_path, save_path, vid_info, kp_mapping)
        for file_number, file in enumerate(json_files)
    ]

    # Set up a pool of workers
    with Pool(processes=20) as pool:
        pool.map(process_file, args)


In [None]:
process_annotations_multiprocess(json_files, json_path, save_path, vid_info, kp_mapping)

In [None]:
def rotate_coordinates(points, center, angle_degrees):
    """
    Rotates a series of x, y coordinates around a specific point by a given angle.

    Args:
        points (list of tuples): List of (x, y) coordinates to be rotated.
        center (tuple): The (cx, cy) coordinates of the rotation center.
        angle_degrees (float): The rotation angle in degrees (positive for counter-clockwise).

    Returns:
        list of tuples: Rotated (x, y) coordinates.
    """
    cx, cy = center
    angle_radians = math.radians(angle_degrees)
    cos_theta = math.cos(angle_radians)
    sin_theta = math.sin(angle_radians)

    rotated_points = []
    for x, y in points:
        # Translate point to origin
        translated_x = x - cx
        translated_y = y - cy
        
        # Apply rotation matrix
        rotated_x = translated_x * cos_theta - translated_y * sin_theta
        rotated_y = translated_x * sin_theta + translated_y * cos_theta
        
        # Translate point back to its original position
        final_x = rotated_x + cx
        final_y = rotated_y + cy
        
        rotated_points.append((final_x, final_y))

    return rotated_points

def find_alignment_angle(points):
    """
    Finds the angle to align a set of points so point[0] is to the left of point[1], ensuring the face is head up.

    Args:
        points (list of tuples): List of (x, y) coordinates where point[0] and point[1] define the face.

    Returns:
        float: The angle in degrees to align the face upright.
    """
    x1, y1 = points[0]
    x2, y2 = points[1]

    # Compute angle to horizontal axis
    delta_x = x2 - x1
    delta_y = y2 - y1
    angle_radians = math.atan2(delta_y, delta_x)
    angle_degrees = math.degrees(angle_radians)

    # Adjust to ensure the face is 'head up'
    if x1 < x2:
        angle_degrees += 180

    return -angle_degrees

def scale_coordinates(points):
    """
    Scales a series of x, y coordinates proportionally to ensure a face height of 1.

    Args:
        points (list of tuples): List of (x, y) coordinates.

    Returns:
        list of tuples: Scaled (x, y) coordinates.
    """
    all_x_y = pd.DataFrame(points, columns=['x', 'y'])
    min_y = all_x_y['y'].min()
    max_y = all_x_y['y'].max()
    height = max_y - min_y

    scaled_points = []
    for x, y in points:
        scaled_x = x / height 
        scaled_y = (y - min_y) / height 
        scaled_points.append((scaled_x, scaled_y))

    return scaled_points

def move_to_center(points, center):
    """
    Moves a series of x, y coordinates to the origin (0, 0) by subtracting the center coordinates.

    Args:
        points (list of tuples): List of (x, y) coordinates.
        center (tuple): The (cx, cy) coordinates of the center.

    Returns:
        list of tuples: Moved (x, y) coordinates.
    """
    cx, cy = center
    moved_points = [(x - cx, y - cy) for x, y in points]
    return moved_points


def move_and_rotate_keypoints_updated(df):
    """
    Reads a CSV file, processes keypoints (move, rotate, normalize), and saves to a new CSV.

    Parameters:
    - input_csv (str): Path to the input CSV file.
    - output_csv (str): Path to save the output CSV file.
    """
    # Load data
    #df = pd.read_csv(input_csv)

    # Ensure required columns exist
    required_columns = ['video', 'frame', 'part_idx', 'x', 'y']
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Input CSV must contain the following columns: {required_columns}")
    
    # Keypoint indexes to be aligned
    result = []

    grouped = df.groupby(['video', 'frame'])

    for (infant_id, frame_id), group in grouped:
        # Check if keypoint 55 is present
        if group[group['part_idx'] == 54].empty:
            continue

        points = list(zip(group['x'], group['y']))
        rotation_center = points[54]  # Rotating around the origin

        # Find the angle to align the first two points with the horizontal axis
        alignment_angle = find_alignment_angle([points[70], points[75]])
        # print(f"Angle to align points with the horizontal axis: {alignment_angle} degrees")

        # Rotate the points using the alignment angle
        rotated = rotate_coordinates(points, rotation_center, alignment_angle)
        scaled = scale_coordinates(rotated)
    

        result.append(scaled)
        
    # Combine results and save to CSV
    if not result:
        print("No frames were processed. Check input data.")
        return
    
    return pd.DataFrame(list(itertools.chain(*result)), columns=['x', 'y'])


In [None]:
# Smooth and interpolate the data

def process_dataframe(file):
    
    df = pd.read_csv(os.path.join(pose_estimate_path, file))
    df.rename(columns={'file_number': 'video_number', 'fname': 'video', 'frame_id': 'frame'}, inplace=True)

    if df.empty:
        #print("DataFrame is empty, skipping processing.")
        return
    # print(f"Processing DataFrame for video_number: {df['video_number'].iloc[0]}")
    
    try:
        session = df['video'].unique()[0].split('_')[1]
        infant = df['video'].unique()[0].split('_')[0]
        age = df['video'].unique()[0].split('_')[2]
        
        print(f'infant: {infant} {session} {age}')

        norm = move_and_rotate_keypoints_updated(df)

        df['x'] = norm['x']
        df['y'] = norm['y']
        
        # shift each frame to its center 
        # df['x'] = df['x'] - df.groupby('frame')['x'].transform(
        #     lambda x: x.loc[df.loc[x.index, 'part_idx'] == 54].values[0]
        #     )
        df['x'] = df['x'] - df.groupby('frame')['x'].transform('min')

        
        median_window = 1
        mean_window = 1
        delta_window = 0.25  # Smoothing applied to delta_x, velocity, acceleration

        df['x'] = pd.to_numeric(df['x'])
        df['y'] = pd.to_numeric(df['y'])

        # Interpolate
        df = df.groupby(['video', 'part_idx']).apply(interpolate_df).reset_index(drop=True)
        # Median and mean filter
        median_window = 0.5
        mean_window = 0.5

        df = df.groupby(['video', 'part_idx']).apply(lambda x: smooth(x, 'y', median_window, mean_window)).reset_index(drop=True)
        df = df.groupby(['video', 'part_idx']).apply(lambda x: smooth(x, 'x', median_window, mean_window)).reset_index(drop=True)
        

        df.to_csv(f'{pose_estimate_path}/smooth/{infant}_{session}_{age}_smooth_pose_estimates_coords.csv')
        
    except:
        f'could not process video {df["video"].unique()[0]}'
        return
        # Rotate and normalize the data



In [5]:
# Smooth detections and compute features
pose_estimate_path = f'./pose_estimates/{dataset}_norm'
csv_path = f'{pose_estimate_path}/pose_estimates_{dataset}.csv'
save_path = f'{pose_estimate_path}/pose_estimates_{dataset}_processed.csv'

# List of subdirectories to create
subdirs = [
    "",
    "xdf",
    "adf",
    "xy_features",
    "angle_features",
    "xy_features/total",
    "angle_features/total",
    "xy_features/windows",
    "angle_features/windows",
    "smooth",
    "symmetrical",
    "anim"
]

# Create necessary directories
for subdir in subdirs:
    os.makedirs(f'{pose_estimate_path}/{subdir}', exist_ok=True)


In [None]:
faces_files = os.listdir(f'{pose_estimate_path}')
faces_files = [file for file in faces_files if file.endswith('.csv')]

#process only files that havent been processed
output_names = os.listdir(f'{pose_estimate_path}/smooth')
output_names = [file for file in output_names if file.endswith('.csv')]

match = []
for name in output_names:
    parts = name.split('_')
    match.append(f'{parts[0]}_{parts[1]}_{parts[2]}.csv')

faces_files = [file for file in faces_files if file not in match]
print(f'processing {len(faces_files)} files')


In [None]:
with Pool(processes=20) as pool:
    pool.map(process_dataframe, faces_files)

In [None]:
from matplotlib.animation import FuncAnimation

def init_animation(scatter, text):
    """Initialization function for the animation."""
    scatter.set_offsets(np.empty((0, 2)))  # Empty 2D array with shape (0, 2)
    text.set_text('')
    return scatter, text

def update_animation(frame, df, scatter, text):
    """Update function for the animation."""
    # Filter data for the current frame
    current_frame_data = df[df['frame'] == frame]
    
    # Ensure we get a 2D array for coordinates
    if not current_frame_data.empty:        
        coordinates = current_frame_data[['x', 'y']].values
    else:
        coordinates = np.empty((0, 2))  # Empty 2D array with shape (0, 2)


    # Update scatter plot and frame number text
    scatter.set_offsets(coordinates)
    text.set_text(f"Frame: {frame}")
    return scatter, text

def animate_coordinates(file_path, output_gif=None):
    """
    Animates the time series of (x, y) coordinates from a normalized, rotated file.

    Parameters:
    - file_path (str): Path to the normalized and rotated CSV file.
    - output_gif (str, optional): Path to save the animation as a GIF file. If None, displays the animation.
    """
    # Load the normalized and rotated file
    df = pd.read_csv(file_path)
    try:
        df.rename(columns={'file_number': 'video_number', 'fname': 'video', 'frame_id': 'frame'}, inplace=True)
    except:
        pass
    # Ensure required columns exist
    required_columns = ['frame', 'x', 'y', 'bp']
    if not all(col in df.columns for col in required_columns):
        raise ValueError("Input CSV is missing required columns: 'frame', 'x', 'y', 'bp'")

    # Sort the data by frame_id
    df = df.sort_values(by='frame')
    #xlim = df['pixel_x'].unique()[0]
    #ylim = df['pixel_y'].unique()[0]

    # Extract unique frame IDs and body parts
    frame_ids = df['frame'].unique()

    # Initialize the figure
    fig, ax = plt.subplots(figsize=(6, 6))
    ax.set_xlim(0, 2.4)  # Normalized range [-1, 1] with padding
    ax.set_ylim(0, 1.2)
    #ax.set_xlim(0, 10) 
    #ax.set_ylim(ylim, 0)

    ax.set_title("Time Series of Keypoints")
    ax.set_xlabel("Normalized X")
    ax.set_ylabel("Normalized Y")

    # Initialize scatter plot and text
    scatter = ax.scatter([], [], s=50, color='blue')  # Placeholder for keypoints
    text = ax.text(0, 1, '', fontsize=12)

    print(f'starting animation process')
    # Create the animation
    ani = FuncAnimation(
        fig, 
        update_animation, 
        frames=frame_ids, 
        init_func=lambda: init_animation(scatter, text), 
        fargs=(df, scatter, text), 
        blit=True, 
        repeat=True
    )
    
    # Save as GIF or display
    if output_gif:
        ani.save(output_gif, writer='pillow', fps=10)
        print(f"Animation saved to {output_gif}")
    else:
        plt.show()

def animate_face(file):
    input_file = f'{pose_estimate_path}/symmetrical/{file}'
    output_gif = f'{pose_estimate_path}/anim/{os.path.splitext(file)[0]}.gif'
    animate_coordinates(input_file, output_gif=output_gif)



In [None]:
import random
smooth_files = os.listdir(f'{pose_estimate_path}/smooth')
smooth_files = random.sample(smooth_files, 5)

with Pool(processes=5) as pool:
    pool.map(animate_face, smooth_files)

In [8]:
# clean data 

# Define midline indices and symmetry pairs
midline_index = [54, 53, 52]
symmetry_pairs = [(30, 4), (8, 27), (12, 22)]
symmetry_threshold = 0.3 #smaller is more strict

# Function to compute symmetry score for a single frame
def compute_symmetry_score(frame_df):
    # Compute the midline X-coordinate
    mid_x = frame_df.loc[frame_df['part_idx'].isin(midline_index), 'x'].mean()
    
    try:
    # Calculate symmetry differences
        symmetry_diffs = [
            abs(abs(frame_df.loc[frame_df['part_idx'] == left, 'x'].values[0] - mid_x) -
                abs(frame_df.loc[frame_df['part_idx'] == right, 'x'].values[0] - mid_x))
            for left, right in symmetry_pairs
        ]
        # Compute average symmetry score
    except:
        symmetry_diffs = np.nan
        
    return np.mean(symmetry_diffs)

def get_symmetrical_frames(df):
    scores = (
        df.groupby('frame')
        .apply(lambda frame_df: compute_symmetry_score(frame_df))
        .reset_index(name='symmetry_score')
    )
    return scores

def filter_dataframe(file):
    df = pd.read_csv(f'{pose_estimate_path}/smooth/{file}')
    scores = get_symmetrical_frames(df)
    frames = scores[scores['symmetry_score'] < symmetry_threshold]['frame']
    df = df[df['frame'].isin(frames)]
    
    df.to_csv(f'{pose_estimate_path}/symmetrical/{file}', index=False)



In [15]:
# Group by 'frame' and compute symmetry scores
smooth_files = os.listdir(f'{pose_estimate_path}/smooth')
smooth_files = [file for file in smooth_files if file.endswith('.csv')]

output_names = os.listdir(f'{pose_estimate_path}/symmetrical')
output_names = [file for file in output_names if file.endswith('.csv')]

# match = []
# for name in output_names:
#     parts = name.split('_')
#     match.append(f'{parts[0]}_{parts[1]}_{parts[2]}.csv')

smooth_files = [file for file in smooth_files if file not in output_names]
print(f'processing {len(smooth_files)} files')


processing 168 files


In [16]:
with Pool(processes=10) as pool:
    pool.map(filter_dataframe, smooth_files)

In [None]:
#compute facial features 
kp_mapping2 ={
        **{i: "upper_brow" for i in [33,34,35,36,42,43,44,45,46]},
        **{i: "lower_brow" for i in [47,48,49,50,38,39,40,41]},
        **{i: "upper_eyelid" for i in [66,67,68,69,70,75,76,77,78,79]},
        **{i: "lower_eyelid" for i in [71,72,73,80,81,82]},
        **{i: "upper_lip" for i in [84,85,86,87,88,89,90,99,98,97,96]},
        **{i: "lower_lip" for i in [103,102,101,91,92,93,94,95]},
    }

feature_list = ['mean_eyelid_dist',
                'mean_lip_dist',
                'mean_brow_height',
                'mean_brow_curvature',
                'mean_eyelid_curvature',
                'mean_lip_curvature'
                ]     

def get_y_distance(df):
    return(df[df.upper_lower == 'upper_eyelid'].y.max() - df[df.upper_lower == 'lower_eyelid'].y.min())


def get_mean_curvature(points):
    return np.mean([CS.circ_std(np.array(points), high=math.pi, low=-math.pi) for points in points])

In [None]:
df = pd.read_csv(f'{pose_estimate_path}/smooth/{smooth_files[20]}')

df.dropna(inplace=True)
face = df[df.frame == 17]
plt.scatter(face.x, face.y)

In [67]:
r_eye_opening = face[face.bp == 'Right_Eye'].y.max() - face[face.bp == 'Right_Eye'].y.min()
l_eye_opening = face[face.bp == 'Left_Eye'].y.max() - face[face.bp == 'Left_Eye'].y.min()

print(np.mean([r_eye_opening, l_eye_opening]))

0.04909447506659975
