In [None]:
import os
import h5py
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d

# Define the input and output directories
input_dir = r"C:\Users\topohl\Documents\SLEAP\Projects\AnalysisSocP\predictions\animal"
output_dir = r"C:\Users\topohl\Documents\SLEAP\Projects\AnalysisSocP\predictions\animal"

# Define body part indices
part_indices = {
    0: 'nose',
    1: 'leftEar',
    2: 'rightEar',
    3: 'leftFlank',
    4: 'rightFlank',
    5: 'tailbase',
    6: 'spine1',
    7: 'leftArm',
    8: 'rightArm',
    9: 'center',
    10: 'spine2',
    11: 'tail1',
    12: 'tail2',
    13: 'tailtip'
}

def fill_missing(Y, kind="linear"):
    """Fills missing values independently along each dimension after the first."""
    if len(Y) == 0:
        return Y
    else:
        # Store initial shape.
        initial_shape = Y.shape
        # Flatten after first dim.
        Y = Y.reshape((initial_shape[0], -1))
        # Interpolate along each slice.
        for i in range(Y.shape[-1]):
            y = Y[:, i]
            if np.isnan(y).all():
                y[:] = np.nan
            else:
                # Fill leading NaNs with the nearest non-NaN value
                if np.isnan(y[0]):
                    y[0] = y[np.flatnonzero(~np.isnan(y))][0]
                # Fill trailing NaNs with the nearest non-NaN value
                if np.isnan(y[-1]):
                    y[-1] = y[np.flatnonzero(~np.isnan(y))][-1]
                # Build interpolant.
                x = np.flatnonzero(~np.isnan(y))
                f = interp1d(x, y[x], kind=kind, fill_value=np.nan, bounds_error=False)
                # Fill missing
                xq = np.flatnonzero(np.isnan(y))
                y[xq] = f(xq)
            # Save slice
            Y[:, i] = y
        # Restore to initial shape.
        Y = Y.reshape(initial_shape)
        return Y

# Loop over all files in the input directory
for filename in os.listdir(input_dir):
    if filename.endswith(".h5"):
        # Construct the full input and output file paths
        input_path = os.path.join(input_dir, filename)
        output_filename = os.path.splitext(filename)[0] + '_locs.csv'
        output_path = os.path.join(output_dir, output_filename)

        # Load the data from the input file
        with h5py.File(input_path, "r") as f:
            locations = f["tracks"][:].T

        # Fill missing data in the locations array
        locations = fill_missing(locations)

        # Extract body part locations
        part_locs = {}
        for index, part_name in part_indices.items():
            part_locs[part_name] = locations[:, index, :, :]

        # Initialize the DataFrame
        df = pd.DataFrame()

        # Loop through the body parts
        for body_part in part_indices.values():
            x = part_locs[body_part][:, 0] # retrieve the x coordinate
            y = part_locs[body_part][:, 1] # retrieve the y coordinate
            x, y = x.ravel(), y.ravel() # unravel the arrays
            df[f"{body_part}_x"] = x # add the x coordinate to the DataFrame
            df[f"{body_part}_y"] = y # add the y coordinate to the DataFrame

        # Save the DataFrame as a csv file
        df.to_csv(output_path, index=False)

        print(f"Processed file {input_path}")
