In [7]:
import h5py
import numpy as np
import os
import pandas as pd
from scipy.interpolate import interp1d

def fill_missing(Y, kind="linear"):
    """Fills missing values independently along each dimension after the first."""

    # Store initial shape.
    initial_shape = Y.shape

    # Flatten after first dim.
    Y = Y.reshape((initial_shape[0], -1))

    # Interpolate along each slice.
    for i in range(Y.shape[-1]):
        y = Y[:, i]

        # Build interpolant.
        x = np.flatnonzero(~np.isnan(y))
        f = interp1d(x, y[x], kind=kind, fill_value=np.nan, bounds_error=False)

        # Fill missing
        xq = np.flatnonzero(np.isnan(y))
        y[xq] = f(xq)
        
        # Fill leading or trailing NaNs with the nearest non-NaN values
        mask = np.isnan(y)
        y[mask] = np.interp(np.flatnonzero(mask), np.flatnonzero(~mask), y[~mask])

        # Save slice
        Y[:, i] = y

    # Restore to initial shape.
    Y = Y.reshape(initial_shape)

    return Y

def process_file(filename):
    with h5py.File(filename, "r") as f:
        dset_names = list(f.keys())
        locations = f["tracks"][:].T
        node_names = [n.decode() for n in f["node_names"][:]]
        
        locations = fill_missing(locations)

In [8]:
nose_index = 0
left_ear_index = 1
right_ear_index = 2
left_flank_index = 3
right_flank_index = 4
tailbase_index = 5
spine_1_index = 6
left_arm_index = 7
right_arm_index = 8
center_index = 9
spine_2_index = 10
tail_1_index = 11
tail_2_index = 12
tail_tip_index = 13


nose_loc = locations[:, nose_index, :, :]
left_ear_loc = locations[:, left_ear_index, :, :]
right_ear_loc = locations[:, right_ear_index, :, :]
left_flank_loc = locations[:, left_flank_index, :, :]
right_flank_loc = locations[:, right_flank_index, :, :]
tailbase_loc = locations[:, tailbase_index, :, :]
spine_1_loc = locations[:, spine_1_index, :, :]
left_arm_loc = locations[:, left_arm_index, :, :]
right_arm_loc = locations[:, right_arm_index, :, :]
center_loc = locations[:, center_index, :, :]
spine_2_loc = locations[:, spine_2_index, :, :]
tail_1_loc = locations[:, tail_1_index, :, :]
tail_2_loc = locations[:, tail_2_index, :, :]
tail_tip_loc = locations[:, tail_tip_index, :, :]

body_parts = ['nose', 'left_ear', 'right_ear', 'spine_1', 'left_arm', 'right_arm', 'center', 'spine_2', 'left_flank', 'right_flank', 'tail_1', 'tail_2', 'tail_tip']

df = pd.DataFrame()

for body_part in body_parts:
    x = eval(f"{body_part}_loc[:, 0]")
    y = eval(f"{body_part}_loc[:, 1]")
    x, y = x.ravel(), y.ravel()
    df[f"{body_part}_x"] = x
    df[f"{body_part}_y"] = y


file_name = os.path.splitext(os.path.basename(filename))[0]

# save the DataFrame as a csv file
df.to_csv(f"{file_name}_locs.csv", index=False)


NameError: name 'locations' is not defined

In [5]:
import os

folder = r"C:\Users\topohl\Documents\SLEAP\Projects\AnalysisOFT\export_h5_files\OFT_animals" # path to the folder containing the files

files = [f for f in os.listdir(folder) if f.endswith(".h5")] # list of h5 files in the folder

for file in files:
    file_path = os.path.join(folder, file) # full path of the file
    process_file(file_path) # call the function to process the file