In [23]:
from pathlib import Path
from typing import Optional, Callable, Dict, List
import numpy as np
import torch
from torch.utils.data import Dataset
from projectaria_tools.core.sensor_data import ImageData
from projectaria_tools.core.stream_id     import StreamId
from projectaria_tools.core.sophus        import SE3, SO3
from projectaria_tools.core               import data_provider
from nymeria.data_provider      import NymeriaDataProvider
from nymeria.recording_data_provider import RecordingDataProvider, AriaStream
from torchvision.transforms import Compose, Normalize  

In [21]:
class NymeriaPoseDataset(Dataset):
    def __init__(
        self,
        seq_root   : Path,                       
        transform  : Optional[Callable] = None,  
        half       : bool = True,
    ):
        super().__init__()

        self.seq_root  = Path(seq_root)
        self.transform = transform
        self.half      = half
        self.dp  = NymeriaDataProvider(sequence_rootdir=self.seq_root, load_wrist=False, load_observer=False)
        self.rec_head : RecordingDataProvider = self.dp.recording_head
        assert self.rec_head and self.rec_head.has_rgb, "no RGB stream found"
        self.rgb_sid  = StreamId(AriaStream.camera_rgb.value)
        self.vrs_dp   = self.rec_head.vrs_dp
        self._num_frames = self.vrs_dp.get_num_data(self.rgb_sid)
        self.cam_calib  = self.vrs_dp.get_device_calibration().get_camera_calib("camera-rgb")
        w, h = self.cam_calib.get_image_size()
        if half: w, h = w//2, h//2
        self.img_size = (h, w)

    def __len__(self) -> int:
        return self._num_frames

    @staticmethod
    def _unique_joints(bones: np.ndarray) -> np.ndarray:
        pts = np.zeros((23, 3), np.float32)
        for b,(c,p) in enumerate(bones):
            pts[b+1] = c         
            pts[0] = p if b==0 else pts[0]  
        return pts

    def __getitem__(self, idx : int):
        img_data, meta, _ = self.vrs_dp.get_image_data_by_index(self.rgb_sid, idx)
        t_code_ns = self.vrs_dp.convert_from_device_to_timecode_time_ns(meta.capture_timestamp_ns)
        poses = self.dp.get_synced_poses(t_code_ns)
        bones = poses["xsens"]                   
        joints_w = self._unique_joints(bones)
        T_W_D : SE3 = poses["recording_head"].transform_world_device
        T_D_C : SE3 = self.cam_calib.get_transform_device_camera()
        T_W_C  = T_W_D * T_D_C
        R_WC   = SO3(T_W_C.rotation()).as_matrix()
        t_WC   = T_W_C.translation()
        joints_c = (R_WC.T @ (joints_w - t_WC).T).T
        uv = []
        for p in joints_c:
            pix = self.cam_calib.project(p)
            if pix is None:
                uv.append([-1,-1])
            else:
                u,v = pix
                if self.half:
                    u, v = u/2.0, v/2.0
                uv.append([u,v])
        joints_2d = torch.tensor(uv, dtype=torch.float32)
        arr = img_data.to_numpy_array()
        if self.half:
            arr = arr[::2, ::2]      
        frame = torch.from_numpy(arr).permute(2,0,1).float() / 255.0
        if self.transform:
            frame = self.transform(frame)
        return frame,                       \
               torch.from_numpy(joints_c).float(), \
               joints_2d

In [24]:
root    = Path(r"C:\Users\Damir\nymeria_dataset\d"
               r"\20230622_s0_john_solomon_act2_8urygm")
norm = Compose([Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])])
ds      = NymeriaPoseDataset(root, transform=norm, half=True)
loader  = DataLoader(ds, batch_size=8, shuffle=True,
                     num_workers=4, pin_memory=True)

[32m2025-05-07 14:41:20.299[0m | [1mINFO    [0m | [36mnymeria.body_motion_provider[0m:[36m__init__[0m:[36m31[0m - [1mloading xsens from npzfile='C:\\Users\\Damir\\nymeria_dataset\\d\\20230622_s0_john_solomon_act2_8urygm\\body\\xdata.npz'[0m
[32m2025-05-07 14:41:20.716[0m | [1mINFO    [0m | [36mnymeria.body_motion_provider[0m:[36m__init__[0m:[36m34[0m - [1mk='segment_qWXYZ', v.shape=(144289, 92)[0m
[32m2025-05-07 14:41:20.716[0m | [1mINFO    [0m | [36mnymeria.body_motion_provider[0m:[36m__init__[0m:[36m34[0m - [1mk='segment_tXYZ', v.shape=(144289, 69)[0m
[32m2025-05-07 14:41:20.716[0m | [1mINFO    [0m | [36mnymeria.body_motion_provider[0m:[36m__init__[0m:[36m34[0m - [1mk='segment_velocity', v.shape=(144289, 69)[0m
[32m2025-05-07 14:41:20.725[0m | [1mINFO    [0m | [36mnymeria.body_motion_provider[0m:[36m__init__[0m:[36m34[0m - [1mk='segment_acceleration', v.shape=(144289, 69)[0m
[32m2025-05-07 14:41:20.725[0m | [1mINFO    [0