In [1]:
import sys,torch,torchvision
from tqdm.auto import tqdm
from torch.utils.data import Dataset,DataLoader
from torchvision.transforms import ToTensor,Normalize,Compose
from pathlib import Path
import numpy as np
from nymeria.data_provider import NymeriaDataProvider
from nymeria.recording_data_provider import AriaStream
from nymeria.xsens_constants import XSensConstants
from projectaria_tools.core.stream_id import StreamId
from projectaria_tools.core.sophus import SE3

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def bones_to_unique_joints(b):
    p=np.zeros((XSensConstants.num_parts,3),np.float32)
    for i,(c,pa) in enumerate(b):
        p[i+1]=c
        if i==0:p[0]=pa
    return p
class TimeAlignedLoader:
    def __init__(self,dp,fps=10,down=True):
        self.dp=dp;self.down=down;self.dt=int(1e9/fps);self.t0,self.t1=dp.timespan_ns
        r=dp.recording_head
        self.sid=StreamId(AriaStream.camera_rgb.value)
        self.vrs=r.vrs_dp
        self.cam=self.vrs.get_device_calibration().get_camera_calib("camera-rgb")
        self.n=int((self.t1-self.t0)//self.dt)
    def world_to_camera(self,P,Twd,Tdc):
        Rwd=Twd.rotation().to_matrix();twd=Twd.translation().reshape(3)
        Rdc=Tdc.rotation().to_matrix();tdc=Tdc.translation().reshape(3)
        Rwc=Rwd@Rdc;twc=Rwd@tdc+twd
        return (Rwc.T@(P.T-twc[:,None])).T
    def __len__(self):return self.n
    def __getitem__(self,i):
        t=self.t0+i*self.dt
        rgb=self.dp.get_synced_rgb_videos(t)["recording_head"][0].to_numpy_array()
        if self.down:rgb=rgb[::2,::2]
        rgb=torch.from_numpy(rgb).permute(2,0,1).float()/255
        poses=self.dp.get_synced_poses(t)
        jw=bones_to_unique_joints(poses["xsens"])
        jw=jw-jw[0]
        jw=torch.from_numpy(jw.reshape(-1)).float()
        return rgb,jw
class EgocentricPoseDataset(Dataset):
    def __init__(self,seq_dir,fps=10,down=True,tf=None):
        self.dp=NymeriaDataProvider(sequence_rootdir=Path(seq_dir),load_wrist=False,load_observer=False)
        self.loader=TimeAlignedLoader(self.dp,fps,down)
        self.tf=tf if tf else Compose([Normalize(0.5,0.5)])
    def __len__(self):return len(self.loader)
    def __getitem__(self,i):
        img,j3d=self.loader[i]
        img=self.tf(img)
        return img,j3d


In [3]:
class PoseNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        m=torchvision.models.resnet18(weights=torchvision.models.ResNet18_Weights.IMAGENET1K_V1)
        m.fc=torch.nn.Linear(512,69)
        self.net=m
    def forward(self,x):return self.net(x)

In [None]:
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
seq=r"C:\Users\Damir\nymeria_dataset\d\20230622_s0_john_solomon_act2_8urygm"
ds=EgocentricPoseDataset(seq,fps=10,down=True,tf=Compose([Normalize(0.5,0.5)]))
dl=DataLoader(ds,batch_size=16,shuffle=True,num_workers=0)
model=PoseNet().to(device)
opt=torch.optim.AdamW(model.parameters(),1e-4)
loss_fn=torch.nn.MSELoss()
epochs=10
for epoch in range(1,epochs+1):
    running=0
    for img,j in tqdm(dl,desc=f"epoch {epoch}/{epochs}",leave=False):
        img,j=img.to(device),j.to(device)
        pred=model(img)
        loss=loss_fn(pred,j)
        opt.zero_grad();loss.backward();opt.step()
        running+=loss.item()*img.size(0)
    print(f"epoch {epoch}: loss {running/len(ds):.4f}")

[32m2025-05-18 02:09:21.899[0m | [1mINFO    [0m | [36mnymeria.body_motion_provider[0m:[36m__init__[0m:[36m31[0m - [1mloading xsens from npzfile='C:\\Users\\Damir\\nymeria_dataset\\d\\20230622_s0_john_solomon_act2_8urygm\\body\\xdata.npz'[0m
[32m2025-05-18 02:09:22.500[0m | [1mINFO    [0m | [36mnymeria.body_motion_provider[0m:[36m__init__[0m:[36m34[0m - [1mk='segment_qWXYZ', v.shape=(144289, 92)[0m
[32m2025-05-18 02:09:22.502[0m | [1mINFO    [0m | [36mnymeria.body_motion_provider[0m:[36m__init__[0m:[36m34[0m - [1mk='segment_tXYZ', v.shape=(144289, 69)[0m
[32m2025-05-18 02:09:22.503[0m | [1mINFO    [0m | [36mnymeria.body_motion_provider[0m:[36m__init__[0m:[36m34[0m - [1mk='segment_velocity', v.shape=(144289, 69)[0m
[32m2025-05-18 02:09:22.505[0m | [1mINFO    [0m | [36mnymeria.body_motion_provider[0m:[36m__init__[0m:[36m34[0m - [1mk='segment_acceleration', v.shape=(144289, 69)[0m
[32m2025-05-18 02:09:22.506[0m | [1mINFO    [0