# Quick Dataset Examination

Check dataset structure and verify observation groups.

In [17]:
import h5py
import numpy as np
from pathlib import Path

# Dataset path - CHANGE THIS to your dataset filename
dataset_filename = "trajectories_Isaac-Dexsuite-Kuka-Allegro-Lift-IK-Play-v0_20251029_231415_truncated75_train.hdf5"

# Construct absolute path (notebook is in scripts/analysis/)
dataset_path = str(Path("/home/raymond/projects/policy_translation/IsaacLab/trajectory_data") / dataset_filename)

print(f"Dataset: {Path(dataset_path).name}")
print(f"Full path: {dataset_path}")
print(f"Exists: {Path(dataset_path).exists()}")

Dataset: trajectories_Isaac-Dexsuite-Kuka-Allegro-Lift-IK-Play-v0_20251029_231415_truncated75_train.hdf5
Full path: /home/raymond/projects/policy_translation/IsaacLab/trajectory_data/trajectories_Isaac-Dexsuite-Kuka-Allegro-Lift-IK-Play-v0_20251029_231415_truncated75_train.hdf5
Exists: True


## 1. Complete Structure

In [18]:
def print_structure(group, prefix="", max_depth=5, current_depth=0):
    """Print HDF5 structure"""
    if current_depth >= max_depth:
        return
    for key in sorted(group.keys()):
        item = group[key]
        if isinstance(item, h5py.Group):
            print(f"{prefix}{key}/")
            print_structure(item, prefix + "  ", max_depth, current_depth + 1)
        else:
            print(f"{prefix}{key}: {item.shape}, {item.dtype}")

with h5py.File(dataset_path, 'r') as f:
    demo = f['data']['demo_0']
    print("Structure of demo_0:")
    print_structure(demo, max_depth=3)
    
    print(f"\nTotal demos: {len([k for k in f['data'].keys() if k.startswith('demo_')])}")

Structure of demo_0:
actions: (75, 22), float32
initial_state/
  articulation/
    robot/
  rigid_object/
    object/
    table/
obs: (75, 165), float32
proprio_obs: (75, 615), float32
rewards: (75,), float32
states/
  articulation/
    robot/
  rigid_object/
    object/
    table/

Total demos: 89


## 2. Observation Breakdown

In [21]:
with h5py.File(dataset_path, 'r') as f:
    demo = f['data']['demo_0']
    
    print("=" * 60)
    print("OBSERVATION BREAKDOWN")
    print("=" * 60)
    
    # Policy observations (obs)
    if 'obs' in demo:
        policy_obs = demo['obs'][:]
        T, dims = policy_obs.shape
        print(f"\nPolicy Observations (obs): {dims}D across {T} timesteps")
        print("-" * 60)
        
        per_timestep = 33
        history_length = 5
        
        print(f"History: {history_length} timesteps x {per_timestep}D = {dims}D total")
        print()
        print("Per-timestep structure (33D):")
        print(f"  [0:4]    object_quat_b      (quaternion, robot base frame)")
        print(f"  [4:11]   target_pose_b      (3D pos + 4D quat, robot base frame)")
        print(f"  [11:33]  last_actions       (22D joint commands)")
        print()
        
        latest_start = (history_length - 1) * per_timestep
        print(f"Latest timestep starts at index {latest_start}")
        print(f"  Object quat:  [{latest_start}:{latest_start+4}]")
        print(f"  Target pose:  [{latest_start+4}:{latest_start+11}]")
        print(f"  Actions:      [{latest_start+11}:{latest_start+33}]")
        print()
        
        # Show samples from start, middle, end
        mid_t = T // 2
        print(f"Samples from trajectory:")
        print(f"  t=0 (start):")
        print(f"    obj_quat = {policy_obs[0, latest_start:latest_start+4]}")
        print(f"    target   = {policy_obs[0, latest_start+4:latest_start+11]}")
        print(f"  t={mid_t} (middle):")
        print(f"    obj_quat = {policy_obs[mid_t, latest_start:latest_start+4]}")
        print(f"    target   = {policy_obs[mid_t, latest_start+4:latest_start+11]}")
        print(f"  t={T-1} (end):")
        print(f"    obj_quat = {policy_obs[T-1, latest_start:latest_start+4]}")
        print(f"    target   = {policy_obs[T-1, latest_start+4:latest_start+11]}")
    
    # Proprio observations
    if 'proprio_obs' in demo:
        proprio_obs = demo['proprio_obs'][:]
        T, dims = proprio_obs.shape
        print(f"\n\nProprio Observations (proprio_obs): {dims}D across {T} timesteps")
        print("-" * 60)
        
        per_timestep = 123
        history_length = 5
        
        print(f"History: {history_length} timesteps x {per_timestep}D = {dims}D total")
        print()
        print("Per-timestep structure (123D):")
        print(f"  [0:23]      joint_pos          (23D)")
        print(f"  [23:46]     joint_vel          (23D)")
        print(f"  [46:111]    hand_tips_state_b  (5 bodies x 13D)")
        print(f"              [46:59]   palm   (pos(3) + quat(4) + lin_vel(3) + ang_vel(3))")
        print(f"              [59:72]   index  (13D)")
        print(f"              [72:85]   middle (13D)")
        print(f"              [85:98]   ring   (13D)")
        print(f"              [98:111]  thumb  (13D)")
        print(f"  [111:123]   contact_forces     (4 fingertips x 3D)")
        print()
        
        latest_start = (history_length - 1) * per_timestep
        print(f"Latest timestep starts at index {latest_start}")
        print(f"  Joint pos:    [{latest_start}:{latest_start+23}]")
        print(f"  Joint vel:    [{latest_start+23}:{latest_start+46}]")
        print(f"  Hand tips:    [{latest_start+46}:{latest_start+111}]")
        print(f"    Index tip pos:  [{latest_start+59}:{latest_start+62}]")
        print(f"    Middle tip pos: [{latest_start+72}:{latest_start+75}]")
        print(f"    Ring tip pos:   [{latest_start+85}:{latest_start+88}]")
        print(f"    Thumb tip pos:  [{latest_start+98}:{latest_start+101}]")
        print(f"  Contact:      [{latest_start+111}:{latest_start+123}]")
        print()
        
        # Show samples from start, middle, end
        mid_t = T // 2
        print(f"Samples from trajectory:")
        print(f"  t=0 (start):")
        print(f"    joint_pos[0:3] = {proprio_obs[0, latest_start:latest_start+3]}")
        print(f"    contact        = {proprio_obs[0, latest_start+111:latest_start+123]}")
        print(f"  t={mid_t} (middle):")
        print(f"    joint_pos[0:3] = {proprio_obs[mid_t, latest_start:latest_start+3]}")
        print(f"    contact        = {proprio_obs[mid_t, latest_start+111:latest_start+123]}")
        print(f"  t={T-1} (end):")
        print(f"    joint_pos[0:3] = {proprio_obs[T-1, latest_start:latest_start+3]}")
        print(f"    contact        = {proprio_obs[T-1, latest_start+111:latest_start+123]}")
    
    # Actions
    if 'actions' in demo:
        actions = demo['actions'][:]
        print(f"\n\nActions: {actions.shape}")
        print("-" * 60)
        print(f"{actions.shape[0]} timesteps x {actions.shape[1]} joint commands")
        mid_t = actions.shape[0] // 2
        print(f"Sample action[0]: {actions[0]}")
        print(f"Sample action[{mid_t}]: {actions[mid_t]}")
    
    # Rewards
    if 'rewards' in demo:
        rewards = demo['rewards'][:]
        print(f"\n\nRewards: {rewards.shape}")
        print("-" * 60)
        print(f"Total: {rewards.sum():.2f}, Mean: {rewards.mean():.4f}, Min: {rewards.min():.4f}, Max: {rewards.max():.4f}")

OBSERVATION BREAKDOWN

Policy Observations (obs): 165D across 75 timesteps
------------------------------------------------------------
History: 5 timesteps x 33D = 165D total

Per-timestep structure (33D):
  [0:4]    object_quat_b      (quaternion, robot base frame)
  [4:11]   target_pose_b      (3D pos + 4D quat, robot base frame)
  [11:33]  last_actions       (22D joint commands)

Latest timestep starts at index 132
  Object quat:  [132:136]
  Target pose:  [136:143]
  Actions:      [143:165]

Samples from trajectory:
  t=0 (start):
    obj_quat = [0. 0. 0. 0.]
    target   = [0. 0. 0. 0. 0. 0. 0.]
  t=37 (middle):
    obj_quat = [-1.8388604  -0.90503275  1.3865818   0.81466496]
    target   = [ 0.29392153  1.9043146   0.391958   -0.646743    1.5064652   4.3504767
 -3.532603  ]
  t=74 (end):
    obj_quat = [-1.5881258 -0.5660581 -0.5109298  3.4098852]
    target   = [-0.5225026  -0.5821446   3.6205854   1.2152443   0.50296575 -0.09230008
 -4.472432  ]


Proprio Observations (proprio

## 3. State Extractor

In [22]:
import sys
sys.path.append('/home/raymond/projects/policy_translation/IsaacLab/scripts')
from lib.state_extraction import StateExtractor

with h5py.File(dataset_path, 'r') as f:
    demo = f['data']['demo_0']
    
    print("=" * 60)
    print("STATE EXTRACTION TEST")
    print("=" * 60)
    
    # Try to extract states
    extractor = StateExtractor()
    try:
        states, actions = extractor.extract_from_dataset(demo)
        print(f"\nExtraction successful!")
        print(f"States shape: {states.shape} (expected: (T, 77))")
        print(f"Actions shape: {actions.shape}")
        print()
        print(f"State breakdown (77D):")
        print(f"  [0:23]    joint_pos       {states[0, 0:23]}")
        print(f"  [23:46]   joint_vel       {states[0, 23:46]}")
        print(f"  [46:50]   obj_quat        {states[0, 46:50]}")
        print(f"  [50:62]   fingertip_pos   {states[0, 50:62]}")
        print(f"  [62:74]   contact_forces  {states[0, 62:74]}")
        print(f"  [74:77]   target_pose     {states[0, 74:77]}")
    except Exception as e:
        print(f"\nExtraction failed: {e}")
        print()
        print("Dataset structure:")
        print(f"  Has policy_data/obs: {'policy_data' in demo and 'obs' in demo['policy_data']}")
        print(f"  Has proprio_data/obs: {'proprio_data' in demo and 'obs' in demo['proprio_data']}")
        print(f"  Has obs: {'obs' in demo}")
        print(f"  Has proprio_obs: {'proprio_obs' in demo}")

STATE EXTRACTION TEST

Extraction failed: "Expected 'policy_data/obs' in demo group. Found keys: ['actions', 'initial_state', 'obs', 'proprio_obs', 'rewards', 'states']"

Dataset structure:
  Has policy_data/obs: False
  Has proprio_data/obs: False
  Has obs: True
  Has proprio_obs: True


## 4. Summary

In [None]:
with h5py.File(dataset_path, 'r') as f:
    demo = f['data']['demo_0']
    
    print("=" * 60)
    print("DATASET SUMMARY")
    print("=" * 60)
    print(f"File: {Path(dataset_path).name}")
    print(f"Demos: {len([k for k in f['data'].keys() if k.startswith('demo_')])}")
    print(f"Trajectory length: {demo['actions'].shape[0]} steps")
    print()
    print("Saved data:")
    for key in sorted(demo.keys()):
        if isinstance(demo[key], h5py.Dataset):
            print(f"  {key}: {demo[key].shape}")
        elif isinstance(demo[key], h5py.Group):
            if 'obs' in demo[key]:
                print(f"  {key}/obs: {demo[key]['obs'].shape}")
            else:
                print(f"  {key}/: (group)")

## 5. Interactive Exploration

In [None]:
# Open for interactive exploration
f = h5py.File(dataset_path, 'r')
demo = f['data']['demo_0']

print("Dataset opened. Available: f, demo")
print("Access data like: demo['policy_data']['obs'][:]")
print("Remember to close: f.close()")

In [None]:
# Close when done
f.close()
print("Closed.")