In [4]:
import os
import numpy as np

smooth_traj_dir = "smoothed_trajs"
raw_traj_dir = "individual_trajs"

# Get all trajectory files in the directories
# Collect all .npz files in each directory
smooth_traj_files = [os.path.join(smooth_traj_dir, f) for f in os.listdir(smooth_traj_dir) if f.endswith('.npz')]
raw_traj_files = [os.path.join(raw_traj_dir, f) for f in os.listdir(raw_traj_dir) if f.endswith('.npz')]

In [5]:
# Load the first file, print its keys, then aggregate from all files under those keys.
# POSITION TARGETS VERSION: Extract position targets from next_observations
merged_data_states_position = {}

if len(smooth_traj_files) == 0:
    raise RuntimeError("No trajectory files found!")

# Get keys from the first file
first_file = smooth_traj_files[0]
first_data = np.load(first_file, allow_pickle=True)
print("Keys in first file:", list(first_data.keys()))
for key in first_data.keys():
    merged_data_states_position[key] = {}

# Prepare lists to aggregate per traj and then stack at the end
obs_list = []
smoothed_obs_list = []
actions_position_list = []  # Position targets instead of velocity actions
rewards_list = []
terminals_list = []
masks_list = []
next_obs_list = []

# Now iterate through all files, populate the merged_data dict,
# and build arrays for observations, actions, rewards, etc. as we go
for filename in smooth_traj_files:
    data = np.load(filename, allow_pickle=True)
    basename = os.path.basename(filename)
    for key in data.keys():
        if key not in merged_data_states_position:
            merged_data_states_position[key] = {}
        merged_data_states_position[key][basename] = data[key]
    
    # Now process arrays for this trajectory
    obj_world = data['aruco_object_in_world']
    obj_ee = data['aruco_object_in_ee']
    ee_world = data['aruco_ee_in_world']

    smoothed_obj_world = data['smoothed_aruco_object_in_world']
    smoothed_obj_ee = data['smoothed_aruco_object_in_ee']
    smoothed_ee_world = data['smoothed_aruco_ee_in_world']
    
    ee_poses_debug = data['ee_poses_debug']
    
    states = data['states']
    n = ee_poses_debug.shape[0]  # Use ee_poses_debug length as reference
    # Sanity: all arrays have same length n
    assert obj_world.shape[0] == n
    assert obj_ee.shape[0] == n
    assert ee_world.shape[0] == n
    assert smoothed_obj_world.shape[0] == n
    assert smoothed_obj_ee.shape[0] == n
    assert smoothed_ee_world.shape[0] == n
    assert states.shape[0] == n

    # Each timestep: [obj_world (7D), ee_poses_debug (7D), states (6D)] = 20D
    obs = np.concatenate([
        obj_world,
        ee_poses_debug,
        states
    ], axis=1)
    obs_list.append(obs)

    smoothed_obs = np.concatenate([
        smoothed_obj_world,
        ee_poses_debug,
        states
    ], axis=1) 
    smoothed_obs_list.append(smoothed_obs)
    
    # Build rewards, terminals, masks
    rewards = -np.ones(n, dtype=np.float32)
    rewards[-1] = 0
    terminals = np.zeros(n, dtype=np.float32)
    terminals[-1] = 1
    masks = np.ones(n, dtype=np.float32)
    masks[-1] = 0

    rewards_list.append(rewards)
    terminals_list.append(terminals)
    masks_list.append(masks)

    # For next_observations, slide observations by 1 forward in this trajectory, last step same as last
    next_obs = np.empty_like(obs)
    next_obs[:-1] = obs[1:]
    next_obs[-1] = obs[-1]
    next_obs_list.append(next_obs)
    
    # Extract position targets from next_observations
    # Observation structure: [obj_world (7D), ee_poses_debug (7D), states (6D)] = 20D
    # ee_poses_debug is at indices 7-13: [x, y, z, qw, qx, qy, qz]
    # gripper is at index 19 (last element of states)
    ee_pose_debug_next = next_obs[:, 7:14]  # [x, y, z, qw, qx, qy, qz] = 7D
    gripper_next = next_obs[:, 19:20]  # Gripper (last element of states) = 1D
    
    # Combine into position target actions: [x, y, z, qw, qx, qy, qz, gripper] = 8D
    actions_position = np.concatenate([
        ee_pose_debug_next,  # [x, y, z, qw, qx, qy, qz] = 7D
        gripper_next  # [gripper] = 1D
    ], axis=1)
    actions_position_list.append(actions_position)

# Stack all
observations = np.concatenate(obs_list, axis=0)  # (N, 20)
smoothed_observations = np.concatenate(smoothed_obs_list, axis=0)  # (N, 20)
actions_position_targets = np.concatenate(actions_position_list, axis=0)  # (N, 8)
rewards = np.concatenate(rewards_list, axis=0)
terminals = np.concatenate(terminals_list, axis=0)
masks = np.concatenate(masks_list, axis=0)
next_observations = np.concatenate(next_obs_list, axis=0)

# Save results in merged_data-like dict for downstream code if needed
merged_data_states_position['observations'] = observations
merged_data_states_position['smoothed_observations'] = smoothed_observations
merged_data_states_position['actions_flat'] = actions_position_targets  # Position targets (8D)
merged_data_states_position['rewards'] = rewards
merged_data_states_position['terminals'] = terminals
merged_data_states_position['next_observations'] = next_observations
merged_data_states_position['masks'] = masks

# Optionally print shapes for confirmation
print("observations", observations.shape)
print("smoothed_observations", smoothed_observations.shape)
print("actions (position targets)", actions_position_targets.shape)
print("rewards", rewards.shape)
print("terminals", terminals.shape)
print("next_observations", next_observations.shape)
print("masks", masks.shape)
print(f"\nPosition targets sample (first 3):")
print(f"  Position [x, y, z]: {actions_position_targets[:3, :3]}")
print(f"  Quaternion [qw, qx, qy, qz]: {actions_position_targets[:3, 3:7]}")
print(f"  Gripper: {actions_position_targets[:3, 7]}")

Keys in first file: ['timestamps', 'states', 'actions', 'ee_poses_debug', 'object_pose', 'object_visible', 'aruco_ee_in_world', 'aruco_object_in_world', 'aruco_ee_in_object', 'aruco_object_in_ee', 'aruco_visibility', 'augmented_actions', 'metadata', 'smoothed_aruco_ee_in_world', 'smoothed_aruco_object_in_world', 'smoothed_aruco_ee_in_object', 'smoothed_aruco_object_in_ee']
observations (21163, 20)
smoothed_observations (21163, 20)
actions (position targets) (21163, 8)
rewards (21163,)
terminals (21163,)
next_observations (21163, 20)
masks (21163,)

Position targets sample (first 3):
  Position [x, y, z]: [[0.25399086 0.01049076 0.14473542]
 [0.25399086 0.01049076 0.14473542]
 [0.25399086 0.01049076 0.14473542]]
  Quaternion [qw, qx, qy, qz]: [[ 0.64023268 -0.19780934 -0.19510119  0.71617673]
 [ 0.64023268 -0.19780934 -0.19510119  0.71617673]
 [ 0.64023268 -0.19780934 -0.19510119  0.71617673]]
  Gripper: [-0.02525226 -0.02525226 -0.02525226]


In [6]:
import numpy as np

np.savez("merged_data_states_position_targets.npz", **merged_data_states_position)
print("merged_data_states_position_targets saved to merged_data_states_position_targets.npz")

merged_data_states_position_targets saved to merged_data_states_position_targets.npz


In [None]:
# Load the first file, print its keys, then aggregate from all files under those keys.
# POSITION TARGETS VERSION: Extract position targets from next_observations
merged_data_aruco_pos_ac = {}

if len(smooth_traj_files) == 0:
    raise RuntimeError("No trajectory files found!")

# Get keys from the first file
first_file = smooth_traj_files[0]
first_data = np.load(first_file, allow_pickle=True)
print("Keys in first file:", list(first_data.keys()))
for key in first_data.keys():
    merged_data_aruco_pos_ac[key] = {}

# Prepare lists to aggregate per traj and then stack at the end
obs_list = []
smoothed_obs_list = []
actions_position_list = []  # Position targets instead of velocity actions
rewards_list = []
terminals_list = []
masks_list = []
next_obs_list = []

# Now iterate through all files, populate the merged_data dict,
# and build arrays for observations, actions, rewards, etc. as we go
for filename in smooth_traj_files:
    data = np.load(filename, allow_pickle=True)
    basename = os.path.basename(filename)
    for key in data.keys():
        if key not in merged_data_aruco_pos_ac:
            merged_data_aruco_pos_ac[key] = {}
        merged_data_aruco_pos_ac[key][basename] = data[key]
    
    # Now process arrays for this trajectory
    obj_world = data['aruco_object_in_world']
    obj_ee = data['aruco_object_in_ee']
    ee_world = data['aruco_ee_in_world']

    smoothed_obj_world = data['smoothed_aruco_object_in_world']
    smoothed_obj_ee = data['smoothed_aruco_object_in_ee']
    smoothed_ee_world = data['smoothed_aruco_ee_in_world']
    
    ee_poses_debug = data['ee_poses_debug']
    
    states = data['states']
    n = ee_poses_debug.shape[0]  # Use ee_poses_debug length as reference
    # Sanity: all arrays have same length n
    assert obj_world.shape[0] == n
    assert obj_ee.shape[0] == n
    assert ee_world.shape[0] == n
    assert smoothed_obj_world.shape[0] == n
    assert smoothed_obj_ee.shape[0] == n
    assert smoothed_ee_world.shape[0] == n
    assert states.shape[0] == n

    # Each timestep: (obj_world (7), obj_ee (7), ee_world (7), last state dim) -> (22,)
    obs = np.concatenate([
        obj_world,
        obj_ee,
        ee_world,
        states
    ], axis=1)  # (n,22)
    obs_list.append(obs)

    smoothed_obs = np.concatenate([
        smoothed_obj_world,
        smoothed_obj_ee,
        smoothed_ee_world,
        states
    ], axis=1)  # (n,22)
    smoothed_obs_list.append(smoothed_obs)
    
    # Build rewards, terminals, masks
    rewards = -np.ones(n, dtype=np.float32)
    rewards[-1] = 0
    terminals = np.zeros(n, dtype=np.float32)
    terminals[-1] = 1
    masks = np.ones(n, dtype=np.float32)
    masks[-1] = 0

    rewards_list.append(rewards)
    terminals_list.append(terminals)
    masks_list.append(masks)

    # For next_observations, slide observations by 1 forward in this trajectory, last step same as last
    next_obs = np.empty_like(obs)
    next_obs[:-1] = obs[1:]
    next_obs[-1] = obs[-1]
    next_obs_list.append(next_obs)
    
    # Extract position targets from next_observations
    # Observation structure: [obj_world (7D), ee_poses_debug (7D), states (6D)] = 20D
    # ee_poses_debug is at indices 7-13: [x, y, z, qw, qx, qy, qz]
    # gripper is at index 19 (last element of states)
    ee_pose_debug_next = next_obs[:, 7:14]  # [x, y, z, qw, qx, qy, qz] = 7D
    gripper_next = next_obs[:, 19:20]  # Gripper (last element of states) = 1D
    
    # Combine into position target actions: [x, y, z, qw, qx, qy, qz, gripper] = 8D
    actions_position = np.concatenate([
        ee_pose_debug_next,  # [x, y, z, qw, qx, qy, qz] = 7D
        gripper_next  # [gripper] = 1D
    ], axis=1)
    actions_position_list.append(actions_position)

# Stack all
observations = np.concatenate(obs_list, axis=0)  # (N, 20)
smoothed_observations = np.concatenate(smoothed_obs_list, axis=0)  # (N, 20)
actions_position_targets = np.concatenate(actions_position_list, axis=0)  # (N, 8)
rewards = np.concatenate(rewards_list, axis=0)
terminals = np.concatenate(terminals_list, axis=0)
masks = np.concatenate(masks_list, axis=0)
next_observations = np.concatenate(next_obs_list, axis=0)

# Save results in merged_data-like dict for downstream code if needed
merged_data_aruco_pos_ac['observations'] = observations
merged_data_aruco_pos_ac['smoothed_observations'] = smoothed_observations
merged_data_aruco_pos_ac['actions_flat'] = actions_position_targets  # Position targets (8D)
merged_data_aruco_pos_ac['rewards'] = rewards
merged_data_aruco_pos_ac['terminals'] = terminals
merged_data_aruco_pos_ac['next_observations'] = next_observations
merged_data_aruco_pos_ac['masks'] = masks

# Optionally print shapes for confirmation
print("observations", observations.shape)
print("smoothed_observations", smoothed_observations.shape)
print("actions (position targets)", actions_position_targets.shape)
print("rewards", rewards.shape)
print("terminals", terminals.shape)
print("next_observations", next_observations.shape)
print("masks", masks.shape)
print(f"\nPosition targets sample (first 3):")
print(f"  Position [x, y, z]: {actions_position_targets[:3, :3]}")
print(f"  Quaternion [qw, qx, qy, qz]: {actions_position_targets[:3, 3:7]}")
print(f"  Gripper: {actions_position_targets[:3, 7]}")

Keys in first file: ['timestamps', 'states', 'actions', 'ee_poses_debug', 'object_pose', 'object_visible', 'aruco_ee_in_world', 'aruco_object_in_world', 'aruco_ee_in_object', 'aruco_object_in_ee', 'aruco_visibility', 'augmented_actions', 'metadata', 'smoothed_aruco_ee_in_world', 'smoothed_aruco_object_in_world', 'smoothed_aruco_ee_in_object', 'smoothed_aruco_object_in_ee']
observations (21163, 22)
smoothed_observations (21163, 22)
actions (position targets) (21163, 8)
rewards (21163,)
terminals (21163,)
next_observations (21163, 22)
masks (21163,)

Position targets sample (first 3):
  Position [x, y, z]: [[ 0.14693005  0.05928251 -0.06355061]
 [ 0.14832026  0.06227169 -0.06019267]
 [ 0.14598011  0.06177107 -0.06482117]]
  Quaternion [qw, qx, qy, qz]: [[ 0.80818778 -0.09448856 -0.52745102  0.24433553]
 [ 0.80745548 -0.07909728 -0.53841875  0.22773784]
 [ 0.81801993 -0.08619011 -0.51709516  0.23670076]]
  Gripper: [-0.0485098  -0.07134474 -0.06197049]


In [8]:
import numpy as np

np.savez("merged_data_aruco_pos_ac_targets.npz", **merged_data_aruco_pos_ac)
print("merged_data_aruco_pos_ac_targets saved to merged_data_aruco_pos_ac_targets.npz")

merged_data_aruco_pos_ac_targets saved to merged_data_aruco_pos_ac_targets.npz
