In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

from lerobot.datasets.lerobot_dataset import LeRobotDataset


# ======================== Configuration ========================
parquet_path = "/home/joy4mj/Feel2Grasp/train.parquet" 
repo_id = "mjkim00/Feel2Grasp"
revision = "main"
video_backend = "pyav"

encoder_ckpt_path = "./ae_out/encoder.pt"
batch_size = 256
device = "cuda" if torch.cuda.is_available() else "cpu"

out_path = "replay_buffer_iql_72d.npz"


# Encoder
class ConvEncoder(nn.Module):
    def __init__(self, latent_dim: int):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(3, 32, 4, 2, 1), nn.ReLU(inplace=True),
            nn.Conv2d(32, 64, 4, 2, 1), nn.ReLU(inplace=True),
            nn.Conv2d(64, 128, 4, 2, 1), nn.ReLU(inplace=True),
            nn.Conv2d(128, 256, 4, 2, 1), nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d((4, 4)),
        )
        self.fc = nn.Linear(256 * 4 * 4, latent_dim)

    def forward(self, x):
        return self.fc(self.net(x).flatten(1))

ckpt = torch.load(encoder_ckpt_path, map_location="cpu")
encoder = ConvEncoder(ckpt["latent_dim"])
encoder.load_state_dict(ckpt["encoder_state_dict"])
encoder.to(device).eval()
for p in encoder.parameters():
    p.requires_grad_(False)

resize_hw = tuple(ckpt["resize_hw"])  # (H, W)

# Parquet load
df = pd.read_parquet(parquet_path)

need_cols = [
    "action", "observation.state", "episode_index", "frame_index", "index",
    "left_image_circle", "right_image_circle", "circle_reward"
]
missing = [c for c in need_cols if c not in df.columns]
if missing:
    raise KeyError(f"Missing columns: {missing}\nAvailable: {list(df.columns)}")

df = df.sort_values(["episode_index", "frame_index"]).reset_index(drop=True)

N = len(df)
print("N =", N)

ep = df["episode_index"].to_numpy()
fr = df["frame_index"].to_numpy()
global_idx = df["index"].to_numpy()

dup = df.duplicated(["episode_index", "frame_index"]).any()
if dup:
    raise ValueError("Duplicated (episode_index, frame_index) rows exist in parquet. Fix this first.")

# observation.state / action stack
obs_state = np.stack(df["observation.state"].to_list()).astype(np.float32)  # (N,6) 
actions   = np.stack(df["action"].to_list()).astype(np.float32)            # (N,Da)
lr01      = df[["left_image_circle", "right_image_circle"]].to_numpy().astype(np.float32)  # (N,2)
rewards   = df["circle_reward"].to_numpy().astype(np.float32)              # (N,)

if obs_state.ndim != 2 or obs_state.shape[1] != 6:
    raise ValueError(f"observation.state expected (N,6) but got {obs_state.shape}")

# reward rescaling
rewards = rewards / 10.0


# load LeRobotDataset 로드
ds = LeRobotDataset(repo_id, revision=revision, video_backend=video_backend)
print("len(ds) =", len(ds))
print("ds.meta =", ds.meta)

def get_sample(ds, i: int):
    s = ds[int(i)]
    return s

def get_key(sample, key):
    if key in sample:
        return sample[key]
    raise KeyError(f"Key {key} not found in sample keys: {list(sample.keys())[:30]} ...")

check_k = min(200, N)
ok_direct = True
for k in np.linspace(0, N-1, check_k, dtype=int):
    i = int(global_idx[k])
    samp = get_sample(ds, i)
    ep_ds = int(get_key(samp, "episode_index"))
    fr_ds = int(get_key(samp, "frame_index"))
    if ep_ds != int(ep[k]) or fr_ds != int(fr[k]):
        ok_direct = False
        break

if not ok_direct:
    print("[WARN] parquet.index != ds index mapping. Building (episode,frame)->ds_index map...")
    map_epfr_to_dsidx = {}
    for i in range(len(ds)):
        samp = get_sample(ds, i)
        ep_i = int(get_key(samp, "episode_index"))
        fr_i = int(get_key(samp, "frame_index"))
        map_epfr_to_dsidx[(ep_i, fr_i)] = i

    ds_indices = np.empty((N,), dtype=np.int64)
    for k in range(N):
        key = (int(ep[k]), int(fr[k]))
        if key not in map_epfr_to_dsidx:
            raise KeyError(f"Cannot find ds index for (episode,frame)={key}")
        ds_indices[k] = map_epfr_to_dsidx[key]
else:
    ds_indices = global_idx.astype(np.int64)

# front image -> encoder -> z(64) 
@torch.no_grad()
def encode_front_batch(ds, idx_batch):
    imgs = []
    for i in idx_batch:
        samp = get_sample(ds, int(i))
        img = get_key(samp, "observation.images.front")  # torch.Tensor or np.ndarray
        # (3,H,W) float32 [0,1]
        if isinstance(img, np.ndarray):
            if img.ndim == 3 and img.shape[-1] == 3:  # HWC
                img = torch.from_numpy(img).permute(2, 0, 1)
            else:
                img = torch.from_numpy(img)
        # torch tensor
        if img.dtype == torch.uint8:
            img = img.float() / 255.0
        else:
            img = img.float()
            if img.max() > 1.5:
                img = img / 255.0

        imgs.append(img)

    x = torch.stack(imgs, dim=0).to(device)  # (B,3,H,W)
    x = F.interpolate(x, size=resize_hw, mode="bilinear", align_corners=False)
    z = encoder(x)  # (B,64)
    return z.detach().cpu().numpy().astype(np.float32)

Z = np.empty((N, ckpt["latent_dim"]), dtype=np.float32)

for start in range(0, N, batch_size):
    end = min(N, start + batch_size)
    Z[start:end] = encode_front_batch(ds, ds_indices[start:end])
    if (start // batch_size) % 20 == 0:
        print(f"encoded {end}/{N}")

# state = [z64, left01, right01, obs_state6]
states = np.concatenate([Z, lr01, obs_state], axis=1).astype(np.float32)  # (N,72)
if states.shape[1] != 72:
    raise ValueError(f"states dim expected 72 but got {states.shape}")


# done / next_state 
terminals = np.zeros((N,), dtype=np.float32)
# t가 terminal이면: 다음 row가 (same ep & frame+1)가 아님
cont = (ep[1:] == ep[:-1]) & (fr[1:] == fr[:-1] + 1)
terminals[:-1] = (~cont).astype(np.float32)
terminals[-1] = 1.0

next_states = np.empty_like(states)
next_states[:-1] = states[1:]
next_states[-1] = states[-1]

terminal_idx = np.where(terminals > 0.5)[0]
next_states[terminal_idx] = states[terminal_idx]


np.savez_compressed(
    out_path,
    observations=states,
    actions=actions.astype(np.float32),
    rewards=rewards.astype(np.float32),
    next_observations=next_states,
    terminals=terminals,
    episode_index=ep.astype(np.int32),
    frame_index=fr.astype(np.int32),
    ds_index=ds_indices.astype(np.int64),
)

print("Saved:", out_path)
print("Shapes:",
      "S", states.shape,
      "A", actions.shape,
      "R", rewards.shape,
      "S'", next_states.shape,
      "D", terminals.shape)


N = 53936
len(ds) = 53936
ds.meta = LeRobotDatasetMetadata({
    Repository ID: 'mjkim00/Feel2Grasp',
    Total episodes: '200',
    Total frames: '53936',
    Features: '['action', 'observation.state', 'observation.images.front', 'observation.images.left', 'observation.images.right', 'timestamp', 'frame_index', 'episode_index', 'index', 'task_index']',
})',

encoded 256/53936


KeyboardInterrupt: 

In [None]:
import numpy as np

d = np.load("replay_buffer_iql_72d.npz")
ep = d["episode_index"]
fr = d["frame_index"]
done = d["terminals"]

# check episode/timestep alignment
idx = np.where(done[:-1] < 0.5)[0]
assert np.all(ep[idx] == ep[idx+1]), "Found done=0 but episode changes!"
assert np.all(fr[idx+1] == fr[idx] + 1), "Found done=0 but frame_index not consecutive!"

# done==1 
idx = np.where(done[:-1] > 0.5)[0]
bad = np.where((ep[idx] == ep[idx+1]) & (fr[idx+1] == fr[idx] + 1))[0]
assert len(bad) == 0, "Found done=1 but next is still a valid continuation!"

print("OK: episode/timestep alignment is consistent.")


OK: episode/timestep alignment is consistent.


In [None]:
import numpy as np
d = np.load("replay_buffer_iql_normalized.npz")
r = d["rewards"]
print("N:", len(r))
print("reward min/max:", r.min(), r.max())
print("count success:", (r > 0.9).sum())   
print("success ratio:", (r > 0.9).mean())

N: 53936
reward min/max: -0.1 1.0
count success: 5932
success ratio: 0.10998220112726194
