In [1]:
%env CUDA_VISIBLE_DEVICES=4

env: CUDA_VISIBLE_DEVICES=4


In [2]:
# 누락된 import 추가
from collections import deque
import collections

In [3]:
import sys, os
from pathlib import Path

# 프로젝트 루트 등록
sys.path += [
    "/home/lcw/openvla-oft",
    "/home/lcw/openvla-oft/vla-scripts",
]

import tensorflow_datasets as tfds
from transformers import AutoProcessor

from finetune import (
    FinetuneConfig,
    RLDSBatchTransform_epi,
    PaddedCollatorForActionPrediction,
    ActionTokenizer,
    PurePromptBuilder,
    EpisodicRLDSDataset,   # ← PyTorch Dataset (RLDS 래핑)
)


  from .autonotebook import tqdm as notebook_tqdm
2025-06-30 22:03:52.143005: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-30 22:03:52.180928: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-06-30 22:03:52.180965: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-06-30 22:03:52.182267: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-06-30 22:03:52.1

Using LIBERO constants:
  NUM_ACTIONS_CHUNK = 8
  ACTION_DIM = 7
  PROPRIO_DIM = 8
  ACTION_PROPRIO_NORMALIZATION_TYPE = bounds_q99
If needed, manually set the correct constants in `prismatic/vla/constants.py`!


KeyboardInterrupt: 

In [None]:
cfg = FinetuneConfig(
    vla_path="openvla/openvla-7b",
    data_root_dir=Path("/home/lcw/openvla-oft/datasets/modified_libero_rlds"),
    dataset_name="libero_10_no_noops",
    num_images_in_input=2,
    use_proprio=True,
    image_aug=False,
)


In [None]:
# dataset_info.json이 있는 디렉터리까지 지정
version_dir = cfg.data_root_dir / cfg.dataset_name / "1.0.0"
builder = tfds.builder_from_directory(version_dir)
builder.download_and_prepare()


In [None]:
processor = AutoProcessor.from_pretrained(cfg.vla_path, trust_remote_code=True)
action_tok = ActionTokenizer(processor.tokenizer)

batch_transform = RLDSBatchTransform_epi(
    action_tokenizer = action_tok,
    base_tokenizer   = processor.tokenizer,
    image_transform  = processor.image_processor.apply_transform,
    prompt_builder_fn= PurePromptBuilder,
    use_wrist_image  = cfg.num_images_in_input > 1,
    use_proprio      = cfg.use_proprio,
)

resize_hw = tuple(processor.image_processor.input_sizes[0][1:])  # (H, W)


In [None]:
from prismatic.vla.datasets.datasets import EpisodicDataset

train_dataset = EpisodicDataset(
    data_root_dir       = cfg.data_root_dir,         # "datasets/modified_libero_rlds"
    data_mix            = cfg.dataset_name,          # "libero_10_no_noops"
    batch_transform     = batch_transform,
    resize_resolution   = tuple(processor.image_processor.input_sizes[0][1:]),
    shuffle_buffer_size = cfg.shuffle_buffer_size,   # 예: 0
    image_aug           = cfg.image_aug,             # True/False
)

2025-06-30 21:52:32.702581: I tensorflow/core/grappler/optimizers/data/replicate_on_split.cc:32] Running replicate on split optimization


2025-06-30 21:52:33.386604: I tensorflow/core/grappler/optimizers/data/replicate_on_split.cc:32] Running replicate on split optimization


In [None]:
# train_dataset에 포함된 총 스텝의 개수를 출력합니다.
print(f"데이터셋의 총 스텝(Step) 개수: {len(train_dataset)}")


데이터셋의 총 스텝(Step) 개수: 379


In [None]:
import matplotlib.pyplot as plt
import torchvision.transforms.functional as TF

def visualize_grid_from_step(step, title):
    grid_tensor = step["pixel_values"]  # shape: (N, 3, H, W)
    grid_tensor = grid_tensor.reshape(-1, 3, 224, 224)
    imgs_pil = [TF.to_pil_image(img) for img in grid_tensor]
    
    fig, axs = plt.subplots(1, len(imgs_pil), figsize=(12, 3))
    for i, img in enumerate(imgs_pil):
        axs[i].imshow(img)
        axs[i].set_title(f"Img {i}")
        axs[i].axis("off")
    plt.suptitle(title)
    plt.tight_layout()
    plt.show()


In [None]:
collator = PaddedCollatorForActionPrediction(
    processor.tokenizer.model_max_length,   # max_length
    processor.tokenizer.pad_token_id,       # pad_token_id
    "right"                                 # padding_side
)

In [None]:
from torch.utils.data import DataLoader

loader = DataLoader(
    train_dataset,
    batch_size  = 4,
    shuffle     = False,
    collate_fn  = collator,
    num_workers = 0,
)


In [None]:
# 전역에 deque를 추가해서 모든 모듈에서 사용 가능하게 만들기
import builtins
from collections import deque

builtins.deque = deque

print("✅ deque를 전역으로 추가했습니다.")

# first_item의 타입과 구조 확인
print("deque 추가 후 재테스트:")
try:
    dataset_iter = iter(train_dataset)
    
    first_item = next(dataset_iter)
    print(f"first_item 타입: {type(first_item)}")
    print(f"first_item 길이: {len(first_item) if hasattr(first_item, '__len__') else 'N/A'}")
    
    # list라면 첫 번째 요소 확인
    if isinstance(first_item, list):
        print(f"리스트의 첫 번째 요소 타입: {type(first_item[0])}")
        if hasattr(first_item[0], 'keys'):
            print(f"첫 번째 요소의 키들: {list(first_item[0].keys())}")
            print(f"Dataset name: {first_item[0].get('dataset_name', 'no name')}")
        else:
            print(f"첫 번째 요소: {first_item[0]}")
    
    # dictionary라면 직접 접근
    elif hasattr(first_item, 'keys'):
        print(f"Dictionary 키들: {list(first_item.keys())}")
        print(f"Dataset name: {first_item.get('dataset_name', 'no name')}")
    
    else:
        print(f"예상과 다른 타입입니다: {first_item}")
        
except Exception as e:
    print(f"❌ 오류: {e}")
    import traceback
    traceback.print_exc()

✅ deque를 전역으로 추가했습니다.
deque 추가 후 재테스트:
[EPISODE 1] Processing 190 steps
[FIRST EPISODE] Starting episode 1 with task: 'pick up the book and place it in the back compartm...'
[EPISODE TRANSITION] None -> libero_10_no_noops_ep_1
[STEP 10] Episode libero_10_no_noops_ep_1: 8 primary + 8 wrist → dual memory grid
[STEP 20] Episode libero_10_no_noops_ep_1: 8 primary + 8 wrist → dual memory grid
[STEP 30] Episode libero_10_no_noops_ep_1: 8 primary + 8 wrist → dual memory grid
[STEP 40] Episode libero_10_no_noops_ep_1: 8 primary + 8 wrist → dual memory grid
[STEP 50] Episode libero_10_no_noops_ep_1: 8 primary + 8 wrist → dual memory grid
[STEP 60] Episode libero_10_no_noops_ep_1: 8 primary + 8 wrist → dual memory grid
[STEP 70] Episode libero_10_no_noops_ep_1: 8 primary + 8 wrist → dual memory grid
[STEP 80] Episode libero_10_no_noops_ep_1: 8 primary + 8 wrist → dual memory grid
[STEP 90] Episode libero_10_no_noops_ep_1: 8 primary + 8 wrist → dual memory grid
[STEP 100] Episode libero_10_no_noo

In [None]:
# Option 1: Modify the dataset to return individual items instead of lists
# Add this to your dataset creation/loading code:

def flatten_dataset_items(dataset):
    """Convert dataset that returns lists of items to individual items"""
    for item_list in dataset:
        if isinstance(item_list, list):
            for individual_item in item_list:
                yield individual_item
        else:
            yield item_list

# Apply the flattening before creating DataLoader:
flattened_dataset = list(flatten_dataset_items(train_dataset))
print(f"Flattened dataset length: {len(flattened_dataset)}")

# Create a simple wrapper dataset
class FlattenedDataset:
    def __init__(self, items):
        self.items = items
    
    def __len__(self):
        return len(self.items)
    
    def __getitem__(self, idx):
        return self.items[idx]

# Use the flattened dataset
flat_dataset = FlattenedDataset(flattened_dataset)
loader = DataLoader(flat_dataset, batch_size=1, collate_fn=collate_fn)

# Option 2: Modify the collate function to handle list inputs
# In your data_utils.py, modify the _find_episode_pairs method:

def _find_episode_pairs(self, instances):
    """Find pairs of instances that belong to different episodes."""
    pairs = []
    
    # Handle case where instances might be lists
    flat_instances = []
    for item in instances:
        if isinstance(item, list):
            flat_instances.extend(item)
        else:
            flat_instances.append(item)
    
    # Group by episode
    episodes = {}
    for item in flat_instances:
        episode_id = item.get('episode_id', 'unknown')
        if episode_id not in episodes:
            episodes[episode_id] = []
        episodes[episode_id].append(item)
    
    # Continue with existing logic...
    episode_keys = list(episodes.keys())
    for i in range(len(episode_keys)):
        for j in range(i + 1, len(episode_keys)):
            ep1_items = episodes[episode_keys[i]]
            ep2_items = episodes[episode_keys[j]]
            pairs.extend([(item1, item2) for item1 in ep1_items for item2 in ep2_items])
    
    return pairs

# Option 3: Quick test to verify the fix works
def test_dataloader_fix():
    try:
        # Test with the first approach
        dataset_iter = iter(train_dataset)
        raw_item = next(dataset_iter)
        
        if isinstance(raw_item, list):
            print(f"Raw item is list with {len(raw_item)} elements")
            # Test individual items
            for i, item in enumerate(raw_item[:3]):
                print(f"Item {i}: episode_id = {item.get('episode_id', 'N/A')}")
                print(f"Item {i}: step_in_episode = {item.get('step_in_episode', 'N/A')}")
        
        # Create flattened version
        flattened_items = []
        for batch in train_dataset:
            if isinstance(batch, list):
                flattened_items.extend(batch)
            else:
                flattened_items.append(batch)
        
        print(f"Total flattened items: {len(flattened_items)}")
        
        # Test a small batch
        test_items = flattened_items[:4]  # Small batch for testing
        test_batch = collate_fn(test_items)
        print("Collation successful!")
        
        return flattened_items
        
    except Exception as e:
        print(f"❌ Test failed: {e}")
        import traceback
        traceback.print_exc()
        return None

# Run the test
flattened_items = test_dataloader_fix()

[EPISODE 1] Processing 237 steps
[NEW EPISODE 3] Task changed at global step 369
  From: 'put the yellow and white mug in the microwave and ...'
  To:   'pick up the book and place it in the back compartm...'
[EPISODE TRANSITION] libero_10_no_noops_ep_2 -> libero_10_no_noops_ep_3
[PREV EPISODE] libero_10_no_noops_ep_2 had 1 images
[STEP 10] Episode libero_10_no_noops_ep_3: 8 primary + 8 wrist → dual memory grid
[STEP 20] Episode libero_10_no_noops_ep_3: 8 primary + 8 wrist → dual memory grid
[STEP 30] Episode libero_10_no_noops_ep_3: 8 primary + 8 wrist → dual memory grid
[STEP 40] Episode libero_10_no_noops_ep_3: 8 primary + 8 wrist → dual memory grid
[STEP 50] Episode libero_10_no_noops_ep_3: 8 primary + 8 wrist → dual memory grid
[STEP 60] Episode libero_10_no_noops_ep_3: 8 primary + 8 wrist → dual memory grid
[STEP 70] Episode libero_10_no_noops_ep_3: 8 primary + 8 wrist → dual memory grid
[STEP 80] Episode libero_10_no_noops_ep_3: 8 primary + 8 wrist → dual memory grid
[STEP 90] E

In [None]:
# batch_transform 디버깅
print("🔧 batch_transform 디버깅:")
try:
    # 원본 RLDS에서 step 하나 가져오기
    ds = builder.as_dataset(split='train')
    
    for episode in ds.take(1):
        for i, step in enumerate(episode['steps'].take(1)):
            print(f"\n=== Step {i} 원본 분석 ===")
            print(f"원본 step 키들: {list(step.keys())}")
            
            # observation 내부 구조 확인
            obs = step['observation']
            print(f"Observation 키들: {list(obs.keys())}")
            
            # 각 키의 상세 정보
            for key, val in step.items():
                if key == 'observation':
                    for obs_key, obs_val in obs.items():
                        if hasattr(obs_val, 'shape'):
                            print(f"  observation.{obs_key}: {obs_val.shape} {obs_val.dtype}")
                elif hasattr(val, 'shape'):
                    print(f"  {key}: {val.shape} {val.dtype}")
                elif hasattr(val, 'numpy'):
                    val_np = val.numpy()
                    if isinstance(val_np, bytes):
                        print(f"  {key}: '{val_np.decode('utf-8')}'")
                    else:
                        print(f"  {key}: {val_np}")
                else:
                    print(f"  {key}: {type(val)}")
            
            # step을 Python dict로 변환
            print(f"\n=== Step 변환 과정 ===")
            step_dict = {}
            for key, val in step.items():
                if key == 'observation':
                    step_dict[key] = {obs_key: obs_val.numpy() if hasattr(obs_val, 'numpy') else obs_val 
                                     for obs_key, obs_val in obs.items()}
                else:
                    step_dict[key] = val.numpy() if hasattr(val, 'numpy') else val
            
            print(f"변환된 step_dict 키들: {list(step_dict.keys())}")
            print(f"observation 내부: {list(step_dict['observation'].keys())}")
            
            # episode_metadata에서 정보 추출
            episode_metadata = episode['episode_metadata']
            print(f"\nEpisode metadata 키들: {list(episode_metadata.keys())}")
            
            # episode_id 추가 (만약 없다면)
            if 'episode_id' not in step_dict:
                # episode_metadata에서 file_path나 다른 정보로 episode_id 생성
                if 'file_path' in episode_metadata:
                    file_path = episode_metadata['file_path'].numpy().decode('utf-8')
                    episode_id = file_path.split('/')[-1]  # 파일명을 episode_id로 사용
                    step_dict['episode_id'] = episode_id
                    print(f"생성된 episode_id: {episode_id}")
                else:
                    step_dict['episode_id'] = f"episode_{i}"
                    print(f"기본 episode_id: episode_{i}")
            
            # dataset_name 추가
            step_dict['dataset_name'] = cfg.dataset_name
            
            print(f"\n=== batch_transform 적용 ===")
            # 이제 batch_transform 적용
            try:
                transformed = batch_transform(step_dict)
                print(f"✅ Transform 성공!")
                print(f"Transform 결과 타입: {type(transformed)}")
                
                if isinstance(transformed, dict):
                    print(f"Transform 결과 키들: {list(transformed.keys())}")
                    for key, val in transformed.items():
                        if hasattr(val, 'shape'):
                            print(f"  {key}: {val.shape}")
                        else:
                            print(f"  {key}: {type(val)}")
                elif isinstance(transformed, list):
                    print(f"⚠️  Transform이 list를 반환했습니다! 길이: {len(transformed)}")
                    if len(transformed) > 0:
                        print(f"첫 번째 요소 타입: {type(transformed[0])}")
                        if hasattr(transformed[0], 'keys'):
                            print(f"첫 번째 요소 키들: {list(transformed[0].keys())}")
                
            except Exception as transform_error:
                print(f"❌ Transform 오류: {transform_error}")
                import traceback
                traceback.print_exc()
            
            break  # 첫 번째 step만 분석
        break  # 첫 번째 episode만 분석
        
except Exception as e:
    print(f"❌ 전체 디버깅 실패: {e}")
    import traceback
    traceback.print_exc()

In [None]:
# DataLoader 재시도
print("\n🚀 DataLoader 테스트:")
try:
    loader = DataLoader(
        train_dataset,
        batch_size=2,
        shuffle=False,
        collate_fn=collator,
        num_workers=0,
    )
    
    batch = next(iter(loader))
    print("✅ DataLoader 성공!")
    
    print(f"배치 타입: {type(batch)}")
    if hasattr(batch, 'keys'):
        print(f"배치 키들: {list(batch.keys())}")
        for key, val in batch.items():
            if hasattr(val, "shape"):
                print(f"  {key}: {val.shape}")
    
except Exception as e:
    print(f"❌ DataLoader 실패: {e}")
    import traceback
    traceback.print_exc()

In [None]:
for key, val in batch.items():
    if hasattr(val, "shape"):
        print(f"{key:20s} → {tuple(val.shape)}")

In [None]:
import torch
import matplotlib.pyplot as plt
import torchvision.transforms.functional as TF

imgs = batch["pixel_values"][3]
imgs = imgs.reshape(-1, 3, 224, 224)


fig, axs = plt.subplots(1, 4, figsize=(12, 3))
for i, img_t in enumerate(imgs):
    img_pil = TF.to_pil_image(img_t)
    axs[i].imshow(img_pil)
    axs[i].set_title(f"Cam {i}")
    axs[i].axis("off")

plt.tight_layout()
plt.show()


In [None]:
from transformers import CLIPProcessor, CLIPModel
import torchvision.transforms.functional as TF
import torch
import matplotlib.pyplot as plt

# 1. 이미지 추출 (4개 카메라 뷰)
imgs = batch["pixel_values"][3]          # (12, 224, 224)
imgs = imgs.reshape(-1, 3, 224, 224)     # (4, 3, 224, 224)

# 2. CLIP 준비
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").eval().cuda()
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# 3. PIL 변환 + CLIP 입력 변환
imgs_pil = [TF.to_pil_image(img) for img in imgs]
clip_inputs = clip_processor(images=imgs_pil, return_tensors="pt").to("cuda")

# 4. CLIP embedding 추출 + norm 계산
with torch.no_grad():
    clip_embeds = clip_model.get_image_features(**clip_inputs)  # (4, 512)

norms = torch.norm(clip_embeds, dim=-1)  # (4,)
print("📊 Norms:", norms.tolist())

# 5. Norm threshold로 마스킹
threshold = 5.0
mask = norms > threshold  # BoolTensor of shape (4,)
print("Keep mask:", mask.tolist())

# 6. 마스킹 후 유효 이미지만 추출
valid_imgs = [img for img, keep in zip(imgs_pil, mask) if keep]

# 7. 시각화
fig, axs = plt.subplots(1, len(imgs_pil), figsize=(12, 3))
for i, (img_pil, norm_val) in enumerate(zip(imgs_pil, norms)):
    axs[i].imshow(img_pil)
    axs[i].set_title(f"Cam {i}\nNorm: {norm_val:.2f}")
    if not mask[i]:
        axs[i].spines['bottom'].set_color('red')  # 제거된 뷰 강조
        axs[i].spines['bottom'].set_linewidth(3)
    axs[i].axis("off")
plt.tight_layout()
plt.show()

# 8. memory grid 만들 때 valid_imgs만 사용
# memory_img, memory_mask = generator.make_memory_grid(valid_imgs)
