In [1]:
import torch
from pprint import pprint

print("Debugging...")
def custom_repr(self):
    return f'{{Tensor:{tuple(self.shape)}}}'
    # return f'{{Tensor:{tuple(self.shape)}}} {original_repr(self)}'

original_repr = torch.Tensor.__repr__
torch.Tensor.__repr__ = custom_repr

Debugging...


In [6]:
from typing import List, Optional
from pathlib import Path
import os
import pickle

import tap
import cv2
import numpy as np
import torch
import blosc
from PIL import Image
import einops

from calvin_env.envs.play_table_env import get_env
from utils.utils_with_calvin import (
    keypoint_discovery,
    deproject,
    get_gripper_camera_view_matrix,
    convert_rotation
)

In [22]:
import cv2
import numpy as np

def write_video_from_array(array, output_file, fps=30):
    """
    Writes a NumPy array of shape (n_frames, h, w, c) to a video file.

    Parameters:
    - array: NumPy array of shape (n_frames, h, w, c), where c is the number of color channels (3 for RGB/BGR).
    - output_file: The path of the output video file (e.g., 'output_video.mp4').
    - fps: Frames per second of the output video (default is 30).
    """
    # Get the shape of the input array
    n_frames, h, w, c = array.shape

    # Ensure the array has 3 color channels (RGB or BGR)
    if c != 3:
        raise ValueError("Array must have 3 color channels (RGB/BGR).")

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for mp4
    out = cv2.VideoWriter(output_file, fourcc, fps, (w, h))

    # Write each frame to the video
    for i in range(n_frames):
        frame = array[i]
        # Write the frame
        out.write(frame)

    # Release the video writer
    out.release()
    print(f"Video saved as {output_file}")

# Example usage:
# array = np.random.randint(0, 256, (100, 480, 640, 3), dtype=np.uint8)
# write_video_from_array(array, 'output_video.mp4', fps=30)


In [3]:
split = "training"
root_dir = Path("../calvin/dataset/calvin_debug_dataset")

In [24]:
annotations = np.load(
    f'{root_dir}/{split}/lang_annotations/auto_lang_ann.npy',
    allow_pickle=True
).item()

len_anno = len(annotations['info']['indx'])
for anno_ind, (start_id, end_id) in enumerate(annotations['info']['indx']):
    print(f'Processing {anno_ind}/{len_anno}, start_id:{start_id}, end_id:{end_id}, len:{end_id-start_id+1}')

    ep_imgs = []
    for ep_id in range(start_id, end_id + 1):
        episode = 'episode_{:07d}.npz'.format(ep_id)
        data = np.load(f'{root_dir}/{split}/{episode}')
        rgb_static = data['rgb_static']
        ep_imgs.append(rgb_static)

    ep_imgs = np.stack(ep_imgs, axis=0)
    ep_img = einops.rearrange(ep_imgs, 'b h w c -> (b h) w c')

    write_video_from_array(ep_imgs, f'anno_{anno_ind}.mp4', fps=30)
    break




Processing 0/9, start_id:358656, end_id:358720, len:65
Video saved as ep_0.mp4


In [None]:
data = np.load(f'{root_dir}/{split}/{episode}')

rgb_static = data['rgb_static']  # (200, 200, 3)
rgb_gripper = data['rgb_gripper']  # (84, 84, 3)
depth_static = data['depth_static']  # (200, 200)
depth_gripper = data['depth_gripper']  # (84, 84)

# data['robot_obs'] is (15,), data['scene_obs'] is (24,)
env.reset(robot_obs=data['robot_obs'], scene_obs=data['scene_obs'])
static_cam = env.cameras[0]
gripper_cam = env.cameras[1]
gripper_cam.viewMatrix = get_gripper_camera_view_matrix(gripper_cam)

static_pcd = deproject(
    static_cam, depth_static,
    homogeneous=False, sanity_check=False
).transpose(1, 0)
static_pcd = np.reshape(
    static_pcd, (depth_static.shape[0], depth_static.shape[1], 3)
)
gripper_pcd = deproject(
    gripper_cam, depth_gripper,
    homogeneous=False, sanity_check=False
).transpose(1, 0)
gripper_pcd = np.reshape(
    gripper_pcd, (depth_gripper.shape[0], depth_gripper.shape[1], 3)
)

# map RGB to [-1, 1]
rgb_static = rgb_static / 255. * 2 - 1
rgb_gripper = rgb_gripper / 255. * 2 - 1

# Map gripper openess to [0, 1]
proprio = np.concatenate([
    data['robot_obs'][:3],
    data['robot_obs'][3:6],
    (data['robot_obs'][[-1]] > 0).astype(np.float32)
], axis=-1)

# Put them into a dict
datas['static_pcd'].append(static_pcd)  # (200, 200, 3)
datas['static_rgb'].append(rgb_static)  # (200, 200, 3)
datas['gripper_pcd'].append(gripper_pcd)  # (84, 84, 3)
datas['gripper_rgb'].append(rgb_gripper)  # (84, 84, 3)
datas['proprios'].append(proprio)  # (8,)
datas['annotation_id'].append(ann_id)  # int