In [None]:
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
import copy
import time
import cv2 
import numpy as np 
import random

from copy import deepcopy 
from collections import deque, defaultdict
from typing import Any, Dict, List, Optional, Set, Tuple, Union
from numpy.typing import ArrayLike, NDArray
from pydantic import dataclasses, validator

import dm_control
from dm_control import mujoco as dm_mujoco
from dm_control.utils.transformations import mat_to_quat, quat_to_euler
import mujoco
from rocobench.envs import SortOneBlockTask, CabinetTask, MoveRopeTask, SweepTask, MakeSandwichTask, PackGroceryTask, MujocoSimEnv, SimRobot, visualize_voxel_scene
from lavis.models.eva_vit import create_eva_vit_g
from lavis.common.registry import registry
from omegaconf import OmegaConf
from PIL import Image
from tqdm import tqdm
import gc

In [None]:
tracked = []
env = PackGroceryTask(
    render_freq=2000,
    image_hw=(400,400), # Potentially important for getting RGBD images later on
    sim_forward_steps=300, # number of time steps forward that mujoco simulates before deciding that the llms need to pick an easier to optimize plan
    error_freq=30,
    error_threshold=1e-5,
    randomize_init=True,
    render_point_cloud=0, # Potentially useful for speeding up point fusion
    render_cameras=["face_panda","face_ur5e","teaser",],
    point_feature_cameras=tracked,
    one_obj_each=True, # TODO: Understand this
)

In [None]:
pointcloud = env.get_point_cloud()

In [None]:
pointcloud.show()

In [None]:
import cv2
img = cv2.imread("rgb_image_sceneshot.png")
img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
h, w = img.shape[:2]

focal_length = w / (2 * np.tan(45 * np.pi / 180))
K = np.array([[focal_length, 0, w / 2],
                  [0, focal_length, h / 2],
                  [0, 0, 1]])
dist_coeffs = np.zeros(4)
new_camera_matrix, roi = cv2.getOptimalNewCameraMatrix(K, dist_coeffs, (w, h), 1, (w, h))
undistorted_img = cv2.undistort(img, K, dist_coeffs, None, new_camera_matrix)
x, y, w, h = roi
undistorted_img = undistorted_img[y:y+h, x:x+w]
cv2.imwrite("undistorted_sceneshot.png", undistorted_img)

In [None]:
obj = "cereal"
minx = min(pointcloud[obj].xyz_pts[:,0])
maxx = max(pointcloud[obj].xyz_pts[:,0])
miny = min(pointcloud[obj].xyz_pts[:,1])
maxy = max(pointcloud[obj].xyz_pts[:,1])
minz = min(pointcloud[obj].xyz_pts[:,2])
maxz = max(pointcloud[obj].xyz_pts[:,2])
width = maxx - minx
height = maxy - miny
depth = maxz - minz

# Find the maximum dimension
max_dim = max(width, height, depth)

# Calculate the center of the bounding box
center_x = (minx + maxx) / 2
center_y = (miny + maxy) / 2
center_z = (minz + maxz) / 2

# Calculate the minimum and maximum coordinates of the bounding box
bbox_min = (center_x - max_dim/2, center_y - max_dim/2, center_z - max_dim/2)
bbox_max = (center_x + max_dim/2, center_y + max_dim/2, center_z + max_dim/2)
bounds = (bbox_min, bbox_max)
pointcloud[obj].show(pts_size=50, bounds=bounds)

In [None]:
print(f"|{env.model.id2name(48, 'body')}|")
print(env.model.geom(171))
print(f"|{env.model.name2id('apple_visual', 'geom')}|")