In [1]:
from perception import realsense_sensor
from datetime import datetime
import numpy as np
%matplotlib
import matplotlib.pyplot as plt
import open3d as o3d
from os.path import join
import pickle as pkl
import time
from time import sleep
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
from belief_srs.utils.mdetr_object_detector import MDETRObjectDetector




Using matplotlib backend: <object object at 0x7f80ba753560>
Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.




In [2]:
PLAYBACK = True
# PLAYBACK = False
SAM = True
# SAM = False
# MDETR = True
MDETR = False

filename = join("../data/household_shelves", "realsense_data_25-10-2023-18-45-23.pkl")
sam_ckp = "../data/sam_vit_h_4b8939.pth"
realsense_intrinsics = pkl.load(open('../data/realsense_intrinsics.pkl', 'rb'))

recording = pkl.load(open(filename, 'rb'))
data = recording['data']

In [3]:
if SAM:
    def show_anns(anns, ax):
        if len(anns) == 0:
            return
        sorted_anns = sorted(anns, key=(lambda x: x['area']), reverse=True)
        # ax = plt.gca()
        ax.set_autoscale_on(False)
        
        img = np.ones((sorted_anns[0]['segmentation'].shape[0], sorted_anns[0]['segmentation'].shape[1], 4))
        img[:,:,3] = 0
        for ann in sorted_anns:
            m = ann['segmentation']
            color_mask = np.concatenate([np.random.random(3), [0.35]])
            img[m] = color_mask
        ax.imshow(img)

if MDETR:
   mdetr = MDETRObjectDetector() 
    
if PLAYBACK:
    print("Playing recording ", filename, "of length ", recording['video_length'], "recorded with freq ", recording['freq'])
    if SAM:
        sam = sam_model_registry["default"](checkpoint=sam_ckp)
        sam.to("cuda")
        mask_generator = SamAutomaticMaskGenerator(sam, min_mask_region_area=10000)
    
    plt.ion()
    fig, axs = plt.subplots(2)

    for frame in data[20:21]:
        rgb = frame['rgb'].data
        depth = frame['depth'].data
        if SAM:
            masks = mask_generator.generate(rgb)

        if MDETR:
            labels, bboxs, probs = mdetr.detect(rgb, object_str="book", caption="book shelf with books")
            if labels is not None:
                mdetr.plot_results(rgb, probs, bboxs, labels, ax=axs[0]) 
            else:
                print("No detection")
            
        # axs[0].imshow(frame['rgb'].data)
        color_img = o3d.geometry.Image(rgb.astype(np.uint8))
        depth_img = o3d.geometry.Image(depth)
        rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(color_img, depth_img, convert_rgb_to_intensity = False)
        pinhole_intrisics =  o3d.camera.PinholeCameraIntrinsic(realsense_intrinsics.width, realsense_intrinsics.height, 
                                                               realsense_intrinsics.fx, realsense_intrinsics.fy,
                                                               realsense_intrinsics.cx, realsense_intrinsics.cy)
        pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, pinhole_intrisics)

        # flip the orientation, so it looks upright, not upside-down
        pcd.transform([[1,0,0,0],[0,-1,0,0],[0,0,-1,0],[0,0,0,1]])
        o3d.visualization.draw_geometries([pcd])
        # if SAM:
            # show_anns(masks, axs[0])
        # axs[1].imshow(frame['depth'].data)
        plt.pause(1.0/recording['freq'])

Playing recording  ../data/household_shelves/realsense_data_25-10-2023-18-45-23.pkl of length  15 recorded with freq  5
