In [1]:
import os
import sys

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"

module_path = os.path.abspath(os.path.join("../.."))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import json

import matplotlib.pyplot as plt
import numpy as np
import open3d as o3d
import torch
from PIL import Image

from core.integrate import FeatureFusionScalableTSDFVolume
from core.labeler import CLIPTextQuerier, KMeansLabeler

In [3]:
o3d._build_config["ENABLE_HEADLESS_RENDERING"]

True

In [4]:
scene = "scannet_scene0000_00"
save_dir = "/storage/quanta/Experiments/feature-instance-fusion/" + scene + "/"

In [5]:
json_pth = os.path.abspath("../../config/views/" + scene + ".json")
with open(json_pth, "r") as f:
    views = json.load(f)['views']

# Single Word and Other Query

In [6]:
tsdf_device = "cuda:1"
tsdf_volume = FeatureFusionScalableTSDFVolume(
    voxel_size=0.015,
    sdf_trunc=0.075,
    margin=0.08,
    device="cuda:1",
)
tsdf_volume.load(save_dir + "tsdf/tsdf_volume_unpruned.pt")
verts = np.load(save_dir + "tsdf/verts.npy")
faces = np.load(save_dir + "tsdf/faces.npy")
tsdf_volume.load_feats(save_dir + "tsdf_feature_lseg/feats.pt")

In [7]:
verts_feats = tsdf_volume.extract_feat_on_grid(verts=verts, device='cpu')[0]

In [8]:
clip_querier = CLIPTextQuerier(device='cuda:1')
clip_querier.load_model()

In [9]:
noun = ["floor", "refridgerator", "sink", "sofa", "wall", "window", "bad", "cabinet", "counter", "curtain", "desk", "door", "chair"]

In [10]:
mesh = o3d.geometry.TriangleMesh(
    vertices=o3d.utility.Vector3dVector(verts),
    triangles=o3d.utility.Vector3iVector(faces),
)
mesh.compute_vertex_normals()

TriangleMesh with 1120153 points and 2091538 triangles.

In [11]:
save_dir = "../01_LSeg/01_single_word_and_other/01_noun/"
os.makedirs(save_dir, exist_ok=True)

In [12]:
for i, text in enumerate(noun):
    score = clip_querier.single_text_others_score(
        text=text,
        other_text="other",
        img_feats=torch.from_numpy(verts_feats),
        temperature=0.03,
    )
    color = clip_querier.score_to_color(score=score, scheme="by")
    mesh.vertex_colors = o3d.utility.Vector3dVector(color)

    vis = o3d.visualization.Visualizer()
    vis.create_window()
    vis.add_geometry(mesh)
    vis.set_view_status(json.dumps(views[0]["view"]))
    buffer = vis.capture_screen_float_buffer(do_render=True)
    image = Image.fromarray((np.asarray(buffer) * 255).astype(np.uint8))
    # display(image)
    image.save(save_dir + "{:02d}_".format(i) + text + ".png")

## Scope

In [13]:
scope = ["living room", "bathroom", "legs of chair"]

In [14]:
save_dir = "../01_LSeg/01_single_word_and_other/02_scope/"
os.makedirs(save_dir, exist_ok=True)

In [15]:
for i, text in enumerate(scope):
    score = clip_querier.single_text_others_score(
        text=text,
        other_text="other",
        img_feats=torch.from_numpy(verts_feats),
        temperature=0.03,
    )
    color = clip_querier.score_to_color(score=score, scheme="by")
    mesh.vertex_colors = o3d.utility.Vector3dVector(color)

    vis = o3d.visualization.Visualizer()
    vis.create_window()
    vis.add_geometry(mesh)
    vis.set_view_status(json.dumps(views[0]["view"]))
    buffer = vis.capture_screen_float_buffer(do_render=True)
    image = Image.fromarray((np.asarray(buffer) * 255).astype(np.uint8))
    # display(image)
    image.save(save_dir + "{:02d}_".format(i) + text + ".png")

## Relative Position

In [16]:
relative_position = ["front panel of fridge", "middle of the bed", "edge of sofa", "backpack on the bed", "left of chairs", "middle between bed and sofa"]

In [17]:
save_dir = "../01_LSeg/01_single_word_and_other/03_relative_position/"
os.makedirs(save_dir, exist_ok=True)

In [18]:
for i, text in enumerate(relative_position):
    score = clip_querier.single_text_others_score(
        text=text,
        other_text="other",
        img_feats=torch.from_numpy(verts_feats),
        temperature=0.03,
    )
    color = clip_querier.score_to_color(score=score, scheme="by")
    mesh.vertex_colors = o3d.utility.Vector3dVector(color)

    vis = o3d.visualization.Visualizer()
    vis.create_window()
    vis.add_geometry(mesh)
    vis.set_view_status(json.dumps(views[0]["view"]))
    buffer = vis.capture_screen_float_buffer(do_render=True)
    image = Image.fromarray((np.asarray(buffer) * 255).astype(np.uint8))
    # display(image)
    image.save(save_dir + "{:02d}_".format(i) + text + ".png")

# Verb

In [19]:
verb = ["sit", "sleep", "brush teeth", "ride", "read", "travel"]

In [20]:
save_dir = "../01_LSeg/01_single_word_and_other/04_verb/"
os.makedirs(save_dir, exist_ok=True)

In [21]:
for i, text in enumerate(verb):
    score = clip_querier.single_text_others_score(
        text=text,
        other_text="other",
        img_feats=torch.from_numpy(verts_feats),
        temperature=0.03,
    )
    color = clip_querier.score_to_color(score=score, scheme="by")
    mesh.vertex_colors = o3d.utility.Vector3dVector(color)

    vis = o3d.visualization.Visualizer()
    vis.create_window()
    vis.add_geometry(mesh)
    vis.set_view_status(json.dumps(views[0]["view"]))
    buffer = vis.capture_screen_float_buffer(do_render=True)
    image = Image.fromarray((np.asarray(buffer) * 255).astype(np.uint8))
    # display(image)
    image.save(save_dir + "{:02d}_".format(i) + text + ".png")

## adjective

In [22]:
adjective = ["warm", "cold", "soft", "metal", "can be burned", "can be closed", "can be closed or opened", "can be used"]

In [23]:
save_dir = "../01_LSeg/01_single_word_and_other/05_adjective/"
os.makedirs(save_dir, exist_ok=True)

In [24]:
for i, text in enumerate(adjective):
    score = clip_querier.single_text_others_score(
        text=text,
        other_text="other",
        img_feats=torch.from_numpy(verts_feats),
        temperature=0.03,
    )
    color = clip_querier.score_to_color(score=score, scheme="by")
    mesh.vertex_colors = o3d.utility.Vector3dVector(color)

    vis = o3d.visualization.Visualizer()
    vis.create_window()
    vis.add_geometry(mesh)
    vis.set_view_status(json.dumps(views[0]["view"]))
    buffer = vis.capture_screen_float_buffer(do_render=True)
    image = Image.fromarray((np.asarray(buffer) * 255).astype(np.uint8))
    # display(image)
    image.save(save_dir + "{:02d}_".format(i) + text + ".png")

## Indirect word, inference

In [25]:
indirect = ["food", "cold food inside", "place for cooking", "place for sleeping", "sleeping"]

In [26]:
save_dir = "../01_LSeg/01_single_word_and_other/06_indirect/"
os.makedirs(save_dir, exist_ok=True)

In [27]:
for i, text in enumerate(indirect):
    score = clip_querier.single_text_others_score(
        text=text,
        other_text="other",
        img_feats=torch.from_numpy(verts_feats),
        temperature=0.03,
    )
    color = clip_querier.score_to_color(score=score, scheme="by")
    mesh.vertex_colors = o3d.utility.Vector3dVector(color)

    vis = o3d.visualization.Visualizer()
    vis.create_window()
    vis.add_geometry(mesh)
    vis.set_view_status(json.dumps(views[0]["view"]))
    buffer = vis.capture_screen_float_buffer(do_render=True)
    image = Image.fromarray((np.asarray(buffer) * 255).astype(np.uint8))
    # display(image)
    image.save(save_dir + "{:02d}_".format(i) + text + ".png")

## Logical combination

In [28]:
logical_combination = ["bed and sofa", "toilet and curtain", "not on the floor", "toilet but not sink"]

In [29]:
save_dir = "../01_LSeg/01_single_word_and_other/07_logical_combination/"
os.makedirs(save_dir, exist_ok=True)

In [30]:
for i, text in enumerate(logical_combination):
    score = clip_querier.single_text_others_score(
        text=text,
        other_text="other",
        img_feats=torch.from_numpy(verts_feats),
        temperature=0.03,
    )
    color = clip_querier.score_to_color(score=score, scheme="by")
    mesh.vertex_colors = o3d.utility.Vector3dVector(color)

    vis = o3d.visualization.Visualizer()
    vis.create_window()
    vis.add_geometry(mesh)
    vis.set_view_status(json.dumps(views[0]["view"]))
    buffer = vis.capture_screen_float_buffer(do_render=True)
    image = Image.fromarray((np.asarray(buffer) * 255).astype(np.uint8))
    # display(image)
    image.save(save_dir + "{:02d}_".format(i) + text + ".png")

: 