In [1]:
import os
import sys

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"

module_path = os.path.abspath(os.path.join("../.."))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import json

import matplotlib.pyplot as plt
import numpy as np
import open3d as o3d
import torch
from PIL import Image

from core.integrate import FeatureFusionScalableTSDFVolume
from core.dataset import ScanNet
from core.labeler import CLIPTextQuerier, KMeansLabeler

In [3]:
o3d._build_config["ENABLE_HEADLESS_RENDERING"]

True

# Save fig

In [4]:
dataset = ScanNet('/home/quanta/Datasets/ScanNet/')
nyu40_color = dataset.nyu40id_to_color
nyu40_class = [dataset.nyu40_id_to_class[i] for i in range(41)]

In [5]:
scene = "scannet_scene0000_00"
save_dir = "/storage/quanta/Experiments/feature-instance-fusion/" + scene + "/"

In [6]:
json_pth = os.path.abspath("../../config/views/" + scene + ".json")
with open(json_pth, "r") as f:
    views = json.load(f)['views']

In [7]:
tsdf_device = "cuda:1"
tsdf_volume = FeatureFusionScalableTSDFVolume(
    voxel_size=0.015,
    sdf_trunc=0.075,
    margin=0.08,
    device="cuda:1",
)
tsdf_volume.load(save_dir + "tsdf/tsdf_volume_unpruned.pt")
verts = np.load(save_dir + "tsdf/verts.npy")
faces = np.load(save_dir + "tsdf/faces.npy")
tsdf_volume.load_feats(save_dir + "tsdf_feature_lseg/feats.pt")

In [8]:
verts_feats = tsdf_volume.extract_feat_on_grid(verts=verts, device='cpu')[0]

In [9]:
clip_querier = CLIPTextQuerier(device='cuda:1')
clip_querier.load_model()

In [10]:
labels = clip_querier.multi_text_query(
    texts=nyu40_class,
    img_feats=torch.from_numpy(verts_feats),
)

In [15]:
mesh = o3d.geometry.TriangleMesh(
    vertices=o3d.utility.Vector3dVector(verts),
    triangles=o3d.utility.Vector3iVector(faces),
)
mesh.compute_vertex_normals()
color = nyu40_color[labels] / 255
mesh.vertex_colors = o3d.utility.Vector3dVector(color)

In [18]:
save_dir = "../01_LSeg/02_nyu40_multi_class_query/"
os.makedirs(save_dir, exist_ok=True)

In [19]:
for i, view in enumerate(views):
    vis = o3d.visualization.Visualizer()
    vis.create_window()
    vis.add_geometry(mesh)
    vis.set_view_status(json.dumps(view["view"]))
    buffer = vis.capture_screen_float_buffer(do_render=True)
    image = Image.fromarray((np.asarray(buffer) * 255).astype(np.uint8))
    # display(image)
    image.save(save_dir + "{:02d}_".format(i) + view["tag"] + ".png")

# Compare to ground truth vertices

In [24]:
dataset = ScanNet("/home/quanta/Datasets/ScanNet/")
scan_id = "scene0000_00"
id = dataset.scan_id_list.index(scan_id)
single_instance = dataset[id]

In [32]:
verts = single_instance["vertices"]
faces = single_instance["faces"]
gt_labels = single_instance["ground_truth_labels"]
verts_feats = tsdf_volume.extract_feat_on_grid(verts=verts, device='cpu')[0]
labels = clip_querier.multi_text_query(
    texts=nyu40_class,
    img_feats=torch.from_numpy(verts_feats),
).numpy()

In [38]:
one_hot_gt = np.zeros((gt_labels.size, 41), dtype=np.int32)
one_hot_gt[np.arange(gt_labels.size), gt_labels] = 1

one_hot_pred = np.zeros((labels.size, 41), dtype=np.int32)
one_hot_pred[np.arange(labels.size), labels] = 1

tp = ((one_hot_gt == one_hot_pred) * one_hot_pred).sum(axis=0)
fp = ((one_hot_gt != one_hot_pred) * one_hot_pred).sum(axis=0)
fn = ((one_hot_gt != one_hot_pred) * (1 - one_hot_pred)).sum(axis=0)
miou = tp / (tp + fp + fn + 1e-16)
mIoU = {}
for cls in dataset.nyu40_id_to_class.keys():
    mIoU[dataset.nyu40_id_to_class[cls]] = miou[cls] * 100
mIoU

{'unknown': 7.680285778075463,
 'wall': 62.62359090622836,
 'chair': 0.0,
 'books': 0.0,
 'floor': 57.099949941598524,
 'door': 28.727434811678183,
 'otherprop': 0.0,
 'window': 54.7700754975978,
 'table': 17.768311117218634,
 'otherfurniture': 0.0,
 'pillow': 0.0,
 'picture': 0.0,
 'ceiling': 73.85645221271848,
 'box': 0.0,
 'cabinet': 45.34689540003036,
 'desk': 0.0,
 'shelves': 2.007057785619762,
 'towel': 0.0,
 'sofa': 79.50922443130933,
 'sink': 35.960591133004925,
 'lamp': 0.0,
 'bed': 61.720554272517326,
 'bookshelf': 0.0,
 'mirror': 15.853658536585366,
 'curtain': 59.18951132300357,
 'whiteboard': 0.0,
 'toilet': 39.157706093189965,
 'bag': 0.0,
 'clothes': 0.0,
 'night stand': 0.0,
 'television': 37.138621200889546,
 'dresser': 0.0,
 'refridgerator': 69.22060766182298,
 'shower curtain': 0.0,
 'bathtub': 0.0,
 'counter': 25.511811023622048,
 'otherstructure': 0.0,
 'floor mat': 0.0,
 'paper': 0.0,
 'person': 0.0,
 'blinds': 0.0}

# Compared to ground truth labels in finer vertices

In [40]:
save_dir = "/storage/quanta/Experiments/feature-instance-fusion/" + scene + "/"

verts = np.load(save_dir + "tsdf/verts.npy")
faces = np.load(save_dir + "tsdf/faces.npy")
verts_feats = tsdf_volume.extract_feat_on_grid(verts=verts, device='cpu')[0]
labels = clip_querier.multi_text_query(
    texts=nyu40_class,
    img_feats=torch.from_numpy(verts_feats),
).numpy()

In [42]:
tsdf_volume.load_feats(save_dir + 'tsdf_feature_gt_semantic/feats.pt')

In [43]:
gt_score = tsdf_volume.extract_feat_on_grid(verts=verts)[0]

In [48]:
gt_labels = np.argmax(gt_score, axis=1)

In [49]:
one_hot_gt = np.zeros((gt_labels.size, 41), dtype=np.int32)
one_hot_gt[np.arange(gt_labels.size), gt_labels] = 1

one_hot_pred = np.zeros((labels.size, 41), dtype=np.int32)
one_hot_pred[np.arange(labels.size), labels] = 1

tp = ((one_hot_gt == one_hot_pred) * one_hot_pred).sum(axis=0)
fp = ((one_hot_gt != one_hot_pred) * one_hot_pred).sum(axis=0)
fn = ((one_hot_gt != one_hot_pred) * (1 - one_hot_pred)).sum(axis=0)
miou = tp / (tp + fp + fn + 1e-16)
mIoU = {}
for cls in dataset.nyu40_id_to_class.keys():
    mIoU[dataset.nyu40_id_to_class[cls]] = miou[cls] * 100
mIoU

{'unknown': 0.0,
 'wall': 66.55031065279276,
 'chair': 0.0,
 'books': 0.0,
 'floor': 61.54319686768637,
 'door': 22.666771061697464,
 'otherprop': 0.0,
 'window': 46.198402536430706,
 'table': 16.35894983613058,
 'otherfurniture': 0.0,
 'pillow': 0.0,
 'picture': 0.0,
 'ceiling': 86.85286513138115,
 'box': 0.0,
 'cabinet': 40.47151852394201,
 'desk': 0.0,
 'shelves': 3.3074186322308603,
 'towel': 0.0,
 'sofa': 76.32380905943732,
 'sink': 25.971004116699483,
 'lamp': 0.0,
 'bed': 72.00018896447467,
 'bookshelf': 0.0,
 'mirror': 8.730847065625904,
 'curtain': 60.89980185710401,
 'whiteboard': 0.0,
 'toilet': 35.92628051699378,
 'bag': 0.0,
 'clothes': 0.0,
 'night stand': 0.0,
 'television': 41.128127527459625,
 'dresser': 0.0,
 'refridgerator': 79.79410388394946,
 'shower curtain': 0.0,
 'bathtub': 0.0,
 'counter': 31.92039978582902,
 'otherstructure': 0.0,
 'floor mat': 0.0,
 'paper': 0.0,
 'person': 0.0,
 'blinds': 0.0}

: 