In [1]:
import os
import sys

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"

module_path = os.path.abspath(os.path.join("../.."))
if module_path not in sys.path:
    sys.path.append(module_path)

In [35]:
import json

import matplotlib.pyplot as plt
import numpy as np
import open3d as o3d
import torch
import pandas as pd
from PIL import Image

from core.captioner import ClipCapCaptioner, DeCapCaptioner
from core.dataset import ScanNet
from core.integrate import FeatureFusionScalableTSDFVolume
from core.labeler import CLIPTextQuerier, KMeansLabeler

# STEP 1: get ground truth semantic seg and vertices

In [3]:
dataset = ScanNet('/home/quanta/Datasets/ScanNet/')
nyu40_color = dataset.nyu40id_to_color
nyu40_class = [dataset.nyu40_id_to_class[i] for i in range(41)]

In [7]:
scene = "scannet_scene0000_00"
load_dir = "/home/quanta/Experiments/feature-instance-fusion/" + scene + "/"

In [5]:
json_pth = os.path.abspath("../../config/views/" + scene + ".json")
with open(json_pth, "r") as f:
    views = json.load(f)['views']

In [8]:
verts = np.load(load_dir + "tsdf/verts.npy")
faces = np.load(load_dir + "tsdf/faces.npy")

In [13]:
tsdf_device = "cuda:1"
tsdf_volume = FeatureFusionScalableTSDFVolume(
    voxel_size=0.015,
    sdf_trunc=0.075,
    margin=0.08,
    device="cuda:1",
)
tsdf_volume.load(load_dir + "tsdf/tsdf_volume_unpruned.pt")
tsdf_volume.load_feats(load_dir + 'tsdf_feature_gt_semantic/feats.pt')

In [14]:
gt_labels = np.argmax(tsdf_volume.extract_feat_on_grid(verts=verts)[0], axis=1)

In [17]:
gt_counts = np.bincount(gt_labels, minlength=41)

# STEP 2: get LSeg feats

In [20]:
tsdf_volume.load_feats(load_dir + "tsdf_feature_lseg/feats.pt")

In [21]:
lseg_feats = tsdf_volume.extract_feat_on_grid(verts=verts, device='cpu')[0]

In [25]:
del tsdf_volume
torch.cuda.empty_cache()

In [27]:
decap = DeCapCaptioner(
    decap_ckpt="/home/quanta/Models/decap/coco_prefix-009.pt",
    device=tsdf_device,
)
decap.load_model()

## Random choice

In [32]:
repeat = 10
random_feat_decode = []
for i in range(41):
    choices = np.argwhere(gt_labels == i).reshape(-1)

    if len(choices) == 0:
        random_feat_decode.append([""] * repeat)
        continue

    decoded_sents = []

    for i in range(repeat):
        idx = np.random.choice(choices)
        feat = torch.from_numpy(lseg_feats[idx, :])
        text = decap.get_caption(feat)
        decoded_sents.append(text)

    random_feat_decode.append(decoded_sents)

In [37]:
random_decode = pd.DataFrame(np.array(random_feat_decode))

In [56]:
random_decode

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,a wall with the other,a table,the wall of a single,a chair,a couch,window of open,table with a,a table that has been used,a refrigerator,a wall of the
1,"wall with the side of a wall , one of the other","wall with the side of a wall , one of the other",some door .,the wall of a cluttered and - fashioned cabinet,some kind of door .,"wall with the side of a wall , one of the other",some door .,window of an open window,"wall with the side of a wall , one of the other",a wall with it
2,rug that is very large and rug,a floor of a room of the floor,a rug that is very long and floor,a floor of a floor of a flat - over,a floor of a floor of a flat - over,a bicycle,a floor of a floor of a new,a floor of a floor of a flat - over,a floor of a floor of a flat,a floor of a floor of a flat - over
3,a shelf of the cabinet,a television,a curtain,a curtain,a shelf of the cabinet,a tv,the cabinet of the electronic equipment in par...,a cabinet of the door,a bicycle,a wall with the word
4,a couch,a bed,a couch,a bed,a bed,a bed,a bed,a bed,a rug that is very long and,a bed with some sheets
5,,,,,,,,,,
6,a couch,a couch,a couch,a couch,a bicycle,a couch,the cabinet of a cluttered cabinet,a couch,a couch,a couch
7,a table that has been used,a table,a counter with the wall,a table,a table,a wall with the door open,a back,a table with the top,a table,a table
8,some kind of door that is door .,some kind of door that is door .,a few mirror in it .,some kind of door that is door .,some kind of door that is door .,some door that has been made and door,some kind of door that is door .,some kind of door that is door .,some kind of door that is door .,some kind of door that is door .
9,window of open,window of open,window of open,window of open,window of open,window of open,window of open,window of open,window of an open wall with the other,window of open and window


## Mean decode

In [46]:
mean_decode = []
for i in range(41):
    choices = np.argwhere(gt_labels == i).reshape(-1)

    if choices.shape[0] == 0:
        mean_decode.append("")
        continue

    feat = torch.from_numpy(lseg_feats[choices, :]).mean(dim=0)
    text = decap.get_caption(feat)
    mean_decode.append(text)

In [47]:
mean_decode = pd.DataFrame(mean_decode)

In [48]:
mean_decode

Unnamed: 0,0
0,a back of a
1,the wall of a wall with a lot of other
2,a floor of a room with the floor
3,a cabinet with the top
4,a bed
5,
6,a couch
7,a wall
8,some kind of door .
9,window of open


In [49]:
feat_count = pd.DataFrame(gt_counts)

In [51]:
class_name = pd.DataFrame(nyu40_class)

In [53]:
save_dir = '../03_caption/03_LSeg_3D_caption/'
os.makedirs(save_dir, exist_ok=True)

In [57]:
random_decode.to_csv(save_dir + 'random_decode.csv', index=False, header=False)
mean_decode.to_csv(save_dir + 'mean_decode.csv', index=False, header=False)
feat_count.to_csv(save_dir + 'semantic_count.csv', index=False, header=False)
class_name.to_csv(save_dir + 'semantic_name.csv', index=False, header=False)