In [1]:
import os
import torch
import numpy as np
from glob import glob
from tqdm import tqdm
from os.path import join, exists
import open3d as o3d
import matplotlib.pyplot as plt

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [2]:
# should be the preprocessed file path
sample_path_0 = "/mnt/project/AT3DCV_Data/Preprocessed_OpenScene/data/augmented/scannet_3d/example/scene0000_00_vh_clean_2.pth"
#sample_path_1 = "D:/AT3DCV_Data/Preprocessed_OpenScene/data/scannet_3d/train/scene0000_01_vh_clean_2.pth"
#sample_path_2 = "D:/AT3DCV_Data/Preprocessed_OpenScene/data/scannet_3d/train/scene0000_02_vh_clean_2.pth"

In [3]:
sample_0 = torch.load(sample_path_0) # coords,colors,labels
#sample_1 = torch.load(sample_path_1) # coords,colors,labels
#sample_2 = torch.load(sample_path_2) # coords,colors,labels

In [4]:
len(sample_0[0])

81915

In [5]:
# aggregating all of the partial point clouds of the same scene (they don't overlap perfectly)
#sample_points = np.concatenate((sample_0[0], sample_1[0], sample_2[0]))
#sample_colors = np.concatenate((sample_0[1], sample_1[1], sample_2[1]))

# single partial point cloud
sample_points  = sample_0[0]
sample_colors = sample_0[1]

In [6]:
#to view original scene
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(np.asarray(sample_points))
#original colors
pcd.colors = o3d.utility.Vector3dVector(np.asarray(sample_colors))
#------
#paint uniform
#sample_paint_uniform = np.asarray([200,200,200])/255.0 #redish
#pcd.paint_uniform_color(sample_paint_uniform)
o3d.visualization.draw_geometries([pcd])

# load fused features

In [9]:
# should be the fused feature path
feature_path = "/mnt/project/AT3DCV_Data/Preprocessed_OpenScene/data/augmented/fused/scene0000_00_0.pt"

In [10]:
feature = torch.load(feature_path)

In [11]:
feature["mask_full"].shape

torch.Size([81915])

In [12]:
feature["feat"].shape

torch.Size([78191, 768])

In [13]:
# Get the indices where the mask is True
indices = torch.nonzero(feature["mask_full"]).squeeze()

In [14]:
filtered_point_cloud = sample_points[indices, :]
filtered_point_cloud_colors = sample_colors[indices, :]

In [12]:
filtered_point_cloud.shape

(78191, 3)

# using clip model

In [7]:
import clip
model, preprocess = clip.load("ViT-L/14@336px")

In [45]:
# highlight with a threshold
# type the query here 
query = ["pillows"]

with torch.no_grad():
    all_text_embeddings = []
    for category in tqdm(query):
        texts = clip.tokenize(category)  #tokenize
        texts = texts.cuda()
        text_embeddings = model.encode_text(texts)  #embed with text encoder
        text_embeddings /= text_embeddings.norm(dim=-1, keepdim=True)
        text_embedding = text_embeddings.mean(dim=0)
        text_embedding /= text_embedding.norm()
        all_text_embeddings.append(text_embedding)

    all_text_embeddings = torch.stack(all_text_embeddings, dim=1)

# normalizing 
fused_f = (feature["feat"]/(feature["feat"].norm(dim=-1, keepdim=True)+1e-5)).half()
# calculating similarity matrix
# similarity_matrix = torch.matmul(feature["feat"].cuda(), all_text_embeddings) # 
similarity_matrix = fused_f.cuda() @ all_text_embeddings    
    
# set higher to increase the certainty (not always correct)
threshold_percentage = 0.9
cap = similarity_matrix.max().item()
found_indices = torch.nonzero(similarity_matrix > cap*threshold_percentage, as_tuple=False).squeeze().T[0]

# creating pc
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(np.asarray(filtered_point_cloud))
pcd.colors = o3d.utility.Vector3dVector(np.asarray(filtered_point_cloud_colors))

found_region = pcd.select_by_index(found_indices.tolist())
found_region.paint_uniform_color([1.0, 0, 0]) # paint related points to red
rest = pcd.select_by_index(found_indices.tolist(), invert=True)
o3d.visualization.draw_geometries([rest,found_region])

100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.22it/s]


In [28]:
# highlight with a heatmap
# type the query here 
query = [["pillars", "legs"]]

with torch.no_grad():
    all_text_embeddings = []
    for category in tqdm(query):
        texts = clip.tokenize(category)  #tokenize
        texts = texts.cuda()
        text_embeddings = model.encode_text(texts)  #embed with text encoder
        text_embeddings /= text_embeddings.norm(dim=-1, keepdim=True)
        text_embedding = text_embeddings.mean(dim=0)
        text_embedding /= text_embedding.norm()
        all_text_embeddings.append(text_embedding)

    all_text_embeddings = torch.stack(all_text_embeddings, dim=1)

# normalizing 
fused_f = (feature["feat"]/(feature["feat"].norm(dim=-1, keepdim=True)+1e-5)).half()
# calculating similarity matrix
# similarity_matrix = torch.matmul(feature["feat"].cuda(), all_text_embeddings) # 
similarity_matrix = fused_f.cuda() @ all_text_embeddings    

# creating pc
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(np.asarray(filtered_point_cloud))
pcd.colors = o3d.utility.Vector3dVector(np.asarray(filtered_point_cloud_colors))

# heatmap
cmap = plt.get_cmap('cividis')

# normalize the tensor to the range [0, 1]
normalized_tensor = (similarity_matrix - torch.min(similarity_matrix)) / (torch.max(similarity_matrix) - torch.min(similarity_matrix))

colors = cmap(normalized_tensor.detach().cpu().numpy().squeeze())
pcd_heatmap = o3d.geometry.PointCloud()

pcd_heatmap.points = o3d.utility.Vector3dVector(pcd.points)
pcd_heatmap.colors = o3d.utility.Vector3dVector(colors[:, :3])

#transform heatmap to the side
pcd_heatmap.points = o3d.utility.Vector3dVector(np.asarray(pcd.points) + [0,10,0])

o3d.visualization.draw_geometries([pcd, pcd_heatmap])

100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 11.62it/s]
