## Imports

In [8]:
# Imports and set torch device
import numpy as np
import meshplot as mp
import torch
import sys
from scripts.helper_functions import segment
import kaolin as kal
import matplotlib.pyplot as plt
from meshseg.models.GLIP.glip import GLIPModel
import igl

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    torch.cuda.set_device(device)
else:
    device = torch.device("cpu")

print('Torch will run on:', device)

object = 'bed' 
obj_path = f'data/demo/{object}.obj'

Torch will run on: cuda:0


## Read Mesh

In [9]:
# Read mesh
mesh = kal.io.obj.import_mesh(
    obj_path,
    with_normals=True,
    with_materials=False,
)

vertices_tensor = mesh.vertices.to(device)
faces_tensor = mesh.faces.to(device)

vertices = vertices_tensor.detach().cpu().numpy()
faces = faces_tensor.detach().cpu().numpy()
color = mesh.vertex_normals.cpu().numpy()
print('Number of vertices: ', vertices.shape[0])
print('Number of faces: ', faces.shape[0])
# Visualize mesh
mp.plot(vertices, faces, color)
plt.show()

Number of vertices:  11973
Number of faces:  19568


Renderer(camera=PerspectiveCamera(children=(DirectionalLight(color='white', intensity=0.6, position=(-0.001309…

## Call SATR

In [10]:
if (object[-1].isdigit()):
    config_path = 'configs/demo/' + object[:-2] + '.yaml'
else:
    config_path = 'configs/demo/' + object + '.yaml'

segment(
    config_path,
    object + '.obj',
    'outputs/demo/ABO/' + object + '/'
)

Reading the mesh...
Reading the mesh with path: data/demo\bed.obj
	having 19568 faces and 11973 vertices
Sampling the mesh...


only got 23710/23946 samples!


Sampled 23710 points
Creating the renderer...
Random rendering: False
Rendering the views...
Num views: 12
Rendering the views...done
Initializing GLIP...
VISION BACKBONE USE GRADIENT CHECKPOINTING:  False
LANGUAGE BACKBONE USE GRADIENT CHECKPOINTING:  False
EARLY FUSION ON, USING MHA-B
EARLY FUSION ON, USING MHA-B
EARLY FUSION ON, USING MHA-B
EARLY FUSION ON, USING MHA-B
EARLY FUSION ON, USING MHA-B
EARLY FUSION ON, USING MHA-B
EARLY FUSION ON, USING MHA-B
EARLY FUSION ON, USING MHA-B
Finished Initializing GLIP


  0%|          | 0/23710 [00:00<?, ?it/s]

Per Sample Point scores
Getting samples neighborhood
Prompts: ['the bed frame of a bed.', 'the headboard of a bed.', 'the legs of a bed.', 'the mattress of a bed.']
Feeding the views to GLIP...
Num views: 12
GLIP - View: 0 Prompt: 0 



inference time per image: 0.8685755000001336
GLIP - View: 0 Prompt: 1 inference time per image: 0.6027295000003505
GLIP - View: 0 Prompt: 2 inference time per image: 0.6048329000000194
GLIP - View: 0 Prompt: 3 inference time per image: 0.6015929000000142
GLIP - View: 1 Prompt: 0 inference time per image: 0.6121003000002929
GLIP - View: 1 Prompt: 1 inference time per image: 0.6083807999998498
GLIP - View: 1 Prompt: 2 inference time per image: 0.5999583000002531
GLIP - View: 1 Prompt: 3 inference time per image: 0.6071596000001591
GLIP - View: 2 Prompt: 0 inference time per image: 0.6014647000001787
GLIP - View: 2 Prompt: 1 inference time per image: 0.6043635999999424
GLIP - View: 2 Prompt: 2 inference time per image: 0.6073489000000336
GLIP - View: 2 Prompt: 3 inference time per image: 0.6030077000000347
GLIP - View: 3 Prompt: 0 inference time per image: 0.6040815999999722
GLIP - View: 3 Prompt: 1 inference time per image: 0.5983166000000892
GLIP - View: 3 Prompt: 2 inference time per i

0it [00:00, ?it/s]

Processing view: 0, Prompt: 0
Processing view: 0, Prompt: 1
Processing view: 0, Prompt: 2
Processing view: 0, Prompt: 3
Processing view: 1, Prompt: 0
Processing view: 1, Prompt: 1
Processing view: 1, Prompt: 2
Processing view: 1, Prompt: 3
Processing view: 2, Prompt: 0
Processing view: 2, Prompt: 1
Processing view: 2, Prompt: 2
Processing view: 2, Prompt: 3
Processing view: 3, Prompt: 0
Processing view: 3, Prompt: 1
Processing view: 3, Prompt: 2
Processing view: 3, Prompt: 3
Processing view: 4, Prompt: 0
Processing view: 4, Prompt: 1
Processing view: 4, Prompt: 2
Processing view: 4, Prompt: 3
Processing view: 5, Prompt: 0
Processing view: 5, Prompt: 1
Processing view: 5, Prompt: 2
Processing view: 5, Prompt: 3
Processing view: 6, Prompt: 0
Processing view: 6, Prompt: 1
Processing view: 6, Prompt: 2
Processing view: 6, Prompt: 3
Processing view: 7, Prompt: 0
Processing view: 7, Prompt: 1
Processing view: 7, Prompt: 2
Processing view: 7, Prompt: 3
Processing view: 8, Prompt: 0
Processing

## Vizualise Output

In [11]:
import json
colors_dict = {
    0: [1, 0, 0],   # Red
    1: [0, 1, 0],   # Green
    2: [0, 0, 1],   # Blue
    3: [1, 1, 0],   # Yellow
    4: [1, 0, 1],   # Magenta
    5: [0, 1, 1],   # Cyan
    6: [0.5, 0, 0], # Dark Red
    7: [0, 0.5, 0], # Dark Green
    8: [0, 0, 0.5], # Dark Blue
    9: [0.5, 0.5, 0.5] # Gray
}
colors_lst = ['Red', 'Green', 'Blue', 'Yellow', 'Magenta', 'Cyan', 'Dark Red', 'Dark Green', 'Dark Blue', 'Grey']
output_file = open('./outputs/demo/ABO/' + object + '/face_preds.json')
output = np.array(json.load(output_file))
segments = np.unique(output)
segments_idx = np.unique(output, return_inverse=True)[1]
prompts = segments.tolist()
if ('unknown' in prompts):
    prompts.remove('unknown')
# print('Prompts: ', prompts)
colors = np.array([colors_dict[segment_id] for segment_id in segments_idx])
for i in range(len(segments)):
    print(colors_lst[i],':\t',segments[i])
mp.plot(vertices, faces, colors)
plt.show()

Red :	 bed frame
Green :	 headboard
Blue :	 legs
Yellow :	 mattress
Magenta :	 unknown


Renderer(camera=PerspectiveCamera(children=(DirectionalLight(color='white', intensity=0.6, position=(-0.001309…