In [1]:
import os
os.chdir("/dust3r")
print(os.getcwd())

/dust3r


In [10]:
import open3d as o3d
import numpy as np

from dust3r.inference import inference
from dust3r.model import AsymmetricCroCo3DStereo
from dust3r.utils.image import load_images
from dust3r.image_pairs import make_pairs
from dust3r.cloud_opt import global_aligner, GlobalAlignerMode

device = 'cuda'
batch_size = 1
schedule = 'cosine'
lr = 0.01
niter = 300

DATA_PATH = "/scratchdata/indoor_short"

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [3]:
model_name = "/scratchdata/MonST3R_PO-TA-S-W_ViTLarge_BaseDecoder_512_dpt.pth"
model = AsymmetricCroCo3DStereo.from_pretrained(model_name).to(device)

  ckpt = torch.load(model_path, map_location='cpu')


... loading model from /scratchdata/MonST3R_PO-TA-S-W_ViTLarge_BaseDecoder_512_dpt.pth
instantiating : AsymmetricCroCo3DStereo(pos_embed='RoPE100', patch_embed_cls='PatchEmbedDust3R', img_size=(512, 512), head_type='dpt', output_mode='pts3d', depth_mode=('exp', -inf, inf), conf_mode=('exp', 1, inf), enc_embed_dim=1024, enc_depth=24, enc_num_heads=16, dec_embed_dim=768, dec_depth=12, dec_num_heads=12, freeze='encoder', landscape_only=False)
<All keys matched successfully>


In [4]:
images_array = []

for i in range(2):
    images_array.append(os.path.join(DATA_PATH,"rgb/{}.png".format(i)))
images = load_images(images_array, size=512, verbose=True)

>> Loading a list of 2 images
 - adding /scratchdata/indoor_short/rgb/0.png with resolution 640x400 --> 512x320
 - adding /scratchdata/indoor_short/rgb/1.png with resolution 640x400 --> 512x320
 (Found 2 images)


In [5]:
pairs = make_pairs(images, scene_graph='complete', prefilter=None, symmetrize=True)
output = inference(pairs, model, device, batch_size=batch_size)

>> Inference with model on 2 image pairs


  with torch.cuda.amp.autocast(enabled=bool(use_amp)):
  with torch.cuda.amp.autocast(enabled=False):
  with torch.cuda.amp.autocast(enabled=False):
100%|██████████| 2/2 [00:00<00:00,  3.07it/s]


In [6]:
scene = global_aligner(output, device=device, mode=GlobalAlignerMode.PointCloudOptimizer)
loss = scene.compute_global_alignment(init="mst", niter=niter, schedule=schedule, lr=lr)

 init edge (1*,0*) score=37.101654052734375
 init loss = 0.008739943616092205
Global alignement - optimizing for:
['pw_poses', 'im_depthmaps', 'im_poses', 'im_focals']


100%|██████████| 300/300 [00:06<00:00, 48.64it/s, lr=1.27413e-06 loss=0.00674731]


In [7]:
imgs = scene.imgs
focals = scene.get_focals()
poses = scene.get_im_poses()
pts3d = scene.get_pts3d()
confidence_masks = scene.get_masks()

In [9]:
pcd = o3d.geometry.PointCloud()

for i in range(len(pts3d)):
    pointcloud = pts3d[i].detach().cpu().numpy()
    pointcloud = pointcloud.reshape(-1, 3)
    color = imgs[i].reshape(-1, 3)
    
    tmp_pcd = o3d.geometry.PointCloud()
    tmp_pcd.points = o3d.utility.Vector3dVector(pointcloud)
    tmp_pcd.colors = o3d.utility.Vector3dVector(color)
    
    pcd += tmp_pcd
    
o3d.visualization.draw_geometries([pcd])

2
