In [1]:
import argparse
import math
import builtins
import datetime
import gradio
import os
import torch
import numpy as np
import functools
import trimesh
import copy
from scipy.spatial.transform import Rotation

from dust3r.inference import inference
from dust3r.image_pairs import make_pairs
from dust3r.utils.image import load_images, rgb
from dust3r.utils.device import to_numpy
from dust3r.viz import add_scene_cam, CAM_COLORS, OPENGL, pts3d_to_trimesh, cat_meshes
from dust3r.cloud_opt import global_aligner, GlobalAlignerMode

import matplotlib.pyplot as pl

from dust3r.demo import get_reconstructed_scene, get_3D_model_from_scene
from dust3r.model import AsymmetricCroCo3DStereo

  from .autonotebook import tqdm as notebook_tqdm




In [2]:
outdir = "output"
device = "cuda"
silent = False
image_size = 512
filelist = "./images_in"
schedule = "linear" # or "cosine"
niter = 300 # number of iters
min_conf_thr = 3
as_pointcloud = True
mask_sky = False
clean_depth = True
transparent_cams = False
cam_size = 0.05
scenegraph_type = "complete"
winsize = 1
refid = 0

model = AsymmetricCroCo3DStereo.from_pretrained("./docker/files/checkpoints/DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth").to(device)


... loading model from ./docker/files/checkpoints/DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth
instantiating : AsymmetricCroCo3DStereo(enc_depth=24, dec_depth=12, enc_embed_dim=1024, dec_embed_dim=768, enc_num_heads=16, dec_num_heads=12, pos_embed='RoPE100', patch_embed_cls='PatchEmbedDust3R', img_size=(512, 512), head_type='dpt', output_mode='pts3d', depth_mode=('exp', -inf, inf), conf_mode=('exp', 1, inf), landscape_only=False)
<All keys matched successfully>


In [3]:
scene, pts3d, rgbimg, cams2world, confs = get_reconstructed_scene(outdir, model, device, silent, image_size, filelist, schedule, niter, min_conf_thr,
                            as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size,
                            scenegraph_type, winsize, refid)


>> Loading images from ./images_in
 - adding 0.png with resolution 640x512 --> 512x400
 - adding 1.png with resolution 640x512 --> 512x400
 - adding 2.png with resolution 640x512 --> 512x400
 - adding 3.png with resolution 640x512 --> 512x400
 - adding 4.png with resolution 640x512 --> 512x400
 - adding 5.png with resolution 640x512 --> 512x400
 - adding 6.png with resolution 640x512 --> 512x400
 - adding 7.png with resolution 640x512 --> 512x400
 - adding 8.png with resolution 640x512 --> 512x400
 - adding 9.png with resolution 640x512 --> 512x400
 (Found 10 images)
>> Inference with model on 90 image pairs


100%|███████████████████████████████████████████████████████████████████████████████████| 90/90 [01:08<00:00,  1.31it/s]


 init edge (1*,0*) score=77.61289978027344
 init edge (1,7*) score=46.66046905517578
 init edge (1,3*) score=45.55316162109375
 init edge (2*,3) score=40.528236389160156
 init edge (2,6*) score=66.45589447021484
 init edge (2,9*) score=57.878299713134766
 init edge (8*,6) score=51.02920150756836
 init edge (2,5*) score=43.53687286376953
 init edge (2,4*) score=42.88144302368164
 init loss = 0.014015794731676579
Global alignement - optimizing for:
['pw_poses', 'im_depthmaps', 'im_poses', 'im_focals']


100%|████████████████████████████████████████████████████| 300/300 [01:40<00:00,  3.00it/s, lr=3.433e-05 loss=0.0077132]


In [5]:
mask = to_numpy(scene.get_masks())

pts3d = to_numpy(pts3d)


pts = np.concatenate([p[m] for p, m in zip(pts3d, mask)])
col = np.concatenate([p[m] for p, m in zip(rgbimg, mask)])
conf = np.concatenate([p[m] for p, m in zip(confs, mask)])

np.savetxt("./output/out.csv",np.hstack((pts, col, conf)), delimiter=",")

for i in range(0,len(cams2world)):
    np.savetxt("./output/"+str(i)+".csv", to_numpy(cams2world[i]))


In [15]:
to_numpy(cams2world[0])

array([[ 0.7855781 ,  0.35925955, -0.5037852 ,  0.14591828],
       [-0.37338138,  0.92447436,  0.0770289 , -0.02858463],
       [ 0.4934099 ,  0.1275918 ,  0.8603876 ,  0.0330322 ],
       [ 0.        ,  0.        ,  0.        ,  1.        ]],
      dtype=float32)