In [None]:
import sys
sys.path.append('../dust3r')
sys.path.append('../gaussian-splatting')
sys.path.append('../src')

In [None]:
from dust3r.inference import inference, load_model
from dust3r.utils.image import load_images
from dust3r.utils.device import to_numpy
from dust3r.image_pairs import make_pairs
from dust3r.cloud_opt import global_aligner, GlobalAlignerMode

import torch
import numpy as np
import matplotlib.pyplot as plt

try:
    import lovely_tensors as lt
except:
    ! pip install --upgrade lovely-tensors
    import lovely_tensors as lt
    
lt.monkey_patch()

In [None]:
model_path = "../dust3r/checkpoints/DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth"
device = 'cuda:0'
batch_size = 1
schedule = 'cosine'
lr = 0.01
niter = 300
scenegraph_type = "complete"
winsize = 1
refid = 0
norm_scene = False
mask_images = False # mask images by confidence threshold for 3DGS

if scenegraph_type == "swin":
    scenegraph_type = scenegraph_type + "-" + str(winsize)
elif scenegraph_type == "oneref":
    scenegraph_type = scenegraph_type + "-" + str(refid)

In [None]:
from pathlib import Path
Path.ls = lambda x: list(x.iterdir())

image_dir = Path('../data/images/turtle_imgs/')

image_files = [str(x) for x in image_dir.ls() if x.suffix in ['.png', '.jpg']]
image_files = sorted(image_files, key=lambda x: int(x.split('/')[-1].split('.')[0]))

In [None]:
model = load_model(model_path, device)
images = load_images(image_files, size=512)
pairs = make_pairs(images, scene_graph='complete', prefilter=None, symmetrize=True)
output = inference(pairs, model, device, batch_size=batch_size)

In [None]:
scene = global_aligner(output, device=device, mode=GlobalAlignerMode.PointCloudOptimizer)
loss = scene.compute_global_alignment(init="mst", niter=niter, schedule=schedule, lr=lr)

In [None]:
from colmap_dataset_utils import inv

intrinsics = scene.get_intrinsics().detach().cpu().numpy()
cam2world = scene.get_im_poses().detach().cpu().numpy()
world2cam = inv(cam2world) #
principal_points = scene.get_principal_points().detach().cpu().numpy()
focals = scene.get_focals().detach().cpu().numpy()
imgs = np.array(scene.imgs)
pts3d = [i.detach() for i in scene.get_pts3d()]
depth_maps = [i.detach() for i in scene.get_depthmaps()]

min_conf_thr = 20
scene.min_conf_thr = float(scene.conf_trf(torch.tensor(min_conf_thr)))
masks = to_numpy(scene.get_masks())

In [None]:
from colmap_dataset_utils import normalize_scene
from copy import deepcopy

pts_norm, c2w_norm = normalize_scene(deepcopy(pts3d), deepcopy(masks), deepcopy(cam2world))
if norm_scene:
    pts3d = pts_norm
    world2cam = inv(np.array(c2w_norm))

In [None]:
from visualization import visualize_pcd, visualize_cameras
fig = None

num_to_show = 10_000
num_of_valid = sum([m.sum() for m in masks])
skip = num_of_valid // num_to_show

for p, i, m, c2w in zip(pts_norm, imgs, masks, c2w_norm):
    fig = visualize_pcd(p[m].cpu().numpy(), i[m], skip=skip, show=False, size=2, fig=fig)
    R, T = np.transpose(c2w[None, :3, :3], (0, 2, 1)), c2w[None, :, 3]
    fig = visualize_cameras(R, T, fig=fig, show=False, radius=2, size=0.2)
    
fig

# Construct colmap dataset

After convertion such data sctructure should appear

```
│   │   │   ├── images
│   │   │   ├── masks
│   │   │   ├── sparse/0
|   |   |   |    |------cameras.bin
|   |   |   |    |------images.bin
|   |   |   |    |------points3D.bin
|   |   |   |    |------points3D.ply
```

In [None]:
save_dir = Path('../data/scenes/turtle')
save_dir.mkdir(exist_ok=True, parents=True)

In [None]:
from colmap_dataset_utils import (
    init_filestructure,
    save_images_masks,
    save_cameras,
    save_imagestxt,
    save_pointcloud_with_normals
)

save_path, images_path, masks_path, sparse_path = init_filestructure(save_dir)
save_images_masks(imgs, masks, images_path, masks_path, mask_images)
save_cameras(focals, principal_points, sparse_path, imgs_shape=imgs.shape)
save_imagestxt(world2cam, sparse_path)
# save_pointcloud(imgs, pts3d, masks, sparse_path)
save_pointcloud_with_normals(imgs, pts3d, masks, sparse_path)