In this notebook, we will build a 3D map of a scene from a small set of images and refine it with the featuremetric optimization. We then localize an image downloaded from the Internet and show the effect of the refinement.

# Setup
We start by defining some output paths: where the intermediate files will be stored.

In [24]:
%load_ext autoreload
%autoreload 2
import tqdm, tqdm.notebook
tqdm.tqdm = tqdm.notebook.tqdm  # notebook-friendly progress bars
from pathlib import Path
import os
import time
import sys
from hloc import extract_features, match_features, reconstruction, pairs_from_exhaustive, visualization
from hloc.visualization import plot_images, read_image
from hloc.utils.viz_3d import init_figure, plot_points, plot_reconstruction, plot_camera_colmap

from pixsfm.util.visualize import init_image, plot_points2D
from pixsfm.refine_hloc import PixSfM
from pixsfm import ostream_redirect
from PIL import Image, ImageDraw
import pycolmap
import numpy as np
#import visualize_model
# redirect the C++ outputs to notebook cells
cpp_out = ostream_redirect(stderr=True, stdout=True)
cpp_out.__enter__()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [25]:
import torch 
print(torch.__version__)
print(torch.cuda.get_arch_list())

1.9.1+cu111
['sm_37', 'sm_50', 'sm_60', 'sm_70', 'sm_75', 'sm_80', 'sm_86']


In [26]:
images = Path('datasets/monarch/')
outputs = Path('outputs/monarch-demo/')
!rm -rf $outputs
sfm_pairs = outputs / 'pairs-sfm.txt'
loc_pairs = outputs / 'pairs-loc.txt'
features = outputs / 'features.h5'
matches = outputs / 'matches.h5'
raw_dir = outputs / "raw"
ref_dir = outputs / "ref"
''' model location in case of intrinsics locked '''
ref_dir_locked = outputs / "ref_locked"
''' model location in case of intrinsics not locked '''
ref_dir_not_locked = outputs / "ref_dir_not_locked" 

Here we will use SuperPoint local features with the SuperGlue matcher, but it's easy to switch to other features like SIFT or R2D2.

### Analysing Sparse Pointcloud Data

In [27]:
sparse_dir = Path("/home/skumar/stereo_colmap_cli_output/sparse/")
print(f"sparse_dir: {sparse_dir.as_posix()}")
sparse_images = sparse_dir / "images.bin"
sparse_points3D = sparse_dir / "points3D.bin"
sparse_cameras = sparse_dir / "cameras.bin"

sparse_dir: /home/skumar/stereo_colmap_cli_output/sparse


In [28]:
sparse_model = pycolmap.Reconstruction()
sparse_model.read_binary(sparse_dir.as_posix())
print(f"sparse_model.summary(): {sparse_model.summary()}")

sparse_model.summary(): Reconstruction:
	num_reg_images = 42
	num_cameras = 1
	num_points3D = 4915
	num_observations = 21377
	mean_track_length = 4.34934
	mean_observations_per_image = 508.976
	mean_reprojection_error = 0.93475


### Baseline check

In [29]:
sys.path.append("/home/skumar/colmap/scripts/python")
from read_write_model import read_images_binary 
sparse_img_dict = read_images_binary(sparse_images)
print(sparse_img_dict.keys())

dict_keys([29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 30, 1, 31, 2, 32, 3, 33, 4, 34, 5, 35, 6, 36, 7, 37, 8, 38, 9, 39, 10, 40, 11, 41, 12, 42, 13, 14, 15, 16])


In [30]:
def cam_extrinsics(img):
    from read_write_model import qvec2rotmat
    R = qvec2rotmat(img.qvec)
    t = img.tvec.reshape(3,-1)
    R_t = np.concatenate((R,t), axis = 1)
    R_t = np.vstack([np.array([0,0,0,1]), R_t])
    return R_t    #  4 * 4 matrix
    

In [31]:
'''
e_lw => left camera pose in world frame (4 * 4)
e_rw => right camera pose in world frame (4 * 4)
'''
def calculate_relative_pose(e_lw, e_rw): 
    from scipy.spatial.transform import Rotation
    e_rl = e_rw * np.linalg.inv(e_lw) #right camera in the frame of the left camera
    R = e_rl[:3,:3] #extracting the rotation matrix
    dx = e_rl[0,3]
    dy = e_rl[1,3]
    dz = e_rl[2,3]
    dquat = Rotation.from_matrix(R).as_quat()
    #rel_pose =  [dx, dy] + dquat
    rel_pose = [dx,dy,dz]
    for q in dquat: 
        rel_pose.append(q)
    return rel_pose
    #return [dx,dy]
    #print(f"dx: {dx} dy: {dy} dquat: {dquat}")

In [34]:
print(f"type(sparse_images): {type(sparse_images)}")

type(sparse_images): <class 'pathlib.PosixPath'>


In [35]:
rel_poses = []
num_images = len(sparse_img_dict.keys())
for idx in range(0, num_images - 1,2): 
    '''
    if idx > 1:
        break
    '''
    left_img = sparse_img_dict[idx + 1]
    right_img = sparse_img_dict[idx + 2]
    e_lw = cam_extrinsics(left_img)  #left camera pose w.r.t. world
    e_rw = cam_extrinsics(right_img) #right camera pose w.r.t world
    rel_pose = calculate_relative_pose(e_lw, e_rw)
    rel_poses.append(rel_pose)

In [37]:
!pip install pandas

Collecting pandas
  Downloading pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.4 MB)
[K     |████████████████████████████████| 12.4 MB 2.8 MB/s eta 0:00:01
[?25hCollecting tzdata>=2022.1
  Downloading tzdata-2024.1-py2.py3-none-any.whl (345 kB)
[K     |████████████████████████████████| 345 kB 13.7 MB/s eta 0:00:01
Installing collected packages: tzdata, pandas
Successfully installed pandas-2.0.3 tzdata-2024.1


In [41]:
import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.notebook_repr_html', True)
df = pd.DataFrame(rel_poses, columns=['dx', 'dy', 'dz', 'qx' , 'qy', 'qz' , 'qw'])
df.style

Unnamed: 0,dx,dy,dz,qx,qy,qz,qw
0,0.030344,0.000144,-2.558585,-0.00032,0.025174,0.929903,0.366942
1,0.026785,0.000165,-2.360786,-0.000251,0.025709,0.919528,0.392182
2,0.023489,0.000236,-2.141947,-0.000343,0.025543,0.904628,0.425437
3,0.021343,0.00045,-1.913895,-0.000333,0.035647,0.883807,0.466491
4,0.0319,5e-05,-1.659279,-0.000453,0.02535,0.855529,0.517133
5,0.045837,-0.000183,-1.420006,-0.000358,0.002115,0.814802,0.579736
6,0.051569,-0.000169,-1.166423,-0.00053,-0.005964,0.757526,0.652777
7,0.048789,-0.000256,-0.915282,-0.000437,-0.016428,0.672911,0.739541
8,0.042881,-0.000348,-0.64736,0.000168,-0.00842,0.540121,0.841545
9,0.022884,-0.000105,-0.377612,-0.000447,-0.003004,0.353077,0.935589


### Testing Arena 

In [9]:
import sys
from pathlib import Path
sys.path.append("/home/skumar/colmap/scripts/python")
from read_write_model import read_cameras_binary
project_path = Path("/home/skumar/stereo_colmap_cli_output")

In [10]:
bin_cameras_path = project_path / "cameras.bin" 
bin_cameras = read_cameras_binary(bin_cameras_path)

In [11]:
print(f"bin_cameras: {bin_cameras}")

bin_cameras: {1: Camera(id=1, model='OPENCV', width=1920, height=1080, params=array([1093.2768, 1093.2768,  964.989 ,  569.276 ,    0.    ,    0.    ,
          0.    ,    0.    ]))}
