In this notebook, we will build a 3D map of a scene from a small set of images and refine it with the featuremetric optimization. We then localize an image downloaded from the Internet and show the effect of the refinement.

# Setup
We start by defining some output paths: where the intermediate files will be stored.

In [1]:
%load_ext autoreload
%autoreload 2
import tqdm, tqdm.notebook
tqdm.tqdm = tqdm.notebook.tqdm  # notebook-friendly progress bars
import os
import time
import sys
import numpy as np
from hloc import extract_features, match_features, reconstruction, pairs_from_exhaustive, visualization
from hloc.visualization import plot_images, read_image
from hloc.utils.viz_3d import init_figure, plot_points, plot_reconstruction, plot_camera_colmap
from pixsfm.util.visualize import init_image, plot_points2D
from pixsfm.refine_hloc import PixSfM
from pixsfm import ostream_redirect
from PIL import Image, ImageDraw
import pycolmap
from pathlib import Path
#import visualize_model
# redirect the C++ outputs to notebook cells
cpp_out = ostream_redirect(stderr=True, stdout=True)
cpp_out.__enter__()

In [2]:
import torch 
print(torch.__version__)
print(torch.cuda.get_arch_list())

1.9.1+cu111
['sm_37', 'sm_50', 'sm_60', 'sm_70', 'sm_75', 'sm_80', 'sm_86']


In [3]:
#images = Path('pixsfm_dataset/')
outputs = Path('../pixsfm_outputs/')
#sfm_pairs = outputs / 'pairs-sfm.txt'
#loc_pairs = outputs / 'pairs-loc.txt'
#features = outputs / 'features.h5'
#matches = outputs / 'matches.h5'
#raw_dir = outputs / "raw"
ref_dir_locked = outputs / "ref_locked"

### Helper functions for relative pose calculations

In [4]:
'''
e_lw => left camera pose in world frame (4 * 4)
e_rw => right camera pose in world frame (4 * 4)
'''
#def calculate_relative_pose(e_lw, e_rw):
def calculate_relative_pose(e_lw: np.ndarray, e_rw: np.ndarray):
    #print(f"Inside the calculate_relative_pose function")
    from scipy.spatial.transform import Rotation
    e_wl = np.linalg.inv(e_lw)
    #print(f"e_wl: {e_wl}")
    #e_rl = e_rw * np.linalg.inv(e_lw) #right camera in the frame of the left camera
    #e_rl = e_rw * e_wl #right camera in the frame of the left camera
    #print(f"e_rl: {e_rl}")
    e_rl = np.dot(e_rw,np.linalg.inv(e_lw))
    R = e_rl[:3,:3] #extracting the rotation matrix
    dx = e_rl[0,3]
    dy = e_rl[1,3]
    dz = e_rl[2,3]
    dquat = Rotation.from_matrix(R).as_quat()
    #rel_pose =  [dx, dy] + dquat
    rel_pose = [dx,dy,dz]
    for q in dquat: 
        rel_pose.append(q)
    return rel_pose
    #return [dx,dy]
    #print(f"dx: {dx} dy: {dy} dquat: {dquat}")


def cam_extrinsics(img):
    from read_write_model import qvec2rotmat
    R = qvec2rotmat(img.qvec)
    t = img.tvec.reshape(3,-1)
    #print(f"R: {R} t: {t}")
    R_t = np.concatenate((R,t), axis = 1)
    #R_t = np.vstack([np.array([0,0,0,1]), R_t])
    R_t = np.vstack([R_t, np.array([0,0,0,1])])
    return R_t    #  4 * 4 matrix

def calculate_relative_pose_between(left_idx: int, right_idx: int):
    left_img = sparse_img_dict[left_idx]
    right_img = sparse_img_dict[right_idx]
    e_lw = cam_extrinsics(left_img)  #left camera pose w.r.t. world
    e_rw = cam_extrinsics(right_img) #right camera pose w.r.t world
    rel_pose = calculate_relative_pose(e_lw, e_rw)
    return rel_pose


    

### Camera positions WITHOUT Rig Bundle Adjustment

In [5]:
from pathlib import Path
sparse_dir = ref_dir_locked 
print(f"sparse_dir: {sparse_dir.as_posix()}")
sparse_images = sparse_dir / "images.bin"
sparse_points3D = sparse_dir / "points3D.bin"
sparse_cameras = sparse_dir / "cameras.bin"

sparse_dir: ../pixsfm_outputs/ref_locked


In [6]:
import sys
sys.path.append(os.path.expandvars('$HOME/colmap/scripts/python'))
#sys.path.append("/home/skumar/colmap/scripts/python")
from read_write_model import read_images_binary 
sparse_img_dict = read_images_binary(sparse_images)
print(f"{len(sparse_img_dict.keys())} ==> {sparse_img_dict.keys()}")
print(f"min_key: {min(sparse_img_dict.keys())} mx_key: {max(sparse_img_dict.keys())}")

80 ==> dict_keys([80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59])
min_key: 1 mx_key: 80


In [7]:
from read_write_model import read_cameras_binary 
sparse_cam_dict = read_cameras_binary(sparse_cameras)
for id,cam in sparse_cam_dict.items():
    print(cam.params)

[1093.2768 1093.2768  964.989   569.276     0.        0.        0.
    0.    ]
[1093.2768 1093.2768  964.989   569.276     0.        0.        0.
    0.    ]


In [8]:
# for k, v in sparse_img_dict.items():
#     print(f"{k} => {v.name}")

In [9]:
sorted_keys = sorted(sparse_img_dict.keys())
for key in sorted_keys: 
    print(f"{key}: {sparse_img_dict[key].name}")

1: left/frame_0_.jpg
2: left/frame_10_.jpg
3: left/frame_11_.jpg
4: left/frame_12_.jpg
5: left/frame_13_.jpg
6: left/frame_14_.jpg
7: left/frame_15_.jpg
8: left/frame_16_.jpg
9: left/frame_17_.jpg
10: left/frame_18_.jpg
11: left/frame_19_.jpg
12: left/frame_1_.jpg
13: left/frame_20_.jpg
14: left/frame_21_.jpg
15: left/frame_22_.jpg
16: left/frame_23_.jpg
17: left/frame_24_.jpg
18: left/frame_25_.jpg
19: left/frame_26_.jpg
20: left/frame_27_.jpg
21: left/frame_28_.jpg
22: left/frame_29_.jpg
23: left/frame_2_.jpg
24: left/frame_30_.jpg
25: left/frame_31_.jpg
26: left/frame_32_.jpg
27: left/frame_33_.jpg
28: left/frame_34_.jpg
29: left/frame_35_.jpg
30: left/frame_36_.jpg
31: left/frame_37_.jpg
32: left/frame_38_.jpg
33: left/frame_39_.jpg
34: left/frame_3_.jpg
35: left/frame_4_.jpg
36: left/frame_5_.jpg
37: left/frame_6_.jpg
38: left/frame_7_.jpg
39: left/frame_8_.jpg
40: left/frame_9_.jpg
41: right/frame_0_.jpg
42: right/frame_10_.jpg
43: right/frame_11_.jpg
44: right/frame_12_.jpg
45: 

In [10]:
import numpy as np
rel_poses = []
num_images = len(sparse_img_dict.keys())
for idx in range(1, num_images // 2 + 1):
    left_img = sparse_img_dict[idx]
    right_img = sparse_img_dict[idx + num_images//2]
    #print(f"left_img_name: {left_img.name} right_img_name: {right_img.name}")
    e_lw = cam_extrinsics(left_img)  #left camera pose w.r.t. world
    e_rw = cam_extrinsics(right_img) #right camera pose w.r.t world
    e_rl = calculate_relative_pose(e_lw, e_rw)
    rel_poses.append(e_rl)

In [11]:
import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.notebook_repr_html', True)
df = pd.DataFrame(rel_poses, columns=['dx', 'dy', 'dz', 'qx' , 'qy', 'qz' , 'qw'])
df.style

Unnamed: 0,dx,dy,dz,qx,qy,qz,qw
0,-0.280629,-0.001853,-0.007641,-3.3e-05,-0.000576,-3.4e-05,1.0
1,-0.281124,-0.001413,-0.005959,-6.3e-05,-0.000536,-6.9e-05,1.0
2,-0.282267,-0.001224,-0.006179,-2.6e-05,-0.000524,-4.9e-05,1.0
3,-0.282638,-4e-05,-0.00602,1e-05,-0.000504,-6.7e-05,1.0
4,-0.282806,-5e-05,-0.004445,2.9e-05,-0.000487,-1.3e-05,1.0
5,-0.283483,-0.002126,-0.005431,-7.4e-05,-0.000463,-8e-05,1.0
6,-0.283603,-0.000189,-0.004876,1.7e-05,-0.000469,-6e-05,1.0
7,-0.282768,-0.000167,-0.004959,2.3e-05,-0.00049,-5.8e-05,1.0
8,-0.281559,0.000635,-0.004554,5.5e-05,-0.000521,-3.5e-05,1.0
9,-0.28313,-7.6e-05,-0.004121,3.6e-05,-0.00048,-7.7e-05,1.0


### Camera poses with Rig Bundle Adjustment

In [12]:
rig_ba_sparse_dir = Path('/home/skumar/ssd/benchmark_pixsfm/rig_bundle_adjuster/output/')
rig_ba_sparse_images = rig_ba_sparse_dir / "images.bin"
rig_ba_sparse_points3D = rig_ba_sparse_dir / "points3D.bin"
rig_ba_sparse_cameras = rig_ba_sparse_dir / "cameras.bin"

In [13]:
import sys
#sys.path.append("/home/skumar/colmap/scripts/python")
sys.path.append(os.path.expandvars('$HOME/colmap/scripts/python'))
from read_write_model import read_images_binary 
rig_ba_sparse_img_dict = read_images_binary(rig_ba_sparse_images)
print(f"{len(rig_ba_sparse_img_dict.keys())} => {rig_ba_sparse_img_dict.keys()}")

80 => dict_keys([59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 23, 24, 25, 26, 27, 28, 29, 60, 1, 61, 2, 62, 3, 63, 4, 64, 5, 65, 6, 66, 7, 67, 8, 22, 80, 21, 79, 20, 78, 19, 77, 18, 76, 17, 75, 16, 74, 15, 73, 14, 72, 13, 71, 12, 70, 11, 69, 10, 68, 9, 30, 31, 32, 33, 34, 35, 36, 37, 38])


In [14]:
import numpy as np
rig_ba_rel_poses = []
num_images = len(rig_ba_sparse_img_dict.keys())
for idx in range(1, num_images // 2 + 1):
    left_img = rig_ba_sparse_img_dict[idx]
    right_img = rig_ba_sparse_img_dict[idx + num_images//2]
    if idx < 5:
        print(f"left_img_name: {left_img.name} right_img_name: {right_img.name}")
    e_lw = cam_extrinsics(left_img)  #left camera pose w.r.t. world
    e_rw = cam_extrinsics(right_img) #right camera pose w.r.t world
    rel_pose = calculate_relative_pose(e_lw, e_rw)
    rig_ba_rel_poses.append(rel_pose)

left_img_name: left/frame_0_.jpg right_img_name: right/frame_0_.jpg
left_img_name: left/frame_10_.jpg right_img_name: right/frame_10_.jpg
left_img_name: left/frame_11_.jpg right_img_name: right/frame_11_.jpg
left_img_name: left/frame_12_.jpg right_img_name: right/frame_12_.jpg


In [15]:
import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.notebook_repr_html', True)
df = pd.DataFrame(rig_ba_rel_poses, columns=['dx', 'dy', 'dz', 'qx' , 'qy', 'qz' , 'qw'])
df.style

Unnamed: 0,dx,dy,dz,qx,qy,qz,qw
0,-12.998243,0.002082,-0.209402,2.6e-05,-0.000514,-4.4e-05,1.0
1,-12.998243,0.002082,-0.209402,2.6e-05,-0.000514,-4.4e-05,1.0
2,-12.998243,0.002082,-0.209402,2.6e-05,-0.000514,-4.4e-05,1.0
3,-12.998243,0.002082,-0.209402,2.6e-05,-0.000514,-4.4e-05,1.0
4,-12.998243,0.002082,-0.209402,2.6e-05,-0.000514,-4.4e-05,1.0
5,-12.998243,0.002082,-0.209402,2.6e-05,-0.000514,-4.4e-05,1.0
6,-12.998243,0.002082,-0.209402,2.6e-05,-0.000514,-4.4e-05,1.0
7,-12.998243,0.002082,-0.209402,2.6e-05,-0.000514,-4.4e-05,1.0
8,-12.998243,0.002082,-0.209402,2.6e-05,-0.000514,-4.4e-05,1.0
9,-12.998243,0.002082,-0.209402,2.6e-05,-0.000514,-4.4e-05,1.0


In [16]:
from read_write_model import read_cameras_binary 
rig_ba_sparse_cam_dict = read_cameras_binary(rig_ba_sparse_cameras)
for id,cam in rig_ba_sparse_cam_dict.items():
    print(cam.params)

[1093.2768 1093.2768  964.989   569.276     0.        0.        0.
    0.    ]
[1093.2768 1093.2768  964.989   569.276     0.        0.        0.
    0.    ]
