In this notebook, we will build a 3D map of a scene from a small set of images and refine it with the featuremetric optimization. We then localize an image downloaded from the Internet and show the effect of the refinement.

# Setup
We start by defining some output paths: where the intermediate files will be stored.

In [11]:
%load_ext autoreload
%autoreload 2
import tqdm, tqdm.notebook
tqdm.tqdm = tqdm.notebook.tqdm  # notebook-friendly progress bars
import os
import time
import sys
import numpy as np
from hloc import extract_features, match_features, reconstruction, pairs_from_exhaustive, visualization
from hloc.visualization import plot_images, read_image
from hloc.utils.viz_3d import init_figure, plot_points, plot_reconstruction, plot_camera_colmap
from pixsfm.util.visualize import init_image, plot_points2D
from pixsfm.refine_hloc import PixSfM
from pixsfm import ostream_redirect
from PIL import Image, ImageDraw
import pycolmap
from pathlib import Path
#import visualize_model
# redirect the C++ outputs to notebook cells
cpp_out = ostream_redirect(stderr=True, stdout=True)
cpp_out.__enter__()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
import torch 
print(torch.__version__)
print(torch.cuda.get_arch_list())

1.9.1+cu111
['sm_37', 'sm_50', 'sm_60', 'sm_70', 'sm_75', 'sm_80', 'sm_86']


In [13]:
# images = Path('datasets/monarch/')
# outputs = Path('outputs/monarch-demo/')
# sfm_pairs = outputs / 'pairs-sfm.txt'
# loc_pairs = outputs / 'pairs-loc.txt'
# features = outputs / 'features.h5'
# matches = outputs / 'matches.h5'
# raw_dir = outputs / "raw"
# ref_dir = outputs / "ref"
# ''' model location in case of intrinsics locked '''
# ref_dir_locked = outputs / "ref_locked"
# ''' model location in case of intrinsics not locked '''
# ref_dir_not_locked = outputs / "ref_dir_not_locked" 

### Helper functions for relative pose calculations

In [14]:
'''
e_lw => left camera pose in world frame (4 * 4)
e_rw => right camera pose in world frame (4 * 4)
'''
#def calculate_relative_pose(e_lw, e_rw):
def calculate_relative_pose(e_lw: np.ndarray, e_rw: np.ndarray):
#     print(f"Inside the calculate_relative_pose function")
    from scipy.spatial.transform import Rotation
    e_wl = np.linalg.inv(e_lw)
    #print(f"e_wl: {e_wl}")
    #e_rl = e_rw * np.linalg.inv(e_lw) #right camera in the frame of the left camera
    #e_rl = e_rw * e_wl #right camera in the frame of the left camera
    #print(f"e_rl: {e_rl}")
    e_rl = np.dot(e_rw,np.linalg.inv(e_lw))
    print(f"e_rl: \n{e_rl}")
    R = e_rl[:3,:3] #extracting the rotation matrix
    dx = e_rl[0,3]
    dy = e_rl[1,3]
    dz = e_rl[2,3]
    dquat = Rotation.from_matrix(R).as_quat()
    #rel_pose =  [dx, dy] + dquat
    rel_pose = [dx,dy,dz]
    for q in dquat: 
        rel_pose.append(q)
    return rel_pose
    #return [dx,dy]
    #print(f"dx: {dx} dy: {dy} dquat: {dquat}")


def cam_extrinsics(img):
    from read_write_model import qvec2rotmat
    R = qvec2rotmat(img.qvec)
    t = img.tvec.reshape(3,-1)
    #print(f"R: {R} t: {t}")
    R_t = np.concatenate((R,t), axis = 1)
    #R_t = np.vstack([np.array([0,0,0,1]), R_t])
    R_t = np.vstack([R_t, np.array([0,0,0,1])])
    return R_t    #  4 * 4 matrix

def calculate_relative_pose_between(left_idx: int, right_idx: int):
    left_img = sparse_img_dict[left_idx]
    right_img = sparse_img_dict[right_idx]
    e_lw = cam_extrinsics(left_img)  #left camera pose w.r.t. world
    e_rw = cam_extrinsics(right_img) #right camera pose w.r.t world
    rel_pose = calculate_relative_pose(e_lw, e_rw)
    return rel_pose


    

### Camera positions WITHOUT Rig Bundle Adjustment

In [18]:
from pathlib import Path
sparse_dir = Path("/home/skumar/ext_ssd/workstation-sfm-setup/output-backend/sparse-reconstruction/vineyards/RJM/front_2024-06-06-09-26-19.svo/4_to_126/ref_locked")
 
print(f"sparse_dir: {sparse_dir.as_posix()}")
sparse_images = sparse_dir / "images.bin"
sparse_points3D = sparse_dir / "points3D.bin"
sparse_cameras = sparse_dir / "cameras.bin"

sparse_dir: /home/skumar/ext_ssd/workstation-sfm-setup/output-backend/sparse-reconstruction/vineyards/RJM/front_2024-06-06-09-26-19.svo/4_to_126/ref_locked


In [19]:
import sys
sys.path.append(os.path.expandvars('$HOME/colmap/scripts/python'))
#sys.path.append("/home/skumar/colmap/scripts/python")
from read_write_model import read_images_binary 
sparse_img_dict = read_images_binary(sparse_images)
print(f"{len(sparse_img_dict.keys())} ==> {sparse_img_dict.keys()}")
print(f"min_key: {min(sparse_img_dict.keys())} mx_key: {max(sparse_img_dict.keys())}")

120 ==> dict_keys([120, 119, 118, 117, 116, 115, 114, 113, 112, 111, 110, 109, 108, 107, 106, 105, 104, 103, 102, 101, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59])
min_key: 1 mx_key: 120


In [24]:
import numpy as np
rel_poses = []
num_images = len(sparse_img_dict.keys())
for idx in range(1, num_images // 2 + 1):
    left_img = sparse_img_dict[idx]
    right_img = sparse_img_dict[idx + (num_images // 2)]
#     print(f"left_img_name: {left_img.name} right_img_name: {right_img.name}")
    e_lw = cam_extrinsics(left_img)  #left camera pose w.r.t. world
    e_rw = cam_extrinsics(right_img) #right camera pose w.r.t world
    e_rl = calculate_relative_pose(e_lw, e_rw)
    rel_poses.append(e_rl)

e_rl: 
[[ 0.9998702  -0.01593678 -0.00236674 -0.02419441]
 [ 0.01580727  0.9987589  -0.04723114  0.08034733]
 [ 0.00311651  0.0471876   0.99888118  0.0300515 ]
 [ 0.          0.          0.          1.        ]]
e_rl: 
[[ 0.99976673 -0.01922949 -0.00983377 -0.01154362]
 [ 0.01888176  0.99923289 -0.03430901  0.0452137 ]
 [ 0.01048598  0.03411533  0.99936289  0.00867532]
 [ 0.          0.          0.          1.        ]]
e_rl: 
[[ 0.99997615 -0.0045439  -0.00520115 -0.01046154]
 [ 0.00419349  0.99784334 -0.06550631  0.09911693]
 [ 0.00548759  0.06548294  0.9978386   0.02450474]
 [ 0.          0.          0.          1.        ]]
e_rl: 
[[ 9.99948793e-01 -2.90615593e-04 -1.01157139e-02  1.48009222e-03]
 [-1.04439241e-04  9.99237989e-01 -3.90311588e-02  5.72238565e-02]
 [ 1.01193486e-02  3.90302166e-02  9.99186790e-01  1.15025936e-02]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.00000000e+00]]
e_rl: 
[[ 0.99982709  0.00236794 -0.01844428  0.0142415 ]
 [-0.00268778  0.99984612 -0.0

In [25]:
import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.notebook_repr_html', True)
df = pd.DataFrame(rel_poses, columns=['dx', 'dy', 'dz', 'qx' , 'qy', 'qz' , 'qw'])
df.style

Unnamed: 0,dx,dy,dz,qx,qy,qz,qw
0,-0.024194,0.080347,0.030052,0.023612,-0.001371,0.007938,0.999689
1,-0.011544,0.045214,0.008675,0.01711,-0.005081,0.00953,0.999795
2,-0.010462,0.099117,0.024505,0.032765,-0.002674,0.002186,0.999457
3,0.00148,0.057224,0.011503,0.019519,-0.00506,4.7e-05,0.999797
4,0.014242,0.025991,-0.004859,0.00868,-0.009212,-0.001264,0.999919
5,-0.033215,-0.000467,-0.001284,-0.000151,-4.4e-05,-0.000112,1.0
6,0.014518,0.013235,-0.006797,0.004432,-0.00951,-0.001089,0.999944
7,0.013424,-0.006862,-0.009757,-0.003381,-0.009118,-0.001479,0.999952
8,-0.006957,0.032341,0.007312,0.013619,-0.002438,0.000557,0.999904
9,-0.004376,0.000139,0.000349,0.000269,-0.003887,-0.000376,0.999992


### Camera poses with Rig Bundle Adjustment

In [27]:
rig_ba_sparse_dir = Path("/home/skumar/ext_ssd/workstation-sfm-setup/output-backend/rig-bundle-adjustment/vineyards/RJM/front_2024-06-06-09-26-19.svo/4_to_126")
rig_ba_sparse_images = rig_ba_sparse_dir / "images.bin"
rig_ba_sparse_points3D = rig_ba_sparse_dir / "points3D.bin"
rig_ba_sparse_cameras = rig_ba_sparse_dir / "cameras.bin"

In [28]:
import sys
#sys.path.append("/home/skumar/colmap/scripts/python")
sys.path.append(os.path.expandvars('$HOME/colmap/scripts/python'))
from read_write_model import read_images_binary 
rig_ba_sparse_img_dict = read_images_binary(rig_ba_sparse_images)
print(f"{len(rig_ba_sparse_img_dict.keys())} => {rig_ba_sparse_img_dict.keys()}")

120 => dict_keys([59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 60, 61, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 120, 119, 118, 117, 116, 115, 114, 113, 112, 111, 110, 109, 108, 91, 90, 89, 88, 87, 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62])


In [29]:
#  for k, v in rig_ba_sparse_img_dict.items():
#         print(f"{k} {v.name}")

print(rig_ba_sparse_img_dict.keys())

dict_keys([59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 60, 61, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 120, 119, 118, 117, 116, 115, 114, 113, 112, 111, 110, 109, 108, 91, 90, 89, 88, 87, 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62])


In [30]:
# import numpy as np
# rig_ba_rel_poses = []
# num_images = len(rig_ba_sparse_img_dict.keys())
# for idx in range(1, num_images // 2):
#     left_img = rig_ba_sparse_img_dict[idx]
#     right_img = rig_ba_sparse_img_dict[idx + num_images // 2]
#     if idx < 5:
#         print(f"left_img_name: {left_img.name} right_img_name: {right_img.name}")
#     e_lw = cam_extrinsics(left_img)  #left camera pose w.r.t. world
#     e_rw = cam_extrinsics(right_img) #right camera pose w.r.t world
#     rel_pose = calculate_relative_pose(e_lw, e_rw)
#     rig_ba_rel_poses.append(rel_pose)

In [31]:
import numpy as np
rig_ba_rel_poses = []
num_images = len(rig_ba_sparse_img_dict.keys())
for idx in range(1, num_images // 2):
    left_img = rig_ba_sparse_img_dict[idx]
    right_img = rig_ba_sparse_img_dict[idx + num_images // 2]
    e_lw = cam_extrinsics(left_img)  #left camera pose w.r.t. world
    e_rw = cam_extrinsics(right_img) #right camera pose w.r.t world
    print(f"e_lw: \n{e_lw}")
    print(f"e_rw: \n{e_rw}\n")
    rel_pose = calculate_relative_pose(e_lw, e_rw)
    rig_ba_rel_poses.append(rel_pose)
#     print(f"rel_pose: {rel_pose}")
    if idx > 5: 
        break
    
    

e_lw: 
[[-9.99592172e-01 -2.82828747e-02  3.94571384e-03 -3.47982173e+00]
 [ 2.85516224e-02 -9.92451510e-01  1.19267791e-01  3.38210159e+00]
 [ 5.42693684e-04  1.19331806e-01  9.92854282e-01  1.11311843e+01]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.00000000e+00]]
e_rw: 
[[-9.99609316e-01 -2.79381233e-02  8.22139501e-04 -3.58074965e+00]
 [ 2.78375143e-02 -9.92508995e-01  1.18957840e-01  3.37658145e+00]
 [-2.50747793e-03  1.18934251e-01  9.92898966e-01  1.11216846e+01]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.00000000e+00]]

e_rl: 
[[ 9.99995062e-01 -7.15180364e-04 -3.06012366e-03 -6.44634833e-02]
 [ 7.14219784e-04  9.99999695e-01 -3.14983654e-04  4.72383105e-04]
 [ 3.06034799e-03  3.12796497e-04  9.99995268e-01  1.44553898e-04]
 [ 0.00000000e+00  0.00000000e+00  0.00000000e+00  1.00000000e+00]]
e_lw: 
[[-9.99083769e-01 -4.01993158e-02  1.46845854e-02 -3.35015926e+00]
 [ 4.27969464e-02 -9.40091163e-01  3.38226296e-01  4.52731048e+00]
 [ 2.08383313e-04  3.38544858e

In [32]:
import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.notebook_repr_html', True)
df = pd.DataFrame(rig_ba_rel_poses, columns=['dx', 'dy', 'dz', 'qx' , 'qy', 'qz' , 'qw'])
df.style

Unnamed: 0,dx,dy,dz,qx,qy,qz,qw
0,-0.064463,0.000472,0.000145,0.000157,-0.00153,0.000357,0.999999
1,-0.064463,0.000472,0.000145,0.000157,-0.00153,0.000357,0.999999
2,-0.064463,0.000472,0.000145,0.000157,-0.00153,0.000357,0.999999
3,-0.064463,0.000472,0.000145,0.000157,-0.00153,0.000357,0.999999
4,-0.064463,0.000472,0.000145,0.000157,-0.00153,0.000357,0.999999
5,-0.064463,0.000472,0.000145,0.000157,-0.00153,0.000357,0.999999
