In this notebook, we will build a 3D map of a scene from a small set of images and refine it with the featuremetric optimization. We then localize an image downloaded from the Internet and show the effect of the refinement.

# Setup
We start by defining some output paths: where the intermediate files will be stored.

In [None]:
%load_ext autoreload
%autoreload 2
import tqdm, tqdm.notebook
tqdm.tqdm = tqdm.notebook.tqdm  # notebook-friendly progress bars
from pathlib import Path
import os
import time
import sys
from hloc import extract_features, match_features, reconstruction, pairs_from_exhaustive, visualization
from hloc.visualization import plot_images, read_image
from hloc.utils.viz_3d import init_figure, plot_points, plot_reconstruction, plot_camera_colmap

from pixsfm.util.visualize import init_image, plot_points2D
from pixsfm.refine_hloc import PixSfM
from pixsfm import ostream_redirect
from PIL import Image, ImageDraw
import pycolmap
#import visualize_model
# redirect the C++ outputs to notebook cells
cpp_out = ostream_redirect(stderr=True, stdout=True)
cpp_out.__enter__()

In [None]:
import torch 
print(torch.__version__)
print(torch.cuda.get_arch_list())

In [None]:
images = Path('datasets/monarch/')
outputs = Path('outputs/monarch-demo/')
!rm -rf $outputs
sfm_pairs = outputs / 'pairs-sfm.txt'
loc_pairs = outputs / 'pairs-loc.txt'
features = outputs / 'features.h5'
matches = outputs / 'matches.h5'
raw_dir = outputs / "raw"
ref_dir = outputs / "ref"
''' model location in case of intrinsics locked '''
ref_dir_locked = outputs / "ref_locked"
''' model location in case of intrinsics not locked '''
ref_dir_not_locked = outputs / "ref_dir_not_locked" 

Here we will use SuperPoint local features with the SuperGlue matcher, but it's easy to switch to other features like SIFT or R2D2.

In [None]:
feature_conf = extract_features.confs['superpoint_aachen']
matcher_conf = match_features.confs['superglue']

# 3D mapping and refinement
First we list the images used for mapping. These are all day-time shots of Sacre Coeur.

In [None]:
# '''masking of the tractor hood from the images '''
# ''' output => datasets/monarch/{target_folder}/image_name.jpg '''
# def draw_box_around_tractor_hood(image_path, target_folder): 
#     image = Image.open(image_path)
#     w, h = image.size
#     box_x1, box_y1 = 460, 770  # Top-left corner
#     box_x2, box_y2 = 1630, 1080  # Bottom-right corner
#     outline_color = (0, 0, 0)  # Red in RGB format
#     fill_color = (0, 0, 0)  # Black in RGB format
#     draw = ImageDraw.Draw(image)
#     draw.rectangle([box_x1, box_y1, box_x2, box_y2], outline=outline_color, fill=fill_color)
#     directory_path,filename = os.path.split(image_path)
#     parent_directory_path = os.path.dirname(directory_path)
#     target_directory = os.path.join(parent_directory_path, target_folder)
#     os.makedirs(target_directory, exist_ok = True)
#     target_image_path = os.path.join(target_directory,filename)
#     image.save(target_image_path)
#     return target_image_path 

In [None]:
references_left = [str(p.relative_to(images)) for i, p in enumerate((images / 'left/').iterdir())]
references_right = [str(p.relative_to(images)) for i, p in enumerate((images / 'right/').iterdir())]

In [None]:
print(len(references_left))

In [None]:
references_left = sorted(references_left, key=lambda x: int(x.split('/')[-1].split('.')[0]))
references_right = sorted(references_right, key=lambda x: int(x.split('/')[-1].split('.')[0]))

In [None]:
print(len(references_left))

In [None]:
references_left = references_left[40:82] 
references_right = references_right[40:82]
references = references_left + references_right

In [None]:
'''sorting references so that each stereo pair is together in the list '''
references = sorted(references, key=lambda x: int(x.split('/')[-1].split('.')[0]))

In [None]:
print(references)

In [None]:
# ''' masking the tractor hood in all the images'''
# ''' returns list of path to the masked images '''
# start_time = time.time()
# target_folder = "masked_images"
# masked_references = [draw_box_around_tractor_hood(p, target_folder) for p in references]
# end_time = time.time()
# elapsed_time = end_time - start_time

# target_path = os.path.join(images, target_folder)

# ''' sorting masked_references sequentially '''
# ''' smf => sorted masked references '''
# #smf = sorted(masked_references, key = lambda x: int(((x.split("/")[-1]).split(".")[0]).split("_")[0]))

# print(f"type(masked_references): {type(masked_references)}")


In [None]:
# smf = []
# for i in range(0, len(references)//2 - 1): 
#     left  = "masked_images/" + str(i) + "_left.jpg"
#     right = "masked_images/" + str(i) + "_right.jpg"
#     smf.append(left)
#     smf.append(right)

In [None]:
# print(f"smf: {smf}")
# print(f"len(smf) : {len(smf)}")

In [None]:
print(references)

In [None]:
references[59]

In [None]:
features_path_ = extract_features.main(feature_conf, images, image_list= references, feature_path=features)
#match_features.main(matcher_conf, sfm_pairs, features=features, matches=matches);

In [None]:
from hloc.extract_features import list_h5_names
h5_feature_names = list_h5_names(features_path_)
print(f"len(h5_feature_names): {len(h5_feature_names)}")
print(h5_feature_names[:10])

Then we extract features and match them across image pairs. Since we deal with few images, we simply match all pairs exhaustively.

In [None]:
# features_path_ = extract_features.main(feature_conf, images, image_list=references_final, feature_path=features)
# #match_features.main(matcher_conf, sfm_pairs, features=features, matches=matches);

In [None]:
from hloc.utils.viz import plot_keypoints, save_plot
from hloc.utils.io import get_keypoints
import matplotlib.pyplot as plt
import numpy as np
import os

ref_trim_ = references[:4]
plot_images([read_image(images / r) for r in ref_trim_], dpi=50, figsize=4.2)

kps_list_ = [] 
for r in ref_trim_:
    kps = get_keypoints(features_path_, r)
    print(type(kps))
    kps_list_.append(kps)
    
plot_keypoints(kps_list_, colors = "red",  ps = 10)

current_path_ = os.getcwd()

print("current_path: ", current_path_)

print(type(current_path_))

final_path = current_path_ + "/kps.png"


save_plot(final_path)


In [None]:
import collections.abc as collections
isinstance(references, collections.Iterable)

In [None]:
pairs_from_exhaustive.stereo_main(sfm_pairs, image_list=references)

In [None]:
#print("features: ", features)
#print("matches: ", matches)
match_features.main(matcher_conf, sfm_pairs, features=features, matches=matches);

In [None]:
match_names = list_h5_names(matches)
for name in match_names: 
    if "right-52.jpg" in name: 
        print (name)

In [None]:
''' script to plot matches between two frames'''
from hloc.utils.viz import plot_matches
from hloc.utils.io import get_matches, get_keypoints
#img1 = images.joinpath(references[0])
#img2 = images.joinpath(references[1])

#print(f"img1 : {img1.as_posix()} img_2: {img2.as_posix()}")

print(f"features: {features}")
kp1 = get_keypoints(features, references[0])
kp2 = get_keypoints(features, references[1])
print(f"kp1.shape: {kp1.shape}")

m, _ = get_matches(matches, references[0], references[1])
print(f"m.shape: {m.shape}")

m1 = np.array([kp1[i] for i in m[:,0]])
m2 = np.array([kp2[i] for i in m[:, 1]])

#print(m1[:10])

plot_images([read_image(images / r) for r in references[:2]], dpi=50, figsize=4.2)
#plot_matches(kp1.transpose, kp2.transpose)
#plot_matches(kp1.transpose, kp2.transpose)
plot_matches(m1, m2)
#plot_matches(m[:,0], m[:,1])
#print(m[:10])
#kp1 = 
#matches, scores = 

Now we run the reconstruction with and without the featuremetric refinement. For this dataset, when computing the dense features, we resize the images such that they are not larger than 1024 pixels.

In [None]:
fx = 1093.2768
fy = 1093.2768
cx = 964.989
cy = 569.276
opencv_camera_params =','.join(map(str, (fx, fy, cx, cy, 0, 0, 0, 0)))

In [None]:
#sfm = PixSfM({"dense_features": {"max_edge": 1024}})


#conf1 = {"dense_features": {"max_edge": 1024}}

conf2 = {
    "BA": {"optimizer": {"refine_focal_length": False,"refine_extra_params": False, "refine_extrinsics": False}},
    "dense_features": {"max_edge":1024}
}

sfm = PixSfM(conf=conf2)



In [None]:
'''CASE 2 => INITIAL K IS PROVIDED + K IS LOCKED '''

image_options = dict(camera_model='OPENCV', 
                     camera_params=opencv_camera_params
                    )

mapper_options_one = dict(ba_refine_focal_length=False, 
                      ba_refine_extra_params=False,
                     ba_refine_principal_point=False)

mapper_options_two = dict(ba_refine_focal_length=False, 
                      ba_refine_extra_params=False,
                     ba_refine_principal_point=False)

hloc_args_not_locked = dict(image_list=references,
                image_options=image_options,
                camera_mode="PER_FOLDER",
                mapper_options=mapper_options_two)

#hloc_args_not_locked = dict(image_list=references)

K_locked, sfm_outputs_not_locked = sfm.reconstruction(ref_dir_locked, images, sfm_pairs, features, matches, **hloc_args_not_locked)


In [None]:
'''
e_lw => left camera pose in world frame (4 * 4)
e_rw => right camera pose in world frame (4 * 4)
'''
#def calculate_relative_pose(e_lw, e_rw):
def calculate_relative_pose(e_lw: np.ndarray, e_rw: np.ndarray):
    #print(f"Inside the calculate_relative_pose function")
    from scipy.spatial.transform import Rotation
    e_wl = np.linalg.inv(e_lw)
    #print(f"e_wl: {e_wl}")
    #e_rl = e_rw * np.linalg.inv(e_lw) #right camera in the frame of the left camera
    #e_rl = e_rw * e_wl #right camera in the frame of the left camera
    #print(f"e_rl: {e_rl}")
    e_rl = np.dot(e_rw,np.linalg.inv(e_lw))
    R = e_rl[:3,:3] #extracting the rotation matrix
    dx = e_rl[0,3]
    dy = e_rl[1,3]
    dz = e_rl[2,3]
    dquat = Rotation.from_matrix(R).as_quat()
    #rel_pose =  [dx, dy] + dquat
    rel_pose = [dx,dy,dz]
    for q in dquat: 
        rel_pose.append(q)
    return rel_pose
    #return [dx,dy]
    #print(f"dx: {dx} dy: {dy} dquat: {dquat}")


def cam_extrinsics(img):
    from read_write_model import qvec2rotmat
    R = qvec2rotmat(img.qvec)
    t = img.tvec.reshape(3,-1)
    #print(f"R: {R} t: {t}")
    R_t = np.concatenate((R,t), axis = 1)
    #R_t = np.vstack([np.array([0,0,0,1]), R_t])
    R_t = np.vstack([R_t, np.array([0,0,0,1])])
    return R_t    #  4 * 4 matrix

def calculate_relative_pose_between(left_idx: int, right_idx: int):
    left_img = sparse_img_dict[left_idx]
    right_img = sparse_img_dict[right_idx]
    e_lw = cam_extrinsics(left_img)  #left camera pose w.r.t. world
    e_rw = cam_extrinsics(right_img) #right camera pose w.r.t world
    rel_pose = calculate_relative_pose(e_lw, e_rw)
    return rel_pose


    

#### Camera positions WITHOUT Rig Bundle Adjustment

In [None]:
from pathlib import Path
#sparse_dir = Path("/home/skumar/stereo_colmap_cli_output/sparse/")
#sparse_dir = ref_dir_locked / "hloc"
sparse_dir = Path("/home/skumar/stereo_colmap_cli_output/")
print(f"sparse_dir: {sparse_dir.as_posix()}")
sparse_images = sparse_dir / "images.bin"
sparse_points3D = sparse_dir / "points3D.bin"
sparse_cameras = sparse_dir / "cameras.bin"

In [None]:
import sys
sys.path.append("/home/skumar/colmap/scripts/python")
from read_write_model import read_images_binary 
sparse_img_dict = read_images_binary(sparse_images)
print(f"{len(sparse_img_dict.keys())} ==> {sparse_img_dict.keys()}")
print(f"min_key: {min(sparse_img_dict.keys())} mx_key: {max(sparse_img_dict.keys())}")

In [None]:
cam_extrinsics(sparse_img_dict[1])

In [None]:
import numpy as np
rel_poses = []
num_images = len(sparse_img_dict.keys())
for idx in range(1, num_images // 2 + 1):
    left_img = sparse_img_dict[idx]
    right_img = sparse_img_dict[idx + 42]
    #print(f"left_img_name: {left_img.name} right_img_name: {right_img.name}")
    e_lw = cam_extrinsics(left_img)  #left camera pose w.r.t. world
    e_rw = cam_extrinsics(right_img) #right camera pose w.r.t world
    e_rl = calculate_relative_pose(e_lw, e_rw)
    rel_poses.append(e_rl)

In [None]:
import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.notebook_repr_html', True)
df = pd.DataFrame(rel_poses, columns=['dx', 'dy', 'dz', 'qx' , 'qy', 'qz' , 'qw'])
df.style

In [None]:
#dr = np.hstack((np.array(df['dx']).reshape(-1,1), np.array(df['dy']).reshape(-1,1), np.array(df['dz']).reshape(-1,1)))

In [None]:
#dr.shape

In [None]:
# x = np.linalg.norm(dr, axis=1, ord=2)
# plt.hist(x, 100)
# plt.show()

#### Camera poses with Rig Bundle Adjustment

In [None]:
rig_ba_sparse_dir = Path("/home/skumar/rig_dense/sparse/")
print(f"rig_ba_sparse_dir: {rig_ba_sparse_dir.as_posix()}")
rig_ba_sparse_images = rig_ba_sparse_dir / "images.bin"
rig_ba_sparse_points3D = rig_ba_sparse_dir / "points3D.bin"
rig_ba_sparse_cameras = rig_ba_sparse_dir / "cameras.bin"

In [None]:
import sys
sys.path.append("/home/skumar/colmap/scripts/python")
from read_write_model import read_images_binary 
rig_ba_sparse_img_dict = read_images_binary(rig_ba_sparse_images)
print(f"{len(rig_ba_sparse_img_dict.keys())} => {rig_ba_sparse_img_dict.keys()}")

In [None]:
import numpy as np
rig_ba_rel_poses = []
num_images = len(rig_ba_sparse_img_dict.keys())
for idx in range(1, num_images // 2 + 1):
    left_img = rig_ba_sparse_img_dict[idx]
    right_img = rig_ba_sparse_img_dict[idx + 42]
    if idx < 5:
        print(f"left_img_name: {left_img.name} right_img_name: {right_img.name}")
    e_lw = cam_extrinsics(left_img)  #left camera pose w.r.t. world
    e_rw = cam_extrinsics(right_img) #right camera pose w.r.t world
    rel_pose = calculate_relative_pose(e_lw, e_rw)
    rig_ba_rel_poses.append(rel_pose)

In [None]:
import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.notebook_repr_html', True)
df = pd.DataFrame(rig_ba_rel_poses, columns=['dx', 'dy', 'dz', 'qx' , 'qy', 'qz' , 'qw'])
df.style