In [1]:
%load_ext autoreload
%autoreload 2

from pathlib import Path

from hloc import (
    extract_features,
    match_features,
    reconstruction,
    visualization,
    pairs_from_retrieval,
)

from IPython.display import clear_output
import gc

## Setup
In this notebook, we will run SfM reconstruction from scratch on a set of images. We choose the [South-Building dataset](https://openaccess.thecvf.com/content_cvpr_2013/html/Hane_Joint_3D_Scene_2013_CVPR_paper.html) - we will download it later. First, we define some paths.

In [47]:
# images = Path("/home/jennyw2/data/small_engine_on_table_2/images_500_highres")
# outputs = Path("outputs/small_engine_on_table_2_500imgs_highres_10match")

# video_path is either a path or None
# video_path = Path("/home/jennyw2/data/container_scan_videos_20250903/20250903_143802000_iOS/20250903_143802000_iOS.MOV")
# images = Path("/home/jennyw2/data/container_scan_videos_20250903/20250903_143802000_iOS/images_500")
# num_matched = 20

# Doing the things before training splat
video_path = Path("/home/jennyw2/data/container_scan_videos_20250903/20250903_143802000_iOS/20250903_143802000_iOS.MOV")
images = Path("/home/jennyw2/data/container_scan_videos_20250903/20250903_143802000_iOS/images_500")
num_matched = 40

# The thing training gaussian splat
# video_path = Path("/home/jennyw2/data/container_scan_videos_20250903/20250903_143627000_iOS/20250903_143627000_iOS.MOV")
# images = Path("/home/jennyw2/data/container_scan_videos_20250903/20250903_143627000_iOS/images_500")
# num_matched = 20

image_height = 540 # only used if video_path is converted in this notebook

# Everything after the data/ folder prefix
experiment_name = "_".join(images.parts[4:]) + f"_{num_matched}match" + f"_{image_height}height"
outputs = Path("/home/jennyw2/code/Hierarchical-Localization/outputs/") / experiment_name

sfm_pairs = outputs / "pairs-netvlad.txt"

# sfm_dir = outputs / "sfm_superpoint+superglue"
# retrieval_conf = extract_features.confs["netvlad"]
# feature_conf = extract_features.confs["superpoint_aachen"]
# matcher_conf = match_features.confs["superglue"]

sfm_dir = outputs / "sfm_disk_disk+lightglue" / "distorted"
retrieval_conf = extract_features.confs["netvlad"]
feature_conf = extract_features.confs["disk"]
matcher_conf = match_features.confs["disk+lightglue"]

In [48]:
print(video_path)
print(images)
print(outputs)

/home/jennyw2/data/container_scan_videos_20250903/20250903_143802000_iOS/20250903_143802000_iOS.MOV
/home/jennyw2/data/container_scan_videos_20250903/20250903_143802000_iOS/images_500
/home/jennyw2/code/Hierarchical-Localization/outputs/container_scan_videos_20250903_20250903_143802000_iOS_images_500_40match_540height


# Preprocess the dataset

In [49]:
if video_path is not None:
    ! mkdir $images
    ! conda run --no-capture-output -n nerfstudio python ~/code/nerf_dataset_preprocessing_helper/01_filter_raw_data.py --input_path $video_path --output_path $images --target_count 500 --scalar 3 -y
else:
    print("Skipping video to image conversion because video_path is not provided")



mkdir: cannot create directory ‘/home/jennyw2/data/container_scan_videos_20250903/20250903_143802000_iOS/images_500’: File exists
frame= 1311 fps=145 q=1.0 Lsize=N/A time=00:00:43.70 bitrate=N/A speed=4.82x    
Found a total of 1311 images to work on.
Calculating image sharpness...
100%|███████████████████████████████████████| 1311/1311 [00:25<00:00, 51.73it/s]
Requested 500 out of 1311 images (38.1%, 1 in 2.6).
Selecting 500 images across 125 groups, with total ~10.5 images per group and selecting ~4.0 images per group (scalar 3).
Group layout:
 [▁█▁▅█▁█▅▁█▁▅█▁█▅▁█▁▅█▁█▅▁█▁▅█▁█▅▁█▁▅█▁█▅▁█▁▅█▁█▅▁█▁▅█▁█▅▁█▁▅█▁█▅▁█▁▅█▁█▅▁█▁▅█▁█▅▁█▁▅█▁█▅▁█▁▅█▁█▅▁█▁▅]

Distribution of to-be-deleted images:
 [▅▃▆▅▅▃▆▅▅▅▅▅▅▅▅▆▃▆▅▅▃▆▅▆▃▅▆▅▃▅▃▆▅▅▃▆▅▆▅▅▃▅▆▃▆▅▅▆▃▅▃▇▂▆▅▆▃▆▅▃▃▇▅▅▃▅▅▅▅▅▆▆▃▅▅▂▅▆▆▆▃▃▆▆▆▃▅▅▁▇▅▆▃▃█▃▂█▃▅]

Distribution of image quality:
 [▆▇▆▅▆▆▆▆▆▅▆▄▃▃▃▃▃▃▃▃▃▃▃▃▄▅▅▇▇██▇▇▇▇▆▅▄▅▅▅▆▅▆▅▅▅▄▄▃▃▃▄▃▃▃▃▄▅▅▄▄▅▆▅▅▅▅▄▃▄▆███▆▂▁▂▅▆▃▁▂▄▅▆▇▇▆▄▄▅▃▄▅▅▃▂▃]

Retained 500 sharpest images.


In [50]:
import os
import subprocess

resized_image_dir = images / "resized_images"
os.makedirs(resized_image_dir, exist_ok=True)

for fname in os.listdir(images):
    if fname.endswith(".jpg"):
        in_path = os.path.join(images, fname)
        out_path = os.path.join(resized_image_dir, f"{os.path.splitext(fname)[0]}.jpg")
        cmd = [
            "ffmpeg", "-y", "-i", in_path,
            "-vf", f"scale=-1:{image_height}",
            out_path
        ]
        subprocess.run(cmd, check=True)

# Hide the output of this cell
clear_output(wait=True)
gc.collect()

0

In [51]:
import shutil
for f in images.glob("*.jpg"):
    f.unlink()  # delete unresized images
for f in resized_image_dir.glob("*.jpg"): 
    shutil.move(str(f), images / f.name) # move resized images to the original locations
shutil.rmtree(resized_image_dir)


## Download the dataset
The dataset is simply a set of images. The intrinsic parameters will be extracted from the EXIF data and refined with SfM.

In [52]:
# if not images.exists():
#     !wget https://cvg-data.inf.ethz.ch/local-feature-evaluation-schoenberger2017/South-Building.zip -P datasets/
#     !unzip -q datasets/South-Building.zip -d datasets/

## Find image pairs via image retrieval
We extract global descriptors with NetVLAD and find for each image the most similar ones. For smaller dataset we can instead use exhaustive matching via `hloc/pairs_from_exhaustive.py`, which would find $\frac{n(n-1)}{2}$ images pairs.

In [53]:
retrieval_path = extract_features.main(retrieval_conf, images, outputs)
pairs_from_retrieval.main(retrieval_path, sfm_pairs, num_matched=num_matched) #5)

gc.collect()

[2025/09/18 11:05:12 hloc INFO] Extracting local features with configuration:
{'model': {'name': 'netvlad'},
 'output': 'global-feats-netvlad',
 'preprocessing': {'resize_max': 1024}}
[2025/09/18 11:05:12 hloc INFO] Found 500 images in root /home/jennyw2/data/container_scan_videos_20250903/20250903_143802000_iOS/images_500.
100%|█████████████████████████████████████████| 500/500 [02:47<00:00,  2.99it/s]
[2025/09/18 11:08:04 hloc INFO] Finished exporting features.
[2025/09/18 11:08:04 hloc INFO] Extracting image pairs from a retrieval database.
[2025/09/18 11:08:05 hloc INFO] Found 20000 pairs.


0

## Extract and match local features

In [54]:
feature_path = extract_features.main(feature_conf, images, outputs)
match_path = match_features.main(
    matcher_conf, sfm_pairs, feature_conf["output"], outputs
)

gc.collect()

[2025/09/18 11:08:05 hloc INFO] Extracting local features with configuration:
{'model': {'max_keypoints': 5000, 'name': 'disk'},
 'output': 'feats-disk',
 'preprocessing': {'grayscale': False, 'resize_max': 1600}}
[2025/09/18 11:08:05 hloc INFO] Found 500 images in root /home/jennyw2/data/container_scan_videos_20250903/20250903_143802000_iOS/images_500.
100%|█████████████████████████████████████████| 500/500 [01:55<00:00,  4.34it/s]
[2025/09/18 11:10:00 hloc INFO] Finished exporting features.
[2025/09/18 11:10:00 hloc INFO] Matching local features with configuration:
{'model': {'features': 'disk', 'name': 'lightglue'},
 'output': 'matches-disk-lightglue'}
100%|███████████████████████████████████| 11537/11537 [6:12:36<00:00,  1.94s/it]
[2025/09/18 17:22:37 hloc INFO] Finished exporting matches.


10

In [55]:
match_path

PosixPath('/home/jennyw2/code/Hierarchical-Localization/outputs/container_scan_videos_20250903_20250903_143802000_iOS_images_500_40match_540height/feats-disk_matches-disk-lightglue_pairs-netvlad.h5')

## 3D reconstruction
Run COLMAP on the features and matches.

In [None]:
model = reconstruction.main(sfm_dir, images, sfm_pairs, feature_path, match_path)

gc.collect()

[2025/09/18 17:22:37 hloc INFO] Writing COLMAP logs to /home/jennyw2/code/Hierarchical-Localization/outputs/container_scan_videos_20250903_20250903_143802000_iOS_images_500_40match_540height/sfm_disk_disk+lightglue/distorted/colmap.LOG.*
[2025/09/18 17:22:37 hloc INFO] Creating an empty database...
[2025/09/18 17:22:37 hloc INFO] Importing images into the database...
[2025/09/18 17:22:38 hloc INFO] Importing features into the database...
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 1510.65it/s]
[2025/09/18 17:22:39 hloc INFO] Importing matches into the database...
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [00:10<00:00, 1984.27it/s]
[2025/09/18 17:22:50 hloc INFO] Performing geometric verification of the matc

In [None]:
# Sometimes, the above bugs out and doesn't save the best model into the main folder. Save the optimal model

# First, move the model it copied over into another folder to save it
maybe_wrong_saved_model_dir = sfm_dir / "picked_model"
os.makedirs(maybe_wrong_saved_model_dir, exist_ok=True)

# for maybe_wrong_model_file in os.listdir(sfm_dir):
    if os.path.isfile(os.path.join(sfm_dir, maybe_wrong_model_file)):
        print(maybe_wrong_model_file)
        os.rename(os.path.join(sfm_dir, maybe_wrong_model_file), os.path.join(maybe_wrong_saved_model_dir, maybe_wrong_model_file))

# Save the model still in memory
model.write(sfm_dir)

# TODO deallocate the model
del model

## Visualization
We visualize some of the registered images, and color their keypoint by visibility, track length, or triangulated depth.

In [None]:
visualization.visualize_sfm_2d(model, images, color_by="visibility", n=5)

In [None]:
visualization.visualize_sfm_2d(model, images, color_by="track_length", n=5)

In [None]:
visualization.visualize_sfm_2d(model, images, color_by="depth", n=5)

gc.collect()

# Prepare dataset for gaussian splat optimization

In [None]:
! rm -r $sfm_dir/../undistorted && echo Overwriting previous undistorted/ folder...
! colmap image_undistorter --image_path $images --input_path $sfm_dir --output_path $sfm_dir/../undistorted

gc.collect()

In [None]:
! mkdir $sfm_dir/../undistorted/sparse/0

In [None]:
! mv $sfm_dir/../undistorted/sparse/*.* $sfm_dir/../undistorted/sparse/0


# Train gaussian splat

In [45]:
print(f"conda deactivate; conda activate gaussian_splatting; cd /home/jennyw2/code/gaussian-splatting-repo/gaussian_splatting")
print(f"python train.py -s {sfm_dir}/../undistorted --eval --model_path output/{experiment_name}")

conda deactivate; conda activate gaussian_splatting; cd /home/jennyw2/code/gaussian-splatting-repo/gaussian_splatting
python train.py -s /home/jennyw2/code/Hierarchical-Localization/outputs/container_scan_videos_20250903_20250903_143627000_iOS_images_500_20match_540height/sfm_disk_disk+lightglue/distorted/../undistorted --eval --model_path output/container_scan_videos_20250903_20250903_143627000_iOS_images_500_20match_540height


In [46]:
! cd /home/jennyw2/code/gaussian-splatting-repo/gaussian_splatting; conda run --no-capture-output -n gaussian_splatting python train.py -s $sfm_dir/../undistorted --eval --model_path output/$experiment_name


gc.collect()

Optimizing output/container_scan_videos_20250903_20250903_143627000_iOS_images_500_20match_540height
Output folder: output/container_scan_videos_20250903_20250903_143627000_iOS_images_500_20match_540height [18/09 10:47:14]
Tensorboard not available: not logging progress [18/09 10:47:14]
------------LLFF HOLD------------- [18/09 10:47:15]
Reading camera 500/500 [18/09 10:47:15]
Converting point3d.bin to .ply, will happen only the first time you open the scene. [18/09 10:47:15]
Loading Training Cameras [18/09 10:47:15]
Loading Test Cameras [18/09 10:47:20]
Number of points at initialisation :  116494 [18/09 10:47:21]
Training progress:  23%|▏| 7000/30000 [14:38<1:13:13,  5.23it/s, Loss=0.0738051,
[ITER 7000] Evaluating test: L1 0.05474777930667476 PSNR 22.771764906625897 [18/09 11:02:03]

[ITER 7000] Evaluating train: L1 0.05195741280913353 PSNR 22.685344314575197 [18/09 11:02:04]

[ITER 7000] Saving Gaussians [18/09 11:02:04]
Training progress:  25%|▎| 7600/30000 [16:45<1:15:28,  4.95it

0