Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make colmapDataParser compatible with 360_v2 dataset format #2860

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions nerfstudio/cameras/cameras.py
Original file line number Diff line number Diff line change
Expand Up @@ -984,12 +984,15 @@ def get_intrinsics_matrices(self) -> Float[Tensor, "*num_cameras 3 3"]:
return K

def rescale_output_resolution(
self, scaling_factor: Union[Shaped[Tensor, "*num_cameras"], Shaped[Tensor, "*num_cameras 1"], float, int]
self,
scaling_factor: Union[Shaped[Tensor, "*num_cameras"], Shaped[Tensor, "*num_cameras 1"], float, int],
scale_rounding_mode: str = "floor",
) -> None:
"""Rescale the output resolution of the cameras.

Args:
scaling_factor: Scaling factor to apply to the output resolution.
scale_rounding_mode: round down or round up when calculating the scaled image height and width
"""
if isinstance(scaling_factor, (float, int)):
scaling_factor = torch.tensor([scaling_factor]).to(self.device).broadcast_to((self.cx.shape))
Expand All @@ -1006,5 +1009,14 @@ def rescale_output_resolution(
self.fy = self.fy * scaling_factor
self.cx = self.cx * scaling_factor
self.cy = self.cy * scaling_factor
self.height = (self.height * scaling_factor).to(torch.int64)
self.width = (self.width * scaling_factor).to(torch.int64)
if scale_rounding_mode == "floor":
self.height = (self.height * scaling_factor).to(torch.int64)
self.width = (self.width * scaling_factor).to(torch.int64)
elif scale_rounding_mode == "round":
self.height = torch.floor(0.5 + (self.height * scaling_factor)).to(torch.int64)
self.width = torch.floor(0.5 + (self.width * scaling_factor)).to(torch.int64)
elif scale_rounding_mode == "ceil":
self.height = torch.ceil(self.height * scaling_factor).to(torch.int64)
self.width = torch.ceil(self.width * scaling_factor).to(torch.int64)
else:
raise ValueError("Scale rounding mode must be 'floor', 'round' or 'ceil'.")
43 changes: 38 additions & 5 deletions nerfstudio/data/dataparsers/colmap_dataparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from __future__ import annotations

import math
import sys
from dataclasses import dataclass, field
from functools import partial
Expand Down Expand Up @@ -56,6 +57,8 @@ class ColmapDataParserConfig(DataParserConfig):
"""How much to scale the camera origins by."""
downscale_factor: Optional[int] = None
"""How much to downscale images. If not set, images are chosen such that the max dimension is <1600px."""
downscale_rounding_mode: Literal["floor", "round", "ceil"] = "floor"
"""How to round downscale image height and Image width."""
scene_scale: float = 1.0
"""How much to scale the region of interest by."""
orientation_method: Literal["pca", "up", "vertical", "none"] = "up"
Expand Down Expand Up @@ -355,7 +358,9 @@ def _generate_dataparser_outputs(self, split: str = "train", **kwargs):
camera_type=camera_type,
)

cameras.rescale_output_resolution(scaling_factor=1.0 / downscale_factor)
cameras.rescale_output_resolution(
scaling_factor=1.0 / downscale_factor, scale_rounding_mode=self.config.downscale_rounding_mode
)

if "applied_transform" in meta:
applied_transform = torch.tensor(meta["applied_transform"], dtype=transform_matrix.dtype)
Expand Down Expand Up @@ -452,18 +457,39 @@ def _load_3D_points(self, colmap_path: Path, transform_matrix: torch.Tensor, sca
out["points3D_points2D_xy"] = torch.stack(points3D_image_xy, dim=0)
return out

def _downscale_images(self, paths, get_fname, downscale_factor: int, nearest_neighbor: bool = False):
def _downscale_images(
self,
paths,
get_fname,
downscale_factor: int,
downscale_rounding_mode: str = "floor",
nearest_neighbor: bool = False,
):
def calculate_scaled_size(original_width, original_height, downscale_factor, mode="floor"):
if mode == "floor":
return math.floor(original_width / downscale_factor), math.floor(original_height / downscale_factor)
elif mode == "round":
return round(original_width / downscale_factor), round(original_height / downscale_factor)
elif mode == "ceil":
return math.ceil(original_width / downscale_factor), math.ceil(original_height / downscale_factor)
else:
raise ValueError("Invalid mode. Choose from 'floor', 'round', or 'ceil'.")

with status(msg="[bold yellow]Downscaling images...", spinner="growVertical"):
assert downscale_factor > 1
assert isinstance(downscale_factor, int)
filepath = next(iter(paths))
img = Image.open(filepath)
w, h = img.size
w_scaled, h_scaled = calculate_scaled_size(w, h, downscale_factor, downscale_rounding_mode)
# Using %05d ffmpeg commands appears to be unreliable (skips images).
for path in paths:
nn_flag = "" if not nearest_neighbor else ":flags=neighbor"
path_out = get_fname(path)
path_out.parent.mkdir(parents=True, exist_ok=True)
ffmpeg_cmd = [
f'ffmpeg -y -noautorotate -i "{path}" ',
f"-q:v 2 -vf scale=iw/{downscale_factor}:ih/{downscale_factor}{nn_flag} ",
f"-q:v 2 -vf scale={w_scaled}:{h_scaled}{nn_flag} ",
f'"{path_out}"',
]
ffmpeg_cmd = " ".join(ffmpeg_cmd)
Expand All @@ -488,7 +514,7 @@ def get_fname(parent: Path, filepath: Path) -> Path:
if self._downscale_factor is None:
if self.config.downscale_factor is None:
test_img = Image.open(filepath)
h, w = test_img.size
w, h = test_img.size
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for catching the error

max_res = max(h, w)
df = 0
while True:
Expand All @@ -508,12 +534,17 @@ def get_fname(parent: Path, filepath: Path) -> Path:
CONSOLE.print(
f"[bold red]Downscaled images do not exist for factor of {self._downscale_factor}.[/bold red]"
)
if Confirm.ask("\nWould you like to downscale the images now?", default=False, console=CONSOLE):
if Confirm.ask(
f"\nWould you like to downscale the images using '{self.config.downscale_rounding_mode}' rounding mode now?",
default=False,
console=CONSOLE,
):
# Install the method
self._downscale_images(
image_filenames,
partial(get_fname, self.config.data / self.config.images_path),
self._downscale_factor,
self.config.downscale_rounding_mode,
nearest_neighbor=False,
)
if len(mask_filenames) > 0:
Expand All @@ -522,6 +553,7 @@ def get_fname(parent: Path, filepath: Path) -> Path:
mask_filenames,
partial(get_fname, self.config.data / self.config.masks_path),
self._downscale_factor,
self.config.downscale_rounding_mode,
nearest_neighbor=True,
)
if len(depth_filenames) > 0:
Expand All @@ -530,6 +562,7 @@ def get_fname(parent: Path, filepath: Path) -> Path:
depth_filenames,
partial(get_fname, self.config.data / self.config.depths_path),
self._downscale_factor,
self.config.downscale_rounding_mode,
nearest_neighbor=True,
)
else:
Expand Down
Loading