In [1]:
from pathlib import Path
import json
import numpy as np
import functools

In [2]:
def load_json(path: Path):
    with open(path, "r") as f:
        return json.load(f)
    

In [3]:
def pretty_print(array: np.ndarray):
    print(np.array2string(array, separator=", ", formatter={"float_kind": lambda x: "%.4f" % x}))

In [4]:
def split_transforms(path: Path, splits: int):
    transforms = load_json(path)
    frames = transforms["frames"]
    split_frames = np.array_split(frames, splits)
    
    split_indexes = []
    split_transforms = []
    for split in split_frames:
        split_transforms.append(
            {
                "camera_model": transforms["camera_model"],
                "fl_x": transforms["fl_x"],
                "fl_y": transforms["fl_y"],
                "cx": transforms["cx"],
                "cy": transforms["cy"],
                "w": transforms["w"],
                "h": transforms["h"],
                "k1": transforms["k1"],
                "k2": transforms["k2"],
                "p1": transforms["p1"],
                "p2": transforms["p2"],
                "frames": split.tolist(),
            }
        )
        if (len(split_indexes) == 0):
            split_indexes.append(len(split))
        else:
            split_indexes.append(split_indexes[-1] + len(split))
    return split_transforms, split_indexes


In [5]:
transforms_path = Path.cwd() / "block_nerf" / "baseline_transforms.json"
split_transforms, split_indexes = split_transforms(transforms_path, 4)

In [6]:
split_indexes

[275, 550, 824, 1098]

In [7]:
# Train the split transforms by themselves

In [8]:
# Retrieve the original, non-scaled and non-centered transforms


In [9]:
# Use the dataparser_transforms to scale and rotate the transforms
dataparser_transforms_path = Path.cwd() / "block_nerf" / "dataparser_transforms.json"
dataparser_transforms = load_json(dataparser_transforms_path)

dp_t = np.array(dataparser_transforms["transform"])
dp_scale = dataparser_transforms["scale"]

In [10]:
# Use the dp_t and dp_scale to scale and rotate the camera_to_world matrices in the camera_path file such that they're comparable to the split_transforms

transforms_matrix = np.array(split_transforms[0]["frames"][0]["transform_matrix"])
pretty_print(transforms_matrix)

[[-0.0000, -1.0000, -0.0000, -3.4461],
 [0.9848, -0.0000, 0.1736, 106.3866],
 [-0.1736, -0.0000, 0.9848, -2.3626],
 [0.0000, 0.0000, 0.0000, 1.0000]]


In [105]:
camera_path_path = Path.cwd() / "block_nerf" / "camera_path_one_lap.json"
camera_path = load_json(camera_path_path)
camera_path_c2w = np.array(camera_path["camera_path"][0]["camera_to_world"]).reshape(4,4,)
pretty_print(camera_path_c2w)

[[0.0387, 0.0485, 0.9981, 0.4484],
 [0.9993, -0.0019, -0.0387, 0.9853],
 [-0.0000, 0.9988, -0.0485, -0.0018],
 [0.0000, 0.0000, 0.0000, 1.0000]]


In [12]:
a = dp_t @ camera_path_c2w
a[:3, 3] *= dp_scale
a = np.vstack((a, [0, 0, 0, 1]))

pretty_print(a)

[[-0.0002, 0.9988, -0.0485, 0.2364],
 [0.9992, -0.0017, -0.0388, -0.3360],
 [-0.0389, -0.0485, -0.9981, -0.0437],
 [0.0000, 0.0000, 0.0000, 1.0000]]


In [13]:
# Get the distance between the two matrices trasform_matrix[:3, 3] and camera_to_world[:3, 3]
np.linalg.norm(transforms_matrix[:3, 3] - a[:3, 3])
# transforms_matrix[:3, 3]

106.81123024696991

In [14]:
b = [0.03871940596928383,0.9992501550311972,-2.220446121113367e-16,0,0.04846804299786681,-0.0018780620887794875,0.9988229486229716,0,0.9980739592677991,-0.03867383019325542,-0.04850441561635006,0,0.4483894695235361,0.985300560475246,-0.0018124304538501418,1]
b = np.array(b).reshape(4,4,)
pretty_print(b)

[[0.0387, 0.9993, -0.0000, 0.0000],
 [0.0485, -0.0019, 0.9988, 0.0000],
 [0.9981, -0.0387, -0.0485, 0.0000],
 [0.4484, 0.9853, -0.0018, 1.0000]]


In [15]:
from nerfstudio.utils.eval_utils import eval_setup

In [99]:
# Config-path
load_config = Path("data/images/exp_combined_baseline_2/exp_combined_baseline_2/nerfacto/2023-04-10_140345/config.yml")

eval_num_rays_per_chunk = 1 << 15 # Same as 2^15

_, pipeline, _ = eval_setup(
    load_config,
    eval_num_rays_per_chunk=eval_num_rays_per_chunk,
    test_mode="inference",
)

In [100]:
len(camera_path["camera_path"])

240

In [101]:
outputs = pipeline.datamanager.train_dataparser_outputs
outputs.dataparser_transform

tensor([[-6.0797e-06, -1.6169e-04,  1.0000e+00,  1.8667e+01],
        [-1.6169e-04,  1.0000e+00,  1.6169e-04, -2.7510e+01],
        [-1.0000e+00, -1.6169e-04, -6.0797e-06, -2.9981e+00]])

In [102]:
outputs.cameras.camera_to_worlds

tensor([[[-1.7381e-01,  6.0797e-06,  9.8478e-01,  2.0630e-01],
         [ 9.8478e-01,  1.6165e-04,  1.7381e-01,  9.9909e-01],
         [-1.5813e-04,  1.0000e+00, -3.4057e-05,  5.4568e-03]],

        [[ 1.7349e-01,  6.0797e-06,  9.8484e-01,  2.0630e-01],
         [ 9.8484e-01,  1.6165e-04, -1.7349e-01,  9.9909e-01],
         [-1.6024e-04,  1.0000e+00,  2.2082e-05,  5.4568e-03]],

        [[-1.7381e-01,  6.0797e-06,  9.8478e-01,  2.0630e-01],
         [ 9.8478e-01,  1.6165e-04,  1.7381e-01,  9.9909e-01],
         [-1.5813e-04,  1.0000e+00, -3.4057e-05,  4.2312e-04]],

        ...,

        [[-1.4479e-01,  5.6213e-05,  9.8946e-01,  2.2548e-01],
         [ 9.8946e-01,  1.4931e-04,  1.4479e-01,  9.9805e-01],
         [-1.3960e-04,  1.0000e+00, -7.7214e-05, -1.7221e-04]],

        [[ 2.0235e-01,  5.9315e-05,  9.7931e-01,  2.2548e-01],
         [ 9.7931e-01,  1.4945e-04, -2.0235e-01,  9.9805e-01],
         [-1.5836e-04,  1.0000e+00, -2.7819e-05, -1.7221e-04]],

        [[ 2.0107e-01,  3.7726e

In [103]:
a = pipeline.datamanager.train_dataparser_outputs.transform_poses_to_original_space(outputs.cameras.camera_to_worlds).numpy()
b = a[200]
original = np.vstack((b, [0, 0, 0, 1]))
pretty_print(original)

[[0.0000, 1.0000, 0.0001, -3.0017],
 [-0.1361, 0.0001, -0.9907, 61.9833],
 [-0.9907, 0.0000, 0.1361, -64.2367],
 [0.0000, 0.0000, 0.0000, 1.0000]]


In [104]:
np.linalg.norm(transforms_matrix[:3, 3] - original[:3, 3])

76.15940801618345

# Test with exp_combined_baseline_block_nerf_2

In [30]:
exp_path = Path.cwd() / "data" / "images" / "exp_combined_baseline_block_nerf_2"
transforms_path = exp_path / "0" / "transforms.json"
transforms = load_json(transforms_path)


In [34]:
# Load the experiment's pipeline
load_config = exp_path / "0/exp_combined_baseline_block_nerf_2-0/nerfacto/2023-04-11_130124/config.yml"

eval_num_rays_per_chunk = 1 << 15 # Same as 2^15

_, pipeline, _ = eval_setup(
    load_config,
    eval_num_rays_per_chunk=eval_num_rays_per_chunk,
    test_mode="inference",
)



In [39]:
import torch

In [107]:
# pipeline.datamanager.train_dataparser_outputs.transform_poses_to_original_space(outputs.cameras.camera_to_worlds).numpy()

# Transform the camera_path to the original space
camera_path_c2w = np.array(camera_path["camera_path"][0]["camera_to_world"]).reshape(4,4,)
# camera_path_c2w = torch.tensor(camera_path_c2w)[:, :-1, :]
# camera_path_c2w = camera_path_c2w.float()

camera_path_c2w
# a = pipeline.datamanager.train_dataparser_outputs.transform_poses_to_original_space(camera_path_c2w)

array([[ 3.87194047e-02,  4.84680439e-02,  9.98073973e-01,
         4.48389470e-01],
       [ 9.99250123e-01, -1.87806213e-03, -3.86738307e-02,
         9.85300560e-01],
       [-2.01661604e-16,  9.98822968e-01, -4.85044163e-02,
        -1.81243045e-03],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         1.00000000e+00]])

In [119]:
t = pipeline.datamanager.train_dataparser_outputs.dataparser_transform
s = pipeline.datamanager.train_dataparser_outputs.dataparser_scale
# camera_path_c2w
a = (t @ camera_path_c2w) * s
a = a.numpy()
pretty_print(a)

[[-0.0000, 0.0127, -0.0006, 0.2364],
 [0.0127, -0.0000, -0.0005, -0.3360],
 [-0.0005, -0.0006, -0.0126, -0.0437]]


In [120]:
# pretty_print(outputs.cameras.camera_to_worlds[0].numpy())

In [123]:
original_transforms = np.array(transforms["frames"][0]["transform_matrix"])
# pretty_print(original_transforms)

transformed_transforms = ((t @ original_transforms) * s).numpy()
pretty_print(transformed_transforms)

[[-0.0022, 0.0000, 0.0125, 0.2063],
 [0.0125, 0.0000, 0.0022, 0.9991],
 [-0.0000, 0.0127, -0.0000, 0.0055]]
