In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
import sys
import os
import json
import torch
import yaml
import numpy as np
import cv2
from pathlib import Path
import matplotlib.pyplot as plt
from copy import deepcopy 
from PIL import Image
import torchvision.transforms as transforms
from scipy.spatial.transform import Rotation 
from nerfstudio.utils.eval_utils import eval_setup
from plane_nerf.plane_nerf_utils import transform_original_space_to_pose
from inerf.inerf_trainer import INerfTrainer
from inerf.inerf_utils import correct_pose, get_corrected_pose

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [4]:
os.chdir('/workspace')
#Path params
MODEL_PATH = "/workspace/outputs/jackal_training_data/plane-nerf/2024-01-12_101255"
DATA_PATH = "/stored_data/jackal_training_data"
INPUT_FRAME = 0
TARGET_FRAME = 6

TRANSFORM_PATH = os.path.join(DATA_PATH, "transforms.json")
with open(TRANSFORM_PATH) as f:
    TRANSFORM_JSON = json.load(f)

WIDTH = TRANSFORM_JSON["w"]
HEIGHT = TRANSFORM_JSON["h"]

print(TRANSFORM_JSON)
INPUT_PATH = {
    "tf" : TRANSFORM_JSON["frames"][INPUT_FRAME]["transform_matrix"],
    "images" : os.path.join(DATA_PATH, "images", f"{INPUT_FRAME}.png"),
    "masks" : os.path.join(DATA_PATH, "masks", f"{INPUT_FRAME}.png")
}
TARGET_PATH = {
    "tf" : TRANSFORM_JSON["frames"][TARGET_FRAME]["transform_matrix"],
    "images" : os.path.join(DATA_PATH, "images", f"{TARGET_FRAME}.png"),
    "masks" : os.path.join(DATA_PATH, "masks", f"{TARGET_FRAME}.png")
}

{'focal_length': 4.74, 'fov_x': 1.1519, 'w': 640, 'h': 420, 'pixel_width': 0.03326787022081392, 'pixel_height': 0.03326787022081392, 'fl_x': 142.47981516515705, 'fl_y': 142.47981516515705, 'cx': 320, 'cy': 210, 'camera_model': 'OPENCV', 'frames': [{'file_path': 'images/0.png', 'transform_matrix': [[0.9999999999999999, 0.0, 0.0, 0.0], [0.0, 0.7073882691671997, -0.706825181105366, -2.5], [0.0, 0.706825181105366, 0.7073882691671997, 2.0], [0.0, 0.0, 0.0, 1.0]], 'mask_path': 'masks/0.png'}, {'file_path': 'images/1.png', 'transform_matrix': [[0.9902680687415701, -0.09844941900277263, 0.09837105229109364, 0.3479327524001636], [0.1391731009600654, 0.7005040151586449, -0.6999464070311214, -2.475670171853926], [0.0, 0.7068251811053659, 0.7073882691671995, 1.9999999999999998], [0.0, 0.0, 0.0, 1.0]], 'mask_path': 'masks/1.png'}, {'file_path': 'images/2.png', 'transform_matrix': [[0.9612616959383189, -0.1949826320492106, 0.19482742394475464, 0.6890933895424979], [0.2756373558169991, 0.679985247306

In [5]:
config_path = os.path.join(MODEL_PATH, "config.yml")
config, pipeline, checkpoint_path, _ = eval_setup(
                        Path(config_path),
                        test_mode="interface",
                    )

Output()



In [6]:
print(INPUT_PATH["tf"])
ERROR = np.asarray(INPUT_PATH["tf"])
ERROR[0,3] += 0.05
ERROR[1,3] -= 0.05
print(ERROR)

[[0.9999999999999999, 0.0, 0.0, 0.0], [0.0, 0.7073882691671997, -0.706825181105366, -2.5], [0.0, 0.706825181105366, 0.7073882691671997, 2.0], [0.0, 0.0, 0.0, 1.0]]
[[ 1.          0.          0.          0.05      ]
 [ 0.          0.70738827 -0.70682518 -2.55      ]
 [ 0.          0.70682518  0.70738827  2.        ]
 [ 0.          0.          0.          1.        ]]


In [7]:
#Spoof dataparser with a new image and pose
custom_train_dataparser_outputs = deepcopy(pipeline.datamanager.train_dataparser_outputs)
custom_train_dataparser_outputs.image_filenames = [Path(INPUT_PATH["images"]).as_posix()]
custom_train_dataparser_outputs.mask_filenames = [Path(INPUT_PATH["masks"]).as_posix()]

#Load new pose through Camera object
custom_cameras = pipeline.datamanager.train_dataparser_outputs.cameras[0]
custom_camera_to_worlds = torch.tensor([TARGET_PATH["tf"]]).float()
#Convert from SE3 to SO3xR3 by removing last row in tensor
custom_camera_to_worlds = custom_camera_to_worlds[:,:3, :]
custom_cameras.camera_to_worlds = transform_original_space_to_pose(custom_camera_to_worlds,
                                                                   custom_train_dataparser_outputs.dataparser_transform,
                                                                   custom_train_dataparser_outputs.dataparser_scale,
                                                                   "opengl")
custom_train_dataparser_outputs.cameras = custom_cameras
print(custom_cameras)


Cameras(camera_to_worlds=tensor([[[ 6.6913e-01, -5.2569e-01,  5.2527e-01,  7.4314e-01],
         [ 7.4314e-01,  4.7334e-01, -4.7296e-01, -6.6913e-01],
         [ 4.7892e-09,  7.0683e-01,  7.0739e-01,  0.0000e+00]]]), fx=tensor([142.4798]), fy=tensor([142.4798]), cx=tensor([320.]), cy=tensor([210.]), width=tensor([640]), height=tensor([420]), distortion_params=tensor([0., 0., 0., 0., 0., 0.]), camera_type=tensor([1]), times=None, metadata=None)


In [8]:
pipeline.datamanager.train_dataparser_outputs = custom_train_dataparser_outputs
pipeline.datamanager.train_dataset = pipeline.datamanager.create_train_dataset()
pipeline.datamanager.setup_train()

Output()

In [9]:
custom_camera_optimizer = deepcopy(pipeline.model.camera_optimizer)
custom_camera_optimizer.num_cameras = 1

In [10]:
trainer = INerfTrainer(config)
trainer.pipeline = pipeline
trainer.pipeline.model.camera_optimizer = custom_camera_optimizer
trainer.setup("interface")

Loading latest Nerfstudio checkpoint from load_dir...


In [11]:
original_pose = np.asarray(INPUT_PATH["tf"])
R = Rotation.from_matrix(original_pose[:3, :3])
rpy = R.as_euler('xyz', degrees=True)
t = original_pose[:3, 3]
print("Translation: ", t)
print("Rotation: ", rpy)

Translation:  [ 0.  -2.5  2. ]
Rotation:  [44.97718692  0.          0.        ]


In [12]:
corrected_pose = get_corrected_pose(trainer)
R = corrected_pose.cpu().detach()[0,:3,:3]
t = corrected_pose.cpu().detach()[0,:3,3]
rpy = Rotation.from_matrix(R).as_euler('xyz', degrees=True)
print("Translation: ", t)
print("Rotation: ", rpy)

Translation:  tensor([ 1.8578, -1.6719,  1.9356])
Rotation:  [46.45167067 -0.34438827 48.00848676]


In [13]:
n = 1000
for i in range(10):
    for j in range(n):
        trainer.train_iteration_inerf(i*n + j,0.001)
    corrected_pose = get_corrected_pose(trainer)
    R = corrected_pose.cpu().detach()[0,:3,:3]
    t = corrected_pose.cpu().detach()[0,:3,3]
    rpy = Rotation.from_matrix(R).as_euler('xyz', degrees=True)
    print("Translation: ", t)
    print("Rotation: ", rpy)

ValueError: Sample larger than population or is negative