In [20]:
from pathlib import Path
import yaml
import json

import numpy as np
import torch
from nerfstudio.model_components.ray_generators import RayGenerator

torch.cuda.init()
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image

from nerfstudio.configs.method_configs import all_methods
from nerfstudio.engine.trainer import TrainerConfig
from nerfstudio.pipelines.base_pipeline import Pipeline
from nerfstudio.cameras.cameras import Cameras, CameraType

In [4]:
def setup_trainable_pipeline(config_path: Path) -> Pipeline:
    config = yaml.load(config_path.read_text(), Loader=yaml.Loader)
    assert isinstance(config, TrainerConfig)
    
    config.pipeline.datamanager._target = all_methods[config.method_name].pipeline.datamanager._target
    config.load_dir = config.get_checkpoint_dir()
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    pipeline = config.pipeline.setup(device=device, test_mode="inference")
    assert isinstance(pipeline, Pipeline)
    
    return pipeline

def image_to_tensor(image_path, device) -> torch.Tensor:
    # Open the image using PIL
    image = Image.open(image_path).convert("RGB")

    # Define the transform to convert the image to a PyTorch tensor
    transform = transforms.ToTensor()  # This will convert to a tensor with shape (C, H, W)

    # Apply the transform
    tensor = transform(image)  # Shape will be (3, 512, 512)

    # Permute the tensor to get shape (512, 512, 3)
    tensor = tensor.permute(1, 2, 0).to(device)

    return tensor.detach().requires_grad_(False)

In [6]:
config_path = Path(
    "/n/home10/dpodolskyi/neural-registration/outputs/0_065_cat5_2/instant-ngp/2024-08-20_140044/config.yml")

checkpoint_path = "/n/home10/dpodolskyi/neural-registration/outputs/0_065_cat5_2/instant-ngp/2024-08-20_140044/nerfstudio_models/step-000018000.ckpt"

train_pipeline = setup_trainable_pipeline(config_path)
nerf_model = train_pipeline.model
nerf_device = nerf_model.device

In [None]:
train_pipeline.datamanager

In [16]:
torch.cuda.empty_cache()

In [7]:
# custom nerfstudio transformation
dataparser_transforms_path = Path(
    "/n/home10/dpodolskyi/neural-registration/outputs/0_065_cat5_2/instant-ngp/2024-08-20_140044/dataparser_transforms.json")

with open(dataparser_transforms_path, "r") as f:
    dataparser_transform = json.load(f)

dataparser_matrix = torch.tensor(dataparser_transform["transform"], dtype=torch.float32)
dataparser_matrix = np.vstack((dataparser_matrix, np.array([[0, 0, 0, 1]])))
dataparser_scale = dataparser_transform["scale"]


In [8]:
data_transform_matrix = np.array([
    [-0.16551750084030392, 0.17173068552312126, -0.9711398089695112, -80.73672706466026],
    [-0.9756847655052445, 0.11494663876681419, 0.18661861803471735, 15.51473463480075],
    [0.14367740002017088, 0.9784149640546033, 0.14852933325600515, 12.348141976499424],
    [0, 0, 0, 1],
])

camera_params = {
    "camera_angle_x": 0.5235987755982988,
    "camera_angle_y": 0.5235987755982988,
    "fl_x": 955.4050067376327,
    "fl_y": 955.4050067376327,
    "k1": 0,
    "k2": 0,
    "k3": 0,
    "k4": 0,
    "p1": 0,
    "p2": 0,
    "is_fisheye": False,
    "cx": 256.0,
    "cy": 256.0,
    "w": 512,
    "h": 512,
    "aabb_scale": 32,
}

final_matrix = np.dot(dataparser_matrix, data_transform_matrix)
final_matrix[:3, 3] = final_matrix[:3, 3] * dataparser_scale
final_matrix = nn.Parameter(torch.tensor(final_matrix[:3, :4], dtype=torch.float32).unsqueeze(0), requires_grad=True).to(nerf_device)
final_matrix, final_matrix.shape

# should look like this:
# actual_transform_matrix = np.array([
#     [-0.2205035537481308, -0.21815498173236847, -0.9506769180297852, -0.11759857088327408],
#     [-0.9667345285415649, 0.17840947210788727, 0.1832878142595291, 0.23202396929264069],
#     [0.129624605178833, 0.9594677686691284, -0.2502378523349762, -0.26915040612220764],
# ])

(tensor([[[-0.2205, -0.2182, -0.9507, -0.1176],
          [-0.9667,  0.1784,  0.1833,  0.2320],
          [ 0.1296,  0.9595, -0.2502, -0.2692]]], device='cuda:0',
        grad_fn=<ToCopyBackward0>),
 torch.Size([1, 3, 4]))

In [35]:
camera = Cameras(
    camera_to_worlds=final_matrix, # 1x3x4 tensor
    fx=camera_params["fl_x"],
    fy=camera_params["fl_y"],
    cx=camera_params["cx"],
    cy=camera_params["cy"],
    camera_type=CameraType.PERSPECTIVE,
    height=camera_params["h"],
    width=camera_params["w"],
).to(nerf_device)

In [36]:
rays_gen = RayGenerator(camera).to(nerf_device)

In [37]:
# indices = torch.tensor(0).to(nerf_device)
ray_indices = torch.tensor([[0, 0, 0]], device=nerf_device)
# ray_bundle = camera.generate_rays(0).to(nerf_device)
ray_bundle = rays_gen(ray_indices)

In [38]:
ray_bundle.origins.shape, ray_bundle.directions.shape, ray_bundle.shape, ray_bundle.origins.device, ray_bundle.directions.device

(torch.Size([1, 3]),
 torch.Size([1, 3]),
 torch.Size([1]),
 device(type='cuda', index=0),
 device(type='cuda', index=0))

In [39]:
nerf_model.get_outputs_for_camera(camera)

RuntimeError: cannot reshape tensor of 0 elements into shape [0, -1] because the unspecified dimension size -1 can be any value and is ambiguous

In [18]:
print(ray_bundle.origins.numel(), ray_bundle.directions.numel())

786432 786432


In [21]:
with torch.no_grad():
    outputs = nerf_model.get_outputs_for_camera(camera)
    generated_rgb = outputs["rgb"].cpu().numpy()

RuntimeError: cannot reshape tensor of 0 elements into shape [0, -1] because the unspecified dimension size -1 can be any value and is ambiguous

In [22]:
outputs = nerf_model.get_outputs_for_camera(camera)
generated_rgb = outputs["rgb"]

RuntimeError: CUDA error: invalid configuration argument
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [8]:
optimizer = torch.optim.Adam([final_matrix], lr=0.001)

In [9]:
target_image = image_to_tensor("/n/home10/dpodolskyi/neural-registration/data/datasets/0_065_cat5_2.0/images/4.png", device=nerf_model.device)

target_image.shape, target_image.requires_grad

(torch.Size([512, 512, 3]), False)

In [10]:
mse_loss = nn.MSELoss()

In [11]:
render_rays = camera.generate_rays(camera_indices=0, keep_shape=True)

In [12]:
rendered_image = nerf_model.get_outputs_for_camera_ray_bundle(render_rays)

RuntimeError: cannot reshape tensor of 0 elements into shape [0, -1] because the unspecified dimension size -1 can be any value and is ambiguous