In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '7'
from nerfstudio.utils.eval_utils import eval_setup
from pathlib import Path
import matplotlib.pyplot as plt
from PIL import Image
import torch
import numpy as np
from f3rm.minimal.homography import Homography
os.environ["TORCHDYNAMO_DISABLE"] = "1"

In [None]:
# config_path = Path("outputs/ahgroom_colmap/f3rm/2025-04-14_190026/config.yml")
config_path = Path("outputs/stata_office/f3rm/2025-04-03_050224/config.yml")
eval_idx = 0
cfg, pipeline, _, _ = eval_setup(config_path=config_path)
model = pipeline.model
cameras = pipeline.datamanager.eval_dataset.cameras
camera_ray_bundle = cameras.generate_rays(camera_indices=eval_idx).to(model.device)

In [None]:
outputs = model.get_outputs_for_camera_ray_bundle(camera_ray_bundle)
outputs["rgb"].shape, outputs["depth"].shape, outputs["feature"].shape, outputs["feature_pca"].shape

In [None]:
plt.imshow((outputs["rgb"].cpu().numpy() * 255).astype(np.uint8))
plt.title("RGB")
plt.axis("off")
plt.show()

In [None]:
depth = outputs["depth"].squeeze().cpu()
depth_norm = (depth - depth.min()) / (depth.max() - depth.min())

plt.imshow(depth_norm.numpy(), cmap="inferno")
plt.title("Depth")
plt.axis("off")
plt.show()

In [None]:
pca_img = outputs["feature_pca"].cpu().numpy()
pca_img = (pca_img - pca_img.min()) / (pca_img.max() - pca_img.min())

plt.imshow(pca_img)
plt.title("Feature PCA")
plt.axis("off")
plt.show()

In [None]:
pil_img_gt = Image.open(str(pipeline.datamanager.eval_dataset.image_filenames[eval_idx])).convert("RGB")

plt.imshow(np.array(pil_img_gt))
plt.title("GT Image")
plt.axis("off")
plt.show()

In [None]:
camera_ray_bundle.origins[0][0]   # all origins same, camera center in world coords

In [None]:
for k, v in camera_ray_bundle.__dict__.items():
    print(k, v.shape if hasattr(v, 'shape') else v)

In [None]:
num_rays_per_chunk = model.config.eval_num_rays_per_chunk
ray_bundle = camera_ray_bundle.get_row_major_sliced_ray_bundle(0, num_rays_per_chunk)
if model.collider is not None:
    ray_bundle = model.collider(ray_bundle)

In [None]:
for k, v in ray_bundle.__dict__.items():
    print(k, v.shape if hasattr(v, 'shape') else v)

In [None]:
ray_samples, weights_list, ray_samples_list = model.proposal_sampler(ray_bundle, density_fns=model.density_fns)

In [None]:
for k, v in ray_samples.frustums.__dict__.items():
    print(k, v.shape if hasattr(v, 'shape') else v)

In [None]:
# 16384 different pixel postions of the original H x W image, each corresponds to a single origin and direction, along with proposal sampler samples 48 (x, y, z) points along the ray at different depths
ray_samples.frustums.get_positions().shape

In [None]:
((ray_samples.frustums.starts + ray_samples.frustums.ends) / 2).squeeze(-1)[-1] # center of the frustum

In [None]:
ray_samples.frustums.starts.squeeze(-1)[0]

In [None]:
ray_samples.frustums.ends.squeeze(-1)[0]

In [None]:
field_outputs = model.field.forward(ray_samples, compute_normals=model.config.predict_normals)

In [None]:
field_outputs.keys()

In [None]:
from nerfstudio.field_components.field_heads import FieldHeadNames
field_outputs[FieldHeadNames.RGB].shape, field_outputs[FieldHeadNames.DENSITY].shape

In [None]:
weights = ray_samples.get_weights(field_outputs[FieldHeadNames.DENSITY])
weights.shape

In [None]:
rgb = model.renderer_rgb(rgb=field_outputs[FieldHeadNames.RGB], weights=weights)
rgb.shape

In [None]:
depth = model.renderer_depth(weights=weights, ray_samples=ray_samples)
depth.shape

In [None]:
from f3rm.feature_field import FeatureField, FeatureFieldHeadNames
ff_outputs = model.feature_field(ray_samples)
ff_outputs[FeatureFieldHeadNames.FEATURE].shape

In [None]:
features = model.renderer_feature(features=ff_outputs[FeatureFieldHeadNames.FEATURE], weights=weights)
features.shape

In [None]:
# num parameters in field and feature_field
num_params_field = sum(p.numel() for p in model.field.parameters())
num_params_feature_field = sum(p.numel() for p in model.feature_field.parameters())
num_params_field, num_params_feature_field

# Custom Pose

In [None]:
from nerfstudio.cameras.cameras import Cameras

# Use intrinsics from eval_dataset (assuming you want similar FOV)
eval_cams = pipeline.datamanager.eval_dataset.cameras
fx, fy = eval_cams.fx[0], eval_cams.fy[0]
cx, cy = eval_cams.cx[0], eval_cams.cy[0]
height, width = eval_cams.height[0], eval_cams.width[0]
dist_params = eval_cams.distortion_params[0]
cam_type = eval_cams.camera_type[0]

In [None]:
# c2w of a known camera pose
c2w_known = eval_cams.camera_to_worlds[0].cpu().numpy()
# add 4th row for homogeneous coordinates
c2w_known = np.vstack((c2w_known, [0, 0, 0, 1]))
# translate camera 0.5m forward (ie, -0.5 in z in nerfstudio convention) wrt to known cam frame
c2w_delta = Homography.get_std_trans(cz=-0.5)  # this is the known to new transform
c2w_new = c2w_known @ np.linalg.inv(c2w_delta)
c2w_new = c2w_new[:3, :4]  # remove the last row
c2w_new = torch.from_numpy(c2w_new).float().to(model.device)[None, ...]

In [None]:
# c2w = torch.eye(4)[None, ...].to(model.device)
# c2w[..., 2, 3] = 0.5
# c2w = c2w[:, :3, :4]  # Remove last row

In [None]:
c2w_viewer = [-0.8025081587509822,0.5966412403996988,8.326673161838576e-17,0,-0.4149196340756713,-0.5580847736047082,0.7185981069914683,0,0.42874523659091934,0.5766808042959521,0.6954257016872606,0,0.1381675073877709,0.4818906926693194,0.31446781793079853,1]
c2w_viewer = np.array(c2w_viewer).reshape(4, 4).T[:3, :4]  # remove last row
c2w_viewer = torch.from_numpy(c2w_viewer).float().to(model.device)[None, ...]
c2w_viewer.shape

In [None]:
fx, fy, cx, cy, height, width, dist_params, cam_type

In [None]:
width = torch.tensor([336])
height = torch.tensor([336])
fx = fy = torch.tensor([450.0])                # Wide FOV
cx = width / 2                                 # 480
cy = height / 2                                # 270
dist_params = torch.zeros(6)
camera_type = torch.tensor([1])                # Pinhole

In [None]:
custom_camera = Cameras(
    fx=fx[None],
    fy=fy[None],
    cx=cx[None],
    cy=cy[None],
    height=height[None],
    width=width[None],
    camera_to_worlds=c2w_viewer,
    distortion_params=dist_params[None],
    camera_type=cam_type[None],
    times=None,
    metadata=None,
).to(model.device)

ray_bundle = custom_camera.generate_rays(camera_indices=0).to(model.device)
outputs = model.get_outputs_for_camera_ray_bundle(ray_bundle)

In [None]:
plt.imshow((outputs["rgb"].cpu().numpy() * 255).astype(np.uint8))
plt.title("RGB")
plt.axis("off")
plt.show()

In [None]:
from einops import rearrange, reduce
patches = rearrange(outputs['feature'], '(h p1) (w p2) d -> h w p1 p2 d', p1=14, p2=14)
patchified = reduce(patches, 'h w p1 p2 d -> h w d', 'mean')

In [None]:
patchified = patchified.reshape((576, 768))

In [None]:
patchified = patchified.cpu().numpy()