In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import sys
import os
import json
import torch
import yaml
import numpy as np
import cv2
import copy
from pathlib import Path
import matplotlib.pyplot as plt
from PIL import Image
from segment_anything import SamPredictor, sam_model_registry
from nerfstudio.utils.eval_utils import eval_setup
from plane_nerf.inerf_utils import get_intrinsic, get_extrinsic, get_footprint, get_footprint_mask



Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


In [3]:
os.chdir('/workspace/plane-nerf')
MODEL_PATH = "/workspace/plane-nerf/outputs/jackal_floor_training_data_1/plane-nerf/2024-03-11_145657"
DATA_PATH = "/workspace/plane-nerf/data/jackal_floor_evaluation_data"
if not os.path.exists(os.path.join(DATA_PATH,"footprint_masks")):
    os.makedirs(os.path.join(DATA_PATH,"footprint_masks"))
with open(f"{DATA_PATH}/transforms.json", "r") as f:
    transforms = json.load(f)

In [4]:
config_path = os.path.join(MODEL_PATH, "config.yml")
config, pipeline, checkpoint_path, _ = eval_setup(
                        Path(config_path),
                        test_mode="inference",
                    )

pipeline.eval()
pipeline.datamanager.setup_train()
for camera, batch in pipeline.datamanager.fixed_indices_train_dataloader:
    break




In [5]:
from segment_anything import SamPredictor, sam_model_registry
from plane_nerf.inerf_utils import get_intrinsic, get_extrinsic, get_footprint, get_image_with_footprint

sam = sam_model_registry["default"](checkpoint="/stored_data/sam_vit_h_4b8939.pth")
predictor = SamPredictor(sam)

footprint_camera =  copy.deepcopy(camera)
R_t = np.zeros((1,3,4))
R_t[0,0,0] = 1
R_t[0,1,1] = 1
R_t[0,2,2] = 1
R_t[0,2,0] = 0
footprint_camera.camera_to_worlds = torch.from_numpy(R_t).float().to(pipeline.device)

footprint_output = pipeline.model.get_outputs_for_camera(camera=footprint_camera)    
footprint_output = footprint_output["rgb"].reshape(camera.height, camera.width, 3).cpu().numpy()
footprint_output = (footprint_output * 255).astype(np.uint8)
H,W = footprint_output.shape[:2]
predictor.set_image(footprint_output)
input_point = np.array([[W//2, H//2]])
input_label = np.array([1])
masks, _, _ = predictor.predict(
    point_coords=input_point,
    point_labels=input_label,
    multimask_output=True,
)


In [6]:
intrinsic = get_intrinsic(pipeline,0)
footprint_extrinsic = get_extrinsic(footprint_camera,0,pipeline)
mask = masks[-1]
mask = mask.astype(np.uint8)
footprint = get_footprint(intrinsic,footprint_extrinsic,mask)

In [7]:
from plane_nerf.inerf_utils import transform_original_space_to_pose

new_frames = []

for i in range(len(transforms["frames"])):

    dup_f = copy.deepcopy(transforms["frames"][i])

    pose = transforms["frames"][i]["transform_matrix"]
    pose = pose[:3][:4]
    pose = np.array([pose])
    pose = torch.from_numpy(pose).float().to("cpu")

    pose = transform_original_space_to_pose(pose, 
                                            pipeline.datamanager.train_dataparser_outputs.dataparser_transform,
                                            pipeline.datamanager.train_dataparser_outputs.dataparser_scale,
                                            "opengl")
    
    print(pose)

    footprint_camera.camera_to_worlds = pose.unsqueeze(0)
    footprint_extrinsic = get_extrinsic(footprint_camera,0,pipeline)
    #Invert for blender convention
    blender_matrix = np.eye(4)
    blender_matrix[1,1] = -1
    blender_matrix[2,2] = -1
    blender_matrix = torch.from_numpy(blender_matrix).float().to(pipeline.device)
    footprint_extrinsic = blender_matrix @ torch.linalg.inv(footprint_extrinsic.float())

    footprint_pixels_bound = get_footprint_mask(pipeline, footprint_camera, intrinsic, footprint_extrinsic, footprint)
    footprint_pixels_bound = np.array(footprint_pixels_bound)

    footprint_mask = np.zeros((H,W),dtype=np.uint8)
    cv2.fillPoly(footprint_mask, [footprint_pixels_bound], 1)
    
    #Save footprint_mask
    footprint_mask_path = f"{DATA_PATH}/footprint_masks/{i}.png"
    cv2.imwrite(footprint_mask_path, footprint_mask*255)

    dup_f["mask_path"] = footprint_mask_path
    new_frames.append(dup_f)


tensor([[[ 7.7162e-01,  4.4966e-01, -4.4989e-01, -4.7102e-01],
         [-6.3608e-01,  5.4622e-01, -5.4502e-01, -4.7795e-01],
         [ 6.6004e-04,  7.0672e-01,  7.0749e-01,  6.1792e-05]]])
tensor([[[-6.5298e-01,  5.3546e-01, -5.3562e-01, -5.5888e-01],
         [-7.5737e-01, -4.6153e-01,  4.6193e-01,  4.4780e-01],
         [ 1.3643e-04,  7.0729e-01,  7.0692e-01, -4.7109e-04]]])
tensor([[[-6.7364e-01, -5.2310e-01,  5.2210e-01,  5.7623e-01],
         [ 7.3906e-01, -4.7614e-01,  4.7652e-01,  5.7796e-01],
         [-6.7479e-04,  7.0686e-01,  7.0735e-01, -7.1866e-05]]])
tensor([[[-8.6796e-02,  7.0443e-01, -7.0445e-01, -9.5243e-01],
         [-9.9623e-01, -6.1019e-02,  6.1729e-02,  4.9244e-02],
         [ 4.9851e-04,  7.0715e-01,  7.0706e-01, -4.1993e-04]]])
tensor([[[ 9.9116e-01,  9.3548e-02, -9.4058e-02, -1.9218e-01],
         [-1.3266e-01,  7.0152e-01, -7.0020e-01, -9.6013e-01],
         [ 4.8072e-04,  7.0649e-01,  7.0773e-01,  4.3564e-04]]])
tensor([[[-6.5518e-01,  5.3412e-01, -5.3428e-

In [8]:
transforms["frames"] = new_frames
with open(f"{DATA_PATH}/transforms_with_footprint.json", "w") as f:
    json.dump(transforms, f, indent=4)