In [None]:
# Copyright (c) Meta Platforms, Inc. and affiliates.

## 1. Imports and Model Loading

In [1]:
import os
import imageio
import uuid
from IPython.display import Image as ImageDisplay
from inference import Inference, ready_gaussian_for_video_rendering, render_video, load_image, load_mask, load_single_mask, display_image, make_scene, interactive_visualizer

  import pynvml  # type: ignore[import]


Warp 1.10.0 initialized:
   CUDA Toolkit 12.8, Driver 12.4
   Devices:
     "cpu"      : "x86_64"
     "cuda:0"   : "NVIDIA GeForce RTX 4090" (24 GiB, sm_89, mempool enabled)
     "cuda:1"   : "NVIDIA GeForce RTX 4090" (24 GiB, sm_89, mempool enabled)
   CUDA peer access:
     Not supported
   Kernel cache:
     /home/leelab/.cache/warp/1.10.0


[32m2025-11-25 05:59:23.250[0m | [1mINFO    [0m | [36msam3d_objects.pipeline.inference_pipeline[0m:[36mset_attention_backend[0m:[36m15[0m - [1mGPU name is NVIDIA GeForce RTX 4090[0m


Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


[32m2025-11-25 05:59:24.406[0m | [1mINFO    [0m | [36msam3d_objects.model.backbone.tdfy_dit.modules.sparse[0m:[36m__from_env[0m:[36m39[0m - [1m[SPARSE] Backend: spconv, Attention: sdpa[0m
[32m2025-11-25 05:59:26.688[0m | [1mINFO    [0m | [36msam3d_objects.model.backbone.tdfy_dit.modules.attention[0m:[36m__from_env[0m:[36m30[0m - [1m[ATTENTION] Using backend: sdpa[0m


[SPARSE][CONV] spconv algo: auto




In [2]:
PATH = os.getcwd()
TAG = "hf"
config_path = f"{PATH}/../checkpoints/{TAG}/pipeline.yaml"
inference = Inference(config_path, compile=False)

[32m2025-11-25 05:59:31.088[0m | [1mINFO    [0m | [36msam3d_objects.pipeline.inference_pipeline[0m:[36m__init__[0m:[36m98[0m - [1mself.device: cuda[0m
[32m2025-11-25 05:59:31.089[0m | [1mINFO    [0m | [36msam3d_objects.pipeline.inference_pipeline[0m:[36m__init__[0m:[36m99[0m - [1mCUDA_VISIBLE_DEVICES: None[0m
[32m2025-11-25 05:59:31.089[0m | [1mINFO    [0m | [36msam3d_objects.pipeline.inference_pipeline[0m:[36m__init__[0m:[36m100[0m - [1mActually using GPU: 0[0m
[32m2025-11-25 05:59:31.089[0m | [1mINFO    [0m | [36msam3d_objects.pipeline.inference_pipeline[0m:[36minit_pose_decoder[0m:[36m295[0m - [1mUsing pose decoder: ScaleShiftInvariant[0m
[32m2025-11-25 05:59:31.089[0m | [1mINFO    [0m | [36msam3d_objects.pipeline.inference_pipeline[0m:[36m__init__[0m:[36m131[0m - [1mLoading model weights...[0m
[32m2025-11-25 05:59:31.201[0m | [1mINFO    [0m | [36msam3d_objects.model.io[0m:[36mload_model_from_checkpoint[0m:[36m158

## 2. Load input image to lift to 3D (single object)

In [None]:
IMAGE_PATH = f"{PATH}/images/shutterstock_stylish_kidsroom_1640806567/image.png"
IMAGE_NAME = os.path.basename(os.path.dirname(IMAGE_PATH))

image = load_image(IMAGE_PATH)
mask = load_single_mask(os.path.dirname(IMAGE_PATH), index=14)
display_image(image, masks=[mask])

## 3. Generate Gaussian Splat (single view)

In [None]:
# run model
output = inference(image, mask, seed=42)

# export gaussian splat (as point cloud)
output["gs"].save_ply(f"{PATH}/gaussians/single/{IMAGE_NAME}.ply")

## 4. (Optional) Multi-view conditioning from an auxiliary directory
Provide extra camera views in a directory and pass it as the third argument to `inference(image, mask, aux_views_dir)`.
The snippet below fabricates a tiny auxiliary set so you can see the call pattern; replace `AUX_VIEWS_DIR` with your own folder of RGBA/RGB images.


In [3]:
from pathlib import Path
from PIL import Image
import numpy as np

IMG_DIR = '/home/leelab/sam3dnate/testImgs'
image = load_image(os.path.join(IMG_DIR, 'G1_run1_mouse-frame_synced_frame2230.jpg'))
mask = load_mask(os.path.join(IMG_DIR, 'G1_run1_mouse-frame_synced_frame2230.png'))

AUX_VIEWS_DIR = Path(f"{IMG_DIR}/aux")
#AUX_VIEWS_DIR.mkdir(parents=True, exist_ok=True)

MULTI_VIEW_NAME = AUX_VIEWS_DIR.name
output_multiview = inference(image, mask, aux_views=str(AUX_VIEWS_DIR), seed=7)
os.makedirs(f"{PATH}/gaussians/multiview", exist_ok=True)
output_multiview["gs"].save_ply(f"{PATH}/gaussians/multiview/{MULTI_VIEW_NAME}.ply")


TypeError: InferencePipelinePointMap.run() got an unexpected keyword argument 'aux_views'

In [None]:
# Choose which reconstruction to visualize (defaults to single-view output)
visualization_output = output_multiview if 'output_multiview' in globals() else output
visualization_name = MULTI_VIEW_NAME if 'output_multiview' in globals() else IMAGE_NAME
visualization_dir = 'multiview' if 'output_multiview' in globals() else 'single'
os.makedirs(f"{PATH}/gaussians/{visualization_dir}", exist_ok=True)


## 5. Visualize Gaussian Splat

In [None]:
# render gaussian splat
scene_gs = make_scene(visualization_output)
scene_gs = ready_gaussian_for_video_rendering(scene_gs)

video = render_video(
    scene_gs,
    r=1,
    fov=60,
    pitch_deg=15,
    yaw_start_deg=-45,
    resolution=512,
)["color"]

# save video as gif
imageio.mimsave(
    os.path.join(f"{PATH}/gaussians/{visualization_dir}/{visualization_name}.gif"),
    video,
    format="GIF",
    duration=1000 / 30,  # default assuming 30fps from the input MP4
    loop=0,  # 0 means loop indefinitely
)

# notebook display
ImageDisplay(url=f"gaussians/{visualization_dir}/{visualization_name}.gif?cache_invalidator={uuid.uuid4()}")


### b. Interactive Visualizer

In [None]:
# might take a while to load (black screen)
interactive_visualizer(f"{PATH}/gaussians/{visualization_dir}/{visualization_name}.ply")
