In [1]:
import os
import sys
sys.path.append('../')
os.environ['CUDA_VISIBLE_DEVICES'] = '7'

import numpy as np
import torch
from scipy.sparse import lil_matrix
from scipy.optimize import least_squares

from vggt.models.vggt import VGGT
from vggt.utils.load_fn import load_and_preprocess_images

device = "cuda" if torch.cuda.is_available() else "cpu"
# bfloat16 is supported on Ampere GPUs (Compute Capability 8.0+) 
dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] >= 8 else torch.float16

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def save_ply(pcd_xyz, pcd_rgb):
    from plyfile import PlyData, PlyElement
    # ensure pcd_xyz and pcd_rgb have the correct shape
    assert pcd_xyz.shape[0] == pcd_rgb.shape[0], "The number of points in the point cloud should be the same"
    assert pcd_xyz.shape[1] == 3, "The point cloud coordinate should be of shape (N, 3)"
    assert pcd_rgb.shape[1] == 3, "The point cloud color should be of shape (N, 3)"

    # transform color to uint8 (assuming color values are in the range of 0-255 or 0-1)
    if pcd_rgb.dtype != np.uint8:
        if np.max(pcd_rgb) <= 1.0:
            pcd_rgb = (pcd_rgb * 255).astype(np.uint8)
        else:
            pcd_rgb = pcd_rgb.astype(np.uint8)

    # create a structured array
    vertex_dtype = [('x', 'f4'), ('y', 'f4'), ('z', 'f4'),
                    ('red', 'u1'), ('green', 'u1'), ('blue', 'u1')]
    vertices = np.empty(pcd_xyz.shape[0], dtype=vertex_dtype)

    # fill in the structured array
    vertices['x'] = pcd_xyz[:, 0].astype('f4')
    vertices['y'] = pcd_xyz[:, 1].astype('f4')
    vertices['z'] = pcd_xyz[:, 2].astype('f4')
    vertices['red'] = pcd_rgb[:, 0]
    vertices['green'] = pcd_rgb[:, 1]
    vertices['blue'] = pcd_rgb[:, 2]

    # create and save the ply file
    ply_element = PlyElement.describe(vertices, 'vertex')
    PlyData([ply_element]).write('colored_point_cloud.ply')

In [3]:
# Initialize the model and load the pretrained weights.
# This will automatically download the model weights the first time it's run, which may take a while.
model = VGGT.from_pretrained("facebook/VGGT-1B").to(device)

In [None]:
# Load and preprocess example images (replace with your own image paths)
images_path = "../data/GauU_Scene/SMBU/images_3.4175"
image_names = os.listdir(images_path)
image_names = [os.path.join(images_path, name) for name in image_names if name.endswith(('.jpg', '.png', '.JPG', '.PNG'))]
images = load_and_preprocess_images(image_names[:150]).to(device)

with torch.no_grad():
    with torch.cuda.amp.autocast(dtype=dtype):
        # Predict attributes including cameras, depth maps, and point maps.
        predictions = model(images)

In [23]:
conf_threshold = 0.5
downsample_interval = 10
pcd_xyz = predictions['world_points'].reshape(-1, 3)
pcd_rgb = predictions['images'].permute(0, 1, 3, 4, 2).reshape(-1, 3)
pcd_conf = predictions['world_points_conf'].reshape(-1)
pcd_xyz = pcd_xyz[pcd_conf > conf_threshold].cpu().numpy()
pcd_rgb = pcd_rgb[pcd_conf > conf_threshold].cpu().numpy()
print(f"Number of points in the point cloud: {pcd_xyz.shape[0]}")
save_ply(pcd_xyz[::downsample_interval], pcd_rgb[::downsample_interval])

Number of points in the point cloud: 27195000
