In [2]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"Number of CUDA devices: {torch.cuda.device_count()}")
if torch.cuda.is_available():
    print(f"Current CUDA device: {torch.cuda.current_device()}")
    print(f"Device name: {torch.cuda.get_device_name()}")

CUDA available: True
CUDA version: 12.8
Number of CUDA devices: 1
Current CUDA device: 0
Device name: NVIDIA GeForce RTX 3090


In [1]:
import torch
import os
# figure out where we’re running
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# pick your autocast dtype
if device.type == "cuda":
    try:
        major, minor = torch.cuda.get_device_capability(device)
        # Ampere+ (8.x) supports bfloat16, otherwise go with float16
        if major >= 8:
            autocast_dtype = torch.bfloat16
        else:
            autocast_dtype = torch.float16
    except RuntimeError as e:
        # Protect against “CUDA unknown error … setting the available devices to be zero”
        print(f"[warning] couldn’t query CUDA capability: {e!r}")
        print(" falling back to float16 autocast")
        autocast_dtype = torch.float16
else:
    # no GPU: you can still use autocast on CPU in PyTorch 2.x,
    # or just run in float32
    # autocast_dtype = torch.bfloat16   # if you want to try cpu bf16 on 2.x
    autocast_dtype = torch.float32

print(f"device = {device}, autocast dtype = {autocast_dtype}")

# load your model and data exactly as before
from vggt.models.vggt import VGGT
from vggt.utils.load_fn import load_and_preprocess_images

model = VGGT.from_pretrained("facebook/VGGT-1B").to(device)
image_paths = sorted(
    f"examples/kitchen/images/{f}"
    for f in os.listdir("examples/kitchen_me/images")
    if f.endswith(".png")
)
images = load_and_preprocess_images(image_paths).to(device)

# inference
with torch.no_grad():
    # note the explicit device_type arg
    with torch.amp.autocast(device.type, dtype=autocast_dtype):
        preds = model(images)

device = cuda, autocast dtype = torch.bfloat16


  with torch.cuda.amp.autocast(enabled=False):


In [2]:
import os
import cv2
import argparse
from glob import glob
import numpy as np

def convert_images_to_video(input_folder, output_file, fps=30, sort_method='numeric'):
    """
    Convert a folder of JPG images to an MP4 video.
    
    Args:
        input_folder: Path to folder containing JPG images
        output_file: Path to save the output MP4 file
        fps: Frames per second for the output video
        sort_method: Method to sort images ('numeric' or 'alphabetic')
    """
    # Get all jpg files in the folder
    image_files = glob(os.path.join(input_folder, "*.jpg"))
    
    if not image_files:
        print(f"No JPG images found in {input_folder}")
        return
    
    # Sort the files
    if sort_method == 'numeric':
        # Try to sort numerically by extracting numbers from filenames
        try:
            image_files.sort(key=lambda x: int(''.join(filter(str.isdigit, os.path.basename(x)))))
        except:
            # Fall back to alphabetical if numeric sorting fails
            image_files.sort()
    else:
        # Sort alphabetically
        image_files.sort()
    
    # Read the first image to get dimensions
    first_img = cv2.imread(image_files[0])
    if first_img is None:
        print(f"Failed to read image: {image_files[0]}")
        return
    
    h, w, layers = first_img.shape
    size = (w, h)
    
    # Create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # MP4 codec
    out = cv2.VideoWriter(output_file, fourcc, fps, size)
    
    # Add each image to the video
    for i, image_file in enumerate(image_files):
        img = cv2.imread(image_file)
        if img is None:
            print(f"Skipping unreadable image: {image_file}")
            continue
            
        # Ensure image has the right dimensions
        if img.shape[0] != h or img.shape[1] != w:
            img = cv2.resize(img, size)
            
        out.write(img)
        
        # Print progress
        if (i + 1) % 100 == 0 or i == 0 or i == len(image_files) - 1:
            print(f"Processing image {i+1}/{len(image_files)}")
    
    # Release the VideoWriter
    out.release()
    print(f"Video saved to {output_file}")


input_folder = "examples/horse/images"
output_file = "examples/videos/horse.mp4"
convert_images_to_video(input_folder, output_file, fps=2, sort_method='numeric')

Processing image 1/21
Processing image 21/21
Video saved to examples/videos/horse.mp4
