<a href="https://colab.research.google.com/github/ritwikraha/computer-needs-glasses/blob/master/image-3D/Stability_TSR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# TripoSR: Fast FeedForward 3D Reconstruction

## Setup and Imports

```
- Fetch the repository
- Install requirements.txt
- Import packages
```



In [1]:
!git clone https://github.com/ritwikraha/TripoSR.git

Cloning into 'TripoSR'...
remote: Enumerating objects: 101, done.[K
remote: Counting objects: 100% (71/71), done.[K
remote: Compressing objects: 100% (57/57), done.[K
remote: Total 101 (delta 25), reused 41 (delta 14), pack-reused 30[K
Receiving objects: 100% (101/101), 36.69 MiB | 14.01 MiB/s, done.
Resolving deltas: 100% (25/25), done.


In [2]:
import sys
sys.path.append('/content/TripoSR/tsr')

In [6]:
%cd TripoSR

/content/TripoSR


In [12]:
!pip install -r requirements.txt -q

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.5/79.5 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m23.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m59.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m688.5/688.5 kB[0m [31m45.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.0/117.0 kB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m77.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [13]:
# Import necessary libraries
import os
import time
from PIL import Image
import numpy as np
import torch
from IPython.display import Video
from tsr.system import TSR
from tsr.utils import remove_background, resize_foreground, save_video
import rembg

## Create the Timer Class

- Utility class for keeping track of GPU usage

In [14]:
# Define Timer class
class Timer:
    def __init__(self):
        self.items = {}
        self.time_scale = 1000.0  # ms
        self.time_unit = "ms"

    def start(self, name: str) -> None:
        if torch.cuda.is_available():
            torch.cuda.synchronize()
        self.items[name] = time.time()

    def end(self, name: str) -> float:
        if name not in self.items:
            return
        if torch.cuda.is_available():
            torch.cuda.synchronize()
        start_time = self.items.pop(name)
        delta = time.time() - start_time
        t = delta * self.time_scale
        print(f"{name} finished in {t:.2f}{self.time_unit}.")

timer = Timer()

## Get the Inputs

In [15]:
# Colab form fields for user input
image_paths = "/content/TripoSR/examples/horse.png" #@param {type:"string"}
device = "cuda:0" #@param ["cuda:0", "cpu"] {allow-input: true}
pretrained_model_name_or_path = "stabilityai/TripoSR" #@param {type:"string"}
chunk_size = 8192 #@param {type:"integer"}
no_remove_bg = True #@param {type:"boolean"}
foreground_ratio = 0.85 #@param {type:"slider", min:0, max:1, step:0.05}
output_dir = "output/" #@param {type:"string"}
model_save_format = "obj" #@param ["obj", "glb"]
render = True #@param {type:"boolean"}

## Preprocessing

In [16]:
# Prepare inputs
image_paths = image_paths.split(",")  # Split string input into list
output_dir = output_dir.strip()
os.makedirs(output_dir, exist_ok=True)

# Adjust device based on CUDA availability
if not torch.cuda.is_available():
    device = "cpu"

# Initialize model
timer.start("Initializing model")
model = TSR.from_pretrained(
    pretrained_model_name_or_path,
    config_name="config.yaml",
    weight_name="model.ckpt",
)
model.renderer.set_chunk_size(chunk_size)
model.to(device)
timer.end("Initializing model")

# Process images
timer.start("Processing images")
images = []

if no_remove_bg:
    rembg_session = None
else:
    rembg_session = rembg.new_session()

for i, image_path in enumerate(image_paths):
    image_path = image_path.strip()
    if no_remove_bg:
        image = np.array(Image.open(image_path).convert("RGB"))
    else:
        image = remove_background(Image.open(image_path), rembg_session)
        image = resize_foreground(image, foreground_ratio)
        image = np.array(image).astype(np.float32) / 255.0
        image = image[:, :, :3] * image[:, :, 3:4] + (1 - image[:, :, 3:4]) * 0.5
        image = Image.fromarray((image * 255.0).astype(np.uint8))
        image_dir = os.path.join(output_dir, str(i))
        os.makedirs(image_dir, exist_ok=True)
        image.save(os.path.join(image_dir, "input.png"))
    images.append(image)
timer.end("Processing images")

Downloading config.yaml:   0%|          | 0.00/987 [00:00<?, ?B/s]

Downloading model.ckpt:   0%|          | 0.00/1.68G [00:00<?, ?B/s]

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

Downloading config.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

Initializing model finished in 13589.94ms.
Processing images finished in 9.78ms.


## Rendering

In [17]:
# Process each image
for i, image in enumerate(images):
    print(f"Running image {i + 1}/{len(images)} ...")

    # Run model
    timer.start("Running model")
    with torch.no_grad():
        scene_codes = model([image], device=device)
    timer.end("Running model")

    # Rendering
    if render:
        timer.start("Rendering")
        render_images = model.render(scene_codes, n_views=30, return_type="pil")
        for ri, render_image in enumerate(render_images[0]):
            render_image.save(os.path.join(output_dir, str(i), f"render_{ri:03d}.png"))
        save_video(
            render_images[0], os.path.join(output_dir, str(i), "render.mp4"), fps=30
        )
        timer.end("Rendering")

    # Export mesh
    timer.start("Exporting mesh")
    meshes = model.extract_mesh(scene_codes)
    mesh_file = os.path.join(output_dir, str(i), f"mesh.{model_save_format}")
    meshes[0].export(mesh_file)
    timer.end("Exporting mesh")

print("Processing complete.")

Running image 1/1 ...
Running model finished in 1647.18ms.


FileNotFoundError: [Errno 2] No such file or directory: 'output/0/render_000.png'

In [None]:
# Display the video
Video('output/render.mp4')
