In [None]:
!nvidia-smi

Clone threestudio repo

In [None]:
!git clone https://github.com/threestudio-project/threestudio.git

In [None]:
%cd threestudio

Install dependencies

In [None]:
!pip install ninja
!pip install lightning==2.0.0 omegaconf==2.3.0 jaxtyping typeguard diffusers transformers accelerate opencv-python tensorboard matplotlib imageio imageio[ffmpeg] trimesh bitsandbytes sentencepiece safetensors huggingface_hub libigl xatlas networkx pysdf PyMCubes wandb torchmetrics controlnet_aux
!pip install einops kornia taming-transformers-rom1504 git+https://github.com/openai/CLIP.git # zero123
!pip install open3d plotly # mesh visualization

And build some dependencies manually. This may take a while.

In [None]:
!pip install git+https://github.com/ashawkey/envlight.git
!pip install git+https://github.com/KAIR-BAIR/nerfacc.git@v0.5.2
!pip install git+https://github.com/NVlabs/nvdiffrast.git
!pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch

Login to HuggingFace

In [None]:
from huggingface_hub import interpreter_login

interpreter_login()

Now create your own 3D model from text prompts

Here we use the DreamFusion model with DeepFloyd-IF guidance. You may try other models by using different running commands given [here](https://github.com/threestudio-project/threestudio#supported-models).

In [None]:
prompt = "a zoomed out DSLR photo of a baby bunny sitting on top of a stack of pancakes"

In [None]:
!python launch.py --config configs/dreamfusion-if.yaml --train --gpu 0 system.prompt_processor.prompt="$prompt" trainer.max_steps=10000 system.prompt_processor.spawn=false

Display the rendered video

In [None]:
from IPython.display import HTML
from base64 import b64encode
def display_video(video_path):
  mp4 = open(video_path,'rb').read()
  data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
  return HTML("""
  <video width=1000 controls>
    <source src="%s" type="video/mp4">
  </video>
  """ % data_url)

In [None]:
# you will see the path to the saving directory at the end of the training logs
# replace save_dir below with that path
save_dir = 'path/to/save/dir'

import os
import glob
video_path = glob.glob(os.path.join(save_dir, "*-test.mp4"))[0]
display_video(video_path)

Extract the object mesh.

Here we use an empirical threshold value. You can also first try `system.geometry.isosurface_threshold=auto` and visualize it. Then you can manually adjust the threshold according to the automatically determined value shown in the logs. Increase it if there are too many floaters and decrease it if the geometry is incomplete. 


The extraction process takes around 2 mins on T4.

In [None]:
!python launch.py --config $save_dir/../configs/parsed.yaml --export --gpu 0 resume=$save_dir/../ckpts/last.ckpt system.exporter_type=mesh-exporter system.exporter.context_type=cuda system.geometry.isosurface_threshold=15.0 

Visualize the mesh. Or you can directly download the export assets and use them locally.

In [None]:
import numpy as np
import open3d as o3d
import plotly.graph_objects as go
import glob

mesh_path = glob.glob(os.path.join(save_dir, "*-export/model.obj"))[0]
mesh = o3d.io.read_triangle_mesh(mesh_path)
if not mesh.has_vertex_normals():
  mesh.compute_vertex_normals()
if not mesh.has_triangle_normals():
  mesh.compute_triangle_normals()

triangles = np.asarray(mesh.triangles)
vertices = np.asarray(mesh.vertices)
colors = None
if mesh.has_triangle_normals():
  colors = (0.5, 0.5, 0.5) + np.asarray(mesh.triangle_normals) * 0.5
  colors = tuple(map(tuple, colors))
else:
  colors = (1.0, 0.0, 0.0)
fig = go.Figure(
  data=[
    go.Mesh3d(
      x=vertices[:,0],
      y=vertices[:,1],
      z=vertices[:,2],
      i=triangles[:,0],
      j=triangles[:,1],
      k=triangles[:,2],
      facecolor=colors,
      opacity=0.50)
  ],
  layout=dict(
    scene=dict(
      xaxis=dict(visible=False),
      yaxis=dict(visible=False),
      zaxis=dict(visible=False)
    )
  )
)
fig.show()
