<a href="https://colab.research.google.com/github/softmurata/colab_notebooks/blob/main/threed/zero123plus.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Installation

In [None]:
!pip install -q transformers accelerate diffusers==0.20.2 bitsandbytes
!pip install -q rembg

In [None]:
!pip install -q controlnet_aux

Get Demo data

In [None]:
!wget https://img.freepik.com/free-photo/furniture-modern-studio-lifestyle-green_1122-1837.jpg -O /content/chair.jpg

In [None]:
import torch
import requests
from PIL import Image
from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler

# Load the pipeline
pipeline = DiffusionPipeline.from_pretrained(
    "sudo-ai/zero123plus-v1.1", custom_pipeline="sudo-ai/zero123plus-pipeline",
    torch_dtype=torch.float16
)

# Feel free to tune the scheduler!
# `timestep_spacing` parameter is not supported in older versions of `diffusers`
# so there may be performance degradations
# We recommend using `diffusers==0.20.2`
pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
    pipeline.scheduler.config, timestep_spacing='trailing'
)
pipeline.to('cuda:0')

In [3]:
# Download an example image.
# cond = Image.open(requests.get("https://d.skis.ltd/nrp/sample-data/lysol.png", stream=True).raw)
cond = Image.open("/content/chair.jpg")

# Run the pipeline!
result = pipeline(cond, num_inference_steps=75).images[0]
# for general real and synthetic images of general objects
# usually it is enough to have around 28 inference steps
# for images with delicate details like faces (real or anime)
# you may need 75-100 steps for the details to construct

result.show()
result.save("output.png")

  0%|          | 0/75 [00:00<?, ?it/s]

In [5]:
import rembg
result = rembg.remove(result)
result.show()

Downloading data from 'https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2net.onnx' to file '/root/.u2net/u2net.onnx'.
100%|████████████████████████████████████████| 176M/176M [00:00<00:00, 123GB/s]


In [6]:
result.save("output_bg.png")

Depth ControlNet Mode

In [None]:
import torch
import requests
from PIL import Image
from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler, ControlNetModel

# Load the pipeline
pipeline = DiffusionPipeline.from_pretrained(
    "sudo-ai/zero123plus-v1.1", custom_pipeline="sudo-ai/zero123plus-pipeline",
    torch_dtype=torch.float16
)
pipeline.add_controlnet(ControlNetModel.from_pretrained(
    "sudo-ai/controlnet-zp11-depth-v1", torch_dtype=torch.float16
), conditioning_scale=0.75)
# Feel free to tune the scheduler
pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
    pipeline.scheduler.config, timestep_spacing='trailing'
)
pipeline.to('cuda:0')

In [4]:
# https://github.com/patrickvonplaten/controlnet_aux
from controlnet_aux import HEDdetector, MidasDetector, MLSDdetector, OpenposeDetector, PidiNetDetector, NormalBaeDetector, LineartDetector, LineartAnimeDetector, CannyDetector, ContentShuffleDetector, ZoeDetector, MediapipeFaceDetector, SamDetector, LeresDetector, DWposeDetector
midas = MidasDetector.from_pretrained("lllyasviel/Annotators")



Downloading (…)id-midas-501f0c75.pt:   0%|          | 0.00/493M [00:00<?, ?B/s]

  model = create_fn(


In [15]:
# Run the pipeline
# cond = Image.open(requests.get("https://d.skis.ltd/nrp/sample-data/0_cond.png", stream=True).raw)
# depth = Image.open(requests.get("https://d.skis.ltd/nrp/sample-data/0_depth.png", stream=True).raw)

img = Image.open("/content/chair.jpg")
depth = midas(img).resize((640, 960))
result = pipeline(img, depth_image=depth, num_inference_steps=36).images[0]
result.show()
result.save("output.png")

(640, 960)


  0%|          | 0/36 [00:00<?, ?it/s]