### Import 

In [34]:
from diffusers import StableDiffusionXLAdapterPipeline, T2IAdapter, EulerAncestralDiscreteScheduler, AutoencoderKL
from diffusers.utils import load_image, make_image_grid
from controlnet_aux.lineart import LineartDetector
from controlnet_aux.pidi import PidiNetDetector
from controlnet_aux.canny import CannyDetector
from controlnet_aux import OpenposeDetector
from controlnet_aux.midas import MidasDetector
from controlnet_aux import ZoeDetector

import torch
import numpy as np
from PIL import Image
#from IPython.display import Image
# Setting device
#device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("mps")


## Sketch

In [2]:
# load adapter
adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-sketch-sdxl-1.0", 
                                     torch_dtype=torch.float16, 
                                     varient="fp16").to(device)

# load euler_a scheduler
model_id = 'stabilityai/stable-diffusion-xl-base-1.0'
euler_a = EulerAncestralDiscreteScheduler.from_pretrained(model_id, 
                                                          subfolder="scheduler")

vae=AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", 
                                  torch_dtype=torch.float16)

pipe = StableDiffusionXLAdapterPipeline.from_pretrained(model_id, 
                                                        vae=vae, 
                                                        adapter=adapter, 
                                                        scheduler=euler_a, 
                                                        torch_dtype=torch.float16, 
                                                        variant="fp16", 
).to(device)

# due to using Mac, I don't enable xformers
#pipe.enable_xformers_memory_efficient_attention()

pidinet = PidiNetDetector.from_pretrained("lllyasviel/Annotators").to(device)

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

### Conditional Image

In [33]:
url = "https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_sketch.png"
image = load_image(url)
image = pidinet(image, detect_resolution=1024, 
                image_resolution=1024, 
                apply_filter=True
)

### Generate images

In [4]:
prompt = "a robot, mount fuji in the background, 4k photo, highly detailed"
negative_prompt = "extra digit, fewer digits, cropped, worst quality, low quality, glitch, deformed, mutated, ugly, disfigured"

gen_images = pipe(
    prompt=prompt,
    negative_prompt=negative_prompt,
    image=image,
    num_inference_steps=30,
    adapter_conditioning_scale=0.9,
    guidance_scale=7.5, 
).images[0]
gen_images.save('out_sketch.png')

  0%|          | 0/30 [00:00<?, ?it/s]

## Lineart

In [5]:
# load adapter
adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-lineart-sdxl-1.0", 
                                     torch_dtype=torch.float16, 
                                     varient="fp16").to(device)

# load euler_a scheduler
model_id = 'stabilityai/stable-diffusion-xl-base-1.0'

euler_a = EulerAncestralDiscreteScheduler.from_pretrained(model_id, 
                                                          subfolder="scheduler")

vae=AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", 
                                  torch_dtype=torch.float16)

pipe = StableDiffusionXLAdapterPipeline.from_pretrained(model_id, 
                                                        vae=vae, 
                                                        adapter=adapter, 
                                                        scheduler=euler_a, 
                                                        torch_dtype=torch.float16, 
                                                        variant="fp16",).to(device)

# due to using Mac, I don't enable xformers
#pipe.enable_xformers_memory_efficient_attention()

line_detector = LineartDetector.from_pretrained("lllyasviel/Annotators").to(device)

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

### Conditional Image

In [6]:
url = "https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_lin.jpg"
image = load_image(url)
image = line_detector(image, 
                      detect_resolution=384, 
                      image_resolution=1024)

### Generate images

In [7]:
prompt = "Ice dragon roar, 4k photo"
negative_prompt = "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"
gen_images = pipe(
    prompt=prompt,
    negative_prompt=negative_prompt,
    image=image,
    num_inference_steps=30,
    adapter_conditioning_scale=0.8,
    guidance_scale=7.5, 
).images[0]
gen_images.save('out_lineart.png')

  0%|          | 0/30 [00:00<?, ?it/s]

## Canny

In [27]:
# load adapter
adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-canny-sdxl-1.0", 
                                     torch_dtype=torch.float16, 
                                     varient="fp16").to(device)

# load euler_a scheduler
model_id = 'stabilityai/stable-diffusion-xl-base-1.0'

euler_a = EulerAncestralDiscreteScheduler.from_pretrained(model_id, 
                                                          subfolder="scheduler")
vae=AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", 
                                  torch_dtype=torch.float16)
pipe = StableDiffusionXLAdapterPipeline.from_pretrained(model_id, 
                                                        vae=vae, 
                                                        adapter=adapter, 
                                                        scheduler=euler_a, 
                                                        torch_dtype=torch.float16, 
                                                        variant="fp16", ).to(device)

# due to using Mac, I don't enable xformers
#pipe.enable_xformers_memory_efficient_attention()

canny_detector = CannyDetector()

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

### Conditional Image

In [28]:
url = "https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_canny.jpg"
image = load_image(url)

# Detect the canny map in low resolution to avoid high-frequency details
image = canny_detector(image, 
                       detect_resolution=384, 
                       image_resolution=1024)#.resize((1024, 1024))


### Generate images

In [29]:
prompt = "Mystical fairy in real, magic, 4k picture, high quality"
negative_prompt = "extra digit, fewer digits, cropped, worst quality, low quality, glitch, deformed, mutated, ugly, disfigured"

gen_images = pipe(
  prompt=prompt,
  negative_prompt=negative_prompt,
  image=image,
  num_inference_steps=30,
  guidance_scale=7.5, 
  adapter_conditioning_scale=0.8, 
  #adapter_conditioning_factor=1
).images[0]
gen_images.save('out_canny.png')


  0%|          | 0/30 [00:00<?, ?it/s]

## OpenPose

In [11]:
# load adapter
adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-openpose-sdxl-1.0", 
                                     torch_dtype=torch.float16).to(device)

# load euler_a scheduler
model_id = 'stabilityai/stable-diffusion-xl-base-1.0'

euler_a = EulerAncestralDiscreteScheduler.from_pretrained(model_id, 
                                                          subfolder="scheduler")

vae=AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", 
                                  torch_dtype=torch.float16)

pipe = StableDiffusionXLAdapterPipeline.from_pretrained(model_id, 
                                                        vae=vae, 
                                                        adapter=adapter, 
                                                        scheduler=euler_a, 
                                                        torch_dtype=torch.float16, 
                                                        variant="fp16", ).to(device)
# due to using Mac, I don't enable xformers
#pipe.enable_xformers_memory_efficient_attention()

open_pose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

### Conditional Image

In [12]:
from PIL import Image
url = "https://huggingface.co/Adapter/t2iadapter/resolve/main/people.jpg"
image = load_image(url)

image = open_pose(image, 
                  detect_resolution=512, 
                  image_resolution=1024)

image = np.array(image)[:, :, ::-1]   
        
image = Image.fromarray(np.uint8(image)) 

### Generate images

In [13]:
prompt = "A couple, 4k photo, highly detailed"
negative_prompt = "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"

gen_images = pipe(
  prompt=prompt,
  negative_prompt=negative_prompt,
  image=image,
  num_inference_steps=30,
  adapter_conditioning_scale=1,
  guidance_scale=7.5,  
).images[0]
gen_images.save('out_openpose.png')

  0%|          | 0/30 [00:00<?, ?it/s]

## Depth-MiDaS

In [14]:
# load adapter
adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-depth-midas-sdxl-1.0", 
                                     torch_dtype=torch.float16,
                                     varient="fp16").to(device)

# load euler_a scheduler
model_id = 'stabilityai/stable-diffusion-xl-base-1.0'

euler_a = EulerAncestralDiscreteScheduler.from_pretrained(model_id, 
                                                          subfolder="scheduler")

vae=AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", 
                                  torch_dtype=torch.float16)

pipe = StableDiffusionXLAdapterPipeline.from_pretrained(model_id, 
                                                        vae=vae, 
                                                        adapter=adapter, 
                                                        scheduler=euler_a, 
                                                        torch_dtype=torch.float16, 
                                                        variant="fp16", ).to(device)
# due to using Mac, I don't enable xformers
#pipe.enable_xformers_memory_efficient_attention()

midas_depth = MidasDetector.from_pretrained("valhalla/t2iadapter-aux-models", 
                                            filename="dpt_large_384.pt", 
                                            model_type="dpt_large").to(device)

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Downloading dpt_large_384.pt:   0%|          | 0.00/1.38G [00:00<?, ?B/s]

### Conditional Image

In [15]:
url = "https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_mid.jpg"
image = load_image(url)
image = midas_depth(image, 
                    detect_resolution=512, 
                    image_resolution=1024)

### Generate images

In [16]:
prompt = "A photo of a room, 4k photo, highly detailed"
negative_prompt = "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"

gen_images = pipe(
  prompt=prompt,
  negative_prompt=negative_prompt,
  image=image,
  num_inference_steps=30,
  adapter_conditioning_scale=1,
  guidance_scale=7.5,  
).images[0]
gen_images.save('out_midas.png')

  0%|          | 0/30 [00:00<?, ?it/s]

## Depth-Zoe

In [43]:
# load adapter
adapter = T2IAdapter.from_pretrained("TencentARC/t2i-adapter-depth-zoe-sdxl-1.0", 
                                     torch_dtype=torch.float16, 
                                     varient="fp16").to(device)

# load euler_a scheduler
model_id = 'stabilityai/stable-diffusion-xl-base-1.0'

euler_a = EulerAncestralDiscreteScheduler.from_pretrained(model_id, 
                                                          subfolder="scheduler")
vae=AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", 
                                  torch_dtype=torch.float16)

pipe = StableDiffusionXLAdapterPipeline.from_pretrained(model_id, 
                                                        vae=vae, 
                                                        adapter=adapter, 
                                                        scheduler=euler_a, 
                                                        torch_dtype=torch.float16, 
                                                        variant="fp16", ).to(device)
# due to using Mac, I don't enable xformers
#pipe.enable_xformers_memory_efficient_attention()

zoe_depth = ZoeDetector.from_pretrained("valhalla/t2iadapter-aux-models", 
                                        #"lllyasviel/Annotators",
                                        #filename="ZoeD_M12_N.pt", 
                                        #filename="zoed_nk.pth",
                                        filename="dpt_large_384.pt",
                                        model_type="zoedepth_nk").to(device)

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]



Using pretrained resource url::https://github.com/isl-org/ZoeDepth/releases/download/v1.0/ZoeD_M12_NK.pt


### Conditional Image

In [44]:
url = "https://huggingface.co/Adapter/t2iadapter/resolve/main/figs_SDXLV1.0/org_zeo.jpg"
image = load_image(url)
image = zoe_depth(image, 
                  gamma_corrected=True,
                  detect_resolution=512, 
                  image_resolution=1024)

### Generate images

In [38]:
prompt = "A photo of a orchid, 4k photo, highly detailed"
negative_prompt = "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"

gen_images = pipe(
  prompt=prompt,
  negative_prompt=negative_prompt,
  image=image,
  num_inference_steps=30,
  adapter_conditioning_scale=1,
  guidance_scale=7.5,  
).images[0]
gen_images.save('out_zoe.png')

  0%|          | 0/30 [00:00<?, ?it/s]