# **SDXL image generation**

In [10]:
# @title Install packages

!pip install -q invisible_watermark transformers accelerate safetensors diffusers --upgrade

In [4]:
# @title Import packages

import torch
from diffusers import StableDiffusionXLImg2ImgPipeline, DiffusionPipeline, KDPM2AncestralDiscreteScheduler, StableDiffusionXLPipeline, AutoencoderKL
import gc
from PIL import Image
import requests
from io import BytesIO
from IPython.display import display

In [5]:
# @title Load SDXL model

model_base = "stabilityai/stable-diffusion-xl-base-1.0"
v_autoencoder = "madebyollin/sdxl-vae-fp16-fix" # fix vae for run in fp16 precision without generating NaNs

vae = AutoencoderKL.from_pretrained(v_autoencoder, torch_dtype=torch.float16)

pipe = StableDiffusionXLPipeline.from_pretrained(
    model_base,
    vae=vae,
    torch_dtype=torch.float16,
    use_safetensors=True,
    variant="fp16",
    add_watermarker=False, # no watermarker
    )

pipe.to("cuda")

Downloading (…)lve/main/config.json:   0%|          | 0.00/631 [00:00<?, ?B/s]

Downloading (…)ch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

Downloading (…)ain/model_index.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

Fetching 17 files:   0%|          | 0/17 [00:00<?, ?it/s]

Downloading (…)cheduler_config.json:   0%|          | 0.00/479 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/737 [00:00<?, ?B/s]

Downloading (…)ncoder_2/config.json:   0%|          | 0.00/575 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

Downloading (…)tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

Downloading (…)_encoder/config.json:   0%|          | 0.00/565 [00:00<?, ?B/s]

Downloading model.fp16.safetensors:   0%|          | 0.00/1.39G [00:00<?, ?B/s]

Downloading model.fp16.safetensors:   0%|          | 0.00/246M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]

Downloading (…)kenizer_2/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

Downloading (…)76f/unet/config.json:   0%|          | 0.00/1.68k [00:00<?, ?B/s]

Downloading (…)del.fp16.safetensors:   0%|          | 0.00/5.14G [00:00<?, ?B/s]

Downloading (…)tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

Downloading (…)del.fp16.safetensors:   0%|          | 0.00/167M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

StableDiffusionXLPipeline {
  "_class_name": "StableDiffusionXLPipeline",
  "_diffusers_version": "0.20.2",
  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
  "force_zeros_for_empty_prompt": true,
  "scheduler": [
    "diffusers",
    "EulerDiscreteScheduler"
  ],
  "text_encoder": [
    "transformers",
    "CLIPTextModel"
  ],
  "text_encoder_2": [
    "transformers",
    "CLIPTextModelWithProjection"
  ],
  "tokenizer": [
    "transformers",
    "CLIPTokenizer"
  ],
  "tokenizer_2": [
    "transformers",
    "CLIPTokenizer"
  ],
  "unet": [
    "diffusers",
    "UNet2DConditionModel"
  ],
  "vae": [
    "diffusers",
    "AutoencoderKL"
  ]
}

In [6]:
# @title Load SDXL refiner model

model_refiner = "stabilityai/stable-diffusion-xl-refiner-1.0"

pipe_refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(
    model_refiner,
    torch_dtype=torch.float16,
    use_safetensors=True,
    variant="fp16",
    add_watermarker=False, # no watermarker
    )

#pipe_refiner.to("cuda")
pipe_refiner.enable_model_cpu_offload()

Downloading (…)ain/model_index.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]

Downloading (…)ncoder_2/config.json:   0%|          | 0.00/575 [00:00<?, ?B/s]

Downloading (…)cheduler_config.json:   0%|          | 0.00/479 [00:00<?, ?B/s]

Downloading (…)del.fp16.safetensors:   0%|          | 0.00/4.52G [00:00<?, ?B/s]

Downloading (…)ef86/vae/config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

Downloading (…)del.fp16.safetensors:   0%|          | 0.00/167M [00:00<?, ?B/s]

Downloading (…)kenizer_2/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

Downloading (…)kenizer_2/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

Downloading (…)f86/unet/config.json:   0%|          | 0.00/1.71k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

Downloading model.fp16.safetensors:   0%|          | 0.00/1.39G [00:00<?, ?B/s]

Downloading (…)del.fp16.safetensors:   0%|          | 0.00/167M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

In [7]:
# @title Change the scheduler

pipe.scheduler = KDPM2AncestralDiscreteScheduler.from_config(
pipe.scheduler.config, use_karras_sigmas=True
)
#generator = torch.Generator().manual_seed(42)

In [8]:
# @title Define draw_image function

def draw_image(prompt, file_path):
  prompt2 = ''
  negative_prompt = ''
  negative_prompt2 = ''

  image_base_latent = pipe(
      prompt=prompt,
      prompt_2=prompt2,
      negative_prompt=negative_prompt,
      negative_prompt_2=negative_prompt2,
      guidance_scale=9.0,
      num_inference_steps=25,
      output_type="latent",
      denoising_end=0.8 # Cut the base denoising in 80%
      ).images[0]
  #print(image_base_latent.shape)

  gc.collect()
  torch.cuda.empty_cache()

  image_refiner = pipe_refiner(
      prompt=prompt,
      prompt_2=prompt2,
      negative_prompt=negative_prompt,
      negative_prompt_2=negative_prompt2,
      image=image_base_latent,
      num_inference_steps=25,
      strength=0.3,
      denoising_start=0.8
      ).images[0]

  gc.collect()
  torch.cuda.empty_cache()

  image_refiner.save(file_path)

In [9]:
# @title Run SDXL image generation function

draw_image("a cute toy", f'/content/output.png')

  0%|          | 0/39 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]