# Stable Diffusion

text-to-image generator

official github: https://github.com/CompVis/stable-diffusion

notebook copied from https://github.com/woctezuma/stable-diffusion-colab

In [None]:
!nvidia-smi

In [None]:
!python -m torch.utils.collect_env

In [None]:
%pip install -q accelerate diffusers transformers safetensors invisible-watermark scipy mediapy

hack to use `torch.compile`

In [None]:
%%bash
export LC_ALL="en_US.UTF-8"
export LD_LIBRARY_PATH="/usr/lib64-nvidia"
export LIBRARY_PATH="/usr/local/cuda/lib64/stubs"
ldconfig /usr/lib64-nvidia

In [None]:
import torch, mediapy
from diffusers import (
	StableDiffusionPipeline,
	StableDiffusionImg2ImgPipeline,
	StableDiffusionXLPipeline,
	StableDiffusionXLImg2ImgPipeline,

	DDIMScheduler,
	EulerDiscreteScheduler,
	DPMSolverSinglestepScheduler,
	DPMSolverMultistepScheduler
)

In [None]:
model_id = "stabilityai/stable-diffusion-2-1"
scheduler=DPMSolverMultistepScheduler.from_pretrained(
	model_id,
	subfolder="scheduler",
	use_karras_sigmas=True,
	algorithm_type="sde-dpmsolver++",
)
pipe = StableDiffusionPipeline.from_pretrained(
	model_id,
	scheduler=scheduler,
	revision="fp16",
	torch_dtype=torch.float16,
	safety_checker=None,
).to("cuda")
base.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)

In [None]:
images = pipe(
	prompt="a photo of an astronaut riding a horse on mars",
	# negative_prompt="",
	# height=768, width=768,
	num_images_per_prompt=5,
	guidance_scale=7.5, # should be between 0 and 20
	num_inference_steps=50, # should be between 10 and 150
).images

mediapy.show_images(images)
# images[0].save("output.jpg")

In [None]:
base = StableDiffusionXLPipeline.from_pretrained(
	"stabilityai/stable-diffusion-xl-base-1.0",
	torch_dtype=torch.float16,
	variant="fp16",
).to("cuda")
base.unet = torch.compile(base.unet, mode="reduce-overhead", fullgraph=True)

In [None]:
pos_prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"
# neg_prompt = ""

image = base(
	prompt=pos_prompt,
	# negative_prompt=neg_prompt,
	# height=1024, width=1024,
	num_inference_steps=40,
	guidance_scale=7.5,
	denoising_end=0.8,
	output_type="latent",
).images

del base
torch.cuda.empty_cache()

In [None]:
refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(
	"stabilityai/stable-diffusion-xl-refiner-1.0",
	torch_dtype=torch.float16,
	variant="fp16",
	text_encoder_2=base.text_encoder_2,
	vae=base.vae,
).to("cuda")
refiner.unet = torch.compile(refiner.unet, mode="reduce-overhead", fullgraph=True)

In [None]:
image_bis = refiner(
	prompt=pos_prompt,
	# negative_prompt=neg_prompt,
	# height=1024, width=1024,
	num_inference_steps=10,
	guidance_scale=7.5,
	denoising_start=0.8,
	image=image,
).images

mediapy.show_images(image_bis)
# image_bis[0].save("output.jpg")