In [None]:
# depth2img generally gives better results.

import gc
import random
import time

import diffusers
import torch
import PIL

DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
DTYPE = torch.float16 if DEVICE in ("cuda", "mps") else torch.float32
print(f"Using {DEVICE}")

def get_generator(seed):
    if DEVICE in ("cuda", "mps"):
        return torch.Generator(DEVICE).manual_seed(seed)
    generator = torch.Generator()
    generator.manual_seed(seed)
    return generator

def load_pipe():
    # There's no fp32 revision.
    # https://huggingface.co/stabilityai/stable-diffusion-2-1/tree/main
    # https://huggingface.co/stabilityai/stable-diffusion-2-1/resolve/fp16/model_index.json
    pipe = diffusers.StableDiffusionImg2ImgPipeline.from_pretrained(
        "stabilityai/stable-diffusion-2-1",
        revision="fp16",
        torch_dtype=torch.float16 if use_cuda else torch.float32)
    pipe.scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained(
        "stabilityai/stable-diffusion-2-1", subfolder="scheduler")
    if DEVICE == "cuda":
        pipe = pipe.to(DEVICE)
        pipe.enable_model_cpu_offload()
    elif DEVICE == "mps":
        pipe = pipe.to(DEVICE)
        pipe.enable_attention_slicing()
    return pipe

In [None]:
pipe = load_pipe()

In [None]:
def inference(prompt, n_images, guidance, steps, width, height, seed, img, strength, neg_prompt):
    ratio = min(height / img.height, width / img.width)
    img = img.resize(
        (int(img.width * ratio), int(img.height * ratio)), PIL.Image.Resampling.LANCZOS)
    return pipe(
        prompt,
        num_images_per_prompt=n_images,
        negative_prompt=neg_prompt,
        image=img,
        num_inference_steps=steps,
        strength=strength,
        guidance_scale=guidance,
        # width=width,
        # height=height,
        generator=get_generator(seed)).images

def run(prompt, image, strength, seed=11):
    # seed = random.randint(0, 2147483647)
    #strength = 0.60 # [0, 1]
    n_images = 1 # number of images
    neg_prompt = ""
    guidance = 7.5 # 7.5 max = 15
    steps = 25 # [2, 100]
    width = 768  # [64, 1024] step=8
    height = 768 # [64, 1024] step=8
    start = time.time()
    gallery = inference(prompt, n_images, guidance, steps, width, height, seed, image, strength, neg_prompt)
    print("Took %.1fs" % (time.time()-start))
    return gallery[0]

In [None]:
def getimg():
    name = "teckel.webp"
    img = PIL.Image.open("out/" + name)
    size = img.size
    # Max is 1024x768 or 768x1024?
    while size[0] > 1024 or size[1] > 1024: # or size[0] * size[1] > 786432:
        size = (size[0]//2, size[1]//2)
    size = (768, 768)
    if size != img.size:
        print("Resized from", img.size, "to", size)
        img = img.resize(size, PIL.Image.Resampling.LANCZOS)
    return img

#p = "a killer robot"
#p = "comic book, marvel, superflat, dc comics, graphic novel"
p = "flowers"
img = run(p, getimg(), strength=0.75, seed=65)
img.save("out/comic_book.png")
img