In [None]:
# Source: https://github.com/qunash/stable-diffusion-2-gui/
# https://github.com/qunash/stable-diffusion-2-gui/blob/main/stable_diffusion_2_0.ipynb
# https://huggingface.co/stabilityai/stable-diffusion-2-1

# Requires:
#   accelerate, scipy, triton, ftfy, transformers
# pip install --upgrade git+https://github.com/huggingface/diffusers.git@main
# pip install --upgrade git+https://github.com/huggingface/transformers/

In [None]:
import torch
torch.cuda.empty_cache()
import gc
gc.collect()

In [None]:
%matplotlib widget
from matplotlib import pyplot as plt

import random

import diffusers
import torch
from PIL import Image

# To test performance delta
use_cuda = torch.cuda.is_available()

def inference(inf_mode, prompt, n_images, guidance, steps, width=768, height=768, seed=0, img=None, strength=0.5, neg_prompt=""):
    if not seed:
        seed = random.randint(0, 2147483647)
    print(f"Seed: {seed}")
    generator = None
    if use_cuda:
        generator = torch.Generator('cuda').manual_seed(seed) if seed else None
    elif seed:      
        generator = torch.Generator()
        generator.manual_seed(seed)
    if inf_mode == 'txt2img':
        return txt_to_img(prompt, n_images, neg_prompt, guidance, steps, width, height, generator, seed)
    if inf_mode == 'img2img':
        if img is None:
            raise Exception("Image is required for Image to Image mode")
        return img_to_img(prompt, n_images, neg_prompt, img, strength, guidance, steps, width, height, generator, seed)
    if inf_mode == 'inpaint':
        if img is None:
            raise Exception("Image is required for Inpainting mode")
        return inpaint(prompt, n_images, neg_prompt, img, guidance, steps, width, height, generator, seed)
    if inf_mode == 'upscale4x':
        if img is None:
            raise Exception("Image is required for Upscale mode")
        return upscale(prompt, n_images, neg_prompt, img, guidance, steps, generator)
    if inf_mode == 'depth2img':
        if img is None:
            raise Exception("Image is required for Depth to Image mode")
        return depth2img(prompt, n_images, neg_prompt, img, guidance, steps, generator, seed)
    raise Exception("TODO")
    

def txt_to_img(prompt, n_images, neg_prompt, guidance, steps, width, height, generator, seed):
    # There's no fp32 revision.
    # https://huggingface.co/stabilityai/stable-diffusion-2-1/tree/main
    # https://huggingface.co/stabilityai/stable-diffusion-2-1/resolve/fp16/model_index.json
    pipe = diffusers.StableDiffusionPipeline.from_pretrained(
        "stabilityai/stable-diffusion-2-1",
        revision="fp16",
        torch_dtype=torch.float16 if use_cuda else torch.float32)
    pipe.scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained(
        "stabilityai/stable-diffusion-2-1", subfolder="scheduler")
    if use_cuda:
        pipe = pipe.to("cuda")
        pipe.enable_attention_slicing()
    return pipe(
        prompt,
        num_images_per_prompt=n_images,
        negative_prompt=neg_prompt,
        num_inference_steps=steps,
        guidance_scale=guidance,
        width=width,
        height=height,
        generator=generator).images

def img_to_img(prompt, n_images, neg_prompt, img, strength, guidance, steps, width, height, generator, seed):
    pipe = diffusers.StableDiffusionImg2ImgPipeline.from_pretrained(
        model_id,
        revision="fp16" if use_cuda else "fp32",
        torch_dtype=torch.float16 if use_cuda else torch.float32,
        scheduler=scheduler)
    pipe.scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained(
        pipe.scheduler.config, subfolder="scheduler")
    if use_cuda:
        pipe.to("cuda")
        pipe.enable_attention_slicing()
    img = img['image']
    ratio = min(height / img.height, width / img.width)
    img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)
    return pipe(
        prompt,
        num_images_per_prompt=n_images,
        negative_prompt=neg_prompt,
        image=img,
        num_inference_steps=steps,
        strength=strength,
        guidance_scale=guidance,
        # width=width,
        # height=height,
        generator=generator).images

# TODO Currently supports only 512x512 images
def inpaint(prompt, n_images, neg_prompt, img, guidance, steps, width, height, generator, seed):
    pipe = diffusers.DiffusionPipeline.from_pretrained(
        "stabilityai/stable-diffusion-2-inpainting",
        revision="fp16" if use_cuda else "fp32",
        torch_dtype=torch.float16 if use_cuda else torch.float32)
    pipe.scheduler = diffusers.DPMSolverMultistepScheduler.from_config(
        pipe.scheduler.config)
    if use_cuda:
        pipe.to("cuda")
        pipe.enable_attention_slicing()
    inp_img = img['image']
    mask = img['mask']
    inp_img = square_padding(inp_img)
    mask = square_padding(mask)
    # # ratio = min(height / inp_img.height, width / inp_img.width)
    # ratio = min(512 / inp_img.height, 512 / inp_img.width)
    # inp_img = inp_img.resize((int(inp_img.width * ratio), int(inp_img.height * ratio)), Image.LANCZOS)
    # mask = mask.resize((int(mask.width * ratio), int(mask.height * ratio)), Image.LANCZOS)
    return pipe(
        prompt,
        image=inp_img.resize((512, 512)),
        mask_image=mask.resize((512, 512)),
        num_images_per_prompt=n_images,
        negative_prompt=neg_prompt,
        num_inference_steps=steps,
        guidance_scale=guidance,
        # width=width,
        # height=height,
        generator=generator).images

def depth2img(prompt, n_images, neg_prompt, img, guidance, steps, generator, seed):
    pipe = diffusers.StableDiffusionDepth2ImgPipeline.from_pretrained(
        "stabilityai/stable-diffusion-2-depth",
        revision="fp16" if use_cuda else "fp32",
        torch_dtype=torch.float16 if use_cuda else torch.float32)
    pipe.scheduler = diffusers.DPMSolverMultistepScheduler.from_config(
        pipe.scheduler.config)
    if use_cuda:
        pipe.to("cuda")
        pipe.enable_attention_slicing()
    return pipe(
        prompt,
        num_images_per_prompt=n_images,
        negative_prompt=neg_prompt,
        image=img['image'],
        num_inference_steps=steps,
        guidance_scale=guidance,
        # width=width,
        # height=height,
        generator=generator).images

def square_padding(img):
    width, height = img.size
    if width == height:
        return img
    new_size = max(width, height)
    new_img = Image.new('RGB', (new_size, new_size), (0, 0, 0, 255))
    new_img.paste(img, ((new_size - width) // 2, (new_size - height) // 2))
    return new_img

def upscale(prompt, n_images, neg_prompt, img, guidance, steps, generator):
    pipe = diffusers.StableDiffusionUpscalePipeline.from_pretrained(
        "stabilityai/stable-diffusion-x4-upscaler",
        revision="fp16" if use_cuda else "fp32",
        torch_dtype=torch.float16 if use_cuda else torch.float32)
    # pipe.scheduler = diffusers.DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
    if use_cuda:
        pipe.to("cuda")
        pipe.enable_attention_slicing()
    return upscale_tiling(pipe, prompt, neg_prompt, img["image"], guidance, steps, generator)
    # return pipe(
    #     prompt,
    #     image=img["image"],
    #     num_inference_steps=steps,
    #     guidance_scale=guidance,
    #     negative_prompt=neg_prompt,
    #     num_images_per_prompt=n_images,
    #     generator=generator).images[0]

def upscale_tiling(pipe, prompt, neg_prompt, img, guidance, steps, generator):
    width, height = img.size

    # calculate the padding needed to make the image dimensions a multiple of 128
    padding_x = 128 - (width % 128) if width % 128 != 0 else 0
    padding_y = 128 - (height % 128) if height % 128 != 0 else 0

    # create a white image of the right size to be used as padding
    padding_img = Image.new('RGB', (padding_x, padding_y), color=(255, 255, 255, 0))

    # paste the padding image onto the original image to add the padding
    img.paste(padding_img, (width, height))

    # update the image dimensions to include the padding
    width += padding_x
    height += padding_y
    
    if width > 128 or height > 128:
        num_tiles_x = int(width / 128)
        num_tiles_y = int(height / 128)
        upscaled_img = Image.new('RGB', (img.size[0] * 4, img.size[1] * 4))
        for x in range(num_tiles_x):
            for y in range(num_tiles_y):
                print(f"Upscaling tile {x * num_tiles_y + y + 1}/{num_tiles_x * num_tiles_y}")
                tile = img.crop((x * 128, y * 128, (x + 1) * 128, (y + 1) * 128))
                upscaled_tile = pipe(
                    prompt="",
                    image=tile,
                    num_inference_steps=steps,
                    guidance_scale=guidance,
                    # negative_prompt=neg_prompt,
                    generator=generator).images[0]
                upscaled_img.paste(upscaled_tile, (x * upscaled_tile.size[0], y * upscaled_tile.size[1]))
        return [upscaled_img]
    return pipe(
        prompt=prompt,
        image=img,
        num_inference_steps=steps,
        guidance_scale=guidance,
        negative_prompt = neg_prompt,
        generator=generator).images

def run(prompt):
    print("Processing:")
    n_images = 1 # number of images
    inf_mode = "txt2img" # img2img, inpaint, upscale4x, depth2img
    neg_prompt = ""
    guidance = 7.5 # max = 15
    steps = 25 # [2, 100]
    width = 768  # [64, 1024] step=8
    height = 768 # [64, 1024] step=8
    seed = 10 # random
    strength = 0.5 # [0, 1]
    image = None
    gallery = inference(inf_mode, prompt, n_images, guidance, steps, width, height, seed, image, strength, neg_prompt)
    plt.figure()
    plt.imshow(gallery[0])
    plt.axis("off")
    return gallery[0]

run("a squirrel cat hybrid").save("out/squicat3.png")
