In [None]:
# References:
# https://huggingface.co/spaces/Norod78/sd2-simpsons-blip/blob/main/app.py
# https://huggingface.co/Norod78/sd2-simpsons-blip

In [None]:
import torch
torch.cuda.empty_cache()
import gc
gc.collect()

In [None]:
%matplotlib widget
from matplotlib import pyplot as plt

import diffusers
import torch
from PIL import Image

model_id = "Norod78/sd2-simpsons-blip"
scheduler = diffusers.DPMSolverMultistepScheduler(
    beta_start=0.00085,
    beta_end=0.012,
    beta_schedule="scaled_linear",
    num_train_timesteps=1000,
    trained_betas=None,
    prediction_type="epsilon",
    thresholding=False,
    algorithm_type="dpmsolver++",
    solver_type="midpoint",
    lower_order_final=True)

# Can't make it work with CUDA with RTC 2060 / 6GB of VRAM.
use_cuda = torch.cuda.is_available() and False
pipe = diffusers.StableDiffusionPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16 if use_cuda else torch.float32,
    scheduler=scheduler)
pipe_i2i = diffusers.StableDiffusionImg2ImgPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16 if use_cuda else torch.float32,
    scheduler=scheduler)
if use_cuda:
    pipe.to("cuda")
    pipe.enable_attention_slicing()
    pipe_i2i.to("cuda")
    pipe_i2i.enable_attention_slicing()


def inference(prompt, guidance, steps, width=512, height=512, seed=0, img=None, strength=0.5, neg_prompt=""):
    generator = None
    if use_cuda:
        generator = torch.Generator("cuda").manual_seed(seed) if seed else None
    elif seed:      
        generator = torch.Generator()
        generator.manual_seed(seed)
    if img is not None:
        return img_to_img(prompt, neg_prompt, img, strength, guidance, steps, width, height, generator)
    return txt_to_img(prompt, neg_prompt, guidance, steps, width, height, generator)

def txt_to_img(prompt, neg_prompt, guidance, steps, width, height, generator):
    result = pipe(
        prompt,
        negative_prompt=neg_prompt,
        num_inference_steps=steps,
        guidance_scale=guidance,
        width=width,
        height=height,
        generator=generator)
    return result.images[0]

def img_to_img(prompt, neg_prompt, img, strength, guidance, steps, width, height, generator):
    ratio = min(height / img.height, width / img.width)
    img = img.resize((int(img.width * ratio), int(img.height * ratio)), Image.LANCZOS)
    result = pipe_i2i(
        prompt,
        negative_prompt=neg_prompt,
        init_image=img,
        num_inference_steps=steps,
        strength=strength,
        guidance_scale=guidance,
        width=width,
        height=height,
        generator=generator)
    return result.images[0]

def run(prompt):
    guidance = 7.5 # max 15
    steps = 25 # [2, 75]
    width = 768
    height = 768
    seed = 10
    image = None
    strength = 0.5 # [0, 1]
    neg_prompt = ""
    image_out = inference(prompt, guidance, steps, width, height, seed, image, strength, neg_prompt)
    plt.figure()
    plt.imshow(image_out)
    plt.axis("off")
    return image_out

# TODO: not simpsons styled at all.
run("A guy with a beard")