# Diffusion ToolBox

### Downloads and imports

In [None]:
import os
from PIL import Image, ImageDraw
import PIL
import cv2
import numpy as np
from IPython.display import HTML
from base64 import b64encode

import cv2 as cv
import torch
from torch import autocast
from torch.nn import functional as F
from diffusers import StableDiffusionPipeline, AutoencoderKL
from diffusers import UNet2DConditionModel, PNDMScheduler, LMSDiscreteScheduler
from diffusers.schedulers.scheduling_ddim import DDIMScheduler
from transformers import CLIPTextModel, CLIPTokenizer
from tqdm.auto import tqdm
from huggingface_hub import notebook_login

from SD_functions import *
from zoom import *
from batch import *

import matplotlib.pyplot as plt

device = 'cuda'

notebook_login()

### Init models

In [None]:
# 1. Load the autoencoder model which will be used to decode the latents into image space. 
vae = AutoencoderKL.from_pretrained(
    'CompVis/stable-diffusion-v1-4', subfolder='vae', use_auth_token=True)
vae = vae.to(device)

# 2. Load the tokenizer and text encoder to tokenize and encode the text. 
tokenizer = CLIPTokenizer.from_pretrained('openai/clip-vit-large-patch14')
text_encoder = CLIPTextModel.from_pretrained('openai/clip-vit-large-patch14')
text_encoder = text_encoder.to(device)

# 3. The UNet model for generating the latents.
unet = UNet2DConditionModel.from_pretrained(
    'CompVis/stable-diffusion-v1-4', subfolder='unet', use_auth_token=True)
unet = unet.to(device)

# 4. Create a scheduler for inference
scheduler = LMSDiscreteScheduler(
    beta_start=0.00085, beta_end=0.012,
    beta_schedule='scaled_linear', num_train_timesteps=1000)

scheduler_inpaint = DDIMScheduler(
    beta_start=0.00085,
    beta_end=0.012,
    beta_schedule="scaled_linear",
    clip_sample=False,
    set_alpha_to_one=False,
).set_format("pt")

### latent exploration utils

In [None]:
# old_run = []

# old_run = zoom_out([
#         (1, 'no'),
#     ], tokenizer=tokenizer, text_encoder=text_encoder,
#     scheduler=scheduler, scheduler_inpaint=scheduler_inpaint, 
#     vae=vae, unet=unet, device=device, previous_buffer=old_run, skip=0, width=512, height=768)

In [None]:
prompt = "digital painting of a house in a luxurious forest by Marc Simonetti"
outs = image_batch(
        prompt, tokenizer=tokenizer, text_encoder=text_encoder, 
        scheduler=scheduler, scheduler_inpaint=scheduler_inpaint, 
        vae=vae, unet=unet, device=device, n_batch=5, batch_size=3, width=768
    )
plt.imshow(outs)
plt.imsave("out_ex/" + "_".join(prompt.split()) + ".jpg", outs)