In [None]:
!git clone https://github.com/kk-digital/kcg-ml-sd1p4
%cd kcg-ml-sd1p4

In [None]:
!pip install -r requirements.txt

In [None]:
!python3 ./download_models.py

In [None]:
!python3 ./process_models.py

In [None]:
import os
import sys
import torch
import time
import shutil
from torchvision.transforms import ToPILImage
from os.path import join

base_directory = "../"
sys.path.insert(0, base_directory)

output_base_dir = join(base_directory, "./output/sd2-notebook/")
output_directory = join(output_base_dir, "demo/")

try:
    shutil.rmtree(output_directory)
except Exception as e:
    print(e, "\n", "Creating the path...")
    os.makedirs(output_directory, exist_ok=True)
else:
    os.makedirs(output_directory, exist_ok=True)


from stable_diffusion import StableDiffusion
from stable_diffusion.utils_backend import *
from stable_diffusion.utils_image import *
from stable_diffusion.utils_model import *
from utility.utils_logger import *

from stable_diffusion.constants import IODirectoryTree

device = get_device()
to_pil = lambda image: ToPILImage()(torch.clamp((image + 1.0) / 2.0, min=0.0, max=1.0))

In [None]:
pt = IODirectoryTree(base_directory)
print(
    pt.autoencoder
)  # should be `.../kcg-ml-sd1p4/input/model/autoencoder/autoencoder.ckpt`

In [None]:
# define the logistic distribution
def logistic_distribution(loc, scale):
    base_distribution = torch.distributions.Uniform(0, 1)
    transforms = [
        torch.distributions.transforms.SigmoidTransform().inv,
        torch.distributions.transforms.AffineTransform(loc=loc, scale=scale),
    ]
    logistic = torch.distributions.TransformedDistribution(
        base_distribution, transforms
    )
    return logistic


noise_fn = (
    lambda shape, device=device: logistic_distribution(loc=0.0, scale=0.5)
    .sample(shape)
    .to(device)
)

In [None]:
# initialize an empty stable diffusion class
stable_diffusion = StableDiffusion(device=device)
get_memory_status()

In [None]:
# initialize a latent diffusion model so you can load its submodels from disk
stable_diffusion.quick_initialize().load_submodel_tree(
    **pt.latent_diffusion_submodels_tree
)
get_memory_status()

In [None]:
# unload the latent diffusion model submodels and check memory usage, to test if the unloading works
stable_diffusion.unload_model()
torch.cuda.empty_cache()
get_memory_status()

In [None]:
# intialize a new stable diffusion model with a ddim_eta parameter and load submodel tree
ddim_eta = 0.0
stable_diffusion = StableDiffusion(device=device, ddim_eta=ddim_eta)
stable_diffusion.quick_initialize().load_submodel_tree(
    **pt.latent_diffusion_submodels_tree
)
get_memory_status()

In [None]:
# choose a temperature for the sampling (in general higher means more diversity but less quality) and generate an image, then save it and show it
# temperature only makes any difference if `ddim_eta` is different from zero

temperature = 1.0
imgs = stable_diffusion.generate_images(
    prompt="A woman with flowers in her hair in a courtyard, in the style of Frank Frazetta",
    seed=2982,
    noise_fn=noise_fn,
    temperature=temperature,
)
save_images(
    imgs,
    join(
        output_directory,
        f"test_sample_temp{temperature:.3f}_eta{stable_diffusion.ddim_eta:.3f}.png",
    ),
)
to_pil(imgs[0])

In [None]:
# change the ddim_eta parameter and generate another image, then save it and show it
stable_diffusion.ddim_eta = 0.1
temperature = 1.0
imgs = stable_diffusion.generate_images(
    prompt="A woman with flowers in her hair in a courtyard, in the style of Frank Frazetta",
    seed=2982,
    noise_fn=noise_fn,
    temperature=temperature,
)

save_images(
    imgs,
    join(
        output_directory,
        f"test_sample_temp{temperature:.3f}_eta{stable_diffusion.ddim_eta:.3f}.png",
    ),
)
to_pil(imgs[0])

In [None]:
# higher `ddim_eta`s imply higher noise levels
stable_diffusion.ddim_eta = 0.5
temperature = 1.0
imgs = stable_diffusion.generate_images(
    prompt="A woman with flowers in her hair in a courtyard, in the style of Frank Frazetta",
    seed=2982,
    noise_fn=noise_fn,
    temperature=temperature,
)

save_images(
    imgs,
    join(
        output_directory,
        f"test_sample_temp{temperature:.3f}_eta{stable_diffusion.ddim_eta:.3f}.png",
    ),
)
to_pil(imgs[0])

In [None]:
# and so do higher temperatures
stable_diffusion.ddim_eta = 0.5
temperature = 1.8
imgs = stable_diffusion.generate_images(
    prompt="A woman with flowers in her hair in a courtyard, in the style of Frank Frazetta",
    seed=2982,
    noise_fn=noise_fn,
    temperature=temperature,
)

save_images(
    imgs,
    join(
        output_directory,
        f"test_sample_temp{temperature:.3f}_eta{stable_diffusion.ddim_eta:.3f}.png",
    ),
)
to_pil(imgs[0])

In [None]:
# we can check how the images change with the ddim_eta parameter
temperature = 1.0
images = []
eta_steps = 5
eta_0 = 0.0
for i in range(eta_steps):
    ddim_eta = eta_0 + i * 0.1
    stable_diffusion.ddim_eta = ddim_eta
    imgs = stable_diffusion.generate_images(
        prompt="A woman with flowers in her hair in a courtyard, in the style of Frank Frazetta",
        seed=2982,
        noise_fn=noise_fn,
        temperature=temperature,
    )
    print(imgs.shape)
    images.append(imgs)
images = torch.cat(images, dim=0)
grid = torchvision.utils.make_grid(
    images, normalize=False, range=(-1, 1), scale_each=True, pad_value=0
)
grid_img = to_pil(grid)
grid_img.save(
    join(
        output_directory,
        f"test_grid_temp{temperature:.3f}_eta{eta_0:.3f}-{stable_diffusion.ddim_eta:.3f}.png",
    )
)
grid_img

In [None]:
# or we can check how the images change with the temperature alone
temperature = 1.0
stable_diffusion.ddim_eta = 0.1
images = []
temp_steps = 5
for i in range(temp_steps):
    temperature += 0.1
    imgs = stable_diffusion.generate_images(
        prompt="A woman with flowers in her hair in a courtyard, in the style of Frank Frazetta",
        seed=2982,
        noise_fn=noise_fn,
        temperature=temperature,
    )
    print(imgs.shape)
    images.append(imgs)
images = torch.cat(images, dim=0)
grid = torchvision.utils.make_grid(
    images,
    normalize=False,
    nrow=temp_steps,
    range=(-1, 1),
    scale_each=True,
    pad_value=0,
)
grid_img = to_pil(grid)
grid_img.save(
    join(
        output_directory,
        f"test_grid_temp{temperature-temp_steps*0.1:.3f}-{temperature:.3f}_eta{stable_diffusion.ddim_eta:.3f}.png",
    )
)
grid_img

In [None]:
# or we can vary both things simultaneously (ddim_eta on y-axis, temperature on x-axis; it increases from left to right and from top to bottom)
grid_side = 2
temperature = 1.0
ddim_eta = 0.1
grid = []
# rows = []
for i in range(grid_side + 1):
    temperature += 0.2
    for j in range(grid_side + 1):
        stable_diffusion.ddim_eta = ddim_eta + j * 0.1
        imgs = stable_diffusion.generate_images(
            prompt="A woman with flowers in her hair in a courtyard, in the style of Frank Frazetta",
            seed=2982,
            noise_fn=noise_fn,
            temperature=temperature,
        )
        # rows.append(imgs)
        grid.append(imgs)
    # grid.append(torch.cat(rows, dim=0))

tensor_grid = torch.cat(grid, dim=0)
tensor_grid.shape
grid = torchvision.utils.make_grid(
    tensor_grid,
    nrow=grid_side + 1,
    normalize=False,
    range=(-1, 1),
    scale_each=True,
    pad_value=0,
)
grid_img = to_pil(grid)
grid_img.save(
    join(
        output_directory,
        f"test_grid_temp{temperature-grid_side*0.2:.3f}-{temperature:.3f}_eta{ddim_eta:.3f}-{stable_diffusion.ddim_eta:.3f}.png",
    )
)
grid_img