In [None]:
import os
import sys

base_directory = "../"
sys.path.insert(0, base_directory)
print(os.path.abspath(base_directory))

import json
import torch
import configparser
import safetensors
from stable_diffusion.utils_backend import *
from stable_diffusion.utils_image import *
from stable_diffusion.utils_model import *
from stable_diffusion.utils_logger import *

from stable_diffusion.constants import IODirectoryTree


In [None]:
DEVICE = get_device()
config = configparser.ConfigParser(interpolation=configparser.ExtendedInterpolation())
config.read(os.path.join(base_directory, "config.ini"))
config['BASE']['BASE_DIRECTORY'] = base_directory

In [None]:
pt = IODirectoryTree(base_io_directory_prefix = config["BASE"].get('base_io_directory_prefix'), base_directory=base_directory)
pt

In [None]:
pt.create_directory_tree_folders()

Here we'll build the `StableDiffusion` class by loading the submodels from disk.

In [None]:
from transformers import CLIPTextConfig, CLIPTextModel
from stable_diffusion.model.clip_text_embedder import CLIPTextEmbedder
# tokenizer = CLIPTokenizer.from_pretrained('openai/clip-vit-large-patch14')

Instantiate a text embedder:

In [None]:
text_embedder = CLIPTextEmbedder(device=DEVICE)

Load it's weights:

In [None]:
text_embedder.load(text_embedder_path = pt.text_embedder_path)

Load it's submodels weights:

In [None]:
text_embedder.load_submodels(tokenizer_path = pt.tokenizer_path, transformer_path = pt.text_model_path)

In [None]:
from stable_diffusion.model.vae import Autoencoder

In [None]:
autoencoder = Autoencoder(device=DEVICE)

In [None]:
autoencoder.load(autoencoder_path = pt.autoencoder_path)

In [None]:
autoencoder.load_submodels(encoder_path=pt.encoder_path, decoder_path=pt.decoder_path)

In [None]:
from stable_diffusion.model.unet import UNetModel
# from stable_diffusion.utils.model import initialize_unet

In [None]:
unet_model = UNetModel(device=DEVICE)

In [None]:
unet_model.load(unet_path = pt.unet_path)

In [None]:
get_memory_status()

Now we need to build a model with the same structure that the checkpoint we are going to use (by default, `runwayml/stable-diffusion-v1-5`), so the weights get properly mapped. This model is called `LatentDiffusion`. We also have a `initialize_latent_diffusion` function, which I will omit since it's a bit longer than the others.

In [None]:
from stable_diffusion import LatentDiffusion
# from stable_diffusion.utils.model import initialize_latent_diffusion

In [None]:
latent_diffusion = LatentDiffusion(
                            autoencoder=autoencoder,
                            clip_embedder=text_embedder,
                            unet_model=unet_model,
                            device=DEVICE
                            )

In [None]:
latent_diffusion.load(latent_diffusion_path=pt.latent_diffusion_path)

In [None]:
from stable_diffusion import StableDiffusion

In [None]:
stable_diffusion = StableDiffusion(device=DEVICE, model = latent_diffusion, ddim_steps = 20)

In [None]:
prompt = 'A cat'

In [None]:
with section('sampling...'):
    image_tensor = stable_diffusion.generate_images(prompt = prompt, seed = 1)

In [None]:
to_pil(image_tensor.squeeze())