In [1]:
!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader

NVIDIA A100-PCIE-40GB, 40960 MiB, 31264 MiB
NVIDIA A100-PCIE-40GB, 40960 MiB, 40510 MiB
NVIDIA A100-PCIE-40GB, 40960 MiB, 40510 MiB


In [17]:
import torch
import os
from diffusers import StableDiffusionPipeline
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer

SEED = 512
DEVICE = torch.device("cuda:2") if torch.cuda.is_available() else torch.device("cpu")

pretrained_model_name_or_path = "/root/autodl-tmp/stable_diffusion/stable-diffusion-v1-5"
learned_embeds_path = "/root/autodl-tmp/textual_inversion/merged_embeddings/custom_chair&custom_cat/learned_embeds_factor=0.12&learned_embeds_factor=0.5.bin"
all_embedding_path = os.path.dirname(learned_embeds_path)
_, all_dataset_name = os.path.split(all_embedding_path)
dataset_name_list = all_dataset_name.split("&")
full_embedding_path, _ = os.path.splitext(learned_embeds_path)
_, embedding_name = os.path.split(full_embedding_path)

tokenizer = CLIPTokenizer.from_pretrained(
    pretrained_model_name_or_path,
    subfolder="tokenizer",
)
text_encoder = CLIPTextModel.from_pretrained(
    pretrained_model_name_or_path, subfolder="text_encoder", torch_dtype=torch.float16
)

In [18]:
loaded_learned_embeds = torch.load(learned_embeds_path, map_location="cpu")
trained_token_list = list(loaded_learned_embeds.keys())
dtype = text_encoder.get_input_embeddings().weight.dtype
for i, trained_token in enumerate(trained_token_list):
    # separate token and the embeds
    embeds = loaded_learned_embeds[trained_token]
    # cast to dtype of text_encoder
    embeds.to(dtype)
    # add the token in tokenizer
    num_added_tokens = tokenizer.add_tokens(trained_token)
    # resize the token embeddings
    text_encoder.resize_token_embeddings(len(tokenizer))
    # get the id for the token and assign the embeds
    token_id = tokenizer.convert_tokens_to_ids(trained_token)
    text_encoder.get_input_embeddings().weight.data[token_id] = embeds
    if num_added_tokens == 0:
        raise ValueError(f"The tokenizer already contains the token {trained_token}. "
                         "Please pass a different `token` that is not already in the tokenizer.")
    print(f"placeholder token for dataset {dataset_name_list[i]}: {trained_token}")

pipe = StableDiffusionPipeline.from_pretrained(pretrained_model_name_or_path,
                                               torch_dtype=torch.float16, 
                                               text_encoder=text_encoder, 
                                               tokenizer=tokenizer).to(DEVICE)

placeholder token for dataset custom_chair: <custom_chair>
placeholder token for dataset custom_cat: <custom_cat>


In [19]:
prompt = "a <custom_cat> sitting on a <custom_chair>"
generator = None if SEED is None else torch.Generator(
            device=DEVICE).manual_seed(SEED)

image = pipe(prompt, num_inference_steps=50, guidance_scale=7.5, generator=generator).images[0]

image_dir = os.path.join(all_embedding_path, "images")
os.makedirs(image_dir, exist_ok=True)
image_path = os.path.join(image_dir, "{}_{}_{}.png".format(prompt, embedding_name, SEED))
image.save(image_path)

100%|██████████| 50/50 [00:04<00:00, 12.08it/s]
