In [1]:
!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader

NVIDIA A100-PCIE-40GB, 40960 MiB, 17206 MiB
NVIDIA A100-PCIE-40GB, 40960 MiB, 17206 MiB
NVIDIA A100-PCIE-40GB, 40960 MiB, 17206 MiB


In [7]:
import torch
import os
from diffusers import StableDiffusionPipeline
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer

SEED = None
DEVICE = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

pretrained_model_name_or_path = "/root/autodl-tmp/stable_diffusion/stable-diffusion-v1-5"
learned_embeds_path = "/root/autodl-tmp/textual_inversion/trained_embeddings/custom_chair/original/learned_embeds.bin"
all_embedding_path = os.path.dirname(learned_embeds_path)
dataset_path, _ = os.path.split(all_embedding_path)
_, dataset_name = os.path.split(dataset_path)
initialization_word_path = f"{all_embedding_path}/initialization_word.txt"

tokenizer = CLIPTokenizer.from_pretrained(
    pretrained_model_name_or_path,
    subfolder="tokenizer",
)
text_encoder = CLIPTextModel.from_pretrained(
    pretrained_model_name_or_path, subfolder="text_encoder", torch_dtype=torch.float16
)

In [8]:
loaded_learned_embeds = torch.load(learned_embeds_path, map_location="cpu")
  
# separate token and the embeds
trained_token = list(loaded_learned_embeds.keys())[0]
embeds = loaded_learned_embeds[trained_token]
print("placeholder token for dataset {}:".format(dataset_name), trained_token)

# cast to dtype of text_encoder
dtype = text_encoder.get_input_embeddings().weight.dtype
embeds.to(dtype)

# get the initial embedding
with open(initialization_word_path, "r") as f:
    initialization_word  = f.read()
print("Initialization token for dataset {}:".format(dataset_name), initialization_word)
initialization_word_ids = tokenizer.convert_tokens_to_ids(initialization_word)
initialization_embedding = text_encoder.get_input_embeddings().weight.data[initialization_word_ids]

placeholder token for dataset custom_chair: <custom_chair>
Initialization token for dataset custom_chair: chair


In [9]:
# perform linear interpolation
interpolation_factor_list = [x / 100 for x in range(101)]
for interpolation_factor in interpolation_factor_list:
    interpolation_embedding = interpolation_factor * embeds + (1 - interpolation_factor) * initialization_embedding
    learned_embeddings_dict = {trained_token: interpolation_embedding.detach().cpu()}
    output_embeddings_path = f"{all_embedding_path}/learned_embeds_factor={interpolation_factor}.bin"
    torch.save(learned_embeddings_dict, output_embeddings_path)