In [1]:
!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader

NVIDIA A100-PCIE-40GB, 40960 MiB, 40513 MiB
NVIDIA A100-PCIE-40GB, 40960 MiB, 40513 MiB
NVIDIA A100-PCIE-40GB, 40960 MiB, 40513 MiB


In [1]:
import torch
import os
import random
from diffusers import StableDiffusionPipeline
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer

SEED = None
DEVICE = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

pretrained_model_name_or_path = "/root/autodl-tmp/stable_diffusion/stable-diffusion-2"
learned_embeds_path = "/root/autodl-tmp/textual_inversion/trained_embeddings/custom_cat_v2/original/learned_embeds.bin"
all_embedding_path = os.path.dirname(learned_embeds_path)
dataset_path, _ = os.path.split(all_embedding_path)
_, dataset_name = os.path.split(dataset_path)
initialization_word_path = f"{all_embedding_path}/initialization_word.txt"
interpolation_factor = 0.8 # modify it to control the strength

tokenizer = CLIPTokenizer.from_pretrained(
    pretrained_model_name_or_path,
    subfolder="tokenizer",
)
text_encoder = CLIPTextModel.from_pretrained(
    pretrained_model_name_or_path, 
    subfolder="text_encoder",
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
loaded_learned_embeds = torch.load(learned_embeds_path, map_location="cpu")
  
# separate token and the embeds
trained_token = list(loaded_learned_embeds.keys())[0]
embeds = loaded_learned_embeds[trained_token]
print("placeholder token for dataset {}:".format(dataset_name), trained_token)

# cast to dtype of text_encoder
dtype = text_encoder.get_input_embeddings().weight.dtype
embeds.to(dtype)

# get the initial embedding
with open(initialization_word_path, "r") as f:
    initialization_word  = f.read()
print("Initialization token for dataset {}:".format(dataset_name), initialization_word)
initialization_word_ids = tokenizer.convert_tokens_to_ids(initialization_word)
initialization_embedding = text_encoder.get_input_embeddings().weight.data[initialization_word_ids]

# add the token in tokenizer
num_added_tokens = tokenizer.add_tokens(trained_token)
if num_added_tokens == 0:
    raise ValueError(f"The tokenizer already contains the token {trained_token}. "
                     "Please pass a different `token` that is not already in the tokenizer.")

# resize the token embeddings
text_encoder.resize_token_embeddings(len(tokenizer))

# get the id for the token and assign the embeds
token_id = tokenizer.convert_tokens_to_ids(trained_token)
text_encoder.get_input_embeddings().weight.data[token_id] = embeds

placeholder token for dataset custom_cat_v2: <custom_cat>
Initialization token for dataset custom_cat_v2: cat


In [3]:
input_id_learned = tokenizer([trained_token], padding=False, truncation=True, 
                             max_length=tokenizer.model_max_length, return_tensors="pt").input_ids
input_id_initial = tokenizer([initialization_word], padding=False, truncation=True, 
                             max_length=tokenizer.model_max_length, return_tensors="pt").input_ids

In [11]:
# project embeddings to text space
hidden_states_learned = text_encoder(input_id_learned[:, 1].unsqueeze(0))[0][0][0]
hidden_states_initial = text_encoder(input_id_initial[:, 1].unsqueeze(0))[0][0][0]

In [12]:
print(hidden_states_learned.shape)

torch.Size([1024])
