In [1]:
!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader

NVIDIA A100-PCIE-40GB, 40960 MiB, 34166 MiB
NVIDIA A100-PCIE-40GB, 40960 MiB, 40510 MiB
NVIDIA A100-PCIE-40GB, 40960 MiB, 40510 MiB


In [69]:
# compute CLIP-space cosine-similarity distance
import torch
import os
import clip
import pathlib
import tqdm
import numpy as np
import warnings
import json
import sklearn.preprocessing
from diffusers import StableDiffusionPipeline
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
from PIL import Image
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
from packaging import version
from shutil import rmtree

SEED = 512
DEVICE = torch.device("cuda:2") if torch.cuda.is_available() else torch.device("cpu")

pretrained_model_name_or_path = "/root/autodl-tmp/stable_diffusion/stable-diffusion-v1-5"
learned_embeds_path = "/root/autodl-tmp/textual_inversion/trained_embeddings/custom_chair/original/learned_embeds_factor=0.18.bin"  
# can also use downloaded embeddings 
all_embedding_path, embeds_suffix = os.path.split(learned_embeds_path)
embeds_name, _ = os.path.splitext(embeds_suffix)
dataset_path, _ = os.path.split(all_embedding_path)
_, dataset_name = os.path.split(dataset_path)
initialization_word_path = f"{all_embedding_path}/initialization_word.txt"

tokenizer = CLIPTokenizer.from_pretrained(
    pretrained_model_name_or_path,
    subfolder="tokenizer",
)
text_encoder = CLIPTextModel.from_pretrained(
    pretrained_model_name_or_path, subfolder="text_encoder", torch_dtype=torch.float16
)

In [70]:
loaded_learned_embeds = torch.load(learned_embeds_path, map_location="cpu")
  
# separate token and the embeds
trained_token = list(loaded_learned_embeds.keys())[0]
embeds = loaded_learned_embeds[trained_token]
print("placeholder token for dataset {}:".format(dataset_name), trained_token)

# cast to dtype of text_encoder
dtype = text_encoder.get_input_embeddings().weight.dtype
embeds.to(dtype)

# get the initial embedding
with open(initialization_word_path, "r") as f:
    initialization_word  = f.read()
print("Initialization token for dataset {}:".format(dataset_name), initialization_word)

# add the token in tokenizer
num_added_tokens = tokenizer.add_tokens(trained_token)
if num_added_tokens == 0:
    raise ValueError(f"The tokenizer already contains the token {trained_token}. "
                     "Please pass a different `token` that is not already in the tokenizer.")

# resize the token embeddings
text_encoder.resize_token_embeddings(len(tokenizer))

# get the id for the token and assign the embeds
token_id = tokenizer.convert_tokens_to_ids(trained_token)
text_encoder.get_input_embeddings().weight.data[token_id] = embeds

pipe = StableDiffusionPipeline.from_pretrained(pretrained_model_name_or_path,
                                               torch_dtype=torch.float16, 
                                               text_encoder=text_encoder, 
                                               tokenizer=tokenizer).to(DEVICE)

placeholder token for dataset custom_chair: <custom_chair>
Initialization token for dataset custom_chair: chair


In [71]:
# Explicitly seperate "object" and "style"
type_embedding_path = f"{all_embedding_path}/type_of_concept.txt"
with open(type_embedding_path, "r") as f:
    embedding_type = f.read()
    
if embedding_type == "object":
    prompt_list = [
        # background modifications
        "a photo of {}".format(trained_token),
        "a photo of {} on the beach".format(trained_token),
        "a photo of {} on the moon".format(trained_token),
        "a photo of {} on the table".format(trained_token),
    ]
elif embedding_type == "style": 
    prompt_list = [
        # style changes
        "a painting in the style of {}".format(trained_token),
        "an oil painting in the style of {}".format(trained_token),
        "a dirty painting in the style of {}".format(trained_token),
        "a dark painting in the style of {}".format(trained_token),
    ]
else:
    raise ValueError("Embedding type should be either 'object' or 'style'")

In [72]:
class CLIPImageDataset(torch.utils.data.Dataset):
    def __init__(self, data):
        self.data = data
        # only 224x224 ViT-B/32 supported for now
        self.preprocess = self._transform_test(224)

    def _transform_test(self, n_px):
        return Compose([
            Resize(n_px, interpolation=Image.BICUBIC),
            CenterCrop(n_px),
            lambda image: image.convert("RGB"),
            ToTensor(),
            Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)),
        ])

    def __getitem__(self, idx):
        c_data = self.data[idx]
        image = Image.open(c_data)
        image = self.preprocess(image)
        return {'image': image}

    def __len__(self):
        return len(self.data)

    
class CLIPCapDataset(torch.utils.data.Dataset):
    def __init__(self, data):
        self.data = data

    def __getitem__(self, idx):
        c_data = self.data[idx]
        c_data = clip.tokenize(c_data, truncate=True).squeeze()
        return {'caption': c_data}

    def __len__(self):
        return len(self.data)

In [73]:
def extract_all_images(images, model, device, batch_size=64, num_workers=8):
    data = torch.utils.data.DataLoader(
        CLIPImageDataset(images),
        batch_size=batch_size, num_workers=num_workers, shuffle=False)
    all_image_features = []
    with torch.no_grad():
        for b in tqdm.tqdm(data):
            b = b['image'].to(device)
            b = b.to(torch.float16)
            all_image_features.append(model.encode_image(b).cpu().numpy())
    all_image_features = np.vstack(all_image_features)
    return all_image_features

def extract_all_captions(captions, model, device, batch_size=64, num_workers=8):
    data = torch.utils.data.DataLoader(
        CLIPCapDataset(captions),
        batch_size=batch_size, num_workers=num_workers, shuffle=False)
    all_text_features = []
    with torch.no_grad():
        for b in tqdm.tqdm(data):
            b = b['caption'].to(device)
            all_text_features.append(model.encode_text(b).cpu().numpy())
    all_text_features = np.vstack(all_text_features)
    return all_text_features

def get_clip_score(model, clip_images, text_features, device, w=2.5):
    if isinstance(clip_images, list):
        # need to extract image features
        clip_images = extract_all_images(clip_images, model, device)
    
    # as of numpy 1.21, normalize doesn't work properly for float16
    if version.parse(np.__version__) < version.parse('1.21'):
        clip_images = sklearn.preprocessing.normalize(clip_images, axis=1)
        original_images = sklearn.preprocessing.normalize(original_images, axis=1)
    else:
        warnings.warn(
            'due to a numerical instability, new numpy normalization is slightly different than' 
            'paper results. To exactly replicate paper results, please use numpy version less' 
            'than 1.21, e.g., 1.20.3.')
        clip_images = clip_images / np.sqrt(np.sum(clip_images ** 2, axis=1, keepdims=True))
        text_features = text_features / np.sqrt(np.sum(text_features ** 2, axis=1, keepdims=True))
    
    per = w * np.clip(np.sum(clip_images * text_features, axis=1), 0, None)
    return np.mean(per)

In [74]:
# calculate clip score for each prompt and average over it
generator = None if SEED is None else torch.Generator(
            device=DEVICE).manual_seed(SEED)
N = 16  # number of random generated images
clip_image_dir = os.path.join(all_embedding_path, "clip_images_temp")
total_score = 0

clip_model, clip_transform = clip.load("ViT-B/32", device=DEVICE, jit=False)
clip_model.eval()

for i, prompt in enumerate(prompt_list):
    os.makedirs(clip_image_dir, exist_ok=True)
    print(f"compute clip score for {i + 1}/{len(prompt_list)} prompt", end="\r")
    
    for n in range(N):
        image_n = pipe(prompt, num_inference_steps=50, guidance_scale=7.5, 
                       generator=generator).images[0]
        image_n_path = os.path.join(clip_image_dir, "{}_{}.png".format(prompt, n + 1))
        image_n.save(image_n_path)
    
    clip_images_path_list = [os.path.join(clip_image_dir, path) for path in os.listdir(
                             clip_image_dir) if path.endswith(('.png', '.jpg', '.jpeg', '.tiff'))]  
    clip_features = extract_all_images(clip_images_path_list, clip_model, DEVICE, batch_size=N, 
                                   num_workers=8)
    
    # get text features
    text_candidates = [prompt] * N # .replace(trained_token, initialization_word)
    text_features = extract_all_captions(text_candidates, clip_model, DEVICE, batch_size=N, num_workers=8)
    
    # compute Clip-space cosine similarity
    once_score = get_clip_score(clip_model, clip_features, text_features, DEVICE)
    total_score += once_score
    
    # empty the clip_image_dir
    rmtree(clip_image_dir)

# compute and save the final score
final_score = total_score / len(prompt_list)
clip_score_dir = f"{all_embedding_path}/i2t_score"
os.makedirs(clip_score_dir, exist_ok=True)
clip_score_path = f"{clip_score_dir}/{embeds_name}_i2t_score.txt"
with open(clip_score_path, "w") as f:
    f.write("CLIP image2text score: {}".format(final_score))
print("END!!! CLIP image2text score for {} is: {}".format(embeds_name, final_score))

compute clip score for 1/4 prompt

100%|██████████| 50/50 [00:02<00:00, 17.42it/s]
100%|██████████| 50/50 [00:02<00:00, 17.32it/s]
100%|██████████| 50/50 [00:02<00:00, 17.71it/s]
100%|██████████| 50/50 [00:02<00:00, 18.05it/s]
100%|██████████| 50/50 [00:02<00:00, 18.04it/s]
100%|██████████| 50/50 [00:02<00:00, 17.97it/s]
100%|██████████| 50/50 [00:02<00:00, 17.98it/s]
100%|██████████| 50/50 [00:02<00:00, 17.92it/s]
100%|██████████| 50/50 [00:02<00:00, 17.79it/s]
100%|██████████| 50/50 [00:02<00:00, 17.83it/s]
100%|██████████| 50/50 [00:02<00:00, 17.93it/s]
100%|██████████| 50/50 [00:02<00:00, 17.99it/s]
100%|██████████| 50/50 [00:02<00:00, 18.16it/s]
100%|██████████| 50/50 [00:02<00:00, 18.07it/s]
100%|██████████| 50/50 [00:02<00:00, 17.75it/s]
100%|██████████| 50/50 [00:02<00:00, 17.86it/s]
  Resize(n_px, interpolation=Image.BICUBIC),
  0%|          | 0/1 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

100%|██████████| 1/1 [00:01<00:00,  1.41s/it]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


100%|██████████| 1/1 [00:01<00:00,  1.63s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

100%|██████████| 1/1 [00:01<00:00,  1.25s/it]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


100%|██████████| 1/1 [00:01<00:00,  1.47s/it]


compute clip score for 2/4 prompt

100%|██████████| 50/50 [00:02<00:00, 18.01it/s]
100%|██████████| 50/50 [00:02<00:00, 18.00it/s]
100%|██████████| 50/50 [00:02<00:00, 18.24it/s]
100%|██████████| 50/50 [00:02<00:00, 18.11it/s]
100%|██████████| 50/50 [00:02<00:00, 18.06it/s]
100%|██████████| 50/50 [00:02<00:00, 18.13it/s]
100%|██████████| 50/50 [00:02<00:00, 17.95it/s]
100%|██████████| 50/50 [00:02<00:00, 17.97it/s]
100%|██████████| 50/50 [00:02<00:00, 18.17it/s]
100%|██████████| 50/50 [00:02<00:00, 18.17it/s]
100%|██████████| 50/50 [00:02<00:00, 18.06it/s]
100%|██████████| 50/50 [00:02<00:00, 17.94it/s]
100%|██████████| 50/50 [00:02<00:00, 17.91it/s]
100%|██████████| 50/50 [00:02<00:00, 18.20it/s]
100%|██████████| 50/50 [00:02<00:00, 18.10it/s]
100%|██████████| 50/50 [00:02<00:00, 18.12it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

100%|██████████| 1/1 [00:01<00:00,  1.39s/it]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


100%|██████████| 1/1 [00:01<00:00,  1.61s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

100%|██████████| 1/1 [00:01<00:00,  1.35s/it]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


100%|██████████| 1/1 [00:01<00:00,  1.57s/it]


compute clip score for 3/4 prompt

100%|██████████| 50/50 [00:02<00:00, 17.66it/s]
100%|██████████| 50/50 [00:02<00:00, 18.11it/s]
100%|██████████| 50/50 [00:02<00:00, 18.20it/s]
100%|██████████| 50/50 [00:02<00:00, 18.12it/s]
100%|██████████| 50/50 [00:02<00:00, 18.13it/s]
100%|██████████| 50/50 [00:02<00:00, 17.90it/s]
100%|██████████| 50/50 [00:03<00:00, 16.10it/s]
100%|██████████| 50/50 [00:02<00:00, 17.65it/s]
100%|██████████| 50/50 [00:02<00:00, 18.24it/s]
100%|██████████| 50/50 [00:02<00:00, 18.05it/s]
100%|██████████| 50/50 [00:02<00:00, 17.75it/s]
100%|██████████| 50/50 [00:02<00:00, 18.11it/s]
100%|██████████| 50/50 [00:02<00:00, 18.18it/s]
100%|██████████| 50/50 [00:02<00:00, 18.21it/s]
100%|██████████| 50/50 [00:02<00:00, 18.04it/s]
100%|██████████| 50/50 [00:02<00:00, 18.14it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

100%|██████████| 1/1 [00:01<00:00,  1.43s/it]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


100%|██████████| 1/1 [00:01<00:00,  1.65s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

100%|██████████| 1/1 [00:01<00:00,  1.20s/it]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]


compute clip score for 4/4 prompt

100%|██████████| 50/50 [00:02<00:00, 17.72it/s]
100%|██████████| 50/50 [00:02<00:00, 18.20it/s]
100%|██████████| 50/50 [00:02<00:00, 18.15it/s]
100%|██████████| 50/50 [00:02<00:00, 18.10it/s]
100%|██████████| 50/50 [00:02<00:00, 17.98it/s]
100%|██████████| 50/50 [00:02<00:00, 17.92it/s]
100%|██████████| 50/50 [00:02<00:00, 18.02it/s]
100%|██████████| 50/50 [00:02<00:00, 18.00it/s]
100%|██████████| 50/50 [00:02<00:00, 18.06it/s]
100%|██████████| 50/50 [00:02<00:00, 17.76it/s]
100%|██████████| 50/50 [00:02<00:00, 17.86it/s]
100%|██████████| 50/50 [00:02<00:00, 17.96it/s]
100%|██████████| 50/50 [00:02<00:00, 17.94it/s]
100%|██████████| 50/50 [00:02<00:00, 17.97it/s]
100%|██████████| 50/50 [00:02<00:00, 17.76it/s]
100%|██████████| 50/50 [00:02<00:00, 17.62it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

100%|██████████| 1/1 [00:01<00:00,  1.45s/it]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


100%|██████████| 1/1 [00:01<00:00,  1.68s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

100%|██████████| 1/1 [00:01<00:00,  1.24s/it]

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


100%|██████████| 1/1 [00:01<00:00,  1.64s/it]

END!!! CLIP image2text score for learned_embeds_factor=0.18 is: 0.6361083984375





In [75]:
from torchmetrics.multimodal import CLIPScore

image_k = pipe("a photo of {}".format(trained_token), num_inference_steps=50, guidance_scale=7.5, 
                       generator=generator).images[0]
rgb2tensor = Compose([ToTensor()])
tensor_k = rgb2tensor(image_k)
prompt_k = "a photo of {}".format(trained_token).replace(trained_token, initialization_word)
metric = CLIPScore(model_name_or_path="openai/clip-vit-base-patch16")
score_k = metric(tensor_k, prompt_k).item() / 100 * 2.5

print(score_k)

100%|██████████| 50/50 [00:02<00:00, 17.47it/s]


0.5651378154754639
