In [1]:
!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader

NVIDIA A100-PCIE-40GB, 40960 MiB, 40396 MiB


In [2]:
import torch
import os
from diffusers import StableDiffusionPipeline
from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
from huggingface_hub import hf_hub_download, snapshot_download
from shutil import rmtree

SEED = None
DEVICE = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

pretrained_model_name_or_path = "runwayml/stable-diffusion-v1-5"
dataset_name = "midjourney-style"
downloaded_embedding_folder = f"/root/autodl-tmp/textual_inversion/downloaded_embeddings/{dataset_name}/original" 
downloaded_state_path = f"{downloaded_embedding_folder}/downloaded_state.txt"
if not os.path.exists(downloaded_embedding_folder):
    os.makedirs(downloaded_embedding_folder, exist_ok=True)
    with open(downloaded_state_path, "w") as f:
        f.write("False")
    repo_id_embeds = f"sd-concepts-library/{dataset_name}"
    _ = snapshot_download(repo_id=repo_id_embeds, local_dir=downloaded_embedding_folder)
    with open(downloaded_state_path, "w") as f:
        f.write("True")
else:
    with open(downloaded_state_path, "r") as f:
        downloaded_state = f.read()
    if downloaded_state == "True":
        print("File already downloaded in the correct path")
    elif downloaded_state == "False":
        rmtree(downloaded_embedding_folder)
        os.makedirs(downloaded_embedding_folder, exist_ok=True)
        with open(downloaded_state_path, "w") as f:
            f.write("False")
        repo_id_embeds = f"sd-concepts-library/{dataset_name}"
        _ = snapshot_download(repo_id=repo_id_embeds, local_dir=downloaded_embedding_folder)
        with open(downloaded_state_path, "w") as f:
            f.write("True")
    else:
        raise ValueError("downloaded_state in downloaded_state.txt should be only True or False")

'''
embeds_path = hf_hub_download(repo_id=repo_id_embeds, filename="learned_embeds.bin", 
                              local_dir=downloaded_embedding_folder)
'''
learned_embeds_path = f"{downloaded_embedding_folder}/learned_embeds.bin"

  from .autonotebook import tqdm as notebook_tqdm
Fetching 9 files:   0%|          | 0/9 [00:00<?, ?it/s]
Downloading (…)bcce2/.gitattributes: 100%|██████████| 1.38k/1.38k [00:00<00:00, 159kB/s]
Fetching 9 files:  11%|█         | 1/9 [00:01<00:15,  1.93s/it]
Downloading (…)oncept_images/2.jpeg:   0%|          | 0.00/168k [00:00<?, ?B/s][A
Downloading (…)oncept_images/2.jpeg: 100%|██████████| 168k/168k [00:00<00:00, 241kB/s][A

Downloading (…)oncept_images/1.jpeg:   0%|          | 0.00/171k [00:00<?, ?B/s][A
Downloading (…)oncept_images/1.jpeg: 100%|██████████| 171k/171k [00:00<00:00, 244kB/s][A

Downloading (…)/type_of_concept.txt: 100%|██████████| 5.00/5.00 [00:00<00:00, 681B/s]

Downloading learned_embeds.bin: 100%|██████████| 3.82k/3.82k [00:00<00:00, 504kB/s]

Downloading (…)oncept_images/0.jpeg:   0%|          | 0.00/96.7k [00:00<?, ?B/s][A

Downloading (…)d0374bcce2/README.md: 100%|██████████| 1.11k/1.11k [00:00<00:00, 114kB/s]


Downloading (…)token_identifier.txt: 100%|███

In [3]:
tokenizer = CLIPTokenizer.from_pretrained(
    pretrained_model_name_or_path,
    subfolder="tokenizer",
)
text_encoder = CLIPTextModel.from_pretrained(
    pretrained_model_name_or_path, subfolder="text_encoder", torch_dtype=torch.float16
)

In [4]:
loaded_learned_embeds = torch.load(learned_embeds_path, map_location="cpu")
  
# separate token and the embeds
trained_token = list(loaded_learned_embeds.keys())[0]
embeds = loaded_learned_embeds[trained_token]
print("placeholder token for dataset {}:".format(dataset_name), trained_token)

# cast to dtype of text_encoder
dtype = text_encoder.get_input_embeddings().weight.dtype
embeds.to(dtype)

# add the token in tokenizer
num_added_tokens = tokenizer.add_tokens(trained_token)
if num_added_tokens == 0:
    raise ValueError(f"The tokenizer already contains the token {trained_token}. "
                     "Please pass a different `token` that is not already in the tokenizer.")

# resize the token embeddings
text_encoder.resize_token_embeddings(len(tokenizer))

# get the id for the token and assign the embeds
token_id = tokenizer.convert_tokens_to_ids(trained_token)
text_encoder.get_input_embeddings().weight.data[token_id] = embeds

pipe = StableDiffusionPipeline.from_pretrained(pretrained_model_name_or_path,
                                               torch_dtype=torch.float16, 
                                               text_encoder=text_encoder, 
                                               tokenizer=tokenizer).to(DEVICE)

placeholder token for dataset midjourney-style: <midjourney-style>




In [6]:
prompt = "a library in the style of {}".format(trained_token)
generator = None if SEED is None else torch.Generator(
            device=DEVICE).manual_seed(SEED)

image = pipe(prompt, num_inference_steps=50, guidance_scale=7.5, generator=generator).images[0]

image_dir = os.path.join(downloaded_embedding_folder, "images")
os.makedirs(image_dir, exist_ok=True)
image_path = os.path.join(image_dir, "{}.png".format(prompt))
image.save(image_path)

100%|██████████| 50/50 [00:04<00:00, 12.06it/s]
