In [1]:
from diffusers import LCMScheduler
from diffusers import DiffusionPipeline
import torch



In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

# 将 model_path 设置为 Stable-Diffusion-v1.5 的模型路径或名字
model_or_name = "/root/autodl-tmp/LCM"

scheduler = LCMScheduler.from_pretrained(model_or_name, subfolder="scheduler")
model = DiffusionPipeline.from_pretrained(
    model_or_name, scheduler=scheduler
)

# 设置随机种子
seed = 0
generator = torch.Generator(device=device)
generator.manual_seed(seed)

model.generator = generator

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

OSError: /root/autodl-tmp/LCM does not appear to have a file named preprocessor_config.json. Checkout 'https://huggingface.co//root/autodl-tmp/LCM/None' for available files.

In [None]:
# 设置一些路径和参数
from inversion_free import gen_inversion_free
from utils import img2latent, latent2img
from datetime import datetime
import os

# 加载图片
# 第一级目录
img_dir_src_1 = "./DVCT/examples"
img_dir_tar_1 = img_dir_src_1
# 第二级目录
img_dir_src_2 = "dog"
src_img_dir = os.path.join(img_dir_src_1, img_dir_src_2)
tar_img_dir_2 = "cat_hat"
tar_img_dir = os.path.join(img_dir_tar_1, tar_img_dir_2)

def get_image_file(path):
    img_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.gif')
    img_files = [f for f in os.listdir(path) if f.lower().endswith(img_extensions)]
    return img_files[0]

src_img_file = get_image_file(src_img_dir)
tar_img_file = get_image_file(tar_img_dir)

src_latent = img2latent(os.path.join(src_img_dir, src_img_file), model)
tar_latent = img2latent(os.path.join(tar_img_dir, tar_img_file), model)

# 加载嵌入向量
# 第一级目录
embed_dir_src_1 = "./DVCT/output/"
embed_dir_tar_1 = embed_dir_src_1

# 第二级目录
embed_dir_src_2 = "dog"
embed_dir_src_2 = os.path.join(embed_dir_src_1, embed_dir_src_2)
tar_embed_dir_2 = "cat_hat"
tar_embed_dir_2 = os.path.join(embed_dir_tar_1, tar_embed_dir_2)

# 嵌入向量名字
src_embed_dir = "dog_05_08_2024_1919"
src_embed_dir = os.path.join(embed_dir_src_2, src_embed_dir)
tar_embed_dir = "cat_hat_05_08_2024_2045"
tar_embed_dir = os.path.join(tar_embed_dir_2, tar_embed_dir)

# 嵌入向量训练步数
src_steps = 1000
tar_steps = src_steps

src_embedding = torch.load(os.path.join(src_embed_dir, f"{src_steps}.bin"))
tar_embedding = torch.load(os.path.join(tar_embed_dir, f"{tar_steps}.bin"))

In [None]:
from utils import load_multitoken_tokenizer
from multi_token_clip import MultiTokenCLIPTokenizer
from transformers.models.clip.modeling_clip import CLIPTextModel

src_placeholders = "<src>"
tar_placeholders = "<tar>"

text_encoder = CLIPTextModel.from_pretrained(model_or_name, subfolder="text_encoder", revision=False)
tokenizer = MultiTokenCLIPTokenizer.from_pretrained(model_or_name, subfolder="tokenizer")

placeholder_dict = {
    "<src>": src_embedding[0][:3],
    "<tar>": tar_embedding[0][:3],
}

load_multitoken_tokenizer(tokenizer, text_encoder, placeholder_dict, tar_placeholders)
load_multitoken_tokenizer(tokenizer, text_encoder, placeholder_dict, src_placeholders)

model.text_encoder = text_encoder
model.tokenizer = tokenizer

In [None]:
from attention_control import make_controller

# 设置输出目录并生成
date = datetime.now().strftime("%Y-%m-%d")
output_dir = f"./output_img/{str(date)}-{img_dir_src_2}-to-{tar_img_dir_2}-LCM/"
os.makedirs(output_dir, exist_ok=True)

time = datetime.now().strftime("%H-%M-%S")
num_inference_steps = 20
save_all = False

for attn in [False]:
    for cfg in [0.7, 1.7, 2.7]:
        # 是否使用注意力控制器
        use_attention = attn
        
        # 选择权重的类型
        inclination = "none-tar"
        mode = "cosine"
        cfg_guidance_scale = cfg
        
        # src 和 tar 方向的系数
        src_coefficient = 0
        tar_coefficient = 0
        
        output_img_name = f"{time}_{src_img_file[:-4]}2{tar_img_file[:-4]}_{num_inference_steps}steps_{inclination}_{mode[:3]}_cfg{cfg_guidance_scale}_src{src_coefficient}_tar{tar_coefficient}_attn{use_attention}"
        
        if use_attention:
            # 设置注意力控制器
            placeholder = [src_placeholders, tar_placeholders]
            cross_injection_ratio = 0.2
            self_injection_ratio = 0.7
            eq_param = {
                'words' : (placeholder[-1],),
                'values' : (0.5,),
            }
            controller = make_controller(
                prompts=placeholder,
                tokenizer=tokenizer,
                is_replace_controller=False,
                cross_replace_steps={'default_': cross_injection_ratio},
                self_replace_steps=self_injection_ratio,
                equilizer_params=eq_param,
            )
            model_2 = None
        else:
            controller = None
            model_2 = None
            
        # 生成图片
        latents = gen_inversion_free(
            model.to(device), src_latent, tar_latent, src_embedding, tar_embedding,
            num_inference_steps=num_inference_steps, mode=mode, inclination=inclination,
            cfg_guidance=cfg_guidance_scale, src_coef=src_coefficient, tar_coef=tar_coefficient,
            controller=controller, model_2=model_2, return_all=save_all,
        )
        
        
        if save_all:
            for i, latent in enumerate(latents):
                img = latent2img(latent[0].detach(), model)
                print(f"Saving {output_img_name}_{i}.png in {output_dir}")
                img.save(os.path.join(output_dir, f"{output_img_name}_{i}.png"))
        else:
            img = latent2img(latents[-1].detach(), model)
            print(f"Saving {output_img_name}.png in {output_dir}")
            img.save(os.path.join(output_dir, f"{output_img_name}.png"))