In [None]:
!pip install transformers diffusers accelerate peft

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from huggingface_hub import hf_hub_download

realistic_vision_path = hf_hub_download(repo_id="SG161222/Realistic_Vision_V5.1_noVAE", filename="Realistic_Vision_V5.1-inpainting.safetensors")
vae_path = hf_hub_download(repo_id="stabilityai/sd-vae-ft-mse-original", filename="vae-ft-mse-840000-ema-pruned.safetensors")

In [4]:
import sys
sys.path.append('../src')

In [5]:
from config import DatasetConfig, Config, ModelConfig, WandbConfig, EvaluationConfig, TrainConfig

dataset_config = DatasetConfig(
    roboflow_api_key='HNXIsW3WwnidNDQZHexX',
    roboflow_workspace='arked',
    project_name='facades-flzke',
    dataset_version=11,
    data_root='facades_data',
    image_size=512,
)

model_config = ModelConfig(
    model_path=realistic_vision_path,
    vae_path=vae_path,
)

wandb_config = WandbConfig(
    project_name='facades',
)

eval_config=EvaluationConfig(
    prompts=['white facade', 'brick facade'],
)

train_config=TrainConfig(
    checkpoint_folder = wandb_config.project_name + "_checkpoints",
    train_batch_size = 4,
    unet_lr=1e-4,
    text_encoder_lr=1e-4,
    scheduler_num_cycles=4,
    total_steps=1000,
)

config = Config(
    dataset=dataset_config,
    model=model_config,
    wandb=wandb_config,
    eval=eval_config,
    train=train_config,
)

In [6]:
from model import get_models

text_encoder, vae, unet, tokenizer, noise_scheduler = get_models(
    config.model.model_path,
    config.model.vae_path,
    device=config.device,
    load_from_safetensor=True,
)

loading VAE...
loading model...


`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.


In [7]:
from peft import LoraConfig, LoraModel

unet_peft = LoraConfig(
    r=config.lora.rank,
    lora_alpha=config.lora.alpha,
    target_modules=config.lora.unet_target_modules,
    lora_dropout=0.1,
    bias='none',
)

text_encoder_peft = LoraConfig(
    r=config.lora.rank,
    lora_alpha=config.lora.alpha,
    target_modules=config.lora.text_encoder_target_modules,
    lora_dropout=0.1,
    bias='none',
)


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link
CUDA SETUP: Loading binary c:\Users\Facundo\AppData\Local\Programs\Python\Python310\lib\site-packages\bitsandbytes\libbitsandbytes_cuda116.dll...


In [8]:
"""from peft import get_peft_model 
unet = get_peft_model(unet, unet_peft)
text_encoder = get_peft_model(text_encoder, text_encoder_peft)"""

'from peft import get_peft_model \nunet = get_peft_model(unet, unet_peft)\ntext_encoder = get_peft_model(text_encoder, text_encoder_peft)'

In [9]:
text_encoder = LoraModel(text_encoder, text_encoder_peft, config.lora.text_encoder_adapter_name)
unet = LoraModel(unet, unet_peft, config.lora.unet_adapter_name)

In [10]:
from diffusers import (
    StableDiffusionInpaintPipeline,
    StableDiffusionControlNetInpaintPipeline,
    ControlNetModel,
)

In [11]:
pipe = StableDiffusionInpaintPipeline(
    vae=vae,
    text_encoder=text_encoder,
    unet=unet,
    tokenizer=tokenizer,
    scheduler=noise_scheduler,
    safety_checker=None,
    feature_extractor=None,
)

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_inpaint.StableDiffusionInpaintPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


In [14]:
unet.peft_config

{'lora_unet': LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type=None, inference_mode=False, r=8, target_modules={'to_q', 'to_v', 'to_out.0', 'to_k', 'ff.net.0.proj'}, lora_alpha=32.0, lora_dropout=0.1, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={})}

In [15]:
from peft import get_peft_model_state_dict

unet_dict = get_peft_model_state_dict(unet, adapter_name='lora_unet')
text_encoder_dict = get_peft_model_state_dict(text_encoder, adapter_name=config.lora.text_encoder_adapter_name)

In [16]:
unet_dict

{'model.down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q.lora_A.weight': tensor([[ 0.0257,  0.0014, -0.0247,  ...,  0.0340, -0.0498,  0.0105],
         [ 0.0188, -0.0514, -0.0259,  ...,  0.0064,  0.0222, -0.0384],
         [-0.0454, -0.0540, -0.0294,  ...,  0.0307, -0.0272, -0.0105],
         ...,
         [-0.0284,  0.0247,  0.0417,  ..., -0.0535, -0.0033,  0.0310],
         [-0.0226, -0.0498, -0.0053,  ..., -0.0096,  0.0083,  0.0530],
         [-0.0302, -0.0213, -0.0420,  ...,  0.0283, -0.0543, -0.0325]],
        device='cuda:0'),
 'model.down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q.lora_B.weight': tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0'),
 'model.down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k.lora_A.weight': tensor(

In [17]:
pipe.save_lora_weights("peft_pipe", unet_dict, text_encoder_dict)

In [10]:
unet.save_pretrained("output_dir_TEST_PEFT")

In [11]:
# if pushing to Hub
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
unet

In [12]:
unet.push_to_hub("my_awesome_peft_model")

Upload 1 LFS files:   0%|          | 0/1 [00:00<?, ?it/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.45G [00:00<?, ?B/s]

In [9]:
from utils import print_trainable_parameters

print_trainable_parameters(text_encoder, 'Text Encoder')
print_trainable_parameters(unet, 'UNet')

Model: Text Encoder
trainable params: 1327104 || all params: 124387584 || trainable%: 1.0669103437204794
Model: UNet
trainable params: 2492928 || all params: 862028292 || trainable%: 0.28919329250970804


In [10]:
import itertools

params_to_optimize = [
    {
        "params": itertools.chain(unet.parameters()),
        "lr": config.train.unet_lr,
    },
]

params_to_optimize += [
    {
        "params": itertools.chain(text_encoder.parameters()),
        "lr": config.train.text_encoder_lr,
    }
]

In [11]:
from torch.optim import AdamW

optimizer_lora = AdamW(
    params_to_optimize,
    lr=config.train.learning_rate,
    weight_decay=config.train.weight_decay,
)

In [12]:
text_encoder.peft_config

{'lora_te': LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type=None, inference_mode=False, r=8, target_modules={'mlp.fc1', 'mlp.fc2', 'k_proj', 'v_proj', 'out_proj', 'q_proj'}, lora_alpha=32.0, lora_dropout=0.1, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={})}

In [21]:
from utils import save_loras

save_loras(
    unet=unet, 
    text_encoder=text_encoder, 
    save_path='te.safetensors',
    config=config
)

In [22]:
from safetensors import safe_open

file = safe_open('te.safetensors', framework="pt", device=0)
file.keys()

['lora_te_text_model_encoder_layers_0_mlp_fc1.alpha',
 'lora_te_text_model_encoder_layers_0_mlp_fc1.lora_down.weight',
 'lora_te_text_model_encoder_layers_0_mlp_fc1.lora_up.weight',
 'lora_te_text_model_encoder_layers_0_mlp_fc2.alpha',
 'lora_te_text_model_encoder_layers_0_mlp_fc2.lora_down.weight',
 'lora_te_text_model_encoder_layers_0_mlp_fc2.lora_up.weight',
 'lora_te_text_model_encoder_layers_0_self_attn_k_proj.alpha',
 'lora_te_text_model_encoder_layers_0_self_attn_k_proj.lora_down.weight',
 'lora_te_text_model_encoder_layers_0_self_attn_k_proj.lora_up.weight',
 'lora_te_text_model_encoder_layers_0_self_attn_out_proj.alpha',
 'lora_te_text_model_encoder_layers_0_self_attn_out_proj.lora_down.weight',
 'lora_te_text_model_encoder_layers_0_self_attn_out_proj.lora_up.weight',
 'lora_te_text_model_encoder_layers_0_self_attn_q_proj.alpha',
 'lora_te_text_model_encoder_layers_0_self_attn_q_proj.lora_down.weight',
 'lora_te_text_model_encoder_layers_0_self_attn_q_proj.lora_up.weight',
 'l

In [19]:
from safetensors import safe_open

file = safe_open('test_lora.safetensors', framework="pt", device=0)

In [20]:
file.keys()

['lora_te_text_model_encoder_layers_0_self_attn_k_proj.alpha',
 'lora_te_text_model_encoder_layers_0_self_attn_k_proj.lora_down.weight',
 'lora_te_text_model_encoder_layers_0_self_attn_k_proj.lora_up.weight',
 'lora_te_text_model_encoder_layers_0_self_attn_out_proj.alpha',
 'lora_te_text_model_encoder_layers_0_self_attn_out_proj.lora_down.weight',
 'lora_te_text_model_encoder_layers_0_self_attn_out_proj.lora_up.weight',
 'lora_te_text_model_encoder_layers_0_self_attn_q_proj.alpha',
 'lora_te_text_model_encoder_layers_0_self_attn_q_proj.lora_down.weight',
 'lora_te_text_model_encoder_layers_0_self_attn_q_proj.lora_up.weight',
 'lora_te_text_model_encoder_layers_0_self_attn_v_proj.alpha',
 'lora_te_text_model_encoder_layers_0_self_attn_v_proj.lora_down.weight',
 'lora_te_text_model_encoder_layers_0_self_attn_v_proj.lora_up.weight',
 'lora_te_text_model_encoder_layers_10_self_attn_k_proj.alpha',
 'lora_te_text_model_encoder_layers_10_self_attn_k_proj.lora_down.weight',
 'lora_te_text_mode

In [None]:
text_encoder_lora.peft_config['text_encoder_lora'].lora_alpha

In [None]:
file.get_tensor('lora_te_text_model_encoder_layers_0_self_attn_k_proj.alpha')