In [None]:
!pip install -qU diffusers peft transformers huggingface_hub

In [None]:
from huggingface_hub import notebook_login
notebook_login()

# Merge LoRAs

Load a Stable Diffusion XL (SDXL) checkpoint and the `KappaNeuro/studio-ghibli-style` and `Norod78/sdxl-chalkboarddrawing-lora` LoRAs..

In [None]:
from diffusers import DiffusionPipeline
import torch

pipeline = DiffusionPipeline.from_pretrained(
    'stabilityai/stable-diffusion-xl-base-1.0',
    torch_dtype=torch.float16
).to('cuda')

pipeline.load_lora_weights(
    'ostris/ikea-instructions-lora-sdxl',
    weight_name='ikea_instructions_xl_v1_5.safetensors',
    adapter_name='ikea'
)

pipeline.load_lora_weights(
    'lordjia/by-feng-zikai',
    weight_name='fengzikai_v1.0_XL.safetensors',
    adapter_name='feng'
)

## `set_adapters`

`set_adapters()` merges LoRA adapters by concatenating their weighted matrices.

In [None]:
pipeline.set_adapters(
    ['ikea', 'feng'],
    adapter_weights=[0.7, 0.8]
)

generator = torch.manual_seed(111)
prompt = 'A bowl of ramen shaped like a cute kawaii bear, by Feng Zikai'

image = pipeline(
    prompt,
    generator=generator,
    cross_attention_kwargs={'scale': 1.0}
).images[0]
image

In [None]:
image = pipeline(
    prompt,
    generator=generator,
    cross_attention_kwargs={'scale': 0.6}
).images[0]
image

In [None]:
pipeline.set_adapters(
    ['ikea', 'feng'],
    adapter_weights=[0.8, 0.6]
)

generator = torch.manual_seed(111)
prompt = 'A bowl of ramen shaped like a cute kawaii bear, by Feng Zikai'

image = pipeline(
    prompt,
    generator=generator,
    cross_attention_kwargs={'scale': 1.0}
).images[0]
image

## `add_weighted_adapter`

`add_weighted_adapter()` provides access to more efficient merging method such as TIES and DARE.

Make sure we have the latest stable version of Diffusers and PEFT installed.

Three steps to merge LoRAs with the `add_weighted_adapter` method:
1. Create a `PeftModel` from the underlying model and LoRA checkpoint.
2. Load a base UNet model and the LoRA adapters.
3. Merge the adapters using the `add_weighted_adapter` and the merging method of our choice.

1. Load a UNet that corresponds to the UNet in the LoRA checkpoint. In our case, both LoRAs use the SDXL UNet as their base model.

In [None]:
from diffusers import UNet2DConditionModel
import torch

unet = UNet2DConditionModel.from_pretrained(
    'stabilityai/stable-diffusion-xl-base-1.0',
    torch_dtype=torch.float16,
    use_safetensors=True,
    variant='fp16',
    subfolder='unet'
).to('cuda')

Load the SDXL pipeline and the LoRA checkpoints

In [None]:
from diffusers import DiffusionPipeline

pipeline = DiffusionPipeline.from_pretrained(
    'stabilityai/stable-diffusion-xl-base-1.0',
    torch_dtype=torch.float16,
    variant='fp16',
    unet=unet,
).to('cuda')

pipeline.load_lora_weights(
    'ostris/ikea-instructions-lora-sdxl',
    weight_name='ikea_instructions_xl_v1_5.safetensors',
    adapter_name='ikea'
)

Now we create a `PeftModel` from the loaded LoRA checkpoint by combining the SDXL UNet and the LoRA Unet from the pipeline.

In [None]:
from peft import get_peft_model, LoraConfig
import copy

sdxl_unet = copy.deepcopy(unet)
ikea_peft_model = get_peft_model(
    sdxl_unet,
    pipeline.unet.peft_config['ikea'],
    adapter_name='ikea'
)

original_state_dict = {
    f"base_model.model.{k}": v
    for k,v in pipeline.unet.state_dict().items()
}
ikea_peft_model.load_state_dict(original_state_dict, strict=True)

Repeat this process to create a `PeftModel` from the `lordjia/by-feng-zikai` LoRA:

In [None]:
pipeline.detele_adapters('ikea')
sdxl_unet.delete_adapters('ikea')

pipeline.load_lora_weights(
    'lorajia/by-feng-zikai',
    weight_name='fengzikai_v1.0_XL.safetensors',
    adapter_name='feng'
)
pipeline.set_adapters(adapter_names='feng')

feng_peft_model = get_peft_model(
    sdxl_unet,
    pipeline.unet.peft_config['feng'],
    adapter_name='feng'
)

original_state_dict = {
    f"base_model.model.{k}": v
    for k,v in pipeline.unet.state_dict().items()
}

feng_peft_model.load_state_dict(original_state_dict, strict=True)

2. Load a base UNet model and then load the adapters onto it:

In [None]:
from peft import PeftModel

base_unet = UNet2DConditionModel.from_pretrained(
    'stabilityai/stable-diffusion-xl-base-1.0',
    torch_dtype=torch.float16,
    use_safetensors=True,
    variant='fp16',
    subfolder='unet'
).to('cuda')

model = PeftModel.from_pretrained(
    base_unet,
    'stevhliu/ikea_peft_model',
    use_safetenosrs=True,
    subfolder='ikea',
    adapter_name='ikea'
)
model.load_adapter(
    'stevhliu/feng_peft_model',
    use_safetensors=True,
    subfolder='feng',
    adapter_name='feng'
)

3. Merge the adapters using the `add_weighted_adapter` method and the merging method of our choice. In this example, we use the `"dare_linear"` method to merge the LoRAs.

Note that **LoRAs need to have the same rank to be merged!**

In [None]:
model.add_weighted_adapter(
    adapters=['ikea', 'feng'],
    weights=[1.0, 1.0],
    combination_type='dare_linear',
    adapter_name='ikea-feng'
)
model.set_adapters('ikea-feng')

Now we can generate an image with the merged LoRA

In [None]:
model = model.to(dtype=torch.float16, device='cuda')

pipeline = DiffusionPipeline.from_pretrained(
    'stabilityai/stable-diffusion-xl-base-1.0',
    unet=model,
    variant='fp16',
    torch_dtype=torch.float16,
).to('cuda')

In [None]:
image = pipeline(
    'A bowl of ramen shaped like a cute kawaii bear, by Feng Zikai',
    generator=torch.manual_seed(111),
).images[0]
image

## `fuse_lora`

Both `set_adapters()` and `add_weighted_adapter()` methods require loading the base model and the LoRA adapters separately which incurs some overhead.

The `fuse_lora` methods allows us to fuse the LoRA weights directly with the original weights of the underlying model so we only load the model once which can increase inference and lower memory-usage.

For example, if we have a base model and adapters loaded and set as active with the following adapter weights:

In [None]:
from diffusers import DiffusionPipeline
import torch

pipeline = DiffusionPipeline.from_pretrained(
    'stabilityai/stable-diffusion-xl-base-1.0',
    torch_dtype=torch.float16
).to('cuda')
pipeline.load_lora_weights(
    'ostris/ikea-instructions-lora-sdxl',
    weight_name='ikea_instructions_xl_v1_5.safetensors',
    adapter_name='ikea'
)
pipeline.load_lora_weights(
    "lordjia/by-feng-zikai",
    weight_name="fengzikai_v1.0_XL.safetensors",
    adapter_name="feng"
)

pipeline.set_adapters(
    ['ikea', 'feng'],
    adapter_weights=[0.7, 0.8]
)

In [None]:
pipeline.fuse_lora(
    adapter_names=['ikea', 'feng'],
    lora_scale=1.0
)

`lora_scale` controls how much to scale the output by with the LoRA weights.

We can now use `unload_lora_weights()` to unload the LoRA weights since they have already been fused with the underlying base model.

Additionally, we can call `save_pretrained()` to save the fused pipeline locally.

In [None]:
pipline.unload_lora_weights()

# save locally
pipeline.save_pretrained('fused_pipeline')
# save to the hub
pipeline.push_to_hub('fused-ikea-feng')

Now we can quickly load the fused pipeline and use it for inference without needing to separately load the LoRA adapters.

In [None]:
pipeline = DiffusionPipeline.from_pretrained(
    'fused_pipeline',
    torch_dtype=torch.float16
).to('cuda')

In [None]:
image = pipeline(
    "A bowl of ramen shaped like a cute kawaii bear, by Feng Zikai",
    generator=torch.manual_seed(0)
).images[0]
image

We can also call `unfuse_lora` to restore the original model's weights. However, this only works if we have only fused one LoRA adapter to the original model. If we have fused multiple LoRAs, we need to reload the model.
```python
pipeline.unfuse_lora()
```

### `torch.compile`

`torch.compile` can speed up our pipeline even more, but the LoRA weights must be fused first and then unloaded. Typically, the UNet is compiled because it is such a computationally intensive component of the pipeline.

In [None]:
from diffusers import DiffusionPipeline
import torch

pipeline = DiffusionPipeline.from_pretrained(
    'stabilityai/stable-diffusion-xl-base-1.0',
    torch_dtype=torch.float16
).to('cuda')
pipeline.load_lora_weights(
    'ostris/ikea-instructions-lora-sdxl',
    weight_name='ikea_instructions_xl_v1_5.safetensors',
    adapter_name='ikea'
)
pipeline.load_lora_weights(
    "lordjia/by-feng-zikai",
    weight_name="fengzikai_v1.0_XL.safetensors",
    adapter_name="feng"
)

pipeline.set_adapters(
    ['ikea', 'feng'],
    adapter_weights=[0.7, 0.8]
)

In [None]:
# fuse LoRAs and unload weights
pipeline.fuse_lora(
    adapter_names=['ikea', 'feng'],
    lora_scale=1.0
)
pipeline.unload_lora_weights()

# torch.compile
pipeline.unet.to(memory_format=torch.channels_last)
pipeline.unet = torch.compile(
    pipeline.unet,
    mode='reduce-overhead',
    fullgraph=True,
)

In [None]:
image = pipeline(
    "A bowl of ramen shaped like a cute kawaii bear, by Feng Zikai",
    generator=torch.manual_seed(0)
).images[0]
image